Leptonica 1.68
C Image Processing Library
|
00001 /*====================================================================* 00002 - Copyright (C) 2001 Leptonica. All rights reserved. 00003 - This software is distributed in the hope that it will be 00004 - useful, but with NO WARRANTY OF ANY KIND. 00005 - No author or distributor accepts responsibility to anyone for the 00006 - consequences of using this software, or for whether it serves any 00007 - particular purpose or works at all, unless he or she says so in 00008 - writing. Everyone is granted permission to copy, modify and 00009 - redistribute this source code, for commercial or non-commercial 00010 - purposes, with the following restrictions: (1) the origin of this 00011 - source code must not be misrepresented; (2) modified versions must 00012 - be plainly marked as such; and (3) this notice may not be removed 00013 - or altered from any source or modified source distribution. 00014 *====================================================================*/ 00015 00016 00017 /* 00018 * readfile.c: reads image on file into memory 00019 * 00020 * Top-level functions for reading images from file 00021 * PIXA *pixaReadFiles() 00022 * PIXA *pixaReadFilesSA() 00023 * PIX *pixRead() 00024 * PIX *pixReadWithHint() 00025 * PIX *pixReadIndexed() 00026 * PIX *pixReadStream() 00027 * 00028 * Read header information from file 00029 * l_int32 pixReadHeader() 00030 * 00031 * Format finders 00032 * l_int32 findFileFormat() 00033 * l_int32 findFileFormatStream() 00034 * l_int32 findFileFormatBuffer() 00035 * l_int32 fileFormatIsTiff() 00036 * 00037 * Read from memory 00038 * PIX *pixReadMem() 00039 * l_int32 pixReadHeaderMem() 00040 * 00041 * Test function for I/O with different formats 00042 * l_int32 ioFormatTest() 00043 */ 00044 00045 #include <string.h> 00046 #include "allheaders.h" 00047 00048 /* choose type of PIX to be generated */ 00049 enum { 00050 READ_24_BIT_COLOR = 0, /* read in as 24 (really 32) bit pix */ 00051 CONVERT_TO_PALETTE = 1, /* convert to 8 bit colormapped pix */ 00052 READ_GRAY = 2 /* read gray only */ 00053 }; 00054 00055 /* Output files for ioFormatTest(). 00056 * Note that the test for jpeg is not yet implemented */ 00057 static const char *FILE_BMP = "/tmp/junkout.bmp"; 00058 static const char *FILE_PNG = "/tmp/junkout.png"; 00059 static const char *FILE_PNM = "/tmp/junkout.pnm"; 00060 static const char *FILE_G3 = "/tmp/junkout_g3.tif"; 00061 static const char *FILE_G4 = "/tmp/junkout_g4.tif"; 00062 static const char *FILE_RLE = "/tmp/junkout_rle.tif"; 00063 static const char *FILE_PB = "/tmp/junkout_packbits.tif"; 00064 static const char *FILE_LZW = "/tmp/junkout_lzw.tif"; 00065 static const char *FILE_ZIP = "/tmp/junkout_zip.tif"; 00066 static const char *FILE_TIFF = "/tmp/junkout.tif"; 00067 static const char *FILE_JPG = "/tmp/junkout.jpg"; 00068 00069 /* I found these from the source code to the unix file */ 00070 /* command. man 1 file */ 00071 static const char JP2K_CODESTREAM[4] = { 0xff, 0x4f, 0xff, 0x51 }; 00072 static const char JP2K_IMAGE_DATA[12] = { 0x00, 0x00, 0x00, 0x0C, 00073 0x6A, 0x50, 0x20, 0x20, 00074 0x0D, 0x0A, 0x87, 0x0A }; 00075 00076 /*---------------------------------------------------------------------* 00077 * Top-level functions for reading images from file * 00078 *---------------------------------------------------------------------*/ 00079 /*! 00080 * pixaReadFiles() 00081 * 00082 * Input: dirname 00083 * substr (<optional> substring filter on filenames; can be null) 00084 * Return: pixa, or null on error 00085 * 00086 * Notes: 00087 * (1) @dirname is the full path for the directory. 00088 * (2) @substr is the part of the file name (excluding 00089 * the directory) that is to be matched. All matching 00090 * filenames are read into the Pixa. If substr is NULL, 00091 * all filenames are read into the Pixa. 00092 */ 00093 PIXA * 00094 pixaReadFiles(const char *dirname, 00095 const char *substr) 00096 { 00097 PIXA *pixa; 00098 SARRAY *sa; 00099 00100 PROCNAME("pixaReadFiles"); 00101 00102 if (!dirname) 00103 return (PIXA *)ERROR_PTR("dirname not defined", procName, NULL); 00104 00105 if ((sa = getSortedPathnamesInDirectory(dirname, substr, 0, 0)) == NULL) 00106 return (PIXA *)ERROR_PTR("sa not made", procName, NULL); 00107 00108 pixa = pixaReadFilesSA(sa); 00109 sarrayDestroy(&sa); 00110 return pixa; 00111 } 00112 00113 00114 /*! 00115 * pixaReadFilesSA() 00116 * 00117 * Input: sarray (full pathnames for all files) 00118 * Return: pixa, or null on error 00119 */ 00120 PIXA * 00121 pixaReadFilesSA(SARRAY *sa) 00122 { 00123 char *str; 00124 l_int32 i, n; 00125 PIX *pix; 00126 PIXA *pixa; 00127 00128 PROCNAME("pixaReadFilesSA"); 00129 00130 if (!sa) 00131 return (PIXA *)ERROR_PTR("sa not defined", procName, NULL); 00132 00133 n = sarrayGetCount(sa); 00134 pixa = pixaCreate(n); 00135 for (i = 0; i < n; i++) { 00136 str = sarrayGetString(sa, i, L_NOCOPY); 00137 if ((pix = pixRead(str)) == NULL) { 00138 L_WARNING_STRING("pix not read from file %s", procName, str); 00139 continue; 00140 } 00141 pixaAddPix(pixa, pix, L_INSERT); 00142 } 00143 00144 return pixa; 00145 } 00146 00147 00148 /*! 00149 * pixRead() 00150 * 00151 * Input: filename (with full pathname or in local directory) 00152 * Return: pix if OK; null on error 00153 */ 00154 PIX * 00155 pixRead(const char *filename) 00156 { 00157 FILE *fp; 00158 PIX *pix; 00159 00160 PROCNAME("pixRead"); 00161 00162 if (!filename) 00163 return (PIX *)ERROR_PTR("filename not defined", procName, NULL); 00164 00165 if ((fp = fopenReadStream(filename)) == NULL) 00166 return (PIX *)ERROR_PTR("image file not found", procName, NULL); 00167 if ((pix = pixReadStream(fp, 0)) == NULL) { 00168 fclose(fp); 00169 return (PIX *)ERROR_PTR("pix not read", procName, NULL); 00170 } 00171 00172 /* Close the stream except if GIF under windows, because 00173 * DGifCloseFile() closes the windows file stream! */ 00174 if (pixGetInputFormat(pix) != IFF_GIF) 00175 fclose(fp); 00176 #ifndef _WIN32 00177 else /* gif file */ 00178 fclose(fp); 00179 #endif /* ! _WIN32 */ 00180 00181 return pix; 00182 } 00183 00184 00185 /*! 00186 * pixReadWithHint() 00187 * 00188 * Input: filename (with full pathname or in local directory) 00189 * hint (bitwise OR of L_HINT_* values for jpeg; use 0 for no hint) 00190 * Return: pix if OK; null on error 00191 * 00192 * Notes: 00193 * (1) The hint is not binding, but may be used to optimize jpeg decoding. 00194 * Use 0 for no hinting. 00195 */ 00196 PIX * 00197 pixReadWithHint(const char *filename, 00198 l_int32 hint) 00199 { 00200 FILE *fp; 00201 PIX *pix; 00202 00203 PROCNAME("pixReadWithHint"); 00204 00205 if (!filename) 00206 return (PIX *)ERROR_PTR("filename not defined", procName, NULL); 00207 00208 if ((fp = fopenReadStream(filename)) == NULL) 00209 return (PIX *)ERROR_PTR("image file not found", procName, NULL); 00210 pix = pixReadStream(fp, hint); 00211 fclose(fp); 00212 00213 if (!pix) 00214 return (PIX *)ERROR_PTR("image not returned", procName, NULL); 00215 return pix; 00216 } 00217 00218 00219 /*! 00220 * pixReadIndexed() 00221 * 00222 * Input: sarray (of full pathnames) 00223 * index (into pathname array) 00224 * Return: pix if OK; null if not found 00225 * 00226 * Notes: 00227 * (1) This function is useful for selecting image files from a 00228 * directory, where the integer @index is embedded into 00229 * the file name. 00230 * (2) This is typically done by generating the sarray using 00231 * getNumberedPathnamesInDirectory(), so that the @index 00232 * pathname would have the number @index in it. The size 00233 * of the sarray should be the largest number (plus 1) appearing 00234 * in the file names, respecting the constraints in the 00235 * call to getNumberedPathnamesInDirectory(). 00236 * (3) Consequently, for some indices into the sarray, there may 00237 * be no pathnames in the directory containing that number. 00238 * By convention, we place empty C strings ("") in those 00239 * locations in the sarray, and it is not an error if such 00240 * a string is encountered and no pix is returned. 00241 * Therefore, the caller must verify that a pix is returned. 00242 * (4) See convertSegmentedPagesToPS() in src/psio1.c for an 00243 * example of usage. 00244 */ 00245 PIX * 00246 pixReadIndexed(SARRAY *sa, 00247 l_int32 index) 00248 { 00249 char *fname; 00250 l_int32 n; 00251 PIX *pix; 00252 00253 PROCNAME("pixReadIndexed"); 00254 00255 if (!sa) 00256 return (PIX *)ERROR_PTR("sa not defined", procName, NULL); 00257 n = sarrayGetCount(sa); 00258 if (index < 0 || index >= n) 00259 return (PIX *)ERROR_PTR("index out of bounds", procName, NULL); 00260 00261 fname = sarrayGetString(sa, index, L_NOCOPY); 00262 if (fname[0] == '\0') 00263 return NULL; 00264 00265 if ((pix = pixRead(fname)) == NULL) { 00266 L_ERROR_STRING("pix not read from file %s", procName, fname); 00267 return NULL; 00268 } 00269 00270 return pix; 00271 } 00272 00273 00274 /*! 00275 * pixReadStream() 00276 * 00277 * Input: fp (file stream) 00278 * hint (bitwise OR of L_HINT_* values for jpeg; use 0 for no hint) 00279 * Return: pix if OK; null on error 00280 * 00281 * Notes: 00282 * (1) The hint only applies to jpeg. 00283 */ 00284 PIX * 00285 pixReadStream(FILE *fp, 00286 l_int32 hint) 00287 { 00288 l_int32 format; 00289 PIX *pix; 00290 00291 PROCNAME("pixReadStream"); 00292 00293 if (!fp) 00294 return (PIX *)ERROR_PTR("stream not defined", procName, NULL); 00295 pix = NULL; 00296 00297 findFileFormatStream(fp, &format); 00298 switch (format) 00299 { 00300 case IFF_BMP: 00301 if ((pix = pixReadStreamBmp(fp)) == NULL ) 00302 return (PIX *)ERROR_PTR( "bmp: no pix returned", procName, NULL); 00303 break; 00304 00305 case IFF_JFIF_JPEG: 00306 if ((pix = pixReadStreamJpeg(fp, READ_24_BIT_COLOR, 1, NULL, hint)) 00307 == NULL) 00308 return (PIX *)ERROR_PTR( "jpeg: no pix returned", procName, NULL); 00309 break; 00310 00311 case IFF_PNG: 00312 if ((pix = pixReadStreamPng(fp)) == NULL) 00313 return (PIX *)ERROR_PTR("png: no pix returned", procName, NULL); 00314 break; 00315 00316 case IFF_TIFF: 00317 case IFF_TIFF_PACKBITS: 00318 case IFF_TIFF_RLE: 00319 case IFF_TIFF_G3: 00320 case IFF_TIFF_G4: 00321 case IFF_TIFF_LZW: 00322 case IFF_TIFF_ZIP: 00323 if ((pix = pixReadStreamTiff(fp, 0)) == NULL) /* page 0 by default */ 00324 return (PIX *)ERROR_PTR("tiff: no pix returned", procName, NULL); 00325 break; 00326 00327 case IFF_PNM: 00328 if ((pix = pixReadStreamPnm(fp)) == NULL) 00329 return (PIX *)ERROR_PTR("pnm: no pix returned", procName, NULL); 00330 break; 00331 00332 case IFF_GIF: 00333 if ((pix = pixReadStreamGif(fp)) == NULL) 00334 return (PIX *)ERROR_PTR("gif: no pix returned", procName, NULL); 00335 break; 00336 00337 case IFF_JP2: 00338 return (PIX *)ERROR_PTR("jp2: format not supported", procName, NULL); 00339 break; 00340 00341 case IFF_WEBP: 00342 if ((pix = pixReadStreamWebP(fp)) == NULL) 00343 return (PIX *)ERROR_PTR("webp: no pix returned", procName, NULL); 00344 break; 00345 00346 case IFF_SPIX: 00347 if ((pix = pixReadStreamSpix(fp)) == NULL) 00348 return (PIX *)ERROR_PTR("spix: no pix returned", procName, NULL); 00349 break; 00350 00351 case IFF_UNKNOWN: 00352 return (PIX *)ERROR_PTR( "Unknown format: no pix returned", 00353 procName, NULL); 00354 break; 00355 } 00356 00357 if (pix) 00358 pixSetInputFormat(pix, format); 00359 return pix; 00360 } 00361 00362 00363 00364 /*---------------------------------------------------------------------* 00365 * Read header information from file * 00366 *---------------------------------------------------------------------*/ 00367 /*! 00368 * pixReadHeader() 00369 * 00370 * Input: filename (with full pathname or in local directory) 00371 * &format (<optional return> file format) 00372 * &w, &h (<optional returns> width and height) 00373 * &bps <optional return> bits/sample 00374 * &spp <optional return> samples/pixel (1, 3 or 4) 00375 * &iscmap (<optional return> 1 if cmap exists; 0 otherwise) 00376 * Return: 0 if OK, 1 on error 00377 * 00378 * Notes: 00379 * (1) This reads the actual headers for jpeg, png, tiff and pnm. 00380 * For bmp and gif, we cheat and read the entire file into a pix, 00381 * from which we extract the "header" information. 00382 */ 00383 l_int32 00384 pixReadHeader(const char *filename, 00385 l_int32 *pformat, 00386 l_int32 *pw, 00387 l_int32 *ph, 00388 l_int32 *pbps, 00389 l_int32 *pspp, 00390 l_int32 *piscmap) 00391 { 00392 l_int32 format, ret, w, h, d, bps, spp, iscmap; 00393 l_int32 type; /* ignored */ 00394 FILE *fp; 00395 PIX *pix; 00396 00397 PROCNAME("pixReadHeader"); 00398 00399 if (pw) *pw = 0; 00400 if (ph) *ph = 0; 00401 if (pbps) *pbps = 0; 00402 if (pspp) *pspp = 0; 00403 if (piscmap) *piscmap = 0; 00404 if (pformat) *pformat = 0; 00405 iscmap = 0; /* init to false */ 00406 if (!filename) 00407 return ERROR_INT("filename not defined", procName, 1); 00408 00409 if ((fp = fopenReadStream(filename)) == NULL) 00410 return ERROR_INT("image file not found", procName, 1); 00411 findFileFormatStream(fp, &format); 00412 fclose(fp); 00413 00414 switch (format) 00415 { 00416 case IFF_BMP: /* cheating: reading the entire file */ 00417 if ((pix = pixRead(filename)) == NULL) 00418 return ERROR_INT( "bmp: pix not read", procName, 1); 00419 pixGetDimensions(pix, &w, &h, &d); 00420 pixDestroy(&pix); 00421 bps = (d == 32) ? 8 : d; 00422 spp = (d == 32) ? 3 : 1; 00423 break; 00424 00425 case IFF_JFIF_JPEG: 00426 ret = readHeaderJpeg(filename, &w, &h, &spp, NULL, NULL); 00427 bps = 8; 00428 if (ret) 00429 return ERROR_INT( "jpeg: no header info returned", procName, 1); 00430 break; 00431 00432 case IFF_PNG: 00433 ret = readHeaderPng(filename, &w, &h, &bps, &spp, &iscmap); 00434 if (ret) 00435 return ERROR_INT( "png: no header info returned", procName, 1); 00436 break; 00437 00438 case IFF_TIFF: 00439 case IFF_TIFF_PACKBITS: 00440 case IFF_TIFF_RLE: 00441 case IFF_TIFF_G3: 00442 case IFF_TIFF_G4: 00443 case IFF_TIFF_LZW: 00444 case IFF_TIFF_ZIP: 00445 /* Reading page 0 by default; possibly redefine format */ 00446 ret = readHeaderTiff(filename, 0, &w, &h, &bps, &spp, NULL, &iscmap, 00447 &format); 00448 if (ret) 00449 return ERROR_INT( "tiff: no header info returned", procName, 1); 00450 break; 00451 00452 case IFF_PNM: 00453 ret = readHeaderPnm(filename, NULL, &w, &h, &d, &type, &bps, &spp); 00454 if (ret) 00455 return ERROR_INT( "pnm: no header info returned", procName, 1); 00456 break; 00457 00458 case IFF_GIF: /* cheating: reading the entire file */ 00459 if ((pix = pixRead(filename)) == NULL) 00460 return ERROR_INT( "gif: pix not read", procName, 1); 00461 pixGetDimensions(pix, &w, &h, &d); 00462 pixDestroy(&pix); 00463 iscmap = 1; /* always colormapped; max 256 colors */ 00464 spp = 1; 00465 bps = d; 00466 break; 00467 00468 case IFF_JP2: 00469 return ERROR_INT("jp2: format not supported", procName, 1); 00470 break; 00471 00472 case IFF_WEBP: 00473 ret = readHeaderWebP(filename, &w, &h); 00474 bps = 8; 00475 spp = 3; 00476 if (ret) 00477 return ERROR_INT( "pnm: no header info returned", procName, 1); 00478 break; 00479 00480 case IFF_SPIX: 00481 ret = readHeaderSpix(filename, &w, &h, &bps, &spp, &iscmap); 00482 if (ret) 00483 return ERROR_INT( "spix: no header info returned", procName, 1); 00484 break; 00485 00486 case IFF_UNKNOWN: 00487 L_ERROR_STRING("unknown format in file %s", procName, filename); 00488 return 1; 00489 break; 00490 } 00491 00492 if (pw) *pw = w; 00493 if (ph) *ph = h; 00494 if (pbps) *pbps = bps; 00495 if (pspp) *pspp = spp; 00496 if (piscmap) *piscmap = iscmap; 00497 if (pformat) *pformat = format; 00498 return 0; 00499 } 00500 00501 00502 /*---------------------------------------------------------------------* 00503 * Format finders * 00504 *---------------------------------------------------------------------*/ 00505 /*! 00506 * findFileFormat() 00507 * 00508 * Input: filename 00509 * &format (<return>) 00510 * Return: 0 if OK, 1 on error or if format is not recognized 00511 */ 00512 l_int32 00513 findFileFormat(const char *filename, 00514 l_int32 *pformat) 00515 { 00516 l_int32 ret; 00517 FILE *fp; 00518 00519 PROCNAME("findFileFormat"); 00520 00521 if (!pformat) 00522 return ERROR_INT("&format not defined", procName, 1); 00523 *pformat = IFF_UNKNOWN; 00524 if (!filename) 00525 return ERROR_INT("filename not defined", procName, 1); 00526 00527 if ((fp = fopenReadStream(filename)) == NULL) 00528 return ERROR_INT("image file not found", procName, 1); 00529 ret = findFileFormatStream(fp, pformat); 00530 fclose(fp); 00531 return ret; 00532 } 00533 00534 00535 /*! 00536 * findFileFormatStream() 00537 * 00538 * Input: fp (file stream) 00539 * &format (<return>) 00540 * Return: 0 if OK, 1 on error or if format is not recognized 00541 * 00542 * Notes: 00543 * (1) Important: Side effect -- this resets fp to BOF. 00544 */ 00545 l_int32 00546 findFileFormatStream(FILE *fp, 00547 l_int32 *pformat) 00548 { 00549 l_uint8 firstbytes[12]; 00550 l_int32 format; 00551 00552 PROCNAME("findFileFormatStream"); 00553 00554 if (!pformat) 00555 return ERROR_INT("&format not defined", procName, 1); 00556 *pformat = IFF_UNKNOWN; 00557 if (!fp) 00558 return ERROR_INT("stream not defined", procName, 1); 00559 00560 rewind(fp); 00561 if (fnbytesInFile(fp) < 12) 00562 return ERROR_INT("truncated file", procName, 1); 00563 00564 if (fread((char *)&firstbytes, 1, 12, fp) != 12) 00565 return ERROR_INT("failed to read first 12 bytes of file", procName, 1); 00566 rewind(fp); 00567 00568 findFileFormatBuffer(firstbytes, &format); 00569 if (format == IFF_TIFF) { 00570 findTiffCompression(fp, &format); 00571 rewind(fp); 00572 } 00573 *pformat = format; 00574 if (format == IFF_UNKNOWN) 00575 return 1; 00576 else 00577 return 0; 00578 } 00579 00580 00581 /*! 00582 * findFileFormatBuffer() 00583 * 00584 * Input: byte buffer (at least 12 bytes in size; we can't check) 00585 * &format (<return>) 00586 * Return: 0 if OK, 1 on error or if format is not recognized 00587 * 00588 * Notes: 00589 * (1) This determines the file format from the first 12 bytes in 00590 * the compressed data stream, which are stored in memory. 00591 * (2) For tiff files, this returns IFF_TIFF. The specific tiff 00592 * compression is then determined using findTiffCompression(). 00593 */ 00594 l_int32 00595 findFileFormatBuffer(const l_uint8 *buf, 00596 l_int32 *pformat) 00597 { 00598 l_uint16 twobytepw; 00599 00600 PROCNAME("findFileFormatBuffer"); 00601 00602 if (!pformat) 00603 return ERROR_INT("&format not defined", procName, 1); 00604 *pformat = IFF_UNKNOWN; 00605 if (!buf) 00606 return ERROR_INT("byte buffer not defined", procName, 0); 00607 00608 /* Check the bmp and tiff 2-byte header ids */ 00609 ((char *)(&twobytepw))[0] = buf[0]; 00610 ((char *)(&twobytepw))[1] = buf[1]; 00611 00612 if (convertOnBigEnd16(twobytepw) == BMP_ID) { 00613 *pformat = IFF_BMP; 00614 return 0; 00615 } 00616 00617 if (twobytepw == TIFF_BIGEND_ID || twobytepw == TIFF_LITTLEEND_ID) { 00618 *pformat = IFF_TIFF; 00619 return 0; 00620 } 00621 00622 /* Check for the p*m 2-byte header ids */ 00623 if ((buf[0] == 'P' && buf[1] == '4') || /* newer packed */ 00624 (buf[0] == 'P' && buf[1] == '1')) { /* old format */ 00625 *pformat = IFF_PNM; 00626 return 0; 00627 } 00628 00629 if ((buf[0] == 'P' && buf[1] == '5') || /* newer */ 00630 (buf[0] == 'P' && buf[1] == '2')) { /* old */ 00631 *pformat = IFF_PNM; 00632 return 0; 00633 } 00634 00635 if ((buf[0] == 'P' && buf[1] == '6') || /* newer */ 00636 (buf[0] == 'P' && buf[1] == '3')) { /* old */ 00637 *pformat = IFF_PNM; 00638 return 0; 00639 } 00640 00641 /* Consider the first 11 bytes of the standard JFIF JPEG header: 00642 * - The first two bytes are the most important: 0xffd8. 00643 * - The next two bytes are the jfif marker: 0xffe0. 00644 * Not all jpeg files have this marker. 00645 * - The next two bytes are the header length. 00646 * - The next 5 bytes are a null-terminated string. 00647 * For JFIF, the string is "JFIF", naturally. For others it 00648 * can be "Exif" or just about anything else. 00649 * - Because of all this variability, we only check the first 00650 * two byte marker. All jpeg files are identified as 00651 * IFF_JFIF_JPEG. */ 00652 if (buf[0] == 0xff && buf[1] == 0xd8) { 00653 *pformat = IFF_JFIF_JPEG; 00654 return 0; 00655 } 00656 00657 /* Check for the 8 byte PNG signature (png_signature in png.c): 00658 * {137, 80, 78, 71, 13, 10, 26, 10} */ 00659 if (buf[0] == 137 && buf[1] == 80 && buf[2] == 78 && buf[3] == 71 && 00660 buf[4] == 13 && buf[5] == 10 && buf[6] == 26 && buf[7] == 10) { 00661 *pformat = IFF_PNG; 00662 return 0; 00663 } 00664 00665 /* Look for "GIF87a" or "GIF89a" */ 00666 if (buf[0] == 'G' && buf[1] == 'I' && buf[2] == 'F' && buf[3] == '8' && 00667 (buf[4] == '7' || buf[4] == '9') && buf[5] == 'a') { 00668 *pformat = IFF_GIF; 00669 return 0; 00670 } 00671 00672 /* Check for both types of jp2k file */ 00673 if (strncmp((const char *)buf, JP2K_CODESTREAM, 4) == 0 || 00674 strncmp((const char *)buf, JP2K_IMAGE_DATA, 12) == 0) { 00675 *pformat = IFF_JP2; 00676 return 0; 00677 } 00678 00679 /* Check for webp */ 00680 if (buf[0] == 'R' && buf[1] == 'I' && buf[2] == 'F' && buf[3] == 'F' && 00681 buf[8] == 'W' && buf[9] == 'E' && buf[10] == 'B' && buf[11] == 'P') { 00682 *pformat = IFF_WEBP; 00683 return 0; 00684 } 00685 00686 /* Check for "spix" serialized pix */ 00687 if (buf[0] == 's' && buf[1] == 'p' && buf[2] == 'i' && buf[3] == 'x') { 00688 *pformat = IFF_SPIX; 00689 return 0; 00690 } 00691 00692 /* File format identifier not found; unknown */ 00693 return 1; 00694 } 00695 00696 00697 /*! 00698 * fileFormatIsTiff() 00699 * 00700 * Input: fp (file stream) 00701 * Return: 1 if file is tiff; 0 otherwise or on error 00702 */ 00703 l_int32 00704 fileFormatIsTiff(FILE *fp) 00705 { 00706 l_int32 format; 00707 00708 PROCNAME("fileFormatIsTiff"); 00709 00710 if (!fp) 00711 return ERROR_INT("stream not defined", procName, 0); 00712 00713 findFileFormatStream(fp, &format); 00714 if (format == IFF_TIFF || format == IFF_TIFF_PACKBITS || 00715 format == IFF_TIFF_RLE || format == IFF_TIFF_G3 || 00716 format == IFF_TIFF_G4 || format == IFF_TIFF_LZW || 00717 format == IFF_TIFF_ZIP) 00718 return 1; 00719 else 00720 return 0; 00721 } 00722 00723 00724 /*---------------------------------------------------------------------* 00725 * Read from memory * 00726 *---------------------------------------------------------------------*/ 00727 /*! 00728 * pixReadMem() 00729 * 00730 * Input: data (const; encoded) 00731 * datasize (size of data) 00732 * Return: pix, or null on error 00733 * 00734 * Notes: 00735 * (1) This is a variation of pixReadStream(), where the data is read 00736 * from a memory buffer rather than a file. 00737 * (2) On windows, this will only read tiff formatted files from 00738 * memory. For other formats, it requires fmemopen(3). 00739 * Attempts to read those formats will fail at runtime. 00740 * (3) findFileFormatBuffer() requires up to 8 bytes to decide on 00741 * the format. That determines the constraint here. 00742 */ 00743 PIX * 00744 pixReadMem(const l_uint8 *data, 00745 size_t size) 00746 { 00747 l_int32 format; 00748 PIX *pix; 00749 00750 PROCNAME("pixReadMem"); 00751 00752 if (!data) 00753 return (PIX *)ERROR_PTR("data not defined", procName, NULL); 00754 if (size < 8) 00755 return (PIX *)ERROR_PTR("size < 8", procName, NULL); 00756 pix = NULL; 00757 00758 findFileFormatBuffer(data, &format); 00759 switch (format) 00760 { 00761 case IFF_BMP: 00762 if ((pix = pixReadMemBmp(data, size)) == NULL ) 00763 return (PIX *)ERROR_PTR( "bmp: no pix returned", procName, NULL); 00764 break; 00765 00766 case IFF_JFIF_JPEG: 00767 if ((pix = pixReadMemJpeg(data, size, READ_24_BIT_COLOR, 1, NULL, 0)) 00768 == NULL) 00769 return (PIX *)ERROR_PTR( "jpeg: no pix returned", procName, NULL); 00770 break; 00771 00772 case IFF_PNG: 00773 if ((pix = pixReadMemPng(data, size)) == NULL) 00774 return (PIX *)ERROR_PTR("png: no pix returned", procName, NULL); 00775 break; 00776 00777 case IFF_TIFF: 00778 case IFF_TIFF_PACKBITS: 00779 case IFF_TIFF_RLE: 00780 case IFF_TIFF_G3: 00781 case IFF_TIFF_G4: 00782 case IFF_TIFF_LZW: 00783 case IFF_TIFF_ZIP: 00784 /* Reading page 0 by default */ 00785 if ((pix = pixReadMemTiff(data, size, 0)) == NULL) 00786 return (PIX *)ERROR_PTR("tiff: no pix returned", procName, NULL); 00787 break; 00788 00789 case IFF_PNM: 00790 if ((pix = pixReadMemPnm(data, size)) == NULL) 00791 return (PIX *)ERROR_PTR("pnm: no pix returned", procName, NULL); 00792 break; 00793 00794 case IFF_GIF: 00795 if ((pix = pixReadMemGif(data, size)) == NULL) 00796 return (PIX *)ERROR_PTR("gif: no pix returned", procName, NULL); 00797 break; 00798 00799 case IFF_JP2: 00800 return (PIX *)ERROR_PTR("jp2: format not supported", procName, NULL); 00801 break; 00802 00803 case IFF_SPIX: 00804 if ((pix = pixReadMemSpix(data, size)) == NULL) 00805 return (PIX *)ERROR_PTR("spix: no pix returned", procName, NULL); 00806 break; 00807 00808 case IFF_UNKNOWN: 00809 return (PIX *)ERROR_PTR("Unknown format: no pix returned", 00810 procName, NULL); 00811 break; 00812 } 00813 00814 /* Set the input format. For tiff reading from memory we lose 00815 * the actual input format; for 1 bpp, default to G4. */ 00816 if (pix) { 00817 if (format == IFF_TIFF && pixGetDepth(pix) == 1) 00818 format = IFF_TIFF_G4; 00819 pixSetInputFormat(pix, format); 00820 } 00821 00822 return pix; 00823 } 00824 00825 00826 /*! 00827 * pixReadHeaderMem() 00828 * 00829 * Input: data (const; encoded) 00830 * datasize (size of data) 00831 * &format (<optional returns> image format) 00832 * &w, &h (<optional returns> width and height) 00833 * &bps <optional return> bits/sample 00834 * &spp <optional return> samples/pixel (1, 3 or 4) 00835 * &iscmap (<optional return> 1 if cmap exists; 0 otherwise) 00836 * Return: 0 if OK, 1 on error 00837 * 00838 * Notes: 00839 * (1) This reads the actual headers for jpeg, png, tiff and pnm. 00840 * For bmp and gif, we cheat and read all the data into a pix, 00841 * from which we extract the "header" information. 00842 * (2) On windows, this will only read tiff formatted files from 00843 * memory. For other formats, it requires fmemopen(3). 00844 * Attempts to read those formats will fail at runtime. 00845 * (3) findFileFormatBuffer() requires up to 8 bytes to decide on 00846 * the format. That determines the constraint here. 00847 */ 00848 l_int32 00849 pixReadHeaderMem(const l_uint8 *data, 00850 size_t size, 00851 l_int32 *pformat, 00852 l_int32 *pw, 00853 l_int32 *ph, 00854 l_int32 *pbps, 00855 l_int32 *pspp, 00856 l_int32 *piscmap) 00857 { 00858 l_int32 format, ret, w, h, d, bps, spp, iscmap; 00859 l_int32 type; /* not used */ 00860 PIX *pix; 00861 00862 PROCNAME("pixReadHeaderMem"); 00863 00864 if (pw) *pw = 0; 00865 if (ph) *ph = 0; 00866 if (pbps) *pbps = 0; 00867 if (pspp) *pspp = 0; 00868 if (piscmap) *piscmap = 0; 00869 if (pformat) *pformat = 0; 00870 iscmap = 0; /* init to false */ 00871 if (!data) 00872 return ERROR_INT("data not defined", procName, 1); 00873 if (size < 8) 00874 return ERROR_INT("size < 8", procName, 1); 00875 00876 findFileFormatBuffer(data, &format); 00877 00878 switch (format) 00879 { 00880 case IFF_BMP: /* cheating: read the pix */ 00881 if ((pix = pixReadMemBmp(data, size)) == NULL) 00882 return ERROR_INT( "bmp: pix not read", procName, 1); 00883 pixGetDimensions(pix, &w, &h, &d); 00884 pixDestroy(&pix); 00885 bps = (d == 32) ? 8 : d; 00886 spp = (d == 32) ? 3 : 1; 00887 break; 00888 00889 case IFF_JFIF_JPEG: 00890 ret = readHeaderMemJpeg(data, size, &w, &h, &spp, NULL, NULL); 00891 bps = 8; 00892 if (ret) 00893 return ERROR_INT( "jpeg: no header info returned", procName, 1); 00894 break; 00895 00896 case IFF_PNG: 00897 ret = sreadHeaderPng(data, &w, &h, &bps, &spp, &iscmap); 00898 if (ret) 00899 return ERROR_INT( "png: no header info returned", procName, 1); 00900 break; 00901 00902 case IFF_TIFF: 00903 case IFF_TIFF_PACKBITS: 00904 case IFF_TIFF_RLE: 00905 case IFF_TIFF_G3: 00906 case IFF_TIFF_G4: 00907 case IFF_TIFF_LZW: 00908 case IFF_TIFF_ZIP: 00909 /* Reading page 0 by default; possibly redefine format */ 00910 ret = readHeaderMemTiff(data, size, 0, &w, &h, &bps, &spp, 00911 NULL, &iscmap, &format); 00912 if (ret) 00913 return ERROR_INT( "tiff: no header info returned", procName, 1); 00914 break; 00915 00916 case IFF_PNM: 00917 ret = sreadHeaderPnm(data, size, &w, &h, &d, &type, &bps, &spp); 00918 if (ret) 00919 return ERROR_INT( "pnm: no header info returned", procName, 1); 00920 break; 00921 00922 case IFF_GIF: /* cheating: read the pix */ 00923 if ((pix = pixReadMemGif(data, size)) == NULL) 00924 return ERROR_INT( "gif: pix not read", procName, 1); 00925 pixGetDimensions(pix, &w, &h, &d); 00926 pixDestroy(&pix); 00927 iscmap = 1; /* always colormapped; max 256 colors */ 00928 spp = 1; 00929 bps = d; 00930 break; 00931 00932 case IFF_JP2: 00933 return ERROR_INT("jp2: format not supported", procName, 1); 00934 break; 00935 00936 case IFF_SPIX: 00937 ret = sreadHeaderSpix((l_uint32 *)data, &w, &h, &bps, 00938 &spp, &iscmap); 00939 if (ret) 00940 return ERROR_INT( "pnm: no header info returned", procName, 1); 00941 break; 00942 00943 case IFF_UNKNOWN: 00944 return ERROR_INT("unknown format; no data returned", procName, 1); 00945 break; 00946 } 00947 00948 if (pw) *pw = w; 00949 if (ph) *ph = h; 00950 if (pbps) *pbps = bps; 00951 if (pspp) *pspp = spp; 00952 if (piscmap) *piscmap = iscmap; 00953 if (pformat) *pformat = format; 00954 return 0; 00955 } 00956 00957 00958 /*---------------------------------------------------------------------* 00959 * Test function for I/O with different formats * 00960 *---------------------------------------------------------------------*/ 00961 #ifdef HAVE_CONFIG_H 00962 #include "config_auto.h" 00963 #endif /* HAVE_CONFIG_H */ 00964 00965 /*! 00966 * ioFormatTest() 00967 * 00968 * Input: filename (input file) 00969 * Return: 0 if OK; 1 on error or if the test fails 00970 * 00971 * Notes: 00972 * (1) This writes and reads a set of output files losslessly 00973 * in different formats to /tmp, and tests that the 00974 * result before and after is unchanged. 00975 * (2) This should work properly on input images of any depth, 00976 * with and without colormaps. 00977 * (3) All supported formats are tested for bmp, png, tiff and 00978 * non-ascii pnm. Ascii pnm also works (but who'd ever want 00979 * to use it?) We allow 2 bpp bmp, although it's not 00980 * supported elsewhere. And we don't support reading 00981 * 16 bpp png, although this can be turned on in pngio.c. 00982 * (4) This silently skips png or tiff testing if HAVE_LIBPNG 00983 * or HAVE_LIBTIFF are 0, respectively. 00984 */ 00985 l_int32 00986 ioFormatTest(const char *filename) 00987 { 00988 l_int32 d, equal, problems; 00989 PIX *pixs, *pixc, *pixt, *pixt2; 00990 PIXCMAP *cmap; 00991 00992 PROCNAME("ioFormatTest"); 00993 00994 if (!filename) 00995 return ERROR_INT("filename not defined", procName, 1); 00996 00997 if ((pixs = pixRead(filename)) == NULL) 00998 return ERROR_INT("pixs not made", procName, 1); 00999 01000 /* Note that the reader automatically removes colormaps 01001 * from 1 bpp BMP images, but not from 8 bpp BMP images. 01002 * Therefore, if our 8 bpp image initially doesn't have a 01003 * colormap, we are going to need to remove it from any 01004 * pix read from a BMP file. */ 01005 pixc = pixClone(pixs); /* laziness */ 01006 cmap = pixGetColormap(pixc); /* colormap; can be NULL */ 01007 d = pixGetDepth(pixc); 01008 01009 problems = FALSE; 01010 01011 /* ----------------------- BMP -------------------------- */ 01012 01013 /* BMP works for 1, 2, 4, 8 and 32 bpp images. 01014 * It always writes colormaps for 1 and 8 bpp, so we must 01015 * remove it after readback if the input image doesn't have 01016 * a colormap. Although we can write/read 2 bpp BMP, nobody 01017 * else can read them! */ 01018 if (d == 1 || d == 8) { 01019 L_INFO("write/read bmp", procName); 01020 pixWrite(FILE_BMP, pixc, IFF_BMP); 01021 pixt = pixRead(FILE_BMP); 01022 if (!cmap) 01023 pixt2 = pixRemoveColormap(pixt, REMOVE_CMAP_BASED_ON_SRC); 01024 else 01025 pixt2 = pixClone(pixt); 01026 pixEqual(pixc, pixt2, &equal); 01027 if (!equal) { 01028 L_INFO(" **** bad bmp image ****", procName); 01029 problems = TRUE; 01030 } 01031 pixDestroy(&pixt); 01032 pixDestroy(&pixt2); 01033 } 01034 01035 if (d == 2 || d == 4 || d == 32) { 01036 L_INFO("write/read bmp", procName); 01037 pixWrite(FILE_BMP, pixc, IFF_BMP); 01038 pixt = pixRead(FILE_BMP); 01039 pixEqual(pixc, pixt, &equal); 01040 if (!equal) { 01041 L_INFO(" **** bad bmp image ****", procName); 01042 problems = TRUE; 01043 } 01044 pixDestroy(&pixt); 01045 } 01046 01047 /* ----------------------- PNG -------------------------- */ 01048 #if HAVE_LIBPNG 01049 /* PNG works for all depths, but here, because we strip 01050 * 16 --> 8 bpp on reading, we don't test png for 16 bpp. */ 01051 if (d != 16) { 01052 L_INFO("write/read png", procName); 01053 pixWrite(FILE_PNG, pixc, IFF_PNG); 01054 pixt = pixRead(FILE_PNG); 01055 pixEqual(pixc, pixt, &equal); 01056 if (!equal) { 01057 L_INFO(" **** bad png image ****", procName); 01058 problems = TRUE; 01059 } 01060 pixDestroy(&pixt); 01061 } 01062 #endif /* HAVE_LIBPNG */ 01063 01064 /* ----------------------- TIFF -------------------------- */ 01065 #if HAVE_LIBTIFF 01066 /* TIFF works for 1, 2, 4, 8, 16 and 32 bpp images. 01067 * Because 8 bpp tiff always writes 256 entry colormaps, the 01068 * colormap sizes may be different for 8 bpp images with 01069 * colormap; we are testing if the image content is the same. 01070 * Likewise, the 2 and 4 bpp tiff images with colormaps 01071 * have colormap sizes 4 and 16, rsp. This test should 01072 * work properly on the content, regardless of the number 01073 * of color entries in pixc. */ 01074 01075 /* tiff uncompressed works for all pixel depths */ 01076 L_INFO("write/read uncompressed tiff", procName); 01077 pixWrite(FILE_TIFF, pixc, IFF_TIFF); 01078 pixt = pixRead(FILE_TIFF); 01079 pixEqual(pixc, pixt, &equal); 01080 if (!equal) { 01081 L_INFO(" **** bad tiff uncompressed image ****", procName); 01082 problems = TRUE; 01083 } 01084 pixDestroy(&pixt); 01085 01086 /* tiff lzw works for all pixel depths */ 01087 L_INFO("write/read lzw compressed tiff", procName); 01088 pixWrite(FILE_LZW, pixc, IFF_TIFF_LZW); 01089 pixt = pixRead(FILE_LZW); 01090 pixEqual(pixc, pixt, &equal); 01091 if (!equal) { 01092 L_INFO(" **** bad tiff lzw compressed image ****", procName); 01093 problems = TRUE; 01094 } 01095 pixDestroy(&pixt); 01096 01097 /* tiff adobe deflate (zip) works for all pixel depths */ 01098 L_INFO("write/read zip compressed tiff", procName); 01099 pixWrite(FILE_ZIP, pixc, IFF_TIFF_ZIP); 01100 pixt = pixRead(FILE_ZIP); 01101 pixEqual(pixc, pixt, &equal); 01102 if (!equal) { 01103 L_INFO(" **** bad tiff zip compressed image ****", procName); 01104 problems = TRUE; 01105 } 01106 pixDestroy(&pixt); 01107 01108 /* tiff g4, g3, rle and packbits work for 1 bpp */ 01109 if (d == 1) { 01110 L_INFO("write/read g4 compressed tiff", procName); 01111 pixWrite(FILE_G4, pixc, IFF_TIFF_G4); 01112 pixt = pixRead(FILE_G4); 01113 pixEqual(pixc, pixt, &equal); 01114 if (!equal) { 01115 L_INFO(" **** bad tiff g4 image ****", procName); 01116 problems = TRUE; 01117 } 01118 pixDestroy(&pixt); 01119 01120 L_INFO("write/read g3 compressed tiff", procName); 01121 pixWrite(FILE_G3, pixc, IFF_TIFF_G3); 01122 pixt = pixRead(FILE_G3); 01123 pixEqual(pixc, pixt, &equal); 01124 if (!equal) { 01125 L_INFO(" **** bad tiff g3 image ****", procName); 01126 problems = TRUE; 01127 } 01128 pixDestroy(&pixt); 01129 01130 L_INFO("write/read rle compressed tiff", procName); 01131 pixWrite(FILE_RLE, pixc, IFF_TIFF_RLE); 01132 pixt = pixRead(FILE_RLE); 01133 pixEqual(pixc, pixt, &equal); 01134 if (!equal) { 01135 L_INFO(" **** bad tiff rle image ****", procName); 01136 problems = TRUE; 01137 } 01138 pixDestroy(&pixt); 01139 01140 L_INFO("write/read packbits compressed tiff", procName); 01141 pixWrite(FILE_PB, pixc, IFF_TIFF_PACKBITS); 01142 pixt = pixRead(FILE_PB); 01143 pixEqual(pixc, pixt, &equal); 01144 if (!equal) { 01145 L_INFO(" **** bad tiff packbits image ****", procName); 01146 problems = TRUE; 01147 } 01148 pixDestroy(&pixt); 01149 } 01150 #endif /* HAVE_LIBTIFF */ 01151 01152 /* ----------------------- PNM -------------------------- */ 01153 01154 /* pnm works for 1, 2, 4, 8, 16 and 32 bpp. 01155 * pnm doesn't have colormaps, so when we write colormapped 01156 * pix out as pnm, the colormap is removed. Thus for the test, 01157 * we must remove the colormap from pixc before testing. */ 01158 L_INFO("write/read pnm", procName); 01159 pixWrite(FILE_PNM, pixc, IFF_PNM); 01160 pixt = pixRead(FILE_PNM); 01161 if (cmap) 01162 pixt2 = pixRemoveColormap(pixc, REMOVE_CMAP_BASED_ON_SRC); 01163 else 01164 pixt2 = pixClone(pixc); 01165 pixEqual(pixt, pixt2, &equal); 01166 if (!equal) { 01167 L_INFO(" **** bad pnm image ****", procName); 01168 problems = TRUE; 01169 } 01170 pixDestroy(&pixt); 01171 pixDestroy(&pixt2); 01172 01173 if (problems == FALSE) 01174 L_INFO("All formats read and written OK!", procName); 01175 01176 pixDestroy(&pixc); 01177 pixDestroy(&pixs); 01178 return problems; 01179 }