Leptonica 1.68
C Image Processing Library

readfile.c

Go to the documentation of this file.
00001 /*====================================================================*
00002  -  Copyright (C) 2001 Leptonica.  All rights reserved.
00003  -  This software is distributed in the hope that it will be
00004  -  useful, but with NO WARRANTY OF ANY KIND.
00005  -  No author or distributor accepts responsibility to anyone for the
00006  -  consequences of using this software, or for whether it serves any
00007  -  particular purpose or works at all, unless he or she says so in
00008  -  writing.  Everyone is granted permission to copy, modify and
00009  -  redistribute this source code, for commercial or non-commercial
00010  -  purposes, with the following restrictions: (1) the origin of this
00011  -  source code must not be misrepresented; (2) modified versions must
00012  -  be plainly marked as such; and (3) this notice may not be removed
00013  -  or altered from any source or modified source distribution.
00014  *====================================================================*/
00015 
00016 
00017 /*
00018  *  readfile.c:  reads image on file into memory
00019  *
00020  *      Top-level functions for reading images from file
00021  *           PIXA      *pixaReadFiles()
00022  *           PIXA      *pixaReadFilesSA()
00023  *           PIX       *pixRead()
00024  *           PIX       *pixReadWithHint()
00025  *           PIX       *pixReadIndexed()
00026  *           PIX       *pixReadStream()
00027  *
00028  *      Read header information from file
00029  *           l_int32    pixReadHeader()
00030  *
00031  *      Format finders
00032  *           l_int32    findFileFormat()
00033  *           l_int32    findFileFormatStream()
00034  *           l_int32    findFileFormatBuffer()
00035  *           l_int32    fileFormatIsTiff()
00036  *
00037  *      Read from memory
00038  *           PIX       *pixReadMem()
00039  *           l_int32    pixReadHeaderMem()
00040  *
00041  *      Test function for I/O with different formats 
00042  *           l_int32    ioFormatTest()
00043  */
00044 
00045 #include <string.h>
00046 #include "allheaders.h"
00047 
00048     /*  choose type of PIX to be generated  */
00049 enum {
00050     READ_24_BIT_COLOR = 0,     /* read in as 24 (really 32) bit pix */
00051     CONVERT_TO_PALETTE = 1,    /* convert to 8 bit colormapped pix */
00052     READ_GRAY = 2              /* read gray only */
00053 };
00054 
00055     /* Output files for ioFormatTest().
00056      * Note that the test for jpeg is not yet implemented */
00057 static const char *FILE_BMP  =  "/tmp/junkout.bmp";
00058 static const char *FILE_PNG  =  "/tmp/junkout.png";
00059 static const char *FILE_PNM  =  "/tmp/junkout.pnm";
00060 static const char *FILE_G3   =  "/tmp/junkout_g3.tif";
00061 static const char *FILE_G4   =  "/tmp/junkout_g4.tif";
00062 static const char *FILE_RLE  =  "/tmp/junkout_rle.tif";
00063 static const char *FILE_PB   =  "/tmp/junkout_packbits.tif";
00064 static const char *FILE_LZW  =  "/tmp/junkout_lzw.tif";
00065 static const char *FILE_ZIP  =  "/tmp/junkout_zip.tif";
00066 static const char *FILE_TIFF =  "/tmp/junkout.tif";
00067 static const char *FILE_JPG  =  "/tmp/junkout.jpg";
00068 
00069     /* I found these from the source code to the unix file */
00070     /* command. man 1 file */
00071 static const char JP2K_CODESTREAM[4] = { 0xff, 0x4f, 0xff, 0x51 };
00072 static const char JP2K_IMAGE_DATA[12] = { 0x00, 0x00, 0x00, 0x0C,
00073                                           0x6A, 0x50, 0x20, 0x20,
00074                                           0x0D, 0x0A, 0x87, 0x0A };
00075 
00076 /*---------------------------------------------------------------------*
00077  *          Top-level functions for reading images from file           *
00078  *---------------------------------------------------------------------*/
00079 /*!
00080  *  pixaReadFiles()
00081  *
00082  *      Input:  dirname
00083  *              substr (<optional> substring filter on filenames; can be null)
00084  *      Return: pixa, or null on error
00085  *
00086  *  Notes:
00087  *      (1) @dirname is the full path for the directory.
00088  *      (2) @substr is the part of the file name (excluding
00089  *          the directory) that is to be matched.  All matching
00090  *          filenames are read into the Pixa.  If substr is NULL,
00091  *          all filenames are read into the Pixa.
00092  */
00093 PIXA *
00094 pixaReadFiles(const char  *dirname,
00095               const char  *substr)
00096 {
00097 PIXA    *pixa;
00098 SARRAY  *sa;
00099 
00100     PROCNAME("pixaReadFiles");
00101 
00102     if (!dirname)
00103         return (PIXA *)ERROR_PTR("dirname not defined", procName, NULL);
00104 
00105     if ((sa = getSortedPathnamesInDirectory(dirname, substr, 0, 0)) == NULL)
00106         return (PIXA *)ERROR_PTR("sa not made", procName, NULL);
00107 
00108     pixa = pixaReadFilesSA(sa);
00109     sarrayDestroy(&sa);
00110     return pixa;
00111 }
00112 
00113 
00114 /*!
00115  *  pixaReadFilesSA()
00116  *
00117  *      Input:  sarray (full pathnames for all files)
00118  *      Return: pixa, or null on error
00119  */
00120 PIXA *
00121 pixaReadFilesSA(SARRAY  *sa)
00122 {
00123 char    *str;
00124 l_int32  i, n;
00125 PIX     *pix;
00126 PIXA    *pixa;
00127 
00128     PROCNAME("pixaReadFilesSA");
00129 
00130     if (!sa)
00131         return (PIXA *)ERROR_PTR("sa not defined", procName, NULL);
00132 
00133     n = sarrayGetCount(sa);
00134     pixa = pixaCreate(n);
00135     for (i = 0; i < n; i++) {
00136         str = sarrayGetString(sa, i, L_NOCOPY);
00137         if ((pix = pixRead(str)) == NULL) {
00138             L_WARNING_STRING("pix not read from file %s", procName, str);
00139             continue;
00140         }
00141         pixaAddPix(pixa, pix, L_INSERT);
00142     }
00143 
00144     return pixa;
00145 }
00146 
00147 
00148 /*!
00149  *  pixRead()
00150  *
00151  *      Input:  filename (with full pathname or in local directory)
00152  *      Return: pix if OK; null on error
00153  */
00154 PIX *
00155 pixRead(const char  *filename)
00156 {
00157 FILE  *fp;
00158 PIX   *pix;
00159 
00160     PROCNAME("pixRead");
00161 
00162     if (!filename)
00163         return (PIX *)ERROR_PTR("filename not defined", procName, NULL);
00164 
00165     if ((fp = fopenReadStream(filename)) == NULL)
00166         return (PIX *)ERROR_PTR("image file not found", procName, NULL);
00167     if ((pix = pixReadStream(fp, 0)) == NULL) {
00168         fclose(fp);
00169         return (PIX *)ERROR_PTR("pix not read", procName, NULL);
00170     }
00171 
00172         /* Close the stream except if GIF under windows, because
00173          * DGifCloseFile() closes the windows file stream! */
00174     if (pixGetInputFormat(pix) != IFF_GIF)
00175         fclose(fp);
00176 #ifndef _WIN32
00177     else  /* gif file */
00178         fclose(fp);
00179 #endif  /* ! _WIN32 */
00180 
00181     return pix;
00182 }
00183 
00184 
00185 /*!
00186  *  pixReadWithHint()
00187  *
00188  *      Input:  filename (with full pathname or in local directory)
00189  *              hint (bitwise OR of L_HINT_* values for jpeg; use 0 for no hint)
00190  *      Return: pix if OK; null on error
00191  *
00192  *  Notes:
00193  *      (1) The hint is not binding, but may be used to optimize jpeg decoding.
00194  *          Use 0 for no hinting.
00195  */
00196 PIX *
00197 pixReadWithHint(const char  *filename,
00198                 l_int32      hint)
00199 {
00200 FILE  *fp;
00201 PIX   *pix;
00202 
00203     PROCNAME("pixReadWithHint");
00204 
00205     if (!filename)
00206         return (PIX *)ERROR_PTR("filename not defined", procName, NULL);
00207 
00208     if ((fp = fopenReadStream(filename)) == NULL)
00209         return (PIX *)ERROR_PTR("image file not found", procName, NULL);
00210     pix = pixReadStream(fp, hint);
00211     fclose(fp);
00212 
00213     if (!pix)
00214         return (PIX *)ERROR_PTR("image not returned", procName, NULL);
00215     return pix;
00216 }
00217 
00218 
00219 /*!
00220  *  pixReadIndexed()
00221  *
00222  *      Input:  sarray (of full pathnames)
00223  *              index (into pathname array)
00224  *      Return: pix if OK; null if not found
00225  *
00226  *  Notes:
00227  *      (1) This function is useful for selecting image files from a
00228  *          directory, where the integer @index is embedded into
00229  *          the file name.
00230  *      (2) This is typically done by generating the sarray using
00231  *          getNumberedPathnamesInDirectory(), so that the @index
00232  *          pathname would have the number @index in it.  The size
00233  *          of the sarray should be the largest number (plus 1) appearing
00234  *          in the file names, respecting the constraints in the
00235  *          call to getNumberedPathnamesInDirectory().
00236  *      (3) Consequently, for some indices into the sarray, there may
00237  *          be no pathnames in the directory containing that number.
00238  *          By convention, we place empty C strings ("") in those
00239  *          locations in the sarray, and it is not an error if such
00240  *          a string is encountered and no pix is returned.
00241  *          Therefore, the caller must verify that a pix is returned.
00242  *      (4) See convertSegmentedPagesToPS() in src/psio1.c for an
00243  *          example of usage.
00244  */
00245 PIX *
00246 pixReadIndexed(SARRAY  *sa,
00247                l_int32  index)
00248 {
00249 char    *fname;
00250 l_int32  n;
00251 PIX     *pix;
00252 
00253     PROCNAME("pixReadIndexed");
00254 
00255     if (!sa)
00256         return (PIX *)ERROR_PTR("sa not defined", procName, NULL);
00257     n = sarrayGetCount(sa);
00258     if (index < 0 || index >= n)
00259         return (PIX *)ERROR_PTR("index out of bounds", procName, NULL);
00260 
00261     fname = sarrayGetString(sa, index, L_NOCOPY);
00262     if (fname[0] == '\0')
00263         return NULL;
00264 
00265     if ((pix = pixRead(fname)) == NULL) {
00266         L_ERROR_STRING("pix not read from file %s", procName, fname);
00267         return NULL;
00268     }
00269 
00270     return pix;
00271 }
00272 
00273 
00274 /*!
00275  *  pixReadStream()
00276  *
00277  *      Input:  fp (file stream)
00278  *              hint (bitwise OR of L_HINT_* values for jpeg; use 0 for no hint)
00279  *      Return: pix if OK; null on error
00280  *
00281  *  Notes:
00282  *      (1) The hint only applies to jpeg.
00283  */
00284 PIX *
00285 pixReadStream(FILE    *fp,
00286               l_int32  hint)
00287 {
00288 l_int32  format;
00289 PIX     *pix;
00290 
00291     PROCNAME("pixReadStream");
00292 
00293     if (!fp)
00294         return (PIX *)ERROR_PTR("stream not defined", procName, NULL);
00295     pix = NULL;
00296 
00297     findFileFormatStream(fp, &format);
00298     switch (format)
00299     {
00300     case IFF_BMP:
00301         if ((pix = pixReadStreamBmp(fp)) == NULL )
00302             return (PIX *)ERROR_PTR( "bmp: no pix returned", procName, NULL);
00303         break;
00304 
00305     case IFF_JFIF_JPEG:
00306         if ((pix = pixReadStreamJpeg(fp, READ_24_BIT_COLOR, 1, NULL, hint))
00307                 == NULL)
00308             return (PIX *)ERROR_PTR( "jpeg: no pix returned", procName, NULL);
00309         break;
00310 
00311     case IFF_PNG:
00312         if ((pix = pixReadStreamPng(fp)) == NULL)
00313             return (PIX *)ERROR_PTR("png: no pix returned", procName, NULL);
00314         break;
00315 
00316     case IFF_TIFF:
00317     case IFF_TIFF_PACKBITS:
00318     case IFF_TIFF_RLE:
00319     case IFF_TIFF_G3:
00320     case IFF_TIFF_G4:
00321     case IFF_TIFF_LZW:
00322     case IFF_TIFF_ZIP:
00323         if ((pix = pixReadStreamTiff(fp, 0)) == NULL)  /* page 0 by default */
00324             return (PIX *)ERROR_PTR("tiff: no pix returned", procName, NULL);
00325         break;
00326 
00327     case IFF_PNM:
00328         if ((pix = pixReadStreamPnm(fp)) == NULL)
00329             return (PIX *)ERROR_PTR("pnm: no pix returned", procName, NULL);
00330         break;
00331 
00332     case IFF_GIF:
00333         if ((pix = pixReadStreamGif(fp)) == NULL)
00334             return (PIX *)ERROR_PTR("gif: no pix returned", procName, NULL);
00335         break;
00336 
00337     case IFF_JP2:
00338         return (PIX *)ERROR_PTR("jp2: format not supported", procName, NULL);
00339         break;
00340 
00341     case IFF_WEBP:
00342         if ((pix = pixReadStreamWebP(fp)) == NULL)
00343             return (PIX *)ERROR_PTR("webp: no pix returned", procName, NULL);
00344         break;
00345 
00346     case IFF_SPIX:
00347         if ((pix = pixReadStreamSpix(fp)) == NULL)
00348             return (PIX *)ERROR_PTR("spix: no pix returned", procName, NULL);
00349         break;
00350 
00351     case IFF_UNKNOWN:
00352         return (PIX *)ERROR_PTR( "Unknown format: no pix returned",
00353                 procName, NULL);
00354         break;
00355     }
00356 
00357     if (pix)
00358         pixSetInputFormat(pix, format);
00359     return pix;
00360 }
00361 
00362 
00363 
00364 /*---------------------------------------------------------------------*
00365  *                     Read header information from file               *
00366  *---------------------------------------------------------------------*/
00367 /*!
00368  *  pixReadHeader()
00369  *
00370  *      Input:  filename (with full pathname or in local directory)
00371  *              &format (<optional return> file format)
00372  *              &w, &h (<optional returns> width and height)
00373  *              &bps <optional return> bits/sample
00374  *              &spp <optional return> samples/pixel (1, 3 or 4)
00375  *              &iscmap (<optional return> 1 if cmap exists; 0 otherwise)
00376  *      Return: 0 if OK, 1 on error
00377  *
00378  *  Notes:
00379  *      (1) This reads the actual headers for jpeg, png, tiff and pnm.
00380  *          For bmp and gif, we cheat and read the entire file into a pix,
00381  *          from which we extract the "header" information.
00382  */
00383 l_int32
00384 pixReadHeader(const char  *filename,
00385               l_int32     *pformat,
00386               l_int32     *pw,
00387               l_int32     *ph,
00388               l_int32     *pbps,
00389               l_int32     *pspp,
00390               l_int32     *piscmap)
00391 {
00392 l_int32  format, ret, w, h, d, bps, spp, iscmap;
00393 l_int32  type;  /* ignored */
00394 FILE    *fp;
00395 PIX     *pix;
00396 
00397     PROCNAME("pixReadHeader");
00398 
00399     if (pw) *pw = 0;
00400     if (ph) *ph = 0;
00401     if (pbps) *pbps = 0;
00402     if (pspp) *pspp = 0;
00403     if (piscmap) *piscmap = 0;
00404     if (pformat) *pformat = 0;
00405     iscmap = 0;  /* init to false */
00406     if (!filename)
00407         return ERROR_INT("filename not defined", procName, 1);
00408 
00409     if ((fp = fopenReadStream(filename)) == NULL)
00410         return ERROR_INT("image file not found", procName, 1);
00411     findFileFormatStream(fp, &format);
00412     fclose(fp);
00413 
00414     switch (format)
00415     {
00416     case IFF_BMP:  /* cheating: reading the entire file */
00417         if ((pix = pixRead(filename)) == NULL)
00418             return ERROR_INT( "bmp: pix not read", procName, 1);
00419         pixGetDimensions(pix, &w, &h, &d);
00420         pixDestroy(&pix);
00421         bps = (d == 32) ? 8 : d;
00422         spp = (d == 32) ? 3 : 1;
00423         break;
00424 
00425     case IFF_JFIF_JPEG:
00426         ret = readHeaderJpeg(filename, &w, &h, &spp, NULL, NULL);
00427         bps = 8;
00428         if (ret)
00429             return ERROR_INT( "jpeg: no header info returned", procName, 1);
00430         break;
00431 
00432     case IFF_PNG:
00433         ret = readHeaderPng(filename, &w, &h, &bps, &spp, &iscmap);
00434         if (ret)
00435             return ERROR_INT( "png: no header info returned", procName, 1);
00436         break;
00437 
00438     case IFF_TIFF:
00439     case IFF_TIFF_PACKBITS:
00440     case IFF_TIFF_RLE:
00441     case IFF_TIFF_G3:
00442     case IFF_TIFF_G4:
00443     case IFF_TIFF_LZW:
00444     case IFF_TIFF_ZIP:
00445             /* Reading page 0 by default; possibly redefine format */
00446         ret = readHeaderTiff(filename, 0, &w, &h, &bps, &spp, NULL, &iscmap,
00447                              &format);
00448         if (ret)
00449             return ERROR_INT( "tiff: no header info returned", procName, 1);
00450         break;
00451 
00452     case IFF_PNM:
00453         ret = readHeaderPnm(filename, NULL, &w, &h, &d, &type, &bps, &spp);
00454         if (ret)
00455             return ERROR_INT( "pnm: no header info returned", procName, 1);
00456         break;
00457 
00458     case IFF_GIF:  /* cheating: reading the entire file */
00459         if ((pix = pixRead(filename)) == NULL)
00460             return ERROR_INT( "gif: pix not read", procName, 1);
00461         pixGetDimensions(pix, &w, &h, &d);
00462         pixDestroy(&pix);
00463         iscmap = 1;  /* always colormapped; max 256 colors */
00464         spp = 1;
00465         bps = d;
00466         break;
00467 
00468     case IFF_JP2:
00469         return ERROR_INT("jp2: format not supported", procName, 1);
00470         break;
00471 
00472     case IFF_WEBP:
00473         ret = readHeaderWebP(filename, &w, &h);
00474         bps = 8;
00475         spp = 3;
00476         if (ret)
00477             return ERROR_INT( "pnm: no header info returned", procName, 1);
00478         break;
00479 
00480     case IFF_SPIX:
00481         ret = readHeaderSpix(filename, &w, &h, &bps, &spp, &iscmap);
00482         if (ret)
00483             return ERROR_INT( "spix: no header info returned", procName, 1);
00484         break;
00485 
00486     case IFF_UNKNOWN:
00487         L_ERROR_STRING("unknown format in file %s", procName, filename);
00488         return 1;
00489         break;
00490     }
00491 
00492     if (pw) *pw = w;
00493     if (ph) *ph = h;
00494     if (pbps) *pbps = bps;
00495     if (pspp) *pspp = spp;
00496     if (piscmap) *piscmap = iscmap;
00497     if (pformat) *pformat = format;
00498     return 0;
00499 }
00500 
00501 
00502 /*---------------------------------------------------------------------*
00503  *                            Format finders                           *
00504  *---------------------------------------------------------------------*/
00505 /*!
00506  *  findFileFormat()
00507  *
00508  *      Input:  filename
00509  *              &format (<return>)
00510  *      Return: 0 if OK, 1 on error or if format is not recognized
00511  */
00512 l_int32
00513 findFileFormat(const char  *filename,
00514                l_int32     *pformat)
00515 {
00516 l_int32  ret;
00517 FILE    *fp;
00518 
00519     PROCNAME("findFileFormat");
00520 
00521     if (!pformat)
00522         return ERROR_INT("&format not defined", procName, 1);
00523     *pformat = IFF_UNKNOWN;
00524     if (!filename)
00525         return ERROR_INT("filename not defined", procName, 1);
00526 
00527     if ((fp = fopenReadStream(filename)) == NULL)
00528         return ERROR_INT("image file not found", procName, 1);
00529     ret = findFileFormatStream(fp, pformat);
00530     fclose(fp);
00531     return ret;
00532 }
00533 
00534 
00535 /*!
00536  *  findFileFormatStream()
00537  *
00538  *      Input:  fp (file stream)
00539  *              &format (<return>)
00540  *      Return: 0 if OK, 1 on error or if format is not recognized
00541  *
00542  *  Notes:
00543  *      (1) Important: Side effect -- this resets fp to BOF.
00544  */
00545 l_int32
00546 findFileFormatStream(FILE     *fp,
00547                      l_int32  *pformat)
00548 {
00549 l_uint8  firstbytes[12];
00550 l_int32  format;
00551 
00552     PROCNAME("findFileFormatStream");
00553 
00554     if (!pformat)
00555         return ERROR_INT("&format not defined", procName, 1);
00556     *pformat = IFF_UNKNOWN;
00557     if (!fp)
00558         return ERROR_INT("stream not defined", procName, 1);
00559 
00560     rewind(fp);
00561     if (fnbytesInFile(fp) < 12)
00562         return ERROR_INT("truncated file", procName, 1);
00563 
00564     if (fread((char *)&firstbytes, 1, 12, fp) != 12)
00565         return ERROR_INT("failed to read first 12 bytes of file", procName, 1);
00566     rewind(fp);
00567 
00568     findFileFormatBuffer(firstbytes, &format);
00569     if (format == IFF_TIFF) {
00570         findTiffCompression(fp, &format);
00571         rewind(fp);
00572     }
00573     *pformat = format;
00574     if (format == IFF_UNKNOWN)
00575         return 1;
00576     else
00577         return 0;
00578 }
00579 
00580 
00581 /*!
00582  *  findFileFormatBuffer()
00583  *
00584  *      Input:  byte buffer (at least 12 bytes in size; we can't check)
00585  *              &format (<return>)
00586  *      Return: 0 if OK, 1 on error or if format is not recognized
00587  *
00588  *  Notes:
00589  *      (1) This determines the file format from the first 12 bytes in
00590  *          the compressed data stream, which are stored in memory.
00591  *      (2) For tiff files, this returns IFF_TIFF.  The specific tiff
00592  *          compression is then determined using findTiffCompression().
00593  */
00594 l_int32
00595 findFileFormatBuffer(const l_uint8  *buf,
00596                      l_int32        *pformat)
00597 {
00598 l_uint16  twobytepw;
00599 
00600     PROCNAME("findFileFormatBuffer");
00601 
00602     if (!pformat)
00603         return ERROR_INT("&format not defined", procName, 1);
00604     *pformat = IFF_UNKNOWN;
00605     if (!buf)
00606         return ERROR_INT("byte buffer not defined", procName, 0);
00607 
00608         /* Check the bmp and tiff 2-byte header ids */
00609     ((char *)(&twobytepw))[0] = buf[0];
00610     ((char *)(&twobytepw))[1] = buf[1];
00611 
00612     if (convertOnBigEnd16(twobytepw) == BMP_ID) {
00613         *pformat = IFF_BMP;
00614         return 0;
00615     }
00616 
00617     if (twobytepw == TIFF_BIGEND_ID || twobytepw == TIFF_LITTLEEND_ID) {
00618         *pformat = IFF_TIFF;
00619         return 0;
00620     }
00621 
00622         /* Check for the p*m 2-byte header ids */
00623     if ((buf[0] == 'P' && buf[1] == '4') || /* newer packed */
00624         (buf[0] == 'P' && buf[1] == '1')) {  /* old format */
00625         *pformat = IFF_PNM;
00626         return 0;
00627     }
00628 
00629     if ((buf[0] == 'P' && buf[1] == '5') || /* newer */
00630         (buf[0] == 'P' && buf[1] == '2')) {  /* old */
00631         *pformat = IFF_PNM;
00632         return 0;
00633     }
00634 
00635     if ((buf[0] == 'P' && buf[1] == '6') || /* newer */
00636         (buf[0] == 'P' && buf[1] == '3')) {  /* old */
00637         *pformat = IFF_PNM;
00638         return 0;
00639     }
00640 
00641         /*  Consider the first 11 bytes of the standard JFIF JPEG header:
00642          *    - The first two bytes are the most important:  0xffd8.
00643          *    - The next two bytes are the jfif marker: 0xffe0.
00644          *      Not all jpeg files have this marker.
00645          *    - The next two bytes are the header length.
00646          *    - The next 5 bytes are a null-terminated string.
00647          *      For JFIF, the string is "JFIF", naturally.  For others it
00648          *      can be "Exif" or just about anything else.
00649          *    - Because of all this variability, we only check the first
00650          *      two byte marker.  All jpeg files are identified as
00651          *      IFF_JFIF_JPEG.  */
00652     if (buf[0] == 0xff && buf[1] == 0xd8) {
00653         *pformat = IFF_JFIF_JPEG;
00654         return 0;
00655     }
00656 
00657         /* Check for the 8 byte PNG signature (png_signature in png.c):
00658          *       {137, 80, 78, 71, 13, 10, 26, 10}      */
00659     if (buf[0] == 137 && buf[1] == 80  && buf[2] == 78  && buf[3] == 71  &&
00660         buf[4] == 13  && buf[5] == 10  && buf[6] == 26  && buf[7] == 10) {
00661         *pformat = IFF_PNG;
00662         return 0;
00663     }
00664 
00665         /* Look for "GIF87a" or "GIF89a" */
00666     if (buf[0] == 'G' && buf[1] == 'I' && buf[2] == 'F' && buf[3] == '8' &&
00667         (buf[4] == '7' || buf[4] == '9') && buf[5] == 'a') {
00668         *pformat = IFF_GIF;
00669         return 0;
00670     }
00671 
00672         /* Check for both types of jp2k file */
00673     if (strncmp((const char *)buf, JP2K_CODESTREAM, 4) == 0 ||
00674         strncmp((const char *)buf, JP2K_IMAGE_DATA, 12) == 0) {
00675         *pformat = IFF_JP2;
00676         return 0;
00677     }
00678 
00679         /* Check for webp */
00680     if (buf[0] == 'R' && buf[1] == 'I' && buf[2] == 'F' && buf[3] == 'F' &&
00681         buf[8] == 'W' && buf[9] == 'E' && buf[10] == 'B' && buf[11] == 'P') {
00682         *pformat = IFF_WEBP;
00683         return 0;
00684     }
00685 
00686         /* Check for "spix" serialized pix */
00687     if (buf[0] == 's' && buf[1] == 'p' && buf[2] == 'i' && buf[3] == 'x') {
00688         *pformat = IFF_SPIX;
00689         return 0;
00690     }
00691 
00692         /* File format identifier not found; unknown */
00693     return 1;
00694 }
00695 
00696 
00697 /*!
00698  *  fileFormatIsTiff()
00699  *
00700  *      Input:  fp (file stream)
00701  *      Return: 1 if file is tiff; 0 otherwise or on error
00702  */
00703 l_int32
00704 fileFormatIsTiff(FILE  *fp)
00705 {
00706 l_int32  format;
00707 
00708     PROCNAME("fileFormatIsTiff");
00709 
00710     if (!fp)
00711         return ERROR_INT("stream not defined", procName, 0);
00712 
00713     findFileFormatStream(fp, &format);
00714     if (format == IFF_TIFF || format == IFF_TIFF_PACKBITS ||
00715         format == IFF_TIFF_RLE || format == IFF_TIFF_G3 ||
00716         format == IFF_TIFF_G4 || format == IFF_TIFF_LZW ||
00717         format == IFF_TIFF_ZIP)
00718         return 1;
00719     else
00720         return 0;
00721 }
00722 
00723 
00724 /*---------------------------------------------------------------------*
00725  *                            Read from memory                         *
00726  *---------------------------------------------------------------------*/
00727 /*!
00728  *  pixReadMem()
00729  *
00730  *      Input:  data (const; encoded)
00731  *              datasize (size of data)
00732  *      Return: pix, or null on error
00733  *
00734  *  Notes:
00735  *      (1) This is a variation of pixReadStream(), where the data is read
00736  *          from a memory buffer rather than a file.
00737  *      (2) On windows, this will only read tiff formatted files from
00738  *          memory.  For other formats, it requires fmemopen(3).
00739  *          Attempts to read those formats will fail at runtime.
00740  *      (3) findFileFormatBuffer() requires up to 8 bytes to decide on
00741  *          the format.  That determines the constraint here.
00742  */
00743 PIX *
00744 pixReadMem(const l_uint8  *data,
00745            size_t          size)
00746 {
00747 l_int32  format;
00748 PIX     *pix;
00749 
00750     PROCNAME("pixReadMem");
00751 
00752     if (!data)
00753         return (PIX *)ERROR_PTR("data not defined", procName, NULL);
00754     if (size < 8)
00755         return (PIX *)ERROR_PTR("size < 8", procName, NULL);
00756     pix = NULL;
00757 
00758     findFileFormatBuffer(data, &format);
00759     switch (format)
00760     {
00761     case IFF_BMP:
00762         if ((pix = pixReadMemBmp(data, size)) == NULL )
00763             return (PIX *)ERROR_PTR( "bmp: no pix returned", procName, NULL);
00764         break;
00765 
00766     case IFF_JFIF_JPEG:
00767         if ((pix = pixReadMemJpeg(data, size, READ_24_BIT_COLOR, 1, NULL, 0))
00768                 == NULL)
00769             return (PIX *)ERROR_PTR( "jpeg: no pix returned", procName, NULL);
00770         break;
00771 
00772     case IFF_PNG:
00773         if ((pix = pixReadMemPng(data, size)) == NULL)
00774             return (PIX *)ERROR_PTR("png: no pix returned", procName, NULL);
00775         break;
00776 
00777     case IFF_TIFF:
00778     case IFF_TIFF_PACKBITS:
00779     case IFF_TIFF_RLE:
00780     case IFF_TIFF_G3:
00781     case IFF_TIFF_G4:
00782     case IFF_TIFF_LZW:
00783     case IFF_TIFF_ZIP:
00784             /* Reading page 0 by default */
00785         if ((pix = pixReadMemTiff(data, size, 0)) == NULL)
00786             return (PIX *)ERROR_PTR("tiff: no pix returned", procName, NULL);
00787         break;
00788 
00789     case IFF_PNM:
00790         if ((pix = pixReadMemPnm(data, size)) == NULL)
00791             return (PIX *)ERROR_PTR("pnm: no pix returned", procName, NULL);
00792         break;
00793 
00794     case IFF_GIF:
00795         if ((pix = pixReadMemGif(data, size)) == NULL)
00796             return (PIX *)ERROR_PTR("gif: no pix returned", procName, NULL);
00797         break;
00798 
00799     case IFF_JP2:
00800         return (PIX *)ERROR_PTR("jp2: format not supported", procName, NULL);
00801         break;
00802 
00803     case IFF_SPIX:
00804         if ((pix = pixReadMemSpix(data, size)) == NULL)
00805             return (PIX *)ERROR_PTR("spix: no pix returned", procName, NULL);
00806         break;
00807 
00808     case IFF_UNKNOWN:
00809         return (PIX *)ERROR_PTR("Unknown format: no pix returned",
00810                 procName, NULL);
00811         break;
00812     }
00813 
00814         /* Set the input format.  For tiff reading from memory we lose
00815          * the actual input format; for 1 bpp, default to G4.  */
00816     if (pix) {
00817         if (format == IFF_TIFF && pixGetDepth(pix) == 1)
00818             format = IFF_TIFF_G4;
00819         pixSetInputFormat(pix, format);
00820     }
00821 
00822     return pix;
00823 }
00824 
00825 
00826 /*!
00827  *  pixReadHeaderMem()
00828  *
00829  *      Input:  data (const; encoded)
00830  *              datasize (size of data)
00831  *              &format (<optional returns> image format)
00832  *              &w, &h (<optional returns> width and height)
00833  *              &bps <optional return> bits/sample
00834  *              &spp <optional return> samples/pixel (1, 3 or 4)
00835  *              &iscmap (<optional return> 1 if cmap exists; 0 otherwise)
00836  *      Return: 0 if OK, 1 on error
00837  *
00838  *  Notes:
00839  *      (1) This reads the actual headers for jpeg, png, tiff and pnm.
00840  *          For bmp and gif, we cheat and read all the data into a pix,
00841  *          from which we extract the "header" information.
00842  *      (2) On windows, this will only read tiff formatted files from
00843  *          memory.  For other formats, it requires fmemopen(3).
00844  *          Attempts to read those formats will fail at runtime.
00845  *      (3) findFileFormatBuffer() requires up to 8 bytes to decide on
00846  *          the format.  That determines the constraint here.
00847  */
00848 l_int32
00849 pixReadHeaderMem(const l_uint8  *data,
00850                  size_t          size,
00851                  l_int32        *pformat,
00852                  l_int32        *pw,
00853                  l_int32        *ph,
00854                  l_int32        *pbps,
00855                  l_int32        *pspp,
00856                  l_int32        *piscmap)
00857 {
00858 l_int32  format, ret, w, h, d, bps, spp, iscmap;
00859 l_int32  type;  /* not used */
00860 PIX     *pix;
00861 
00862     PROCNAME("pixReadHeaderMem");
00863 
00864     if (pw) *pw = 0;
00865     if (ph) *ph = 0;
00866     if (pbps) *pbps = 0;
00867     if (pspp) *pspp = 0;
00868     if (piscmap) *piscmap = 0;
00869     if (pformat) *pformat = 0;
00870     iscmap = 0;  /* init to false */
00871     if (!data)
00872         return ERROR_INT("data not defined", procName, 1);
00873     if (size < 8)
00874         return ERROR_INT("size < 8", procName, 1);
00875 
00876     findFileFormatBuffer(data, &format);
00877 
00878     switch (format)
00879     {
00880     case IFF_BMP:  /* cheating: read the pix */
00881         if ((pix = pixReadMemBmp(data, size)) == NULL)
00882             return ERROR_INT( "bmp: pix not read", procName, 1);
00883         pixGetDimensions(pix, &w, &h, &d);
00884         pixDestroy(&pix);
00885         bps = (d == 32) ? 8 : d;
00886         spp = (d == 32) ? 3 : 1;
00887         break;
00888 
00889     case IFF_JFIF_JPEG:
00890         ret = readHeaderMemJpeg(data, size, &w, &h, &spp, NULL, NULL);
00891         bps = 8;
00892         if (ret)
00893             return ERROR_INT( "jpeg: no header info returned", procName, 1);
00894         break;
00895 
00896     case IFF_PNG:
00897         ret = sreadHeaderPng(data, &w, &h, &bps, &spp, &iscmap);
00898         if (ret)
00899             return ERROR_INT( "png: no header info returned", procName, 1);
00900         break;
00901 
00902     case IFF_TIFF:
00903     case IFF_TIFF_PACKBITS:
00904     case IFF_TIFF_RLE:
00905     case IFF_TIFF_G3:
00906     case IFF_TIFF_G4:
00907     case IFF_TIFF_LZW:
00908     case IFF_TIFF_ZIP:
00909             /* Reading page 0 by default; possibly redefine format */
00910         ret = readHeaderMemTiff(data, size, 0, &w, &h, &bps, &spp,
00911                                 NULL, &iscmap, &format);
00912         if (ret)
00913             return ERROR_INT( "tiff: no header info returned", procName, 1);
00914         break;
00915 
00916     case IFF_PNM:
00917         ret = sreadHeaderPnm(data, size, &w, &h, &d, &type, &bps, &spp);
00918         if (ret)
00919             return ERROR_INT( "pnm: no header info returned", procName, 1);
00920         break;
00921 
00922     case IFF_GIF:  /* cheating: read the pix */
00923         if ((pix = pixReadMemGif(data, size)) == NULL)
00924             return ERROR_INT( "gif: pix not read", procName, 1);
00925         pixGetDimensions(pix, &w, &h, &d);
00926         pixDestroy(&pix);
00927         iscmap = 1;  /* always colormapped; max 256 colors */
00928         spp = 1;
00929         bps = d;
00930         break;
00931 
00932     case IFF_JP2:
00933         return ERROR_INT("jp2: format not supported", procName, 1);
00934         break;
00935 
00936     case IFF_SPIX:
00937         ret = sreadHeaderSpix((l_uint32 *)data, &w, &h, &bps,
00938                                &spp, &iscmap);
00939         if (ret)
00940             return ERROR_INT( "pnm: no header info returned", procName, 1);
00941         break;
00942 
00943     case IFF_UNKNOWN:
00944         return ERROR_INT("unknown format; no data returned", procName, 1);
00945         break;
00946     }
00947 
00948     if (pw) *pw = w;
00949     if (ph) *ph = h;
00950     if (pbps) *pbps = bps;
00951     if (pspp) *pspp = spp;
00952     if (piscmap) *piscmap = iscmap;
00953     if (pformat) *pformat = format;
00954     return 0;
00955 }
00956 
00957 
00958 /*---------------------------------------------------------------------*
00959  *             Test function for I/O with different formats            *
00960  *---------------------------------------------------------------------*/
00961 #ifdef HAVE_CONFIG_H
00962 #include "config_auto.h"
00963 #endif  /* HAVE_CONFIG_H */
00964 
00965 /*!
00966  *  ioFormatTest()
00967  *
00968  *      Input:  filename (input file)
00969  *      Return: 0 if OK; 1 on error or if the test fails
00970  *
00971  *  Notes:
00972  *      (1) This writes and reads a set of output files losslessly
00973  *          in different formats to /tmp, and tests that the
00974  *          result before and after is unchanged.
00975  *      (2) This should work properly on input images of any depth,
00976  *          with and without colormaps.
00977  *      (3) All supported formats are tested for bmp, png, tiff and
00978  *          non-ascii pnm.  Ascii pnm also works (but who'd ever want
00979  *          to use it?)   We allow 2 bpp bmp, although it's not
00980  *          supported elsewhere.  And we don't support reading
00981  *          16 bpp png, although this can be turned on in pngio.c.
00982  *      (4) This silently skips png or tiff testing if HAVE_LIBPNG
00983  *          or HAVE_LIBTIFF are 0, respectively.
00984  */
00985 l_int32
00986 ioFormatTest(const char  *filename)
00987 {
00988 l_int32   d, equal, problems;
00989 PIX      *pixs, *pixc, *pixt, *pixt2;
00990 PIXCMAP  *cmap;
00991 
00992     PROCNAME("ioFormatTest");
00993 
00994     if (!filename)
00995         return ERROR_INT("filename not defined", procName, 1);
00996 
00997     if ((pixs = pixRead(filename)) == NULL)
00998         return ERROR_INT("pixs not made", procName, 1);
00999 
01000         /* Note that the reader automatically removes colormaps
01001          * from 1 bpp BMP images, but not from 8 bpp BMP images.
01002          * Therefore, if our 8 bpp image initially doesn't have a
01003          * colormap, we are going to need to remove it from any
01004          * pix read from a BMP file. */
01005     pixc = pixClone(pixs);  /* laziness */
01006     cmap = pixGetColormap(pixc);  /* colormap; can be NULL */
01007     d = pixGetDepth(pixc);
01008 
01009     problems = FALSE;
01010 
01011         /* ----------------------- BMP -------------------------- */
01012 
01013         /* BMP works for 1, 2, 4, 8 and 32 bpp images.
01014          * It always writes colormaps for 1 and 8 bpp, so we must
01015          * remove it after readback if the input image doesn't have
01016          * a colormap.  Although we can write/read 2 bpp BMP, nobody
01017          * else can read them! */
01018     if (d == 1 || d == 8) {
01019         L_INFO("write/read bmp", procName);
01020         pixWrite(FILE_BMP, pixc, IFF_BMP);
01021         pixt = pixRead(FILE_BMP);
01022         if (!cmap)
01023             pixt2 = pixRemoveColormap(pixt, REMOVE_CMAP_BASED_ON_SRC);
01024         else
01025             pixt2 = pixClone(pixt);
01026         pixEqual(pixc, pixt2, &equal);
01027         if (!equal) {
01028             L_INFO("   **** bad bmp image ****", procName);
01029             problems = TRUE;
01030         }
01031         pixDestroy(&pixt);
01032         pixDestroy(&pixt2);
01033     }
01034 
01035     if (d == 2 || d == 4 || d == 32) {
01036         L_INFO("write/read bmp", procName);
01037         pixWrite(FILE_BMP, pixc, IFF_BMP);
01038         pixt = pixRead(FILE_BMP);
01039         pixEqual(pixc, pixt, &equal);
01040         if (!equal) {
01041             L_INFO("   **** bad bmp image ****", procName);
01042             problems = TRUE;
01043         }
01044         pixDestroy(&pixt);
01045     }
01046 
01047         /* ----------------------- PNG -------------------------- */
01048 #if HAVE_LIBPNG
01049         /* PNG works for all depths, but here, because we strip
01050          * 16 --> 8 bpp on reading, we don't test png for 16 bpp. */
01051     if (d != 16) {
01052         L_INFO("write/read png", procName);
01053         pixWrite(FILE_PNG, pixc, IFF_PNG);
01054         pixt = pixRead(FILE_PNG);
01055         pixEqual(pixc, pixt, &equal);
01056         if (!equal) {
01057             L_INFO("   **** bad png image ****", procName);
01058             problems = TRUE;
01059         }
01060         pixDestroy(&pixt);
01061     }
01062 #endif  /* HAVE_LIBPNG */
01063 
01064         /* ----------------------- TIFF -------------------------- */
01065 #if HAVE_LIBTIFF
01066         /* TIFF works for 1, 2, 4, 8, 16 and 32 bpp images.
01067          * Because 8 bpp tiff always writes 256 entry colormaps, the
01068          * colormap sizes may be different for 8 bpp images with
01069          * colormap; we are testing if the image content is the same.
01070          * Likewise, the 2 and 4 bpp tiff images with colormaps
01071          * have colormap sizes 4 and 16, rsp.  This test should
01072          * work properly on the content, regardless of the number
01073          * of color entries in pixc. */
01074 
01075         /* tiff uncompressed works for all pixel depths */
01076     L_INFO("write/read uncompressed tiff", procName);
01077     pixWrite(FILE_TIFF, pixc, IFF_TIFF);
01078     pixt = pixRead(FILE_TIFF);
01079     pixEqual(pixc, pixt, &equal);
01080     if (!equal) {
01081         L_INFO("   **** bad tiff uncompressed image ****", procName);
01082         problems = TRUE;
01083     }
01084     pixDestroy(&pixt);
01085 
01086         /* tiff lzw works for all pixel depths */
01087     L_INFO("write/read lzw compressed tiff", procName);
01088     pixWrite(FILE_LZW, pixc, IFF_TIFF_LZW);
01089     pixt = pixRead(FILE_LZW);
01090     pixEqual(pixc, pixt, &equal);
01091     if (!equal) {
01092         L_INFO("   **** bad tiff lzw compressed image ****", procName);
01093         problems = TRUE;
01094     }
01095     pixDestroy(&pixt);
01096 
01097         /* tiff adobe deflate (zip) works for all pixel depths */
01098     L_INFO("write/read zip compressed tiff", procName);
01099     pixWrite(FILE_ZIP, pixc, IFF_TIFF_ZIP);
01100     pixt = pixRead(FILE_ZIP);
01101     pixEqual(pixc, pixt, &equal);
01102     if (!equal) {
01103         L_INFO("   **** bad tiff zip compressed image ****", procName);
01104         problems = TRUE;
01105     }
01106     pixDestroy(&pixt);
01107 
01108         /* tiff g4, g3, rle and packbits work for 1 bpp */
01109     if (d == 1) {
01110         L_INFO("write/read g4 compressed tiff", procName);
01111         pixWrite(FILE_G4, pixc, IFF_TIFF_G4);
01112         pixt = pixRead(FILE_G4);
01113         pixEqual(pixc, pixt, &equal);
01114         if (!equal) {
01115             L_INFO("   **** bad tiff g4 image ****", procName);
01116             problems = TRUE;
01117         }
01118         pixDestroy(&pixt);
01119 
01120         L_INFO("write/read g3 compressed tiff", procName);
01121         pixWrite(FILE_G3, pixc, IFF_TIFF_G3);
01122         pixt = pixRead(FILE_G3);
01123         pixEqual(pixc, pixt, &equal);
01124         if (!equal) {
01125             L_INFO("   **** bad tiff g3 image ****", procName);
01126             problems = TRUE;
01127         }
01128         pixDestroy(&pixt);
01129 
01130         L_INFO("write/read rle compressed tiff", procName);
01131         pixWrite(FILE_RLE, pixc, IFF_TIFF_RLE);
01132         pixt = pixRead(FILE_RLE);
01133         pixEqual(pixc, pixt, &equal);
01134         if (!equal) {
01135             L_INFO("   **** bad tiff rle image ****", procName);
01136             problems = TRUE;
01137         }
01138         pixDestroy(&pixt);
01139 
01140         L_INFO("write/read packbits compressed tiff", procName);
01141         pixWrite(FILE_PB, pixc, IFF_TIFF_PACKBITS);
01142         pixt = pixRead(FILE_PB);
01143         pixEqual(pixc, pixt, &equal);
01144         if (!equal) {
01145             L_INFO("   **** bad tiff packbits image ****", procName);
01146             problems = TRUE;
01147         }
01148         pixDestroy(&pixt);
01149     }
01150 #endif  /* HAVE_LIBTIFF */
01151 
01152         /* ----------------------- PNM -------------------------- */
01153 
01154         /* pnm works for 1, 2, 4, 8, 16 and 32 bpp.
01155          * pnm doesn't have colormaps, so when we write colormapped
01156          * pix out as pnm, the colormap is removed.  Thus for the test,
01157          * we must remove the colormap from pixc before testing.  */
01158     L_INFO("write/read pnm", procName);
01159     pixWrite(FILE_PNM, pixc, IFF_PNM);
01160     pixt = pixRead(FILE_PNM);
01161     if (cmap)
01162         pixt2 = pixRemoveColormap(pixc, REMOVE_CMAP_BASED_ON_SRC);
01163     else
01164         pixt2 = pixClone(pixc);
01165     pixEqual(pixt, pixt2, &equal);
01166     if (!equal) {
01167         L_INFO("   **** bad pnm image ****", procName);
01168         problems = TRUE;
01169     }
01170     pixDestroy(&pixt);
01171     pixDestroy(&pixt2);
01172 
01173     if (problems == FALSE)
01174         L_INFO("All formats read and written OK!", procName);
01175 
01176     pixDestroy(&pixc);
01177     pixDestroy(&pixs);
01178     return problems;
01179 }
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Defines