Leptonica/psio1_8c_source.html

00001 /*====================================================================*
00002  -  Copyright (C) 2001 Leptonica.  All rights reserved.
00003  -  This software is distributed in the hope that it will be
00004  -  useful, but with NO WARRANTY OF ANY KIND.
00005  -  No author or distributor accepts responsibility to anyone for the
00006  -  consequences of using this software, or for whether it serves any
00007  -  particular purpose or works at all, unless he or she says so in
00008  -  writing.  Everyone is granted permission to copy, modify and
00009  -  redistribute this source code, for commercial or non-commercial
00010  -  purposes, with the following restrictions: (1) the origin of this
00011  -  source code must not be misrepresented; (2) modified versions must
00012  -  be plainly marked as such; and (3) this notice may not be removed
00013  -  or altered from any source or modified source distribution.
00014  *====================================================================*/
00015
00016 /*
00017  *  psio1.c
00018  *
00019  *    |=============================================================|
00020  *    |                         Important note                      |
00021  *    |=============================================================|
00022  *    | Some of these functions require libtiff, libjpeg and libz.  |
00023  *    | If you do not have these libraries, you must set            |
00024  *    |     #define  USE_PSIO     0                                 |
00025  *    | in environ.h.  This will link psio1stub.c                   |
00026  *    |=============================================================|
00027  *
00028  *     This is a PostScript "device driver" for wrapping images
00029  *     in PostScript.  The images can be rendered by a PostScript
00030  *     interpreter for viewing, using evince or gv.  They can also be
00031  *     rasterized for printing, using gs or an embedded interpreter
00032  *     in a PostScript printer.  And they can be converted to a pdf
00033  *     using gs (ps2pdf).
00034  *
00035  *     Convert specified files to PS
00036  *          l_int32          convertFilesToPS()
00037  *          l_int32          sarrayConvertFilesToPS()
00038  *          l_int32          convertFilesFittedToPS()
00039  *          l_int32          sarrayConvertFilesFittedToPS()
00040  *          l_int32          writeImageCompressedToPSFile()
00041  *
00042  *     Convert mixed text/image files to PS
00043  *          l_int32          convertSegmentedPagesToPS()
00044  *          l_int32          pixWriteSegmentedPageToPS()
00045  *          l_int32          pixWriteMixedToPS()
00046  *
00047  *     Convert any image file to PS for embedding
00048  *          l_int32          convertToPSEmbed()
00049  *
00050  *     Write all images in a pixa out to PS
00051  *          l_int32          pixaWriteCompressedToPS()
00052  *
00053  *  These PostScript converters are used in three different ways.
00054  *
00055  *  (1) For embedding a PS file in a program like TeX.
00056  *      convertToPSEmbed() handles this for levels 1, 2 and 3 output,
00057  *      and prog/converttops wraps this in an executable.
00058  *      converttops is a generalization of Thomas Merz's jpeg2ps wrapper,
00059  *      in that it works for all types (formats, depth, colormap)
00060  *      of input images and gives PS output in one of these formats
00061  *        * level 1 (uncompressed)
00062  *        * level 2 (compressed ccittg4 or dct)
00063  *        * level 3 (compressed flate)
00064  *
00065  *  (2) For composing a set of pages with any number of images
00066  *      painted on them, in either level 2 or level 3 formats.
00067  *
00068  *  (3) For printing a page image or a set of page images, at a
00069  *      resolution that optimally fills the page, using
00070  *      convertFilesFittedToPS().
00071  *
00072  *  The top-level calls of utilities in category 2, which can compose
00073  *  multiple images on a page, and which generate a PostScript file for
00074  *  printing or display (e.g., conversion to pdf), are:
00075  *      convertFilesToPS()
00076  *      convertFilesFittedToPS()
00077  *      convertSegmentedPagesToPS()
00078  *
00079  *  All images are output with page numbers.  Bounding box hints are
00080  *  more subtle.  They must be included for embeding images in
00081  *  TeX, for example, and the low-level writers include bounding
00082  *  box hints by default.  However, these hints should not be included for
00083  *  multi-page PostScript that is composed of a sequence of images;
00084  *  consequently, they are not written when calling higher level
00085  *  functions such as convertFilesToPS(), convertFilesFittedToPS()
00086  *  and convertSegmentedPagesToPS().  The function l_psWriteBoundingBox()
00087  *  sets a flag to give low-level control over this.
00088  */
00089
00090 #include <string.h>
00091 #include "allheaders.h"
00092
00093 /* --------------------------------------------*/
00094 #if  USE_PSIO   /* defined in environ.h */
00095  /* --------------------------------------------*/
00096
00097 /*-------------------------------------------------------------*
00098  *                Convert files in a directory to PS           *
00099  *-------------------------------------------------------------*/
00100 /*
00101  *  convertFilesToPS()
00102  *
00103  *      Input:  dirin (input directory)
00104  *              substr (<optional> substring filter on filenames; can be NULL)
00105  *              res (typ. 300 or 600 ppi)
00106  *              fileout (output ps file)
00107  *      Return: 0 if OK, 1 on error
00108  *
00109  *  Notes:
00110  *      (1) This generates a PS file for all image files in a specified
00111  *          directory that contain the substr pattern to be matched.
00112  *      (2) Each image is written to a separate page in the output PS file.
00113  *      (3) All images are written compressed:
00114  *              * if tiffg4  -->  use ccittg4
00115  *              * if jpeg    -->  use dct
00116  *              * all others -->  use flate
00117  *          If the image is jpeg or tiffg4, we use the existing compressed
00118  *          strings for the encoding; otherwise, we read the image into
00119  *          a pix and flate-encode the pieces.
00120  *      (4) The resolution is often confusing.  It is interpreted
00121  *          as the resolution of the output display device:  "If the
00122  *          input image were digitized at 300 ppi, what would it
00123  *          look like when displayed at res ppi."  So, for example,
00124  *          if res = 100 ppi, then the display pixels are 3x larger
00125  *          than the 300 ppi pixels, and the image will be rendered
00126  *          3x larger.
00127  *      (5) The size of the PostScript file is independent of the resolution,
00128  *          because the entire file is encoded.  The res parameter just
00129  *          tells the PS decomposer how to render the page.  Therefore,
00130  *          for minimum file size without loss of visual information,
00131  *          if the output res is less than 300, you should downscale
00132  *          the image to the output resolution before wrapping in PS.
00133  *      (6) The "canvas" on which the image is rendered, at the given
00134  *          output resolution, is a standard page size (8.5 x 11 in).
00135  */
00136 l_int32
00137 convertFilesToPS(const char  *dirin,
00138                  const char  *substr,
00139                  l_int32      res,
00140                  const char  *fileout)
00141 {
00142 SARRAY  *sa;
00143
00144     PROCNAME("convertFilesToPS");
00145
00146     if (!dirin)
00147         return ERROR_INT("dirin not defined", procName, 1);
00148     if (!fileout)
00149         return ERROR_INT("fileout not defined", procName, 1);
00150     if (res <= 0) {
00151         L_INFO("setting res to 300 ppi", procName);
00152         res = 300;
00153     }
00154     if (res < 10 || res > 4000)
00155         L_WARNING("res is typically in the range 300-600 ppi", procName);
00156
00157         /* Get all filtered and sorted full pathnames. */
00158     sa = getSortedPathnamesInDirectory(dirin, substr, 0, 0);
00159
00160         /* Generate the PS file.  Don't use bounding boxes. */
00161     l_psWriteBoundingBox(FALSE);
00162     sarrayConvertFilesToPS(sa, res, fileout);
00163     l_psWriteBoundingBox(TRUE);
00164     sarrayDestroy(&sa);
00165     return 0;
00166 }
00167
00168
00169 /*
00170  *  sarrayConvertFilesToPS()
00171  *
00172  *      Input:  sarray (of full path names)
00173  *              res (typ. 300 or 600 ppi)
00174  *              fileout (output ps file)
00175  *      Return: 0 if OK, 1 on error
00176  *
00177  *  Notes:
00178  *      (1) See convertFilesToPS()
00179  */
00180 l_int32
00181 sarrayConvertFilesToPS(SARRAY      *sa,
00182                        l_int32      res,
00183                        const char  *fileout)
00184 {
00185 char    *fname;
00186 l_int32  i, nfiles, index, firstfile, ret, format;
00187
00188     PROCNAME("sarrayConvertFilesToPS");
00189
00190     if (!sa)
00191         return ERROR_INT("sa not defined", procName, 1);
00192     if (!fileout)
00193         return ERROR_INT("fileout not defined", procName, 1);
00194     if (res <= 0) {
00195         L_INFO("setting res to 300 ppi", procName);
00196         res = 300;
00197     }
00198     if (res < 10 || res > 4000)
00199         L_WARNING("res is typically in the range 300-600 ppi", procName);
00200
00201     nfiles = sarrayGetCount(sa);
00202     firstfile = TRUE;
00203     for (i = 0, index = 0; i < nfiles; i++) {
00204         fname = sarrayGetString(sa, i, L_NOCOPY);
00205         ret = pixReadHeader(fname, &format, NULL, NULL, NULL, NULL, NULL);
00206         if (ret) continue;
00207         if (format == IFF_UNKNOWN)
00208             continue;
00209
00210         writeImageCompressedToPSFile(fname, fileout, res, &firstfile, &index);
00211     }
00212
00213     return 0;
00214 }
00215
00216
00217 /*
00218  *  convertFilesFittedToPS()
00219  *
00220  *      Input:  dirin (input directory)
00221  *              substr (<optional> substring filter on filenames; can be NULL)
00222  *              xpts, ypts (desired size in printer points; use 0 for default)
00223  *              fileout (output ps file)
00224  *      Return: 0 if OK, 1 on error
00225  *
00226  *  Notes:
00227  *      (1) This generates a PS file for all files in a specified directory
00228  *          that contain the substr pattern to be matched.
00229  *      (2) Each image is written to a separate page in the output PS file.
00230  *      (3) All images are written compressed:
00231  *              * if tiffg4  -->  use ccittg4
00232  *              * if jpeg    -->  use dct
00233  *              * all others -->  use flate
00234  *          If the image is jpeg or tiffg4, we use the existing compressed
00235  *          strings for the encoding; otherwise, we read the image into
00236  *          a pix and flate-encode the pieces.
00237  *      (4) The resolution is internally determined such that the images
00238  *          are rendered, in at least one direction, at 100% of the given
00239  *          size in printer points.  Use 0.0 for xpts or ypts to get
00240  *          the default value, which is 612.0 or 792.0, rsp.
00241  *      (5) The size of the PostScript file is independent of the resolution,
00242  *          because the entire file is encoded.  The @xpts and @ypts
00243  *          parameter tells the PS decomposer how to render the page.
00244  */
00245 l_int32
00246 convertFilesFittedToPS(const char  *dirin,
00247                        const char  *substr,
00248                        l_float32    xpts,
00249                        l_float32    ypts,
00250                        const char  *fileout)
00251 {
00252 SARRAY  *sa;
00253
00254     PROCNAME("convertFilesFittedToPS");
00255
00256     if (!dirin)
00257         return ERROR_INT("dirin not defined", procName, 1);
00258     if (!fileout)
00259         return ERROR_INT("fileout not defined", procName, 1);
00260     if (xpts <= 0.0) {
00261         L_INFO("setting xpts to 612.0 ppi", procName);
00262         xpts = 612.0;
00263     }
00264     if (ypts <= 0.0) {
00265         L_INFO("setting ypts to 792.0 ppi", procName);
00266         ypts = 792.0;
00267     }
00268     if (xpts < 100.0 || xpts > 2000.0 || ypts < 100.0 || ypts > 2000.0)
00269         L_WARNING("xpts,ypts are typically in the range 500-800", procName);
00270
00271         /* Get all filtered and sorted full pathnames. */
00272     sa = getSortedPathnamesInDirectory(dirin, substr, 0, 0);
00273
00274         /* Generate the PS file.  Don't use bounding boxes. */
00275     l_psWriteBoundingBox(FALSE);
00276     sarrayConvertFilesFittedToPS(sa, xpts, ypts, fileout);
00277     l_psWriteBoundingBox(TRUE);
00278     sarrayDestroy(&sa);
00279     return 0;
00280 }
00281
00282
00283 /*
00284  *  sarrayConvertFilesFittedToPS()
00285  *
00286  *      Input:  sarray (of full path names)
00287  *              xpts, ypts (desired size in printer points; use 0 for default)
00288  *              fileout (output ps file)
00289  *      Return: 0 if OK, 1 on error
00290  *
00291  *  Notes:
00292  *      (1) See convertFilesFittedToPS()
00293  */
00294 l_int32
00295 sarrayConvertFilesFittedToPS(SARRAY      *sa,
00296                              l_float32    xpts,
00297                              l_float32    ypts,
00298                              const char  *fileout)
00299 {
00300 char    *fname;
00301 l_int32  ret, i, w, h, nfiles, index, firstfile, format, res;
00302
00303     PROCNAME("sarrayConvertFilesFittedToPS");
00304
00305     if (!sa)
00306         return ERROR_INT("sa not defined", procName, 1);
00307     if (!fileout)
00308         return ERROR_INT("fileout not defined", procName, 1);
00309     if (xpts <= 0.0) {
00310         L_INFO("setting xpts to 612.0", procName);
00311         xpts = 612.0;
00312     }
00313     if (ypts <= 0.0) {
00314         L_INFO("setting ypts to 792.0", procName);
00315         ypts = 792.0;
00316     }
00317     if (xpts < 100.0 || xpts > 2000.0 || ypts < 100.0 || ypts > 2000.0)
00318         L_WARNING("xpts,ypts are typically in the range 500-800", procName);
00319
00320     nfiles = sarrayGetCount(sa);
00321     firstfile = TRUE;
00322     for (i = 0, index = 0; i < nfiles; i++) {
00323         fname = sarrayGetString(sa, i, L_NOCOPY);
00324         ret = pixReadHeader(fname, &format, &w, &h, NULL, NULL, NULL);
00325         if (ret) continue;
00326         if (format == IFF_UNKNOWN)
00327             continue;
00328
00329             /* Be sure the entire image is wrapped */
00330         if (xpts * h < ypts * w)
00331             res = (l_int32)((l_float32)w * 72.0 / xpts);
00332         else
00333             res = (l_int32)((l_float32)h * 72.0 / ypts);
00334
00335         writeImageCompressedToPSFile(fname, fileout, res, &firstfile, &index);
00336     }
00337
00338     return 0;
00339 }
00340
00341
00342 /*
00343  *  writeImageCompressedToPSFile()
00344  *
00345  *      Input:  filein (input image file)
00346  *              fileout (output ps file)
00347  *              res (output printer resolution)
00348  *              &firstfile (<input and return> 1 if the first image;
00349  *                          0 otherwise)
00350  *              &index (<input and return> index of image in output ps file)
00351  *      Return: 0 if OK, 1 on error
00352  *
00353  *  Notes:
00354  *      (1) This wraps a single page image in PS.
00355  *      (2) The input file can be in any format.  It is compressed as follows:
00356  *             * if in tiffg4  -->  use ccittg4
00357  *             * if in jpeg    -->  use dct
00358  *             * all others    -->  use flate
00359  *      (3) Before the first call, set @firstpage = 1.  After writing
00360  *          the first page, it will be set to 0.
00361  *      (4) @index is incremented if the page is successfully written.
00362  */
00363 l_int32
00364 writeImageCompressedToPSFile(const char  *filein,
00365                              const char  *fileout,
00366                              l_int32      res,
00367                              l_int32     *pfirstfile,
00368                              l_int32     *pindex)
00369 {
00370 const char  *op;
00371 l_int32      format, retval;
00372
00373     PROCNAME("writeImageCompressedToPSFile");
00374
00375     if (!pfirstfile || !pindex)
00376         return ERROR_INT("&firstfile and &index not defined", procName, 1);
00377
00378     findFileFormat(filein, &format);
00379     if (format == IFF_UNKNOWN) {
00380         L_ERROR_STRING("format of %s not known", procName, filein);
00381         return 1;
00382     }
00383
00384     op = (*pfirstfile == TRUE) ? "w" : "a";
00385     if (format == IFF_JFIF_JPEG) {
00386         retval = convertJpegToPS(filein, fileout, op, 0, 0,
00387                                  res, 1.0, *pindex + 1, TRUE);
00388         if (retval == 0) {
00389             *pfirstfile = FALSE;
00390             (*pindex)++;
00391         }
00392     }
00393     else if (format == IFF_TIFF_G4) {
00394         retval = convertG4ToPS(filein, fileout, op, 0, 0,
00395                                res, 1.0, *pindex + 1, FALSE, TRUE);
00396         if (retval == 0) {
00397             *pfirstfile = FALSE;
00398             (*pindex)++;
00399         }
00400     }
00401     else {  /* all other image formats */
00402         retval = convertFlateToPS(filein, fileout, op, 0, 0,
00403                                   res, 1.0, *pindex + 1, TRUE);
00404         if (retval == 0) {
00405             *pfirstfile = FALSE;
00406             (*pindex)++;
00407         }
00408     }
00409
00410     return retval;
00411 }
00412
00413
00414 /*-------------------------------------------------------------*
00415  *              Convert mixed text/image files to PS           *
00416  *-------------------------------------------------------------*/
00417 /*
00418  *  convertSegmentedPagesToPS()
00419  *
00420  *      Input:  pagedir (input page image directory)
00421  *              pagestr (<optional> substring filter on page filenames;
00422  *                       can be NULL)
00423  *              maskdir (input mask image directory)
00424  *              maskstr (<optional> substring filter on mask filenames;
00425  *                       can be NULL)
00426  *              numpre (number of characters in name before number)
00427  *              numpost (number of characters in name after number)
00428  *              maxnum (only consider page numbers up to this value)
00429  *              textscale (scale of text output relative to pixs)
00430  *              imagescale (scale of image output relative to pixs)
00431  *              threshold (for binarization; typ. about 190; 0 for default)
00432  *              fileout (output ps file)
00433  *      Return: 0 if OK, 1 on error
00434  *
00435  *  Notes:
00436  *      (1) This generates a PS file for all page image and mask files in two
00437  *          specified directories and that contain the page numbers as
00438  *          specified below.  The two directories can be the same, in which
00439  *          case the page and mask files are differentiated by the two
00440  *          substrings for string matches.
00441  *      (2) The page images are taken in lexicographic order.
00442  *          Mask images whose numbers match the page images are used to
00443  *          segment the page images.  Page images without a matching
00444  *          mask image are scaled, thresholded and rendered entirely as text.
00445  *      (3) Each PS page is generated as a compressed representation of
00446  *          the page image, where the part of the image under the mask
00447  *          is suitably scaled and compressed as DCT (i.e., jpeg), and
00448  *          the remaining part of the page is suitably scaled, thresholded,
00449  *          compressed as G4 (i.e., tiff g4), and rendered by painting
00450  *          black through the resulting text mask.
00451  *      (4) The scaling is typically 2x down for the DCT component
00452  *          (@imagescale = 0.5) and 2x up for the G4 component
00453  *          (@textscale = 2.0).
00454  *      (5) The resolution is automatically set to fit to a
00455  *          letter-size (8.5 x 11 inch) page.
00456  *      (6) Both the DCT and the G4 encoding are PostScript level 2.
00457  *      (7) It is assumed that the page number is contained within
00458  *          the basename (the filename without directory or extension).
00459  *          @numpre is the number of characters in the basename
00460  *          preceeding the actual page numer; @numpost is the number
00461  *          following the page number.  Note: the same numbers must be
00462  *          applied to both the page and mask image names.
00463  *      (8) To render a page as is -- that is, with no thresholding
00464  *          of any pixels -- use a mask in the mask directory that is
00465  *          full size with all pixels set to 1.  If the page is 1 bpp,
00466  *          it is not necessary to have a mask.
00467  */
00468 l_int32
00469 convertSegmentedPagesToPS(const char  *pagedir,
00470                           const char  *pagestr,
00471                           const char  *maskdir,
00472                           const char  *maskstr,
00473                           l_int32      numpre,
00474                           l_int32      numpost,
00475                           l_int32      maxnum,
00476                           l_float32    textscale,
00477                           l_float32    imagescale,
00478                           l_int32      threshold,
00479                           const char  *fileout)
00480 {
00481 l_int32  pageno, i, npages;
00482 PIX     *pixs, *pixm;
00483 SARRAY  *sapage, *samask;
00484
00485     PROCNAME("convertSegmentedPagesToPS");
00486
00487     if (!pagedir)
00488         return ERROR_INT("pagedir not defined", procName, 1);
00489     if (!maskdir)
00490         return ERROR_INT("maskdir not defined", procName, 1);
00491     if (!fileout)
00492         return ERROR_INT("fileout not defined", procName, 1);
00493     if (threshold <= 0) {
00494         L_INFO("setting threshold to 190", procName);
00495         threshold = 190;
00496     }
00497
00498         /* Get numbered full pathnames; max size of sarray is maxnum */
00499     sapage = getNumberedPathnamesInDirectory(pagedir, pagestr,
00500                                              numpre, numpost, maxnum);
00501     samask = getNumberedPathnamesInDirectory(maskdir, maskstr,
00502                                              numpre, numpost, maxnum);
00503     sarrayPadToSameSize(sapage, samask, (char *)"");
00504     if ((npages = sarrayGetCount(sapage)) == 0) {
00505         sarrayDestroy(&sapage);
00506         sarrayDestroy(&samask);
00507         return ERROR_INT("no matching pages found", procName, 1);
00508     }
00509
00510         /* Generate the PS file */
00511     pageno = 1;
00512     for (i = 0; i < npages; i++) {
00513         if ((pixs = pixReadIndexed(sapage, i)) == NULL)
00514             continue;
00515         pixm = pixReadIndexed(samask, i);
00516         pixWriteSegmentedPageToPS(pixs, pixm, textscale, imagescale,
00517                                   threshold, pageno, fileout);
00518         pixDestroy(&pixs);
00519         pixDestroy(&pixm);
00520         pageno++;
00521     }
00522
00523     sarrayDestroy(&sapage);
00524     sarrayDestroy(&samask);
00525     return 0;
00526 }
00527
00528
00529 /*
00530  *  pixWriteSegmentedPageToPS()
00531  *
00532  *      Input:  pixs (all depths; colormap ok)
00533  *              pixm (<optional> 1 bpp segmentation mask over image region)
00534  *              textscale (scale of text output relative to pixs)
00535  *              imagescale (scale of image output relative to pixs)
00536  *              threshold (threshold for binarization; typ. 190)
00537  *              pageno (page number in set; use 1 for new output file)
00538  *              fileout (output ps file)
00539  *      Return: 0 if OK, 1 on error
00540  *
00541  *  Notes:
00542  *      (1) This generates the PS string for a mixed text/image page,
00543  *          and adds it to an existing file if @pageno > 1.
00544  *          The PS output is determined by fitting the result to
00545  *          a letter-size (8.5 x 11 inch) page.
00546  *      (2) The two images (pixs and pixm) are at the same resolution
00547  *          (typically 300 ppi).  They are used to generate two compressed
00548  *          images, pixb and pixc, that are put directly into the output
00549  *          PS file.
00550  *      (3) pixb is the text component.  In the PostScript world, we think of
00551  *          it as a mask through which we paint black.  It is produced by
00552  *          scaling pixs by @textscale, and thresholding to 1 bpp.
00553  *      (4) pixc is the image component, which is that part of pixs under
00554  *          the mask pixm.  It is scaled from pixs by @imagescale.
00555  *      (5) Typical values are textscale = 2.0 and imagescale = 0.5.
00556  *      (6) If pixm == NULL, the page has only text.  If it is all black,
00557  *          the page is all image and has no text.
00558  *      (7) This can be used to write a multi-page PS file, by using
00559  *          sequential page numbers with the same output file.  It can
00560  *          also be used to write separate PS files for each page,
00561  *          by using different output files with @pageno = 0 or 1.
00562  */
00563 l_int32
00564 pixWriteSegmentedPageToPS(PIX         *pixs,
00565                           PIX         *pixm,
00566                           l_float32    textscale,
00567                           l_float32    imagescale,
00568                           l_int32      threshold,
00569                           l_int32      pageno,
00570                           const char  *fileout)
00571 {
00572 l_int32    alltext, notext, d, ret;
00573 l_uint32   val;
00574 l_float32  scaleratio;
00575 PIX       *pixmi, *pixmis, *pixt, *pixg, *pixsc, *pixb, *pixc;
00576
00577     PROCNAME("pixWriteSegmentedPageToPS");
00578
00579     if (!pixs)
00580         return ERROR_INT("pixs not defined", procName, 1);
00581     if (!fileout)
00582         return ERROR_INT("fileout not defined", procName, 1);
00583     if (imagescale <= 0.0 || textscale <= 0.0)
00584         return ERROR_INT("relative scales must be > 0.0", procName, 1);
00585
00586         /* Analyze the page.  Determine the ratio by which the
00587          * binary text mask is scaled relative to the image part.
00588          * If there is no image region (alltext == TRUE), the
00589          * text mask will be rendered directly to fit the page,
00590          * and scaleratio = 1.0.  */
00591     alltext = TRUE;
00592     notext = FALSE;
00593     scaleratio = 1.0;
00594     if (pixm) {
00595         pixZero(pixm, &alltext);  /* pixm empty: all text */
00596         if (alltext)
00597             pixm = NULL;  /* treat it as not existing here */
00598         else {
00599             pixmi = pixInvert(NULL, pixm);
00600             pixZero(pixmi, &notext);  /* pixm full; no text */
00601             pixDestroy(&pixmi);
00602             scaleratio = textscale / imagescale;
00603         }
00604     }
00605
00606     if (pixGetDepth(pixs) == 1) {  /* render tiff g4 */
00607         pixb = pixClone(pixs);
00608         pixc = NULL;
00609     }
00610     else {
00611         pixt = pixConvertTo8Or32(pixs, 0, 0);  /* this can be a clone of pixs */
00612
00613             /* Get the binary text mask.  Note that pixg cannot be a
00614              * clone of pixs, because it may be altered by pixSetMasked(). */
00615         pixb = NULL;
00616         if (notext == FALSE) {
00617             d = pixGetDepth(pixt);
00618             if (d == 8)
00619                 pixg = pixCopy(NULL, pixt);
00620             else  /* d == 32 */
00621                 pixg = pixConvertRGBToLuminance(pixt);
00622             if (pixm)  /* clear out the image parts */
00623                 pixSetMasked(pixg, pixm, 255);
00624             if (textscale == 1.0)
00625                 pixsc = pixClone(pixg);
00626             else if (textscale >= 0.7)
00627                 pixsc = pixScaleGrayLI(pixg, textscale, textscale);
00628             else
00629                 pixsc = pixScaleAreaMap(pixg, textscale, textscale);
00630             pixb = pixThresholdToBinary(pixsc, threshold);
00631             pixDestroy(&pixg);
00632             pixDestroy(&pixsc);
00633         }
00634
00635             /* Get the scaled image region */
00636         pixc = NULL;
00637         if (pixm) {
00638             if (imagescale == 1.0)
00639                 pixsc = pixClone(pixt);  /* can possibly be a clone of pixs */
00640             else
00641                 pixsc = pixScale(pixt, imagescale, imagescale);
00642
00643                 /* If pixm is not full, clear the pixels in pixsc
00644                  * corresponding to bg in pixm, where there can be text
00645                  * that is written through the mask pixb.  Note that
00646                  * we could skip this and use pixsc directly in
00647                  * pixWriteMixedToPS(); however, clearing these
00648                  * non-image regions to a white background will reduce
00649                  * the size of pixc (relative to pixsc), and hence
00650                  * reduce the size of the PS file that is generated.
00651                  * Use a copy so that we don't accidentally alter pixs.  */
00652             if (notext == FALSE) {
00653                 pixmis = pixScale(pixm, imagescale, imagescale);
00654                 pixmi = pixInvert(NULL, pixmis);
00655                 val = (d == 8) ? 0xff : 0xffffff00;
00656                 pixc = pixCopy(NULL, pixsc);
00657                 pixSetMasked(pixc, pixmi, val);  /* clear non-image part */
00658                 pixDestroy(&pixmis);
00659                 pixDestroy(&pixmi);
00660             }
00661             else
00662                 pixc = pixClone(pixsc);
00663             pixDestroy(&pixsc);
00664         }
00665         pixDestroy(&pixt);
00666     }
00667
00668         /* Generate the PS file.  Don't use bounding boxes. */
00669     l_psWriteBoundingBox(FALSE);
00670     ret = pixWriteMixedToPS(pixb, pixc, scaleratio, pageno, fileout);
00671     l_psWriteBoundingBox(TRUE);
00672     pixDestroy(&pixb);
00673     pixDestroy(&pixc);
00674     return ret;
00675 }
00676
00677
00678 /*
00679  *  pixWriteMixedToPS()
00680  *
00681  *      Input:  pixb (<optionall> 1 bpp "mask"; typically for text)
00682  *              pixc (<optional> 8 or 32 bpp image regions)
00683  *              scale (relative scale factor for rendering pixb
00684  *                    relative to pixc; typ. 4.0)
00685  *              pageno (page number in set; use 1 for new output file)
00686  *              fileout (output ps file)
00687  *      Return: 0 if OK, 1 on error
00688  *
00689  *  Notes:
00690  *      (1) This low level function generates the PS string for a mixed
00691  *          text/image page, and adds it to an existing file if
00692  *          @pageno > 1.
00693  *      (2) The two images (pixb and pixc) are typically generated at the
00694  *          resolution that they will be rendered in the PS file.
00695  *      (3) pixb is the text component.  In the PostScript world, we think of
00696  *          it as a mask through which we paint black.
00697  *      (4) pixc is the (typically halftone) image component.  It is
00698  *          white in the rest of the page.  To minimize the size of the
00699  *          PS file, it should be rendered at a resolution that is at
00700  *          least equal to its actual resolution.
00701  *      (5) @scale gives the ratio of resolution of pixb to pixc.
00702  *          Typical resolutions are: 600 ppi for pixb, 150 ppi for pixc;
00703  *          so @scale = 4.0.  If one of the images is not defined,
00704  *          the value of @scale is ignored.
00705  *      (6) We write pixc with DCT compression (jpeg).  This is followed
00706  *          by painting the text as black through the mask pixb.  If
00707  *          pixc doesn't exist (alltext), we write the text with the
00708  *          PS "image" operator instead of the "imagemask" operator,
00709  *          because ghostscript's ps2pdf is flaky when the latter is used.
00710  *      (7) The actual output resolution is determined by fitting the
00711  *          result to a letter-size (8.5 x 11 inch) page.
00712  */
00713 l_int32
00714 pixWriteMixedToPS(PIX         *pixb,
00715                   PIX         *pixc,
00716                   l_float32    scale,
00717                   l_int32      pageno,
00718                   const char  *fileout)
00719 {
00720 const char   tnameb[] = "/tmp/junk_pix_write_mixed.tif";
00721 const char   tnamec[] = "/tmp/junk_pix_write_mixed.jpg";
00722 const char  *op;
00723 l_int32      resb, resc, endpage, maskop, ret;
00724
00725     PROCNAME("pixWriteMixedToPS");
00726
00727     if (!pixb && !pixc)
00728         return ERROR_INT("pixb and pixc both undefined", procName, 1);
00729     if (!fileout)
00730         return ERROR_INT("fileout not defined", procName, 1);
00731
00732         /* Compute the resolution that fills a letter-size page. */
00733     if (!pixc)
00734        resb = getResLetterPage(pixGetWidth(pixb), pixGetHeight(pixb), 0);
00735     else {
00736        resc = getResLetterPage(pixGetWidth(pixc), pixGetHeight(pixc), 0);
00737        if (pixb)
00738            resb = (l_int32)(scale * resc);
00739     }
00740
00741         /* Write the jpeg image first */
00742     if (pixc) {
00743         pixWrite(tnamec, pixc, IFF_JFIF_JPEG);
00744         endpage = (pixb) ? FALSE : TRUE;
00745         op = (pageno <= 1) ? "w" : "a";
00746         ret = convertJpegToPS(tnamec, fileout, op, 0, 0, resc, 1.0,
00747                               pageno, endpage);
00748         if (ret)
00749             return ERROR_INT("jpeg data not written", procName, 1);
00750     }
00751
00752         /* Write the binary data, either directly or, if there is
00753          * a jpeg image on the page, through the mask. */
00754     if (pixb) {
00755         pixWrite(tnameb, pixb, IFF_TIFF_G4);
00756         op = (pageno <= 1 && !pixc) ? "w" : "a";
00757         maskop = (pixc) ? 1 : 0;
00758         ret = convertG4ToPS(tnameb, fileout, op, 0, 0, resb, 1.0,
00759                             pageno, maskop, 1);
00760         if (ret)
00761             return ERROR_INT("tiff data not written", procName, 1);
00762     }
00763
00764     return 0;
00765 }
00766
00767
00768 /*-------------------------------------------------------------*
00769  *            Convert any image file to PS for embedding       *
00770  *-------------------------------------------------------------*/
00771 /*
00772  *  convertToPSEmbed()
00773  *
00774  *      Input:  filein (input image file -- any format)
00775  *              fileout (output ps file)
00776  *              level (compression: 1 (uncompressed), 2 or 3)
00777  *      Return: 0 if OK, 1 on error
00778  *
00779  *  Notes:
00780  *      (1) This is a wrapper function that generates a PS file with
00781  *          a bounding box, from any input image file.
00782  *      (2) Do the best job of compression given the specified level.
00783  *          @level=3 does flate compression on anything that is not
00784  *          tiffg4 (1 bpp) or jpeg (8 bpp or rgb).
00785  *      (3) If @level=2 and the file is not tiffg4 or jpeg, it will
00786  *          first be written to file as jpeg with quality = 75.
00787  *          This will remove the colormap and cause some degradation
00788  *          in the image.
00789  *      (4) The bounding box is required when a program such as TeX
00790  *          (through epsf) places and rescales the image.  It is
00791  *          sized for fitting the image to an 8.5 x 11.0 inch page.
00792  */
00793 l_int32
00794 convertToPSEmbed(const char  *filein,
00795                  const char  *fileout,
00796                  l_int32      level)
00797 {
00798 const char  nametif[] = "/tmp/junk_convert_ps_embed.tif";
00799 const char  namejpg[] = "/tmp/junk_convert_ps_embed.jpg";
00800 l_int32     d, format;
00801 PIX        *pix, *pixs;
00802
00803     PROCNAME("convertToPSEmbed");
00804
00805     if (!filein)
00806         return ERROR_INT("filein not defined", procName, 1);
00807     if (!fileout)
00808         return ERROR_INT("fileout not defined", procName, 1);
00809     if (level != 1 && level != 2 && level != 3) {
00810         L_ERROR("invalid level specified; using level 2", procName);
00811         level = 2;
00812     }
00813
00814     if (level == 1) {  /* no compression */
00815         pixWritePSEmbed(filein, fileout);
00816         return 0;
00817     }
00818
00819         /* Find the format and write out directly if in jpeg or tiff g4 */
00820     findFileFormat(filein, &format);
00821     if (format == IFF_JFIF_JPEG) {
00822         convertJpegToPSEmbed(filein, fileout);
00823         return 0;
00824     }
00825     else if (format == IFF_TIFF_G4) {
00826         convertG4ToPSEmbed(filein, fileout);
00827         return 0;
00828     }
00829     else if (format == IFF_UNKNOWN) {
00830         L_ERROR_STRING("format of %s not known", procName, filein);
00831         return 1;
00832     }
00833
00834         /* If level 3, flate encode. */
00835     if (level == 3) {
00836         convertFlateToPSEmbed(filein, fileout);
00837         return 0;
00838     }
00839
00840         /* OK, it's level 2, so we must convert to jpeg or tiff g4 */
00841     if ((pixs = pixRead(filein)) == NULL)
00842         return ERROR_INT("image not read from file", procName, 1);
00843     d = pixGetDepth(pixs);
00844     if ((d == 2 || d == 4) && !pixGetColormap(pixs))
00845         pix = pixConvertTo8(pixs, 0);
00846     else if (d == 16)
00847         pix = pixConvert16To8(pixs, 1);
00848     else
00849         pix = pixRemoveColormap(pixs, REMOVE_CMAP_BASED_ON_SRC);
00850
00851     d = pixGetDepth(pix);
00852     if (d == 1) {
00853         pixWrite(nametif, pix, IFF_TIFF_G4);
00854         convertG4ToPSEmbed(nametif, fileout);
00855     }
00856     else {
00857         pixWrite(namejpg, pix, IFF_JFIF_JPEG);
00858         convertJpegToPSEmbed(namejpg, fileout);
00859     }
00860
00861     pixDestroy(&pix);
00862     pixDestroy(&pixs);
00863     return 0;
00864 }
00865
00866
00867 /*-------------------------------------------------------------*
00868  *              Write all images in a pixa out to PS           *
00869  *-------------------------------------------------------------*/
00870 /*
00871  *  pixaWriteCompressedToPS()
00872  *
00873  *      Input:  pixa (any set of images)
00874  *              fileout (output ps file)
00875  *              res (of input image)
00876  *              level (compression: 2 or 3)
00877  *      Return: 0 if OK, 1 on error
00878  *
00879  *  Notes:
00880  *      (1) This generates a PS file of multiple page images, all
00881  *          with bounding boxes.
00882  *      (2) It compresses to:
00883  *              cmap + level2:        jpeg
00884  *              cmap + level3:        flate
00885  *              1 bpp:                tiffg4
00886  *              2 or 4 bpp + level2:  jpeg
00887  *              2 or 4 bpp + level3:  flate
00888  *              8 bpp:                jpeg
00889  *              16 bpp:               flate
00890  *              32 bpp:               jpeg
00891  *      (3) To generate a pdf, use: ps2pdf <infile.ps> <outfile.pdf>
00892  */
00893 l_int32
00894 pixaWriteCompressedToPS(PIXA        *pixa,
00895                         const char  *fileout,
00896                         l_int32      res,
00897                         l_int32      level)
00898 {
00899 char     *tname, *g4_name, *jpeg_name, *png_name;
00900 l_int32   i, n, firstfile, index, writeout, d;
00901 PIX      *pix, *pixt;
00902 PIXCMAP  *cmap;
00903
00904     PROCNAME("pixaWriteCompressedToPS");
00905
00906     if (!pixa)
00907         return ERROR_INT("pixa not defined", procName, 1);
00908     if (!fileout)
00909         return ERROR_INT("fileout not defined", procName, 1);
00910     if (level != 2 && level != 3) {
00911         L_ERROR("only levels 2 and 3 permitted; using level 2", procName);
00912         level = 2;
00913     }
00914
00915     n = pixaGetCount(pixa);
00916     firstfile = TRUE;
00917     index = 0;
00918     g4_name = genTempFilename("/tmp", "temp_compr.tif", 0, 0);
00919     jpeg_name = genTempFilename("/tmp", "temp_compr.jpg", 0, 0);
00920     png_name = genTempFilename("/tmp", "temp_compr.png", 0, 0);
00921     for (i = 0; i < n; i++) {
00922         writeout = TRUE;
00923         pix = pixaGetPix(pixa, i, L_CLONE);
00924         d = pixGetDepth(pix);
00925         cmap = pixGetColormap(pix);
00926         if (d == 1) {
00927             tname = g4_name;
00928             pixWrite(tname, pix, IFF_TIFF_G4);
00929         }
00930         else if (cmap) {
00931             if (level == 2) {
00932                 pixt = pixConvertForPSWrap(pix);
00933                 tname = jpeg_name;
00934                 pixWrite(tname, pixt, IFF_JFIF_JPEG);
00935                 pixDestroy(&pixt);
00936             }
00937             else {  /* level == 3 */
00938                 tname = png_name;
00939                 pixWrite(tname, pix, IFF_PNG);
00940             }
00941         }
00942         else if (d == 16) {
00943             if (level == 2)
00944                 L_WARNING("d = 16; must write out flate", procName);
00945             tname = png_name;
00946             pixWrite(tname, pix, IFF_PNG);
00947         }
00948         else if (d == 2 || d == 4) {
00949             if (level == 2) {
00950                 pixt = pixConvertTo8(pix, 0);
00951                 tname = jpeg_name;
00952                 pixWrite(tname, pixt, IFF_JFIF_JPEG);
00953                 pixDestroy(&pixt);
00954             }
00955             else {  /* level == 3 */
00956                 tname = png_name;
00957                 pixWrite(tname, pix, IFF_PNG);
00958             }
00959         }
00960         else if (d == 8 || d == 32) {
00961             tname = jpeg_name;
00962             pixWrite(tname, pix, IFF_JFIF_JPEG);
00963         }
00964         else {  /* shouldn't happen */
00965             L_ERROR_INT("invalid depth: %d", procName, d);
00966             writeout = FALSE;
00967         }
00968         pixDestroy(&pix);
00969
00970         if (writeout)
00971             writeImageCompressedToPSFile(tname, fileout, res,
00972                                          &firstfile, &index);
00973     }
00974
00975     FREE(g4_name);
00976     FREE(jpeg_name);
00977     FREE(png_name);
00978     return 0;
00979 }
00980
00981
00982 /* --------------------------------------------*/
00983 #endif  /* USE_PSIO */
00984 /* --------------------------------------------*/
00985