Leptonica 1.68
C Image Processing Library
|
00001 /*====================================================================* 00002 - Copyright (C) 2001 Leptonica. All rights reserved. 00003 - This software is distributed in the hope that it will be 00004 - useful, but with NO WARRANTY OF ANY KIND. 00005 - No author or distributor accepts responsibility to anyone for the 00006 - consequences of using this software, or for whether it serves any 00007 - particular purpose or works at all, unless he or she says so in 00008 - writing. Everyone is granted permission to copy, modify and 00009 - redistribute this source code, for commercial or non-commercial 00010 - purposes, with the following restrictions: (1) the origin of this 00011 - source code must not be misrepresented; (2) modified versions must 00012 - be plainly marked as such; and (3) this notice may not be removed 00013 - or altered from any source or modified source distribution. 00014 *====================================================================*/ 00015 00016 /* 00017 * psio1.c 00018 * 00019 * |=============================================================| 00020 * | Important note | 00021 * |=============================================================| 00022 * | Some of these functions require libtiff, libjpeg and libz. | 00023 * | If you do not have these libraries, you must set | 00024 * | #define USE_PSIO 0 | 00025 * | in environ.h. This will link psio1stub.c | 00026 * |=============================================================| 00027 * 00028 * This is a PostScript "device driver" for wrapping images 00029 * in PostScript. The images can be rendered by a PostScript 00030 * interpreter for viewing, using evince or gv. They can also be 00031 * rasterized for printing, using gs or an embedded interpreter 00032 * in a PostScript printer. And they can be converted to a pdf 00033 * using gs (ps2pdf). 00034 * 00035 * Convert specified files to PS 00036 * l_int32 convertFilesToPS() 00037 * l_int32 sarrayConvertFilesToPS() 00038 * l_int32 convertFilesFittedToPS() 00039 * l_int32 sarrayConvertFilesFittedToPS() 00040 * l_int32 writeImageCompressedToPSFile() 00041 * 00042 * Convert mixed text/image files to PS 00043 * l_int32 convertSegmentedPagesToPS() 00044 * l_int32 pixWriteSegmentedPageToPS() 00045 * l_int32 pixWriteMixedToPS() 00046 * 00047 * Convert any image file to PS for embedding 00048 * l_int32 convertToPSEmbed() 00049 * 00050 * Write all images in a pixa out to PS 00051 * l_int32 pixaWriteCompressedToPS() 00052 * 00053 * These PostScript converters are used in three different ways. 00054 * 00055 * (1) For embedding a PS file in a program like TeX. 00056 * convertToPSEmbed() handles this for levels 1, 2 and 3 output, 00057 * and prog/converttops wraps this in an executable. 00058 * converttops is a generalization of Thomas Merz's jpeg2ps wrapper, 00059 * in that it works for all types (formats, depth, colormap) 00060 * of input images and gives PS output in one of these formats 00061 * * level 1 (uncompressed) 00062 * * level 2 (compressed ccittg4 or dct) 00063 * * level 3 (compressed flate) 00064 * 00065 * (2) For composing a set of pages with any number of images 00066 * painted on them, in either level 2 or level 3 formats. 00067 * 00068 * (3) For printing a page image or a set of page images, at a 00069 * resolution that optimally fills the page, using 00070 * convertFilesFittedToPS(). 00071 * 00072 * The top-level calls of utilities in category 2, which can compose 00073 * multiple images on a page, and which generate a PostScript file for 00074 * printing or display (e.g., conversion to pdf), are: 00075 * convertFilesToPS() 00076 * convertFilesFittedToPS() 00077 * convertSegmentedPagesToPS() 00078 * 00079 * All images are output with page numbers. Bounding box hints are 00080 * more subtle. They must be included for embeding images in 00081 * TeX, for example, and the low-level writers include bounding 00082 * box hints by default. However, these hints should not be included for 00083 * multi-page PostScript that is composed of a sequence of images; 00084 * consequently, they are not written when calling higher level 00085 * functions such as convertFilesToPS(), convertFilesFittedToPS() 00086 * and convertSegmentedPagesToPS(). The function l_psWriteBoundingBox() 00087 * sets a flag to give low-level control over this. 00088 */ 00089 00090 #include <string.h> 00091 #include "allheaders.h" 00092 00093 /* --------------------------------------------*/ 00094 #if USE_PSIO /* defined in environ.h */ 00095 /* --------------------------------------------*/ 00096 00097 /*-------------------------------------------------------------* 00098 * Convert files in a directory to PS * 00099 *-------------------------------------------------------------*/ 00100 /* 00101 * convertFilesToPS() 00102 * 00103 * Input: dirin (input directory) 00104 * substr (<optional> substring filter on filenames; can be NULL) 00105 * res (typ. 300 or 600 ppi) 00106 * fileout (output ps file) 00107 * Return: 0 if OK, 1 on error 00108 * 00109 * Notes: 00110 * (1) This generates a PS file for all image files in a specified 00111 * directory that contain the substr pattern to be matched. 00112 * (2) Each image is written to a separate page in the output PS file. 00113 * (3) All images are written compressed: 00114 * * if tiffg4 --> use ccittg4 00115 * * if jpeg --> use dct 00116 * * all others --> use flate 00117 * If the image is jpeg or tiffg4, we use the existing compressed 00118 * strings for the encoding; otherwise, we read the image into 00119 * a pix and flate-encode the pieces. 00120 * (4) The resolution is often confusing. It is interpreted 00121 * as the resolution of the output display device: "If the 00122 * input image were digitized at 300 ppi, what would it 00123 * look like when displayed at res ppi." So, for example, 00124 * if res = 100 ppi, then the display pixels are 3x larger 00125 * than the 300 ppi pixels, and the image will be rendered 00126 * 3x larger. 00127 * (5) The size of the PostScript file is independent of the resolution, 00128 * because the entire file is encoded. The res parameter just 00129 * tells the PS decomposer how to render the page. Therefore, 00130 * for minimum file size without loss of visual information, 00131 * if the output res is less than 300, you should downscale 00132 * the image to the output resolution before wrapping in PS. 00133 * (6) The "canvas" on which the image is rendered, at the given 00134 * output resolution, is a standard page size (8.5 x 11 in). 00135 */ 00136 l_int32 00137 convertFilesToPS(const char *dirin, 00138 const char *substr, 00139 l_int32 res, 00140 const char *fileout) 00141 { 00142 SARRAY *sa; 00143 00144 PROCNAME("convertFilesToPS"); 00145 00146 if (!dirin) 00147 return ERROR_INT("dirin not defined", procName, 1); 00148 if (!fileout) 00149 return ERROR_INT("fileout not defined", procName, 1); 00150 if (res <= 0) { 00151 L_INFO("setting res to 300 ppi", procName); 00152 res = 300; 00153 } 00154 if (res < 10 || res > 4000) 00155 L_WARNING("res is typically in the range 300-600 ppi", procName); 00156 00157 /* Get all filtered and sorted full pathnames. */ 00158 sa = getSortedPathnamesInDirectory(dirin, substr, 0, 0); 00159 00160 /* Generate the PS file. Don't use bounding boxes. */ 00161 l_psWriteBoundingBox(FALSE); 00162 sarrayConvertFilesToPS(sa, res, fileout); 00163 l_psWriteBoundingBox(TRUE); 00164 sarrayDestroy(&sa); 00165 return 0; 00166 } 00167 00168 00169 /* 00170 * sarrayConvertFilesToPS() 00171 * 00172 * Input: sarray (of full path names) 00173 * res (typ. 300 or 600 ppi) 00174 * fileout (output ps file) 00175 * Return: 0 if OK, 1 on error 00176 * 00177 * Notes: 00178 * (1) See convertFilesToPS() 00179 */ 00180 l_int32 00181 sarrayConvertFilesToPS(SARRAY *sa, 00182 l_int32 res, 00183 const char *fileout) 00184 { 00185 char *fname; 00186 l_int32 i, nfiles, index, firstfile, ret, format; 00187 00188 PROCNAME("sarrayConvertFilesToPS"); 00189 00190 if (!sa) 00191 return ERROR_INT("sa not defined", procName, 1); 00192 if (!fileout) 00193 return ERROR_INT("fileout not defined", procName, 1); 00194 if (res <= 0) { 00195 L_INFO("setting res to 300 ppi", procName); 00196 res = 300; 00197 } 00198 if (res < 10 || res > 4000) 00199 L_WARNING("res is typically in the range 300-600 ppi", procName); 00200 00201 nfiles = sarrayGetCount(sa); 00202 firstfile = TRUE; 00203 for (i = 0, index = 0; i < nfiles; i++) { 00204 fname = sarrayGetString(sa, i, L_NOCOPY); 00205 ret = pixReadHeader(fname, &format, NULL, NULL, NULL, NULL, NULL); 00206 if (ret) continue; 00207 if (format == IFF_UNKNOWN) 00208 continue; 00209 00210 writeImageCompressedToPSFile(fname, fileout, res, &firstfile, &index); 00211 } 00212 00213 return 0; 00214 } 00215 00216 00217 /* 00218 * convertFilesFittedToPS() 00219 * 00220 * Input: dirin (input directory) 00221 * substr (<optional> substring filter on filenames; can be NULL) 00222 * xpts, ypts (desired size in printer points; use 0 for default) 00223 * fileout (output ps file) 00224 * Return: 0 if OK, 1 on error 00225 * 00226 * Notes: 00227 * (1) This generates a PS file for all files in a specified directory 00228 * that contain the substr pattern to be matched. 00229 * (2) Each image is written to a separate page in the output PS file. 00230 * (3) All images are written compressed: 00231 * * if tiffg4 --> use ccittg4 00232 * * if jpeg --> use dct 00233 * * all others --> use flate 00234 * If the image is jpeg or tiffg4, we use the existing compressed 00235 * strings for the encoding; otherwise, we read the image into 00236 * a pix and flate-encode the pieces. 00237 * (4) The resolution is internally determined such that the images 00238 * are rendered, in at least one direction, at 100% of the given 00239 * size in printer points. Use 0.0 for xpts or ypts to get 00240 * the default value, which is 612.0 or 792.0, rsp. 00241 * (5) The size of the PostScript file is independent of the resolution, 00242 * because the entire file is encoded. The @xpts and @ypts 00243 * parameter tells the PS decomposer how to render the page. 00244 */ 00245 l_int32 00246 convertFilesFittedToPS(const char *dirin, 00247 const char *substr, 00248 l_float32 xpts, 00249 l_float32 ypts, 00250 const char *fileout) 00251 { 00252 SARRAY *sa; 00253 00254 PROCNAME("convertFilesFittedToPS"); 00255 00256 if (!dirin) 00257 return ERROR_INT("dirin not defined", procName, 1); 00258 if (!fileout) 00259 return ERROR_INT("fileout not defined", procName, 1); 00260 if (xpts <= 0.0) { 00261 L_INFO("setting xpts to 612.0 ppi", procName); 00262 xpts = 612.0; 00263 } 00264 if (ypts <= 0.0) { 00265 L_INFO("setting ypts to 792.0 ppi", procName); 00266 ypts = 792.0; 00267 } 00268 if (xpts < 100.0 || xpts > 2000.0 || ypts < 100.0 || ypts > 2000.0) 00269 L_WARNING("xpts,ypts are typically in the range 500-800", procName); 00270 00271 /* Get all filtered and sorted full pathnames. */ 00272 sa = getSortedPathnamesInDirectory(dirin, substr, 0, 0); 00273 00274 /* Generate the PS file. Don't use bounding boxes. */ 00275 l_psWriteBoundingBox(FALSE); 00276 sarrayConvertFilesFittedToPS(sa, xpts, ypts, fileout); 00277 l_psWriteBoundingBox(TRUE); 00278 sarrayDestroy(&sa); 00279 return 0; 00280 } 00281 00282 00283 /* 00284 * sarrayConvertFilesFittedToPS() 00285 * 00286 * Input: sarray (of full path names) 00287 * xpts, ypts (desired size in printer points; use 0 for default) 00288 * fileout (output ps file) 00289 * Return: 0 if OK, 1 on error 00290 * 00291 * Notes: 00292 * (1) See convertFilesFittedToPS() 00293 */ 00294 l_int32 00295 sarrayConvertFilesFittedToPS(SARRAY *sa, 00296 l_float32 xpts, 00297 l_float32 ypts, 00298 const char *fileout) 00299 { 00300 char *fname; 00301 l_int32 ret, i, w, h, nfiles, index, firstfile, format, res; 00302 00303 PROCNAME("sarrayConvertFilesFittedToPS"); 00304 00305 if (!sa) 00306 return ERROR_INT("sa not defined", procName, 1); 00307 if (!fileout) 00308 return ERROR_INT("fileout not defined", procName, 1); 00309 if (xpts <= 0.0) { 00310 L_INFO("setting xpts to 612.0", procName); 00311 xpts = 612.0; 00312 } 00313 if (ypts <= 0.0) { 00314 L_INFO("setting ypts to 792.0", procName); 00315 ypts = 792.0; 00316 } 00317 if (xpts < 100.0 || xpts > 2000.0 || ypts < 100.0 || ypts > 2000.0) 00318 L_WARNING("xpts,ypts are typically in the range 500-800", procName); 00319 00320 nfiles = sarrayGetCount(sa); 00321 firstfile = TRUE; 00322 for (i = 0, index = 0; i < nfiles; i++) { 00323 fname = sarrayGetString(sa, i, L_NOCOPY); 00324 ret = pixReadHeader(fname, &format, &w, &h, NULL, NULL, NULL); 00325 if (ret) continue; 00326 if (format == IFF_UNKNOWN) 00327 continue; 00328 00329 /* Be sure the entire image is wrapped */ 00330 if (xpts * h < ypts * w) 00331 res = (l_int32)((l_float32)w * 72.0 / xpts); 00332 else 00333 res = (l_int32)((l_float32)h * 72.0 / ypts); 00334 00335 writeImageCompressedToPSFile(fname, fileout, res, &firstfile, &index); 00336 } 00337 00338 return 0; 00339 } 00340 00341 00342 /* 00343 * writeImageCompressedToPSFile() 00344 * 00345 * Input: filein (input image file) 00346 * fileout (output ps file) 00347 * res (output printer resolution) 00348 * &firstfile (<input and return> 1 if the first image; 00349 * 0 otherwise) 00350 * &index (<input and return> index of image in output ps file) 00351 * Return: 0 if OK, 1 on error 00352 * 00353 * Notes: 00354 * (1) This wraps a single page image in PS. 00355 * (2) The input file can be in any format. It is compressed as follows: 00356 * * if in tiffg4 --> use ccittg4 00357 * * if in jpeg --> use dct 00358 * * all others --> use flate 00359 * (3) Before the first call, set @firstpage = 1. After writing 00360 * the first page, it will be set to 0. 00361 * (4) @index is incremented if the page is successfully written. 00362 */ 00363 l_int32 00364 writeImageCompressedToPSFile(const char *filein, 00365 const char *fileout, 00366 l_int32 res, 00367 l_int32 *pfirstfile, 00368 l_int32 *pindex) 00369 { 00370 const char *op; 00371 l_int32 format, retval; 00372 00373 PROCNAME("writeImageCompressedToPSFile"); 00374 00375 if (!pfirstfile || !pindex) 00376 return ERROR_INT("&firstfile and &index not defined", procName, 1); 00377 00378 findFileFormat(filein, &format); 00379 if (format == IFF_UNKNOWN) { 00380 L_ERROR_STRING("format of %s not known", procName, filein); 00381 return 1; 00382 } 00383 00384 op = (*pfirstfile == TRUE) ? "w" : "a"; 00385 if (format == IFF_JFIF_JPEG) { 00386 retval = convertJpegToPS(filein, fileout, op, 0, 0, 00387 res, 1.0, *pindex + 1, TRUE); 00388 if (retval == 0) { 00389 *pfirstfile = FALSE; 00390 (*pindex)++; 00391 } 00392 } 00393 else if (format == IFF_TIFF_G4) { 00394 retval = convertG4ToPS(filein, fileout, op, 0, 0, 00395 res, 1.0, *pindex + 1, FALSE, TRUE); 00396 if (retval == 0) { 00397 *pfirstfile = FALSE; 00398 (*pindex)++; 00399 } 00400 } 00401 else { /* all other image formats */ 00402 retval = convertFlateToPS(filein, fileout, op, 0, 0, 00403 res, 1.0, *pindex + 1, TRUE); 00404 if (retval == 0) { 00405 *pfirstfile = FALSE; 00406 (*pindex)++; 00407 } 00408 } 00409 00410 return retval; 00411 } 00412 00413 00414 /*-------------------------------------------------------------* 00415 * Convert mixed text/image files to PS * 00416 *-------------------------------------------------------------*/ 00417 /* 00418 * convertSegmentedPagesToPS() 00419 * 00420 * Input: pagedir (input page image directory) 00421 * pagestr (<optional> substring filter on page filenames; 00422 * can be NULL) 00423 * maskdir (input mask image directory) 00424 * maskstr (<optional> substring filter on mask filenames; 00425 * can be NULL) 00426 * numpre (number of characters in name before number) 00427 * numpost (number of characters in name after number) 00428 * maxnum (only consider page numbers up to this value) 00429 * textscale (scale of text output relative to pixs) 00430 * imagescale (scale of image output relative to pixs) 00431 * threshold (for binarization; typ. about 190; 0 for default) 00432 * fileout (output ps file) 00433 * Return: 0 if OK, 1 on error 00434 * 00435 * Notes: 00436 * (1) This generates a PS file for all page image and mask files in two 00437 * specified directories and that contain the page numbers as 00438 * specified below. The two directories can be the same, in which 00439 * case the page and mask files are differentiated by the two 00440 * substrings for string matches. 00441 * (2) The page images are taken in lexicographic order. 00442 * Mask images whose numbers match the page images are used to 00443 * segment the page images. Page images without a matching 00444 * mask image are scaled, thresholded and rendered entirely as text. 00445 * (3) Each PS page is generated as a compressed representation of 00446 * the page image, where the part of the image under the mask 00447 * is suitably scaled and compressed as DCT (i.e., jpeg), and 00448 * the remaining part of the page is suitably scaled, thresholded, 00449 * compressed as G4 (i.e., tiff g4), and rendered by painting 00450 * black through the resulting text mask. 00451 * (4) The scaling is typically 2x down for the DCT component 00452 * (@imagescale = 0.5) and 2x up for the G4 component 00453 * (@textscale = 2.0). 00454 * (5) The resolution is automatically set to fit to a 00455 * letter-size (8.5 x 11 inch) page. 00456 * (6) Both the DCT and the G4 encoding are PostScript level 2. 00457 * (7) It is assumed that the page number is contained within 00458 * the basename (the filename without directory or extension). 00459 * @numpre is the number of characters in the basename 00460 * preceeding the actual page numer; @numpost is the number 00461 * following the page number. Note: the same numbers must be 00462 * applied to both the page and mask image names. 00463 * (8) To render a page as is -- that is, with no thresholding 00464 * of any pixels -- use a mask in the mask directory that is 00465 * full size with all pixels set to 1. If the page is 1 bpp, 00466 * it is not necessary to have a mask. 00467 */ 00468 l_int32 00469 convertSegmentedPagesToPS(const char *pagedir, 00470 const char *pagestr, 00471 const char *maskdir, 00472 const char *maskstr, 00473 l_int32 numpre, 00474 l_int32 numpost, 00475 l_int32 maxnum, 00476 l_float32 textscale, 00477 l_float32 imagescale, 00478 l_int32 threshold, 00479 const char *fileout) 00480 { 00481 l_int32 pageno, i, npages; 00482 PIX *pixs, *pixm; 00483 SARRAY *sapage, *samask; 00484 00485 PROCNAME("convertSegmentedPagesToPS"); 00486 00487 if (!pagedir) 00488 return ERROR_INT("pagedir not defined", procName, 1); 00489 if (!maskdir) 00490 return ERROR_INT("maskdir not defined", procName, 1); 00491 if (!fileout) 00492 return ERROR_INT("fileout not defined", procName, 1); 00493 if (threshold <= 0) { 00494 L_INFO("setting threshold to 190", procName); 00495 threshold = 190; 00496 } 00497 00498 /* Get numbered full pathnames; max size of sarray is maxnum */ 00499 sapage = getNumberedPathnamesInDirectory(pagedir, pagestr, 00500 numpre, numpost, maxnum); 00501 samask = getNumberedPathnamesInDirectory(maskdir, maskstr, 00502 numpre, numpost, maxnum); 00503 sarrayPadToSameSize(sapage, samask, (char *)""); 00504 if ((npages = sarrayGetCount(sapage)) == 0) { 00505 sarrayDestroy(&sapage); 00506 sarrayDestroy(&samask); 00507 return ERROR_INT("no matching pages found", procName, 1); 00508 } 00509 00510 /* Generate the PS file */ 00511 pageno = 1; 00512 for (i = 0; i < npages; i++) { 00513 if ((pixs = pixReadIndexed(sapage, i)) == NULL) 00514 continue; 00515 pixm = pixReadIndexed(samask, i); 00516 pixWriteSegmentedPageToPS(pixs, pixm, textscale, imagescale, 00517 threshold, pageno, fileout); 00518 pixDestroy(&pixs); 00519 pixDestroy(&pixm); 00520 pageno++; 00521 } 00522 00523 sarrayDestroy(&sapage); 00524 sarrayDestroy(&samask); 00525 return 0; 00526 } 00527 00528 00529 /* 00530 * pixWriteSegmentedPageToPS() 00531 * 00532 * Input: pixs (all depths; colormap ok) 00533 * pixm (<optional> 1 bpp segmentation mask over image region) 00534 * textscale (scale of text output relative to pixs) 00535 * imagescale (scale of image output relative to pixs) 00536 * threshold (threshold for binarization; typ. 190) 00537 * pageno (page number in set; use 1 for new output file) 00538 * fileout (output ps file) 00539 * Return: 0 if OK, 1 on error 00540 * 00541 * Notes: 00542 * (1) This generates the PS string for a mixed text/image page, 00543 * and adds it to an existing file if @pageno > 1. 00544 * The PS output is determined by fitting the result to 00545 * a letter-size (8.5 x 11 inch) page. 00546 * (2) The two images (pixs and pixm) are at the same resolution 00547 * (typically 300 ppi). They are used to generate two compressed 00548 * images, pixb and pixc, that are put directly into the output 00549 * PS file. 00550 * (3) pixb is the text component. In the PostScript world, we think of 00551 * it as a mask through which we paint black. It is produced by 00552 * scaling pixs by @textscale, and thresholding to 1 bpp. 00553 * (4) pixc is the image component, which is that part of pixs under 00554 * the mask pixm. It is scaled from pixs by @imagescale. 00555 * (5) Typical values are textscale = 2.0 and imagescale = 0.5. 00556 * (6) If pixm == NULL, the page has only text. If it is all black, 00557 * the page is all image and has no text. 00558 * (7) This can be used to write a multi-page PS file, by using 00559 * sequential page numbers with the same output file. It can 00560 * also be used to write separate PS files for each page, 00561 * by using different output files with @pageno = 0 or 1. 00562 */ 00563 l_int32 00564 pixWriteSegmentedPageToPS(PIX *pixs, 00565 PIX *pixm, 00566 l_float32 textscale, 00567 l_float32 imagescale, 00568 l_int32 threshold, 00569 l_int32 pageno, 00570 const char *fileout) 00571 { 00572 l_int32 alltext, notext, d, ret; 00573 l_uint32 val; 00574 l_float32 scaleratio; 00575 PIX *pixmi, *pixmis, *pixt, *pixg, *pixsc, *pixb, *pixc; 00576 00577 PROCNAME("pixWriteSegmentedPageToPS"); 00578 00579 if (!pixs) 00580 return ERROR_INT("pixs not defined", procName, 1); 00581 if (!fileout) 00582 return ERROR_INT("fileout not defined", procName, 1); 00583 if (imagescale <= 0.0 || textscale <= 0.0) 00584 return ERROR_INT("relative scales must be > 0.0", procName, 1); 00585 00586 /* Analyze the page. Determine the ratio by which the 00587 * binary text mask is scaled relative to the image part. 00588 * If there is no image region (alltext == TRUE), the 00589 * text mask will be rendered directly to fit the page, 00590 * and scaleratio = 1.0. */ 00591 alltext = TRUE; 00592 notext = FALSE; 00593 scaleratio = 1.0; 00594 if (pixm) { 00595 pixZero(pixm, &alltext); /* pixm empty: all text */ 00596 if (alltext) 00597 pixm = NULL; /* treat it as not existing here */ 00598 else { 00599 pixmi = pixInvert(NULL, pixm); 00600 pixZero(pixmi, ¬ext); /* pixm full; no text */ 00601 pixDestroy(&pixmi); 00602 scaleratio = textscale / imagescale; 00603 } 00604 } 00605 00606 if (pixGetDepth(pixs) == 1) { /* render tiff g4 */ 00607 pixb = pixClone(pixs); 00608 pixc = NULL; 00609 } 00610 else { 00611 pixt = pixConvertTo8Or32(pixs, 0, 0); /* this can be a clone of pixs */ 00612 00613 /* Get the binary text mask. Note that pixg cannot be a 00614 * clone of pixs, because it may be altered by pixSetMasked(). */ 00615 pixb = NULL; 00616 if (notext == FALSE) { 00617 d = pixGetDepth(pixt); 00618 if (d == 8) 00619 pixg = pixCopy(NULL, pixt); 00620 else /* d == 32 */ 00621 pixg = pixConvertRGBToLuminance(pixt); 00622 if (pixm) /* clear out the image parts */ 00623 pixSetMasked(pixg, pixm, 255); 00624 if (textscale == 1.0) 00625 pixsc = pixClone(pixg); 00626 else if (textscale >= 0.7) 00627 pixsc = pixScaleGrayLI(pixg, textscale, textscale); 00628 else 00629 pixsc = pixScaleAreaMap(pixg, textscale, textscale); 00630 pixb = pixThresholdToBinary(pixsc, threshold); 00631 pixDestroy(&pixg); 00632 pixDestroy(&pixsc); 00633 } 00634 00635 /* Get the scaled image region */ 00636 pixc = NULL; 00637 if (pixm) { 00638 if (imagescale == 1.0) 00639 pixsc = pixClone(pixt); /* can possibly be a clone of pixs */ 00640 else 00641 pixsc = pixScale(pixt, imagescale, imagescale); 00642 00643 /* If pixm is not full, clear the pixels in pixsc 00644 * corresponding to bg in pixm, where there can be text 00645 * that is written through the mask pixb. Note that 00646 * we could skip this and use pixsc directly in 00647 * pixWriteMixedToPS(); however, clearing these 00648 * non-image regions to a white background will reduce 00649 * the size of pixc (relative to pixsc), and hence 00650 * reduce the size of the PS file that is generated. 00651 * Use a copy so that we don't accidentally alter pixs. */ 00652 if (notext == FALSE) { 00653 pixmis = pixScale(pixm, imagescale, imagescale); 00654 pixmi = pixInvert(NULL, pixmis); 00655 val = (d == 8) ? 0xff : 0xffffff00; 00656 pixc = pixCopy(NULL, pixsc); 00657 pixSetMasked(pixc, pixmi, val); /* clear non-image part */ 00658 pixDestroy(&pixmis); 00659 pixDestroy(&pixmi); 00660 } 00661 else 00662 pixc = pixClone(pixsc); 00663 pixDestroy(&pixsc); 00664 } 00665 pixDestroy(&pixt); 00666 } 00667 00668 /* Generate the PS file. Don't use bounding boxes. */ 00669 l_psWriteBoundingBox(FALSE); 00670 ret = pixWriteMixedToPS(pixb, pixc, scaleratio, pageno, fileout); 00671 l_psWriteBoundingBox(TRUE); 00672 pixDestroy(&pixb); 00673 pixDestroy(&pixc); 00674 return ret; 00675 } 00676 00677 00678 /* 00679 * pixWriteMixedToPS() 00680 * 00681 * Input: pixb (<optionall> 1 bpp "mask"; typically for text) 00682 * pixc (<optional> 8 or 32 bpp image regions) 00683 * scale (relative scale factor for rendering pixb 00684 * relative to pixc; typ. 4.0) 00685 * pageno (page number in set; use 1 for new output file) 00686 * fileout (output ps file) 00687 * Return: 0 if OK, 1 on error 00688 * 00689 * Notes: 00690 * (1) This low level function generates the PS string for a mixed 00691 * text/image page, and adds it to an existing file if 00692 * @pageno > 1. 00693 * (2) The two images (pixb and pixc) are typically generated at the 00694 * resolution that they will be rendered in the PS file. 00695 * (3) pixb is the text component. In the PostScript world, we think of 00696 * it as a mask through which we paint black. 00697 * (4) pixc is the (typically halftone) image component. It is 00698 * white in the rest of the page. To minimize the size of the 00699 * PS file, it should be rendered at a resolution that is at 00700 * least equal to its actual resolution. 00701 * (5) @scale gives the ratio of resolution of pixb to pixc. 00702 * Typical resolutions are: 600 ppi for pixb, 150 ppi for pixc; 00703 * so @scale = 4.0. If one of the images is not defined, 00704 * the value of @scale is ignored. 00705 * (6) We write pixc with DCT compression (jpeg). This is followed 00706 * by painting the text as black through the mask pixb. If 00707 * pixc doesn't exist (alltext), we write the text with the 00708 * PS "image" operator instead of the "imagemask" operator, 00709 * because ghostscript's ps2pdf is flaky when the latter is used. 00710 * (7) The actual output resolution is determined by fitting the 00711 * result to a letter-size (8.5 x 11 inch) page. 00712 */ 00713 l_int32 00714 pixWriteMixedToPS(PIX *pixb, 00715 PIX *pixc, 00716 l_float32 scale, 00717 l_int32 pageno, 00718 const char *fileout) 00719 { 00720 const char tnameb[] = "/tmp/junk_pix_write_mixed.tif"; 00721 const char tnamec[] = "/tmp/junk_pix_write_mixed.jpg"; 00722 const char *op; 00723 l_int32 resb, resc, endpage, maskop, ret; 00724 00725 PROCNAME("pixWriteMixedToPS"); 00726 00727 if (!pixb && !pixc) 00728 return ERROR_INT("pixb and pixc both undefined", procName, 1); 00729 if (!fileout) 00730 return ERROR_INT("fileout not defined", procName, 1); 00731 00732 /* Compute the resolution that fills a letter-size page. */ 00733 if (!pixc) 00734 resb = getResLetterPage(pixGetWidth(pixb), pixGetHeight(pixb), 0); 00735 else { 00736 resc = getResLetterPage(pixGetWidth(pixc), pixGetHeight(pixc), 0); 00737 if (pixb) 00738 resb = (l_int32)(scale * resc); 00739 } 00740 00741 /* Write the jpeg image first */ 00742 if (pixc) { 00743 pixWrite(tnamec, pixc, IFF_JFIF_JPEG); 00744 endpage = (pixb) ? FALSE : TRUE; 00745 op = (pageno <= 1) ? "w" : "a"; 00746 ret = convertJpegToPS(tnamec, fileout, op, 0, 0, resc, 1.0, 00747 pageno, endpage); 00748 if (ret) 00749 return ERROR_INT("jpeg data not written", procName, 1); 00750 } 00751 00752 /* Write the binary data, either directly or, if there is 00753 * a jpeg image on the page, through the mask. */ 00754 if (pixb) { 00755 pixWrite(tnameb, pixb, IFF_TIFF_G4); 00756 op = (pageno <= 1 && !pixc) ? "w" : "a"; 00757 maskop = (pixc) ? 1 : 0; 00758 ret = convertG4ToPS(tnameb, fileout, op, 0, 0, resb, 1.0, 00759 pageno, maskop, 1); 00760 if (ret) 00761 return ERROR_INT("tiff data not written", procName, 1); 00762 } 00763 00764 return 0; 00765 } 00766 00767 00768 /*-------------------------------------------------------------* 00769 * Convert any image file to PS for embedding * 00770 *-------------------------------------------------------------*/ 00771 /* 00772 * convertToPSEmbed() 00773 * 00774 * Input: filein (input image file -- any format) 00775 * fileout (output ps file) 00776 * level (compression: 1 (uncompressed), 2 or 3) 00777 * Return: 0 if OK, 1 on error 00778 * 00779 * Notes: 00780 * (1) This is a wrapper function that generates a PS file with 00781 * a bounding box, from any input image file. 00782 * (2) Do the best job of compression given the specified level. 00783 * @level=3 does flate compression on anything that is not 00784 * tiffg4 (1 bpp) or jpeg (8 bpp or rgb). 00785 * (3) If @level=2 and the file is not tiffg4 or jpeg, it will 00786 * first be written to file as jpeg with quality = 75. 00787 * This will remove the colormap and cause some degradation 00788 * in the image. 00789 * (4) The bounding box is required when a program such as TeX 00790 * (through epsf) places and rescales the image. It is 00791 * sized for fitting the image to an 8.5 x 11.0 inch page. 00792 */ 00793 l_int32 00794 convertToPSEmbed(const char *filein, 00795 const char *fileout, 00796 l_int32 level) 00797 { 00798 const char nametif[] = "/tmp/junk_convert_ps_embed.tif"; 00799 const char namejpg[] = "/tmp/junk_convert_ps_embed.jpg"; 00800 l_int32 d, format; 00801 PIX *pix, *pixs; 00802 00803 PROCNAME("convertToPSEmbed"); 00804 00805 if (!filein) 00806 return ERROR_INT("filein not defined", procName, 1); 00807 if (!fileout) 00808 return ERROR_INT("fileout not defined", procName, 1); 00809 if (level != 1 && level != 2 && level != 3) { 00810 L_ERROR("invalid level specified; using level 2", procName); 00811 level = 2; 00812 } 00813 00814 if (level == 1) { /* no compression */ 00815 pixWritePSEmbed(filein, fileout); 00816 return 0; 00817 } 00818 00819 /* Find the format and write out directly if in jpeg or tiff g4 */ 00820 findFileFormat(filein, &format); 00821 if (format == IFF_JFIF_JPEG) { 00822 convertJpegToPSEmbed(filein, fileout); 00823 return 0; 00824 } 00825 else if (format == IFF_TIFF_G4) { 00826 convertG4ToPSEmbed(filein, fileout); 00827 return 0; 00828 } 00829 else if (format == IFF_UNKNOWN) { 00830 L_ERROR_STRING("format of %s not known", procName, filein); 00831 return 1; 00832 } 00833 00834 /* If level 3, flate encode. */ 00835 if (level == 3) { 00836 convertFlateToPSEmbed(filein, fileout); 00837 return 0; 00838 } 00839 00840 /* OK, it's level 2, so we must convert to jpeg or tiff g4 */ 00841 if ((pixs = pixRead(filein)) == NULL) 00842 return ERROR_INT("image not read from file", procName, 1); 00843 d = pixGetDepth(pixs); 00844 if ((d == 2 || d == 4) && !pixGetColormap(pixs)) 00845 pix = pixConvertTo8(pixs, 0); 00846 else if (d == 16) 00847 pix = pixConvert16To8(pixs, 1); 00848 else 00849 pix = pixRemoveColormap(pixs, REMOVE_CMAP_BASED_ON_SRC); 00850 00851 d = pixGetDepth(pix); 00852 if (d == 1) { 00853 pixWrite(nametif, pix, IFF_TIFF_G4); 00854 convertG4ToPSEmbed(nametif, fileout); 00855 } 00856 else { 00857 pixWrite(namejpg, pix, IFF_JFIF_JPEG); 00858 convertJpegToPSEmbed(namejpg, fileout); 00859 } 00860 00861 pixDestroy(&pix); 00862 pixDestroy(&pixs); 00863 return 0; 00864 } 00865 00866 00867 /*-------------------------------------------------------------* 00868 * Write all images in a pixa out to PS * 00869 *-------------------------------------------------------------*/ 00870 /* 00871 * pixaWriteCompressedToPS() 00872 * 00873 * Input: pixa (any set of images) 00874 * fileout (output ps file) 00875 * res (of input image) 00876 * level (compression: 2 or 3) 00877 * Return: 0 if OK, 1 on error 00878 * 00879 * Notes: 00880 * (1) This generates a PS file of multiple page images, all 00881 * with bounding boxes. 00882 * (2) It compresses to: 00883 * cmap + level2: jpeg 00884 * cmap + level3: flate 00885 * 1 bpp: tiffg4 00886 * 2 or 4 bpp + level2: jpeg 00887 * 2 or 4 bpp + level3: flate 00888 * 8 bpp: jpeg 00889 * 16 bpp: flate 00890 * 32 bpp: jpeg 00891 * (3) To generate a pdf, use: ps2pdf <infile.ps> <outfile.pdf> 00892 */ 00893 l_int32 00894 pixaWriteCompressedToPS(PIXA *pixa, 00895 const char *fileout, 00896 l_int32 res, 00897 l_int32 level) 00898 { 00899 char *tname, *g4_name, *jpeg_name, *png_name; 00900 l_int32 i, n, firstfile, index, writeout, d; 00901 PIX *pix, *pixt; 00902 PIXCMAP *cmap; 00903 00904 PROCNAME("pixaWriteCompressedToPS"); 00905 00906 if (!pixa) 00907 return ERROR_INT("pixa not defined", procName, 1); 00908 if (!fileout) 00909 return ERROR_INT("fileout not defined", procName, 1); 00910 if (level != 2 && level != 3) { 00911 L_ERROR("only levels 2 and 3 permitted; using level 2", procName); 00912 level = 2; 00913 } 00914 00915 n = pixaGetCount(pixa); 00916 firstfile = TRUE; 00917 index = 0; 00918 g4_name = genTempFilename("/tmp", "temp_compr.tif", 0, 0); 00919 jpeg_name = genTempFilename("/tmp", "temp_compr.jpg", 0, 0); 00920 png_name = genTempFilename("/tmp", "temp_compr.png", 0, 0); 00921 for (i = 0; i < n; i++) { 00922 writeout = TRUE; 00923 pix = pixaGetPix(pixa, i, L_CLONE); 00924 d = pixGetDepth(pix); 00925 cmap = pixGetColormap(pix); 00926 if (d == 1) { 00927 tname = g4_name; 00928 pixWrite(tname, pix, IFF_TIFF_G4); 00929 } 00930 else if (cmap) { 00931 if (level == 2) { 00932 pixt = pixConvertForPSWrap(pix); 00933 tname = jpeg_name; 00934 pixWrite(tname, pixt, IFF_JFIF_JPEG); 00935 pixDestroy(&pixt); 00936 } 00937 else { /* level == 3 */ 00938 tname = png_name; 00939 pixWrite(tname, pix, IFF_PNG); 00940 } 00941 } 00942 else if (d == 16) { 00943 if (level == 2) 00944 L_WARNING("d = 16; must write out flate", procName); 00945 tname = png_name; 00946 pixWrite(tname, pix, IFF_PNG); 00947 } 00948 else if (d == 2 || d == 4) { 00949 if (level == 2) { 00950 pixt = pixConvertTo8(pix, 0); 00951 tname = jpeg_name; 00952 pixWrite(tname, pixt, IFF_JFIF_JPEG); 00953 pixDestroy(&pixt); 00954 } 00955 else { /* level == 3 */ 00956 tname = png_name; 00957 pixWrite(tname, pix, IFF_PNG); 00958 } 00959 } 00960 else if (d == 8 || d == 32) { 00961 tname = jpeg_name; 00962 pixWrite(tname, pix, IFF_JFIF_JPEG); 00963 } 00964 else { /* shouldn't happen */ 00965 L_ERROR_INT("invalid depth: %d", procName, d); 00966 writeout = FALSE; 00967 } 00968 pixDestroy(&pix); 00969 00970 if (writeout) 00971 writeImageCompressedToPSFile(tname, fileout, res, 00972 &firstfile, &index); 00973 } 00974 00975 FREE(g4_name); 00976 FREE(jpeg_name); 00977 FREE(png_name); 00978 return 0; 00979 } 00980 00981 00982 /* --------------------------------------------*/ 00983 #endif /* USE_PSIO */ 00984 /* --------------------------------------------*/ 00985