Leptonica 1.68
C Image Processing Library

pdfiotest.c

Go to the documentation of this file.
00001 /*====================================================================*
00002  -  Copyright (C) 2001 Leptonica.  All rights reserved.
00003  -  This software is distributed in the hope that it will be
00004  -  useful, but with NO WARRANTY OF ANY KIND.
00005  -  No author or distributor accepts responsibility to anyone for the
00006  -  consequences of using this software, or for whether it serves any
00007  -  particular purpose or works at all, unless he or she says so in
00008  -  writing.  Everyone is granted permission to copy, modify and
00009  -  redistribute this source code, for commercial or non-commercial
00010  -  purposes, with the following restrictions: (1) the origin of this
00011  -  source code must not be misrepresented; (2) modified versions must
00012  -  be plainly marked as such; and (3) this notice may not be removed
00013  -  or altered from any source or modified source distribution.
00014  *====================================================================*/
00015 
00016 /*
00017  *  pdfiotest.c
00018  *
00019  *    Note: pdftk (pdftk.exe for Windows) is required to run the
00020  *          last part of this test.
00021  */
00022 
00023 #include <string.h>
00024 #include "allheaders.h"
00025 
00026 static void GetImageMask(PIX *pixs, l_int32 res, BOXA **pboxa,
00027                          const char *debugfile);
00028 static PIX * QuantizeNonImageRegion(PIX *pixs, PIX *pixm, l_int32 levels);
00029 
00030 
00031 main(int    argc,
00032      char **argv)
00033 {
00034 char         buffer[512];
00035 char        *tempfile1, *tempfile2;
00036 l_uint8     *data;
00037 l_int32      i, j, w, h, seq, ret, same;
00038 size_t       nbytes;
00039 const char  *title;
00040 BOX         *box;
00041 BOXA        *boxa1, *boxa2;
00042 L_BYTEA     *ba;
00043 L_PDF_DATA  *lpd;
00044 PIX         *pix1, *pix2, *pix3, *pix4, *pix5, *pix6;
00045 PIX         *pixs, *pixt, *pixg, *pixgc, *pixc;
00046 static char  mainName[] = "pdfiotest";
00047 
00048     if (argc != 1)
00049         exit(ERROR_INT("syntax: pdfiotest", mainName, 1));
00050     l_pdfSetDateAndVersion(0);
00051 
00052 #if 1
00053     /* ---------------  Single image tests  ------------------- */
00054     fprintf(stderr, "\n*** Writing single images as pdf files\n");
00055 
00056     convertToPdf("weasel2.4c.png", L_FLATE_ENCODE, 0, "/tmp/pdffile01.pdf",
00057                  0, 0, 72, NULL, 0, "weasel2.4c.png");
00058     convertToPdf("test24.jpg", L_JPEG_ENCODE, 0, "/tmp/pdffile02.pdf",
00059                  0, 0, 72, NULL, 0, "test24.jpg");
00060     convertToPdf("feyn.tif", L_G4_ENCODE, 0, "/tmp/pdffile03.pdf",
00061                  0, 0, 300, NULL, 0, "feyn.tif");
00062 
00063     pixs = pixRead("feyn.tif");
00064     pixConvertToPdf(pixs, L_G4_ENCODE, 0, "/tmp/pdffile04.pdf", 0, 0, 300,
00065                     NULL, 0, "feyn.tif");
00066     pixDestroy(&pixs);
00067 
00068     pixs = pixRead("test24.jpg");
00069     pixConvertToPdf(pixs, L_JPEG_ENCODE, 5, "/tmp/pdffile05.pdf", 0, 0, 72,
00070                     NULL, 0, "test24.jpg");
00071     pixDestroy(&pixs);
00072 
00073     pixs = pixRead("feyn.tif");
00074     pixt = pixScaleToGray2(pixs);
00075     pixWrite("junkfeyn8.png", pixt, IFF_PNG);
00076     convertToPdf("junkfeyn8.png", L_JPEG_ENCODE, 0, "/tmp/pdffile06.pdf",
00077                  0, 0, 150, NULL, 0, "junkfeyn8.png");
00078     pixDestroy(&pixs);
00079     pixDestroy(&pixt);
00080     
00081     convertToPdf("weasel4.16g.png", L_FLATE_ENCODE, 0, "/tmp/pdffile07.pdf",
00082                  0, 0, 30, NULL, 0, "weasel4.16g.png");
00083 
00084     pixs = pixRead("test24.jpg");
00085     pixg = pixConvertTo8(pixs, 0);
00086     box = boxCreate(100, 100, 100, 100);
00087     pixc = pixClipRectangle(pixs, box, NULL);
00088     pixgc = pixClipRectangle(pixg, box, NULL);
00089     pixWrite("junkpix32.jpg", pixc, IFF_JFIF_JPEG);
00090     pixWrite("junkpix8.jpg", pixgc, IFF_JFIF_JPEG);
00091     convertToPdf("junkpix32.jpg", L_FLATE_ENCODE, 0, "/tmp/pdffile08.pdf",
00092                  0, 0, 72, NULL, 0, "junkpix32.jpg");
00093     convertToPdf("junkpix8.jpg", L_FLATE_ENCODE, 0, "/tmp/pdffile09.pdf",
00094                  0, 0, 72, NULL, 0, "junkpix8.jpg");
00095     pixDestroy(&pixs);
00096     pixDestroy(&pixg);
00097     pixDestroy(&pixc);
00098     pixDestroy(&pixgc);
00099     boxDestroy(&box);
00100 #endif
00101 
00102 
00103 #if 1
00104     /* ---------------  Multiple image tests  ------------------- */
00105     fprintf(stderr, "\n*** Writing multiple images as single page pdf files\n");
00106 
00107     pix1 = pixRead("feyn-fract.tif");
00108     pix2 = pixRead("weasel8.240c.png");
00109 
00110 /*    l_pdfSetDateAndVersion(0); */
00111         /* First, write the 1 bpp image through the mask onto the weasels */
00112     for (i = 0; i < 5; i++) {
00113         for (j = 0; j < 10; j++) {
00114             seq = (i == 0 && j == 0) ? L_FIRST_IMAGE : L_NEXT_IMAGE;
00115             title = (i == 0 && j == 0) ? "feyn-fract.tif" : NULL;
00116             pixConvertToPdf(pix2, L_FLATE_ENCODE, 0, NULL, 100 * j,
00117                             100 * i, 70, &lpd, seq, title);
00118         }
00119     }
00120     pixConvertToPdf(pix1, L_G4_ENCODE, 0, "/tmp/pdffile10.pdf", 0, 0, 80, &lpd,
00121                     L_LAST_IMAGE, NULL);
00122 
00123         /* Now, write the 1 bpp image over the weasels */
00124     l_pdfSetG4ImageMask(0);
00125     for (i = 0; i < 5; i++) {
00126         for (j = 0; j < 10; j++) {
00127             seq = (i == 0 && j == 0) ? L_FIRST_IMAGE : L_NEXT_IMAGE;
00128             title = (i == 0 && j == 0) ? "feyn-fract.tif" : NULL;
00129             pixConvertToPdf(pix2, L_FLATE_ENCODE, 0, NULL, 100 * j,
00130                             100 * i, 70, &lpd, seq, title);
00131         }
00132     }
00133     pixConvertToPdf(pix1, L_G4_ENCODE, 0, "/tmp/pdffile11.pdf", 0, 0, 80, &lpd,
00134                     L_LAST_IMAGE, NULL);
00135     l_pdfSetG4ImageMask(1);
00136     pixDestroy(&pix1);
00137     pixDestroy(&pix2);
00138 #endif
00139 
00140 #if 1
00141     /* -------- pdf convert segmented with no image regions -------- */
00142     fprintf(stderr, "\n*** Writing segmented images without image regions\n");
00143 
00144     pix1 = pixRead("rabi.png");
00145     pix2 = pixScaleToGray2(pix1);
00146     pixWrite("/tmp/rabi8.jpg", pix2, IFF_JFIF_JPEG);
00147     pix3 = pixThresholdTo4bpp(pix2, 16, 1);
00148     pixWrite("/tmp/rabi4.png", pix3, IFF_PNG);
00149     pixDestroy(&pix1);
00150     pixDestroy(&pix2);
00151     pixDestroy(&pix3);
00152 
00153         /* 1 bpp input */
00154     convertToPdfSegmented("rabi.png", 300, L_G4_ENCODE, 128, NULL, 0, 0,
00155                           "/tmp/pdffile12.pdf");
00156     convertToPdfSegmented("rabi.png", 300, L_JPEG_ENCODE, 128, NULL, 0, 0,
00157                           "/tmp/pdffile13.pdf");
00158     convertToPdfSegmented("rabi.png", 300, L_FLATE_ENCODE, 128, NULL, 0, 0,
00159                           "/tmp/pdffile14.pdf");
00160 
00161         /* 8 bpp input, no cmap */
00162     convertToPdfSegmented("/tmp/rabi8.jpg", 150, L_G4_ENCODE, 128,
00163                           NULL, 0, 0, "/tmp/pdffile15.pdf");
00164     convertToPdfSegmented("/tmp/rabi8.jpg", 150, L_JPEG_ENCODE, 128,
00165                           NULL, 0, 0, "/tmp/pdffile16.pdf");
00166     convertToPdfSegmented("/tmp/rabi8.jpg", 150, L_FLATE_ENCODE, 128,
00167                           NULL, 0, 0, "/tmp/pdffile17.pdf");
00168 
00169         /* 4 bpp input, cmap */
00170     convertToPdfSegmented("/tmp/rabi4.png", 150, L_G4_ENCODE, 128,
00171                           NULL, 0, 0, "/tmp/pdffile18.pdf");
00172     convertToPdfSegmented("/tmp/rabi4.png", 150, L_JPEG_ENCODE, 128,
00173                           NULL, 0, 0, "/tmp/pdffile19.pdf");
00174     convertToPdfSegmented("/tmp/rabi4.png", 150, L_FLATE_ENCODE, 128,
00175                           NULL, 0, 0, "/tmp/pdffile20.pdf");
00176 
00177 #endif
00178 
00179 #if 1
00180     /* ---------- pdf convert segmented with image regions ---------- */
00181     fprintf(stderr, "\n*** Writing segmented images with image regions\n");
00182 
00183         /* Get the image region(s) for rabi.png.  There are two
00184          * small bogus regions at the top, but we'll keep them for
00185          * the demonstration. */
00186     pix1 = pixRead("rabi.png");
00187     pixSetResolution(pix1, 300, 300);
00188     pixGetDimensions(pix1, &w, &h, NULL);
00189     pix2 = pixGenHalftoneMask(pix1, NULL, NULL, 0);
00190     pix3 = pixMorphSequence(pix2, "c20.1 + c1.20", 0);
00191     boxa1 = pixConnComp(pix3, NULL, 8);
00192     boxa2 = boxaTransform(boxa1, 0, 0, 0.5, 0.5);
00193     pixDestroy(&pix1);
00194     pixDestroy(&pix2);
00195     pixDestroy(&pix3);
00196 
00197         /* 1 bpp input */
00198     convertToPdfSegmented("rabi.png", 300, L_G4_ENCODE, 128, boxa1,
00199                           0, 0.25, "/tmp/pdffile21.pdf");
00200     convertToPdfSegmented("rabi.png", 300, L_JPEG_ENCODE, 128, boxa1,
00201                           0, 0.25, "/tmp/pdffile22.pdf");
00202     convertToPdfSegmented("rabi.png", 300, L_FLATE_ENCODE, 128, boxa1,
00203                           0, 0.25, "/tmp/pdffile23.pdf");
00204 
00205         /* 8 bpp input, no cmap */
00206     convertToPdfSegmented("/tmp/rabi8.jpg", 150, L_G4_ENCODE, 128, boxa2,
00207                           0, 0.5, "/tmp/pdffile24.pdf");
00208     convertToPdfSegmented("/tmp/rabi8.jpg", 150, L_JPEG_ENCODE, 128, boxa2,
00209                           0, 0.5, "/tmp/pdffile25.pdf");
00210     convertToPdfSegmented("/tmp/rabi8.jpg", 150, L_FLATE_ENCODE, 128, boxa2,
00211                           0, 0.5, "/tmp/pdffile26.pdf");
00212 
00213         /* 4 bpp input, cmap */
00214     convertToPdfSegmented("/tmp/rabi4.png", 150, L_G4_ENCODE, 128, boxa2,
00215                           0, 0.5, "/tmp/pdffile27.pdf");
00216     convertToPdfSegmented("/tmp/rabi4.png", 150, L_JPEG_ENCODE, 128, boxa2,
00217                           0, 0.5, "/tmp/pdffile28.pdf");
00218     convertToPdfSegmented("/tmp/rabi4.png", 150, L_FLATE_ENCODE, 128, boxa2,
00219                           0, 0.5, "/tmp/pdffile29.pdf");
00220 
00221         /* 4 bpp input, cmap, data output */
00222     data = NULL;
00223     convertToPdfDataSegmented("/tmp/rabi4.png", 150, L_G4_ENCODE, 128, boxa2,
00224                               0, 0.5, &data, &nbytes);
00225     l_binaryWrite("/tmp/pdffile30.pdf", "w", data, nbytes);
00226     lept_free(data);
00227     convertToPdfDataSegmented("/tmp/rabi4.png", 150, L_JPEG_ENCODE, 128, boxa2,
00228                               0, 0.5, &data, &nbytes);
00229     l_binaryWrite("/tmp/pdffile31.pdf", "w", data, nbytes);
00230     lept_free(data);
00231     convertToPdfDataSegmented("/tmp/rabi4.png", 150, L_FLATE_ENCODE, 128, boxa2,
00232                               0, 0.5, &data, &nbytes);
00233     l_binaryWrite("/tmp/pdffile32.pdf", "w", data, nbytes);
00234     lept_free(data);
00235 
00236     boxaDestroy(&boxa1);
00237     boxaDestroy(&boxa2);
00238 #endif
00239 
00240         
00241 #if 1
00242     /* -------- pdf convert segmented from color image -------- */
00243     fprintf(stderr, "\n*** Writing color segmented images\n");
00244 
00245     pix1 = pixRead("candelabrum-11.jpg");
00246     pix2 = pixScale(pix1, 3.0, 3.0);
00247     pixWrite("/tmp/candelabrum3.jpg", pix2, IFF_JFIF_JPEG);
00248     GetImageMask(pix2, 200, &boxa1, "/tmp/seg1.jpg");
00249     convertToPdfSegmented("/tmp/candelabrum3.jpg", 200, L_G4_ENCODE,
00250                           100, boxa1, 0, 0.25, "/tmp/pdffile33.pdf");
00251     convertToPdfSegmented("/tmp/candelabrum3.jpg", 200, L_JPEG_ENCODE,
00252                           100, boxa1, 0, 0.25, "/tmp/pdffile34.pdf");
00253     convertToPdfSegmented("/tmp/candelabrum3.jpg", 200, L_FLATE_ENCODE,
00254                           100, boxa1, 0, 0.25, "/tmp/pdffile35.pdf");
00255 
00256     pixDestroy(&pix1);
00257     pixDestroy(&pix2);
00258     boxaDestroy(&boxa1);
00259 
00260     pix1 = pixRead("lion-page.00016.jpg");
00261     pix2 = pixScale(pix1, 3.0, 3.0);
00262     pixWrite("/tmp/lion16.jpg", pix2, IFF_JFIF_JPEG);
00263     pix3 = pixRead("lion-mask.00016.tif");
00264     boxa1 = pixConnComp(pix3, NULL, 8);
00265     boxa2 = boxaTransform(boxa1, 0, 0, 3.0, 3.0);
00266     convertToPdfSegmented("/tmp/lion16.jpg", 200, L_G4_ENCODE,
00267                           190, boxa2, 0, 0.5, "/tmp/pdffile36.pdf");
00268     convertToPdfSegmented("/tmp/lion16.jpg", 200, L_JPEG_ENCODE,
00269                           190, boxa2, 0, 0.5, "/tmp/pdffile37.pdf");
00270     convertToPdfSegmented("/tmp/lion16.jpg", 200, L_FLATE_ENCODE,
00271                           190, boxa2, 0, 0.5, "/tmp/pdffile38.pdf");
00272 
00273         /* Quantize the non-image part and flate encode.
00274          * This is useful because it results in a smaller file than
00275          * when you flate-encode the un-quantized non-image regions. */
00276     pix4 = pixScale(pix3, 3.0, 3.0);  /* higher res mask, for combining */
00277     pix5 = QuantizeNonImageRegion(pix2, pix4, 12);
00278     pixWrite("/tmp/lion16-quant.png", pix5, IFF_PNG);
00279     convertToPdfSegmented("/tmp/lion16-quant.png", 200, L_FLATE_ENCODE,
00280                           190, boxa2, 0, 0.5, "/tmp/pdffile39.pdf");
00281 
00282     pixDestroy(&pix1);
00283     pixDestroy(&pix2);
00284     pixDestroy(&pix3);
00285     pixDestroy(&pix4);
00286     pixDestroy(&pix5);
00287     boxaDestroy(&boxa1);
00288     boxaDestroy(&boxa2);
00289 #endif
00290 
00291 #if 1
00292     /* ------------------ Test multipage pdf generation ----------------- */
00293     fprintf(stderr, "\n*** Writing multipage pdfs from single page pdfs\n");
00294 
00295         /* Generate a multi-page pdf from all these files */
00296     startTimer();
00297     concatenatePdf("/tmp", "pdffile", "/tmp/cat_lept.pdf");
00298     fprintf(stderr, "Time: %7.3f\n", stopTimer());
00299 
00300         /* Put two good pdf files in a directory */
00301     lept_mkdir("good");
00302     lept_cp("testfile1.pdf", "/tmp/good");
00303     lept_cp("testfile2.pdf", "/tmp/good");
00304     concatenatePdf("/tmp/good", "file", "/tmp/good.pdf");
00305 
00306         /* Make a version with the pdf id removed, so that it is not
00307          * recognized as a pdf */
00308     ba = l_byteaInitFromFile("testfile2.pdf");
00309     data = l_byteaGetData(ba, &nbytes);
00310     l_binaryWrite("testfile0.notpdf.pdf", "w", data + 10, nbytes - 10);
00311 
00312         /* Make a version with a corrupted trailer */
00313     data[2297] = '2';  /* munge trailer object 6: change 458 --> 428 */
00314     l_binaryWrite("testfile2.bad.pdf", "w", data, nbytes);
00315 
00316         /* Put these two bad files, along with a good file, in a directory */
00317     lept_mkdir("bad");
00318     lept_mv("testfile0.notpdf.pdf", "/tmp/bad");
00319     lept_cp("testfile1.pdf", "/tmp/bad");
00320     lept_mv("testfile2.bad.pdf", "/tmp/bad");
00321     l_byteaDestroy(&ba);
00322 
00323         /* Run concat on the bad files.   In the /tmp/bad/ directory,
00324          * the "not pdf" file should be ignored, and the corrupted pdf
00325          * file should be properly parsed, so the resulting
00326          * concatenated files should be identical.  */
00327     fprintf(stderr, "\nWe attempt to build from the bad directory\n");
00328     concatenatePdf("/tmp/bad", "file", "/tmp/bad.pdf");
00329     filesAreIdentical("/tmp/good.pdf", "/tmp/bad.pdf", &same);
00330     if (same)
00331         fprintf(stderr, "Fixed: files are the same\n"
00332                         "Attempt succeeded\n\n");
00333     else
00334         fprintf(stderr, "Busted: files are different\n");
00335 
00336         /* pdftk fails because the first file is not a pdf */
00337     fprintf(stderr, "pdftk attempts to build from the bad directory\n");
00338     tempfile1 = genPathname("/tmp/bad", "*.pdf");
00339     tempfile2 = genPathname("/tmp", "pdftk.bad.pdf");
00340     snprintf(buffer, sizeof(buffer), "pdftk %s output %s",
00341              tempfile1, tempfile2);
00342     ret = system(buffer);
00343     lept_free(tempfile1);
00344     lept_free(tempfile2);
00345     fprintf(stderr, "Attempt failed\n\n");
00346 
00347 #endif
00348 
00349 #if 1
00350     fprintf(stderr, "\n*** pdftk writes multipage pdfs from images\n");
00351     tempfile1 = genPathname("/tmp", "pdffile*.pdf");
00352     tempfile2 = genPathname("/tmp", "cat_pdftk.pdf");
00353     snprintf(buffer, sizeof(buffer), "pdftk %s output %s",
00354              tempfile1, tempfile2);
00355     ret = system(buffer);
00356     lept_free(tempfile1);
00357     lept_free(tempfile2);
00358 #endif
00359 
00360 #if 1
00361     /* -- Test simple interface for generating multi-page pdf from images -- */
00362     fprintf(stderr, "\n*** Writing multipage pdfs from images\n");
00363 
00364         /* Put four image files in a directory.  They will be encoded thus:
00365          *     file1.png:  flate (8 bpp, only 10 colors)
00366          *     file2.jpg:  dct (8 bpp, 256 colors because of the jpeg encoding)
00367          *     file3.tif:  g4 (1 bpp)
00368          *     file4.jpg:  dct (32 bpp)    */
00369     lept_mkdir("image");
00370     pix1 = pixRead("feyn.tif");
00371     pix2 = pixRead("rabi.png");
00372     pix3 = pixScaleToGray3(pix1);
00373     pix4 = pixScaleToGray3(pix2);
00374     pix5 = pixScale(pix1, 0.33, 0.33);
00375     pix6 = pixRead("test24.jpg");
00376     pixWrite("/tmp/image/file1.png", pix3, IFF_PNG);  /* 10 colors */
00377     pixWrite("/tmp/image/file2.jpg", pix4, IFF_JFIF_JPEG);  /* 256 colors */
00378     pixWrite("/tmp/image/file3.tif", pix5, IFF_TIFF_G4);
00379     pixWrite("/tmp/image/file4.jpg", pix6, IFF_JFIF_JPEG);
00380 
00381     startTimer();
00382     convertFilesToPdf("/tmp/image", "file", 100, 0.8, 75, "4 file test",
00383                       "/tmp/fourimages.pdf");
00384     fprintf(stderr, "Time: %7.3f\n", stopTimer());
00385     pixDestroy(&pix1);
00386     pixDestroy(&pix2);
00387     pixDestroy(&pix3);
00388     pixDestroy(&pix4);
00389     pixDestroy(&pix5);
00390     pixDestroy(&pix6);
00391 #endif
00392 
00393     return 0;
00394 }
00395 
00396 
00397 static void
00398 GetImageMask(PIX         *pixs,
00399              l_int32      res,
00400              BOXA       **pboxa,
00401              const char  *debugfile)
00402 {
00403 PIX   *pix1, *pix2, *pix3, *pix4;
00404 PIXA  *pixa;
00405 
00406     pixSetResolution(pixs, 200, 200);
00407     pix1 = pixConvertTo1(pixs, 100);
00408     pix2 = pixGenHalftoneMask(pix1, NULL, NULL, 0);
00409     pix3 = pixMorphSequence(pix2, "c20.1 + c1.20", 0);
00410     *pboxa = pixConnComp(pix3, NULL, 8);
00411     if (debugfile) {
00412         pixa = pixaCreate(0);
00413         pixaAddPix(pixa, pixs, L_COPY);
00414         pixaAddPix(pixa, pix1, L_INSERT);
00415         pixaAddPix(pixa, pix2, L_INSERT);
00416         pixaAddPix(pixa, pix3, L_INSERT);
00417         pix4 = pixaDisplayTiledInRows(pixa, 32, 1800, 0.25, 0, 25, 2);
00418         pixWrite(debugfile, pix4, IFF_JFIF_JPEG);
00419         pixDisplay(pix4, 100, 100);
00420         pixDestroy(&pix4);
00421         pixaDestroy(&pixa);
00422     } else {
00423         pixDestroy(&pix1);
00424         pixDestroy(&pix2);
00425         pixDestroy(&pix3);
00426     }
00427 
00428     return;
00429 }
00430 
00431 static PIX *
00432 QuantizeNonImageRegion(PIX     *pixs,
00433                        PIX     *pixm,
00434                        l_int32  levels)
00435 {
00436 PIX  *pix1, *pix2, *pixd;
00437 
00438     pix1 = pixConvertTo8(pixs, 0);
00439     pix2 = pixThresholdOn8bpp(pix1, levels, 1);
00440     pixd = pixConvertTo32(pix2);  /* save in rgb */
00441     pixCombineMasked(pixd, pixs, pixm);  /* rgb result */
00442     pixDestroy(&pix1);
00443     pixDestroy(&pix2);
00444     return pixd;
00445 }
00446 
00447 
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Defines