Leptonica 1.68
C Image Processing Library

convertsegfilestopdf.c

Go to the documentation of this file.
00001 /*====================================================================*
00002  -  Copyright (C) 2001 Leptonica.  All rights reserved.
00003  -  This software is distributed in the hope that it will be
00004  -  useful, but with NO WARRANTY OF ANY KIND.
00005  -  No author or distributor accepts responsibility to anyone for the
00006  -  consequences of using this software, or for whether it serves any
00007  -  particular purpose or works at all, unless he or she says so in
00008  -  writing.  Everyone is granted permission to copy, modify and
00009  -  redistribute this source code, for commercial or non-commercial
00010  -  purposes, with the following restrictions: (1) the origin of this
00011  -  source code must not be misrepresented; (2) modified versions must
00012  -  be plainly marked as such; and (3) this notice may not be removed
00013  -  or altered from any source or modified source distribution.
00014  *====================================================================*/
00015 
00016 /*
00017  * convertsegfilestopdf.c
00018  *
00019  *    Converts all image files in the given directory with matching substring
00020  *    to a pdf, with the specified scaling factor <= 1.0.  To convert
00021  *    all files in the directory, use 'allfiles' for the substring.
00022  *
00023  *    See below for syntax and usage.
00024  *
00025  *    The image regions are displayed at a resolution that depends on the
00026  *    input resolution (res) and the scaling factor (scalefact) that
00027  *    is applied to the images before conversion to pdf.  Internally
00028  *    we multiply these, so that the generated pdf will render at the
00029  *    same resolution as if it hadn't been scaled.  By downscaling, this:
00030  *       (1) reduces the size of the images.  For jpeg, downscaling
00031  *           reduces by square of the scale factor the 'image' segmented part.
00032  *       (2) regenerates the jpeg with quality = 75 after downscaling.
00033  *
00034  *    If boxaafile doesn't exist, the images are converted without
00035  *    scaling and with the best compression for each image.
00036  *
00037  *    To see how this works:
00038  *       (1) run pdfseg_reg
00039  *           This generates image and boxaa files in /tmp/segtest/
00040  *       (2) run convertsegfilestopdf:
00041  *           convertsegfilestopdf /tmp/segtest ".jpg" 100 2 140
00042  *              /tmp/segtest/seg.baa 1.0 segtest /tmp/segtest.pdf
00043  */
00044 
00045 #include <string.h>
00046 #include "allheaders.h"
00047 
00048 main(int    argc,
00049      char **argv)
00050 {
00051 char        *dirin, *substr, *title, *fileout, *boxaafile, *boxaapath;
00052 l_int32      ret, res, type, thresh;
00053 l_float32    scalefactor;
00054 BOXAA       *baa;
00055 static char  mainName[] = "convertsegfilestopdf";
00056 
00057     if (argc != 10) {
00058         fprintf(stderr,
00059             " Syntax: convertsegfilestopdf dirin substr res type thresh \\ \n"
00060             "                       boxaafile scalefactor title fileout\n"
00061             "     where\n"
00062             "         dirin:  input directory for image files\n"
00063             "         substr:  Use 'allfiles' to convert all files\n"
00064             "                  in the directory\n"
00065             "         res:  Input resolution of each image;\n"
00066             "               assumed to all be the same\n"
00067             "         type: compression used for non-image regions:\n"
00068             "               0: default (G4 encoding)\n"
00069             "               1: JPEG encoding\n"
00070             "               2: G4 encoding\n"
00071             "               3: PNG encoding\n"
00072             "         thresh:  threshold for binarization; use 0 for default\n"
00073             "         boxaafile: File of 'image' regions within each page\n"
00074             "                    This contains a boxa for each page,\n"
00075             "                    consisting of a set of regions\n"
00076             "         scalefactor:  Use to scale down the image regions\n"
00077             "         title:  Use 'none' to omit\n"
00078             "         fileout:  Output pdf file\n");
00079         return 1;
00080     }
00081 
00082     dirin = argv[1];
00083     substr = argv[2];
00084     res = atoi(argv[3]);
00085     type = atoi(argv[4]);
00086     thresh = atoi(argv[5]);
00087     boxaafile = argv[6];
00088     scalefactor = atof(argv[7]);
00089     title = argv[8];
00090     fileout = argv[9];
00091 
00092     if (!strcmp(substr, "allfiles"))
00093         substr = NULL;
00094     if (scalefactor <= 0.0 || scalefactor > 1.0) {
00095         L_WARNING("invalid scalefactor: setting to 1.0", mainName);
00096         scalefactor = 1.0;
00097     }
00098     if (type != 1 && type != 2 && type != 3)
00099         type = L_G4_ENCODE;
00100     if (thresh <= 0)
00101         thresh = 150;
00102     if (!strcmp(title, "none"))
00103         title = NULL;
00104 
00105     boxaapath = genPathname(boxaafile, NULL);
00106     if ((baa = boxaaRead(boxaapath)) == NULL) {
00107         L_WARNING(
00108             "boxaa file not found; converting unsegmented and unscaled",
00109             mainName);
00110         ret = convertFilesToPdf(dirin, substr, res, 1.0, 75, title,
00111                                 fileout);
00112         FREE(boxaapath);
00113         return ret;
00114     }
00115 
00116     ret = convertSegmentedFilesToPdf(dirin, substr, res, type, thresh, baa,
00117                                      75, scalefactor, title, fileout);
00118     FREE(boxaapath);
00119     boxaaDestroy(&baa);
00120     return ret;
00121 }
00122 
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Defines