Leptonica 1.68
C Image Processing Library

convertsegfilestops.c

Go to the documentation of this file.
00001 /*====================================================================*
00002  -  Copyright (C) 2001 Leptonica.  All rights reserved.
00003  -  This software is distributed in the hope that it will be
00004  -  useful, but with NO WARRANTY OF ANY KIND.
00005  -  No author or distributor accepts responsibility to anyone for the
00006  -  consequences of using this software, or for whether it serves any
00007  -  particular purpose or works at all, unless he or she says so in
00008  -  writing.  Everyone is granted permission to copy, modify and
00009  -  redistribute this source code, for commercial or non-commercial
00010  -  purposes, with the following restrictions: (1) the origin of this
00011  -  source code must not be misrepresented; (2) modified versions must
00012  -  be plainly marked as such; and (3) this notice may not be removed
00013  -  or altered from any source or modified source distribution.
00014  *====================================================================*/
00015 
00016 /*
00017  * convertsegfilestops.c
00018  *
00019  *    Converts all image files in a 'page' directory, using optional
00020  *    corresponding segmentation mask files in a 'mask' directory,
00021  *    to a level 2 compressed PostScript file.  This is done
00022  *    automatically at a resolution that fits to a letter-sized
00023  *    (8.5 x 11) inch page.  The 'page' and 'mask' files are paired
00024  *    by having the same number embedded in their name.
00025  *    The 'numpre' and 'numpost' args specify the number of
00026  *    characters at the beginning and end of the filename (not
00027  *    counting any extension) that are NOT part of the page number.
00028  *    For example, if the page numbers are 00000.jpg, 00001.jpg, ...
00029  *    then numpre = numpost = 0.
00030  *
00031  *    The mask directory must exist, but it does not need to have
00032  *    any image mask files.
00033  *
00034  *    The pages are taken in lexical order of the filenames.  Therefore,
00035  *    the embedded numbers should be 0-padded on the left up to
00036  *    a fixed number of digits.
00037  *
00038  *    PostScript (and pdf) allow regions of the image to be encoded
00039  *    differently.  Regions can be over-written, with the last writing
00040  *    determining the final output.  Black "ink" can also be written
00041  *    through a mask that is given by a 1 bpp image.
00042  *
00043  *    The page images are typically grayscale or color.  To take advantage
00044  *    of this depth, one typically upscales the text by 2.0.  Likewise,
00045  *    the images regions, denoted by foreground in the corresponding
00046  *    segmentation mask, can be rendered at lower resolution, and
00047  *    it is often useful to downscale the image parts by 0.5.
00048  *
00049  *    If the mask does not exist, the entire page is interpreted as
00050  *    text; it is converted to 1 bpp and written to file with
00051  *    ccitt-g4 compression at the requested "textscale" relative
00052  *    to the page image.   If the mask exists and the foreground
00053  *    covers the entire page, the entire page is saved with jpeg
00054  *    ("dct") compression at the requested "imagescale".
00055  *    If the mask exists and partially covers the page image, the
00056  *    page is saved as a mixture of grayscale or rgb dct and 1 bpp g4.
00057  *
00058  *    This uses a single global threshold for binarizing the text
00059  *    (i.e., non-image) regions of every page.
00060  */
00061 
00062 #include <string.h>
00063 #include "allheaders.h"
00064 
00065 main(int    argc,
00066      char **argv)
00067 {
00068 char      *pagedir, *pagestr, *maskdir, *maskstr, *fileout;
00069 l_int32    threshold, numpre, numpost, maxnum;
00070 l_float32  textscale, imagescale;    
00071 
00072     if (argc != 12) {
00073         fprintf(stderr,
00074             " Syntax: convertsegfilestops pagedir pagestr maskdir maskstr \\ \n"
00075             "                             numpre numpost maxnum \\ \n"
00076             "                             textscale imagescale thresh fileout\n"
00077             "     where\n"
00078             "         pagedir:  Input directory for page image files\n"
00079             "         pagestr:  Substring for matching; use 'allfiles' to\n"
00080             "                   convert all files in the page directory\n"
00081             "         maskdir:  Input directory for mask image files\n"
00082             "         maskstr:  Substring for matching; use 'allfiles' to\n"
00083             "                   convert all files in the mask directory\n"
00084             "         numpre:  Number of characters in name before number\n"
00085             "         numpost:  Number of characters in name after number\n"
00086             "         maxnum:  Only consider page numbers up to this value\n"
00087             "         textscale:  Scale of text output relative to pixs\n"
00088             "         imagescale:  Scale of image output relative to pixs\n"
00089             "         thresh:  threshold for binarization; typically about\n"
00090             "                  180; use 0 for default\n"
00091             "         fileout:  Output p file\n");
00092         return 1;
00093     }
00094 
00095     pagedir = argv[1];
00096     pagestr = argv[2];
00097     maskdir = argv[3];
00098     maskstr = argv[4];
00099     numpre = atoi(argv[5]);
00100     numpost = atoi(argv[6]);
00101     maxnum = atoi(argv[7]);
00102     textscale = atof(argv[8]);
00103     imagescale = atof(argv[9]);
00104     threshold = atoi(argv[10]);
00105     fileout = argv[11];
00106 
00107     if (!strcmp(pagestr, "allfiles"))
00108         pagestr = NULL;
00109     if (!strcmp(maskstr, "allfiles"))
00110         maskstr = NULL;
00111 
00112     return convertSegmentedPagesToPS(pagedir, pagestr, maskdir, maskstr,
00113                                      numpre, numpost, maxnum, textscale,
00114                                      imagescale, threshold, fileout);
00115 }
00116 
00117 
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Defines