Leptonica 1.68
C Image Processing Library
|
00001 /*====================================================================* 00002 - Copyright (C) 2001 Leptonica. All rights reserved. 00003 - This software is distributed in the hope that it will be 00004 - useful, but with NO WARRANTY OF ANY KIND. 00005 - No author or distributor accepts responsibility to anyone for the 00006 - consequences of using this software, or for whether it serves any 00007 - particular purpose or works at all, unless he or she says so in 00008 - writing. Everyone is granted permission to copy, modify and 00009 - redistribute this source code, for commercial or non-commercial 00010 - purposes, with the following restrictions: (1) the origin of this 00011 - source code must not be misrepresented; (2) modified versions must 00012 - be plainly marked as such; and (3) this notice may not be removed 00013 - or altered from any source or modified source distribution. 00014 *====================================================================*/ 00015 00016 /* 00017 * convertsegfilestops.c 00018 * 00019 * Converts all image files in a 'page' directory, using optional 00020 * corresponding segmentation mask files in a 'mask' directory, 00021 * to a level 2 compressed PostScript file. This is done 00022 * automatically at a resolution that fits to a letter-sized 00023 * (8.5 x 11) inch page. The 'page' and 'mask' files are paired 00024 * by having the same number embedded in their name. 00025 * The 'numpre' and 'numpost' args specify the number of 00026 * characters at the beginning and end of the filename (not 00027 * counting any extension) that are NOT part of the page number. 00028 * For example, if the page numbers are 00000.jpg, 00001.jpg, ... 00029 * then numpre = numpost = 0. 00030 * 00031 * The mask directory must exist, but it does not need to have 00032 * any image mask files. 00033 * 00034 * The pages are taken in lexical order of the filenames. Therefore, 00035 * the embedded numbers should be 0-padded on the left up to 00036 * a fixed number of digits. 00037 * 00038 * PostScript (and pdf) allow regions of the image to be encoded 00039 * differently. Regions can be over-written, with the last writing 00040 * determining the final output. Black "ink" can also be written 00041 * through a mask that is given by a 1 bpp image. 00042 * 00043 * The page images are typically grayscale or color. To take advantage 00044 * of this depth, one typically upscales the text by 2.0. Likewise, 00045 * the images regions, denoted by foreground in the corresponding 00046 * segmentation mask, can be rendered at lower resolution, and 00047 * it is often useful to downscale the image parts by 0.5. 00048 * 00049 * If the mask does not exist, the entire page is interpreted as 00050 * text; it is converted to 1 bpp and written to file with 00051 * ccitt-g4 compression at the requested "textscale" relative 00052 * to the page image. If the mask exists and the foreground 00053 * covers the entire page, the entire page is saved with jpeg 00054 * ("dct") compression at the requested "imagescale". 00055 * If the mask exists and partially covers the page image, the 00056 * page is saved as a mixture of grayscale or rgb dct and 1 bpp g4. 00057 * 00058 * This uses a single global threshold for binarizing the text 00059 * (i.e., non-image) regions of every page. 00060 */ 00061 00062 #include <string.h> 00063 #include "allheaders.h" 00064 00065 main(int argc, 00066 char **argv) 00067 { 00068 char *pagedir, *pagestr, *maskdir, *maskstr, *fileout; 00069 l_int32 threshold, numpre, numpost, maxnum; 00070 l_float32 textscale, imagescale; 00071 00072 if (argc != 12) { 00073 fprintf(stderr, 00074 " Syntax: convertsegfilestops pagedir pagestr maskdir maskstr \\ \n" 00075 " numpre numpost maxnum \\ \n" 00076 " textscale imagescale thresh fileout\n" 00077 " where\n" 00078 " pagedir: Input directory for page image files\n" 00079 " pagestr: Substring for matching; use 'allfiles' to\n" 00080 " convert all files in the page directory\n" 00081 " maskdir: Input directory for mask image files\n" 00082 " maskstr: Substring for matching; use 'allfiles' to\n" 00083 " convert all files in the mask directory\n" 00084 " numpre: Number of characters in name before number\n" 00085 " numpost: Number of characters in name after number\n" 00086 " maxnum: Only consider page numbers up to this value\n" 00087 " textscale: Scale of text output relative to pixs\n" 00088 " imagescale: Scale of image output relative to pixs\n" 00089 " thresh: threshold for binarization; typically about\n" 00090 " 180; use 0 for default\n" 00091 " fileout: Output p file\n"); 00092 return 1; 00093 } 00094 00095 pagedir = argv[1]; 00096 pagestr = argv[2]; 00097 maskdir = argv[3]; 00098 maskstr = argv[4]; 00099 numpre = atoi(argv[5]); 00100 numpost = atoi(argv[6]); 00101 maxnum = atoi(argv[7]); 00102 textscale = atof(argv[8]); 00103 imagescale = atof(argv[9]); 00104 threshold = atoi(argv[10]); 00105 fileout = argv[11]; 00106 00107 if (!strcmp(pagestr, "allfiles")) 00108 pagestr = NULL; 00109 if (!strcmp(maskstr, "allfiles")) 00110 maskstr = NULL; 00111 00112 return convertSegmentedPagesToPS(pagedir, pagestr, maskdir, maskstr, 00113 numpre, numpost, maxnum, textscale, 00114 imagescale, threshold, fileout); 00115 } 00116 00117