Leptonica 1.68
C Image Processing Library
|
00001 /*====================================================================* 00002 - Copyright (C) 2001 Leptonica. All rights reserved. 00003 - This software is distributed in the hope that it will be 00004 - useful, but with NO WARRANTY OF ANY KIND. 00005 - No author or distributor accepts responsibility to anyone for the 00006 - consequences of using this software, or for whether it serves any 00007 - particular purpose or works at all, unless he or she says so in 00008 - writing. Everyone is granted permission to copy, modify and 00009 - redistribute this source code, for commercial or non-commercial 00010 - purposes, with the following restrictions: (1) the origin of this 00011 - source code must not be misrepresented; (2) modified versions must 00012 - be plainly marked as such; and (3) this notice may not be removed 00013 - or altered from any source or modified source distribution. 00014 *====================================================================*/ 00015 00016 /* 00017 * wordsinorder.c 00018 * 00019 * wordsinorder dirin rootname [firstpage npages] 00020 * 00021 * dirin: directory of input pages 00022 * rootname: used for naming the two output files (templates 00023 * and c.c. data) 00024 * firstpage: <optional> 0-based; default is 0 00025 * npages: <optional> use 0 for all pages; default is 0 00026 * 00027 */ 00028 00029 #include "allheaders.h" 00030 00031 /* Input variables */ 00032 static const l_int32 MIN_WORD_WIDTH = 6; 00033 static const l_int32 MIN_WORD_HEIGHT = 4; 00034 static const l_int32 MAX_WORD_WIDTH = 500; 00035 static const l_int32 MAX_WORD_HEIGHT = 100; 00036 00037 00038 #define BUF_SIZE 512 00039 #define RENDER_PAGES 1 00040 00041 00042 main(int argc, 00043 char **argv) 00044 { 00045 char filename[BUF_SIZE]; 00046 char *dirin, *rootname, *fname; 00047 l_int32 i, j, w, h, firstpage, npages, nfiles, ncomp; 00048 l_int32 index, ival, rval, gval, bval; 00049 BOX *box; 00050 BOXA *boxa; 00051 BOXAA *baa; 00052 JBDATA *data; 00053 JBCLASSER *classer; 00054 NUMA *nai; 00055 NUMAA *naa; 00056 SARRAY *safiles; 00057 PIX *pixs, *pixt1, *pixt2, *pixd; 00058 PIXCMAP *cmap; 00059 static char mainName[] = "wordsinorder"; 00060 00061 if (argc != 3 && argc != 5) 00062 exit(ERROR_INT( 00063 " Syntax: wordsinorder dirin rootname [firstpage, npages]", 00064 mainName, 1)); 00065 00066 dirin = argv[1]; 00067 rootname = argv[2]; 00068 00069 if (argc == 3) { 00070 firstpage = 0; 00071 npages = 0; 00072 } 00073 else { 00074 firstpage = atoi(argv[3]); 00075 npages = atoi(argv[4]); 00076 } 00077 00078 /* Compute the word bounding boxes at 2x reduction, along with 00079 * the textlines that they are in. */ 00080 safiles = getSortedPathnamesInDirectory(dirin, NULL, firstpage, npages); 00081 nfiles = sarrayGetCount(safiles); 00082 baa = boxaaCreate(nfiles); 00083 naa = numaaCreate(nfiles); 00084 for (i = 0; i < nfiles; i++) { 00085 fname = sarrayGetString(safiles, i, 0); 00086 if ((pixs = pixRead(fname)) == NULL) { 00087 L_WARNING_INT("image file %d not read", mainName, i); 00088 continue; 00089 } 00090 pixGetWordBoxesInTextlines(pixs, 2, MIN_WORD_WIDTH, MIN_WORD_HEIGHT, 00091 MAX_WORD_WIDTH, MAX_WORD_HEIGHT, 00092 &boxa, &nai); 00093 boxaaAddBoxa(baa, boxa, L_INSERT); 00094 numaaAddNuma(naa, nai, L_INSERT); 00095 00096 #if RENDER_PAGES 00097 /* Show the results on a 2x reduced image, where each 00098 * word is outlined and the color of the box depends on the 00099 * computed textline. */ 00100 pixt1 = pixReduceRankBinary2(pixs, 2, NULL); 00101 pixGetDimensions(pixt1, &w, &h, NULL); 00102 pixd = pixCreate(w, h, 8); 00103 cmap = pixcmapCreateRandom(8, 1, 1); /* first color is black */ 00104 pixSetColormap(pixd, cmap); 00105 00106 pixt2 = pixUnpackBinary(pixt1, 8, 1); 00107 pixRasterop(pixd, 0, 0, w, h, PIX_SRC | PIX_DST, pixt2, 0, 0); 00108 ncomp = boxaGetCount(boxa); 00109 for (j = 0; j < ncomp; j++) { 00110 box = boxaGetBox(boxa, j, L_CLONE); 00111 numaGetIValue(nai, j, &ival); 00112 index = 1 + (ival % 254); /* omit black and white */ 00113 pixcmapGetColor(cmap, index, &rval, &gval, &bval); 00114 pixRenderBoxArb(pixd, box, 2, rval, gval, bval); 00115 boxDestroy(&box); 00116 } 00117 00118 snprintf(filename, BUF_SIZE, "%s.%05d", rootname, i); 00119 fprintf(stderr, "filename: %s\n", filename); 00120 pixWrite(filename, pixd, IFF_PNG); 00121 pixDestroy(&pixt1); 00122 pixDestroy(&pixt2); 00123 pixDestroy(&pixs); 00124 pixDestroy(&pixd); 00125 #endif /* RENDER_PAGES */ 00126 } 00127 00128 boxaaDestroy(&baa); 00129 numaaDestroy(&naa); 00130 sarrayDestroy(&safiles); 00131 return 0; 00132 } 00133