Leptonica 1.68
C Image Processing Library

wordsinorder.c

Go to the documentation of this file.
00001 /*====================================================================*
00002  -  Copyright (C) 2001 Leptonica.  All rights reserved.
00003  -  This software is distributed in the hope that it will be
00004  -  useful, but with NO WARRANTY OF ANY KIND.
00005  -  No author or distributor accepts responsibility to anyone for the
00006  -  consequences of using this software, or for whether it serves any
00007  -  particular purpose or works at all, unless he or she says so in
00008  -  writing.  Everyone is granted permission to copy, modify and
00009  -  redistribute this source code, for commercial or non-commercial
00010  -  purposes, with the following restrictions: (1) the origin of this
00011  -  source code must not be misrepresented; (2) modified versions must
00012  -  be plainly marked as such; and (3) this notice may not be removed
00013  -  or altered from any source or modified source distribution.
00014  *====================================================================*/
00015 
00016 /*
00017  * wordsinorder.c
00018  *
00019  *     wordsinorder dirin rootname [firstpage npages]
00020  *
00021  *         dirin:  directory of input pages
00022  *         rootname: used for naming the two output files (templates
00023  *                   and c.c. data)
00024  *         firstpage: <optional> 0-based; default is 0
00025  *         npages: <optional> use 0 for all pages; default is 0
00026  *
00027  */
00028 
00029 #include "allheaders.h"
00030 
00031     /* Input variables */
00032 static const l_int32  MIN_WORD_WIDTH = 6;
00033 static const l_int32  MIN_WORD_HEIGHT = 4;
00034 static const l_int32  MAX_WORD_WIDTH = 500;
00035 static const l_int32  MAX_WORD_HEIGHT = 100;
00036 
00037 
00038 #define   BUF_SIZE                  512
00039 #define   RENDER_PAGES              1
00040 
00041 
00042 main(int    argc,
00043      char **argv)
00044 {
00045 char         filename[BUF_SIZE];
00046 char        *dirin, *rootname, *fname;
00047 l_int32      i, j, w, h, firstpage, npages, nfiles, ncomp;
00048 l_int32      index, ival, rval, gval, bval;
00049 BOX         *box;
00050 BOXA        *boxa;
00051 BOXAA       *baa;
00052 JBDATA      *data;
00053 JBCLASSER   *classer;
00054 NUMA        *nai;
00055 NUMAA       *naa;
00056 SARRAY      *safiles;
00057 PIX         *pixs, *pixt1, *pixt2, *pixd;
00058 PIXCMAP     *cmap;
00059 static char  mainName[] = "wordsinorder";
00060 
00061     if (argc != 3 && argc != 5)
00062         exit(ERROR_INT(
00063              " Syntax: wordsinorder dirin rootname [firstpage, npages]",
00064              mainName, 1));
00065 
00066     dirin = argv[1];
00067     rootname = argv[2];
00068 
00069     if (argc == 3) {
00070         firstpage = 0;
00071         npages = 0;
00072     }
00073     else {
00074         firstpage = atoi(argv[3]);
00075         npages = atoi(argv[4]);
00076     }
00077 
00078         /* Compute the word bounding boxes at 2x reduction, along with
00079          * the textlines that they are in. */
00080     safiles = getSortedPathnamesInDirectory(dirin, NULL, firstpage, npages);
00081     nfiles = sarrayGetCount(safiles);
00082     baa = boxaaCreate(nfiles);
00083     naa = numaaCreate(nfiles);
00084     for (i = 0; i < nfiles; i++) {
00085         fname = sarrayGetString(safiles, i, 0);
00086         if ((pixs = pixRead(fname)) == NULL) {
00087             L_WARNING_INT("image file %d not read", mainName, i);
00088             continue;
00089         }
00090         pixGetWordBoxesInTextlines(pixs, 2, MIN_WORD_WIDTH, MIN_WORD_HEIGHT,
00091                                    MAX_WORD_WIDTH, MAX_WORD_HEIGHT,
00092                                    &boxa, &nai);
00093         boxaaAddBoxa(baa, boxa, L_INSERT);
00094         numaaAddNuma(naa, nai, L_INSERT);
00095         
00096 #if  RENDER_PAGES
00097             /* Show the results on a 2x reduced image, where each
00098              * word is outlined and the color of the box depends on the
00099              * computed textline. */
00100         pixt1 = pixReduceRankBinary2(pixs, 2, NULL);
00101         pixGetDimensions(pixt1, &w, &h, NULL);
00102         pixd = pixCreate(w, h, 8);
00103         cmap = pixcmapCreateRandom(8, 1, 1);  /* first color is black */
00104         pixSetColormap(pixd, cmap);
00105 
00106         pixt2 = pixUnpackBinary(pixt1, 8, 1);
00107         pixRasterop(pixd, 0, 0, w, h, PIX_SRC | PIX_DST, pixt2, 0, 0);
00108         ncomp = boxaGetCount(boxa);
00109         for (j = 0; j < ncomp; j++) {
00110             box = boxaGetBox(boxa, j, L_CLONE);
00111             numaGetIValue(nai, j, &ival);
00112             index = 1 + (ival % 254);  /* omit black and white */
00113             pixcmapGetColor(cmap, index, &rval, &gval, &bval);
00114             pixRenderBoxArb(pixd, box, 2, rval, gval, bval);
00115             boxDestroy(&box);
00116         }
00117 
00118         snprintf(filename, BUF_SIZE, "%s.%05d", rootname, i);
00119         fprintf(stderr, "filename: %s\n", filename);
00120         pixWrite(filename, pixd, IFF_PNG);
00121         pixDestroy(&pixt1);
00122         pixDestroy(&pixt2);
00123         pixDestroy(&pixs);
00124         pixDestroy(&pixd);
00125 #endif  /* RENDER_PAGES */
00126     }
00127 
00128     boxaaDestroy(&baa);
00129     numaaDestroy(&naa);
00130     sarrayDestroy(&safiles);
00131     return 0;
00132 }
00133 
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Defines