Leptonica 1.68
C Image Processing Library

jbwords.c

Go to the documentation of this file.
00001 /*====================================================================*
00002  -  Copyright (C) 2001 Leptonica.  All rights reserved.
00003  -  This software is distributed in the hope that it will be
00004  -  useful, but with NO WARRANTY OF ANY KIND.
00005  -  No author or distributor accepts responsibility to anyone for the
00006  -  consequences of using this software, or for whether it serves any
00007  -  particular purpose or works at all, unless he or she says so in
00008  -  writing.  Everyone is granted permission to copy, modify and
00009  -  redistribute this source code, for commercial or non-commercial
00010  -  purposes, with the following restrictions: (1) the origin of this
00011  -  source code must not be misrepresented; (2) modified versions must
00012  -  be plainly marked as such; and (3) this notice may not be removed
00013  -  or altered from any source or modified source distribution.
00014  *====================================================================*/
00015 
00016 /*
00017  * jbwords.c
00018  *
00019  *     jbwords dirin thresh weight rootname [firstpage npages]
00020  *
00021  *         dirin:  directory of input pages
00022  *         reduction: 1 (full res) or 2 (half-res)
00023  *         thresh: 0.80 is a reasonable compromise between accuracy
00024  *                 and number of classes, for characters
00025  *         weight: 0.6 seems to work reasonably with thresh = 0.8.
00026  *         rootname: used for naming the two output files (templates
00027  *                   and c.c. data)
00028  *         firstpage: <optional> 0-based; default is 0
00029  *         npages: <optional> use 0 for all pages; default is 0
00030  *
00031  */
00032 
00033 #include <stdio.h>
00034 #include <stdlib.h>
00035 #include "allheaders.h"
00036 
00037     /* Eliminate very large "words" */
00038 static const l_int32  MAX_WORD_WIDTH = 500;
00039 static const l_int32  MAX_WORD_HEIGHT = 200;
00040 
00041 #define   BUF_SIZE                  512
00042 
00043     /* select additional debug output */
00044 #define   RENDER_PAGES              1
00045 #define   RENDER_DEBUG              1
00046 
00047 
00048 main(int    argc,
00049      char **argv)
00050 {
00051 char         filename[BUF_SIZE];
00052 char        *dirin, *rootname, *fname;
00053 l_int32      reduction, i, firstpage, npages, nfiles;
00054 l_float32    thresh, weight;
00055 JBDATA      *data;
00056 JBCLASSER   *classer;
00057 NUMA        *natl;
00058 SARRAY      *safiles;
00059 PIX         *pix;
00060 PIXA        *pixa, *pixadb;
00061 static char  mainName[] = "jbwords";
00062 
00063     if (argc != 6 && argc != 8)
00064         exit(ERROR_INT(
00065  " Syntax: jbwords dirin reduction thresh weight rootname [firstpage, npages]",
00066              mainName, 1));
00067 
00068     dirin = argv[1];
00069     reduction = atoi(argv[2]);
00070     thresh = atof(argv[3]);
00071     weight = atof(argv[4]);
00072     rootname = argv[5];
00073 
00074     if (argc == 6) {
00075         firstpage = 0;
00076         npages = 0;
00077     }
00078     else {
00079         firstpage = atoi(argv[6]);
00080         npages = atoi(argv[7]);
00081     }
00082 
00083     classer = jbWordsInTextlines(dirin, reduction, MAX_WORD_WIDTH,
00084                                  MAX_WORD_HEIGHT, thresh, weight,
00085                                  &natl, firstpage, npages);
00086 
00087         /* Save and write out the result */
00088     data = jbDataSave(classer);
00089     jbDataWrite(rootname, data);
00090 
00091 #if  RENDER_PAGES
00092         /* Render the pages from the classifier data, and write to file.
00093          * Use debugflag == FALSE to omit outlines of each component. */
00094     pixa = jbDataRender(data, FALSE);
00095     npages = pixaGetCount(pixa);
00096     for (i = 0; i < npages; i++) {
00097         pix = pixaGetPix(pixa, i, L_CLONE);
00098         snprintf(filename, BUF_SIZE, "%s.%05d", rootname, i);
00099         fprintf(stderr, "filename: %s\n", filename);
00100         pixWrite(filename, pix, IFF_PNG);
00101         pixDestroy(&pix);
00102     }
00103     pixaDestroy(&pixa);
00104 #endif  /* RENDER_PAGES */
00105 
00106 #if  RENDER_DEBUG
00107         /* Use debugflag == TRUE to see outlines of each component. */
00108     pixadb = jbDataRender(data, TRUE);
00109         /* Write the debug pages out */
00110     npages = pixaGetCount(pixadb);
00111     for (i = 0; i < npages; i++) {
00112         pix = pixaGetPix(pixadb, i, L_CLONE);
00113         snprintf(filename, BUF_SIZE, "%s.db.%05d", rootname, i);
00114         fprintf(stderr, "filename: %s\n", filename);
00115         pixWrite(filename, pix, IFF_PNG);
00116         pixDestroy(&pix);
00117     }
00118     pixaDestroy(&pixadb);
00119 #endif  /* RENDER_DEBUG */
00120 
00121     jbClasserDestroy(&classer);
00122     jbDataDestroy(&data);
00123     numaDestroy(&natl);
00124 
00125     exit(0);
00126 }
00127 
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Defines