Leptonica 1.68
C Image Processing Library

classapp.c

Go to the documentation of this file.
00001 /*====================================================================*
00002  -  Copyright (C) 2001 Leptonica.  All rights reserved.
00003  -  This software is distributed in the hope that it will be
00004  -  useful, but with NO WARRANTY OF ANY KIND.
00005  -  No author or distributor accepts responsibility to anyone for the
00006  -  consequences of using this software, or for whether it serves any
00007  -  particular purpose or works at all, unless he or she says so in
00008  -  writing.  Everyone is granted permission to copy, modify and
00009  -  redistribute this source code, for commercial or non-commercial
00010  -  purposes, with the following restrictions: (1) the origin of this
00011  -  source code must not be misrepresented; (2) modified versions must
00012  -  be plainly marked as such; and (3) this notice may not be removed
00013  -  or altered from any source or modified source distribution.
00014  *====================================================================*/
00015 
00016 
00017 /*
00018  *  classapp.c
00019  *
00020  *      Top-level jb2 correlation and rank-hausdorff
00021  *
00022  *         l_int32         jbCorrelation()
00023  *         l_int32         jbRankHaus()
00024  *
00025  *      Extract and classify words in textline order
00026  *
00027  *         JBCLASSER      *jbWordsInTextlines()
00028  *         l_int32         pixGetWordsInTextlines()
00029  *         l_int32         pixGetWordBoxesInTextlines()
00030  *
00031  *      Use word bounding boxes to compare page images
00032  *
00033  *         NUMAA          *boxaExtractSortedPattern()
00034  *         l_int32         numaaCompareImagesByBoxes()
00035  *         static l_int32  testLineAlignmentX()
00036  *         static l_int32  countAlignedMatches()
00037  *         static void     printRowIndices()
00038  */
00039 
00040 #include <string.h>
00041 #include "allheaders.h"
00042 
00043 static const l_int32  JB_WORDS_MIN_WIDTH = 5;  /* pixels */
00044 static const l_int32  JB_WORDS_MIN_HEIGHT = 3;  /* pixels */
00045 
00046     /* MSVC can't handle arrays dimensioned by static const integers */
00047 #define  L_BUF_SIZE  512
00048 
00049     /* Static comparison functions */
00050 static l_int32 testLineAlignmentX(NUMA *na1, NUMA *na2, l_int32 shiftx,
00051                                   l_int32 delx, l_int32 nperline);
00052 static l_int32 countAlignedMatches(NUMA *nai1, NUMA *nai2, NUMA *nasx,
00053                                    NUMA *nasy, l_int32 n1, l_int32 n2,
00054                                    l_int32 delx, l_int32 dely,
00055                                    l_int32 nreq, l_int32 *psame,
00056                                    l_int32 debugflag);
00057 static void printRowIndices(l_int32 *index1, l_int32 n1,
00058                             l_int32 *index2, l_int32 n2);
00059 
00060 
00061 /*------------------------------------------------------------------*
00062  *          Top-level jb2 correlation and rank-hausdorff            *
00063  *------------------------------------------------------------------*/
00064 /*!
00065  *  jbCorrelation()
00066  *
00067  *       Input:  dirin (directory of input images)
00068  *               thresh (typically ~0.8)
00069  *               weight (typically ~0.6)
00070  *               components (JB_CONN_COMPS, JB_CHARACTERS, JB_WORDS)
00071  *               rootname (for output files)
00072  *               firstpage (0-based)
00073  *               npages (use 0 for all pages in dirin)
00074  *               renderflag (1 to render from templates; 0 to skip)
00075  *       Return: 0 if OK, 1 on error
00076  *
00077  *  Notes:
00078  *      (1) The images must be 1 bpp.  If they are not, you can convert
00079  *          them using convertFilesTo1bpp().
00080  *      (2) See prog/jbcorrelation for generating more output (e.g.,
00081  *          for debugging)
00082  */
00083 l_int32
00084 jbCorrelation(const char  *dirin,
00085               l_float32    thresh,
00086               l_float32    weight,
00087               l_int32      components,
00088               const char  *rootname,
00089               l_int32      firstpage,
00090               l_int32      npages,
00091               l_int32      renderflag)
00092 {
00093 char        filename[L_BUF_SIZE];
00094 l_int32     nfiles, i, numpages;
00095 JBDATA     *data;
00096 JBCLASSER  *classer;
00097 PIX        *pix;
00098 PIXA       *pixa;
00099 SARRAY     *safiles;
00100 
00101     PROCNAME("jbCorrelation");
00102 
00103     if (!dirin)
00104         return ERROR_INT("dirin not defined", procName, 1);
00105     if (!rootname)
00106         return ERROR_INT("rootname not defined", procName, 1);
00107     if (components != JB_CONN_COMPS && components != JB_CHARACTERS &&
00108         components != JB_WORDS)
00109         return ERROR_INT("components invalid", procName, 1);
00110 
00111     safiles = getSortedPathnamesInDirectory(dirin, NULL, firstpage, npages);
00112     nfiles = sarrayGetCount(safiles);
00113 
00114         /* Classify components */
00115     classer = jbCorrelationInit(components, 0, 0, thresh, weight);
00116     jbAddPages(classer, safiles);
00117 
00118         /* Save data */
00119     data = jbDataSave(classer);
00120     jbDataWrite(rootname, data);
00121 
00122         /* Optionally, render pages using class templates */
00123     if (renderflag) {
00124         pixa = jbDataRender(data, FALSE);
00125         numpages = pixaGetCount(pixa);
00126         if (numpages != nfiles)
00127             fprintf(stderr, "numpages = %d, nfiles = %d, not equal!\n",
00128                     numpages, nfiles);
00129         for (i = 0; i < numpages; i++) {
00130             pix = pixaGetPix(pixa, i, L_CLONE);
00131             snprintf(filename, L_BUF_SIZE, "%s.%05d", rootname, i);
00132             fprintf(stderr, "filename: %s\n", filename);
00133             pixWrite(filename, pix, IFF_PNG);
00134             pixDestroy(&pix);
00135         }
00136         pixaDestroy(&pixa);
00137     }
00138 
00139     sarrayDestroy(&safiles);
00140     jbClasserDestroy(&classer);
00141     jbDataDestroy(&data);
00142     return 0;
00143 }
00144 
00145 
00146 /*!
00147  *  jbRankHaus()
00148  *
00149  *       Input:  dirin (directory of input images)
00150  *               size (of Sel used for dilation; typ. 2)
00151  *               rank (rank value of match; typ. 0.97)
00152  *               components (JB_CONN_COMPS, JB_CHARACTERS, JB_WORDS)
00153  *               rootname (for output files)
00154  *               firstpage (0-based)
00155  *               npages (use 0 for all pages in dirin)
00156  *               renderflag (1 to render from templates; 0 to skip)
00157  *       Return: 0 if OK, 1 on error
00158  *
00159  *  Notes:
00160  *      (1) See prog/jbrankhaus for generating more output (e.g.,
00161  *          for debugging)
00162  */
00163 l_int32
00164 jbRankHaus(const char  *dirin,
00165            l_int32      size,
00166            l_float32    rank,
00167            l_int32      components,
00168            const char  *rootname,
00169            l_int32      firstpage,
00170            l_int32      npages,
00171            l_int32      renderflag)
00172 {
00173 char        filename[L_BUF_SIZE];
00174 l_int32     nfiles, i, numpages;
00175 JBDATA     *data;
00176 JBCLASSER  *classer;
00177 PIX        *pix;
00178 PIXA       *pixa;
00179 SARRAY     *safiles;
00180 
00181     PROCNAME("jbRankHaus");
00182 
00183     if (!dirin)
00184         return ERROR_INT("dirin not defined", procName, 1);
00185     if (!rootname)
00186         return ERROR_INT("rootname not defined", procName, 1);
00187     if (components != JB_CONN_COMPS && components != JB_CHARACTERS &&
00188         components != JB_WORDS)
00189         return ERROR_INT("components invalid", procName, 1);
00190 
00191     safiles = getSortedPathnamesInDirectory(dirin, NULL, firstpage, npages);
00192     nfiles = sarrayGetCount(safiles);
00193 
00194         /* Classify components */
00195     classer = jbRankHausInit(components, 0, 0, size, rank);
00196     jbAddPages(classer, safiles);
00197 
00198         /* Save data */
00199     data = jbDataSave(classer);
00200     jbDataWrite(rootname, data);
00201 
00202         /* Optionally, render pages using class templates */
00203     if (renderflag) {
00204         pixa = jbDataRender(data, FALSE);
00205         numpages = pixaGetCount(pixa);
00206         if (numpages != nfiles)
00207             fprintf(stderr, "numpages = %d, nfiles = %d, not equal!\n",
00208                     numpages, nfiles);
00209         for (i = 0; i < numpages; i++) {
00210             pix = pixaGetPix(pixa, i, L_CLONE);
00211             snprintf(filename, L_BUF_SIZE, "%s.%05d", rootname, i);
00212             fprintf(stderr, "filename: %s\n", filename);
00213             pixWrite(filename, pix, IFF_PNG);
00214             pixDestroy(&pix);
00215         }
00216         pixaDestroy(&pixa);
00217     }
00218 
00219     sarrayDestroy(&safiles);
00220     jbClasserDestroy(&classer);
00221     jbDataDestroy(&data);
00222     return 0;
00223 }
00224 
00225 
00226 
00227 /*------------------------------------------------------------------*
00228  *           Extract and classify words in textline order           *
00229  *------------------------------------------------------------------*/
00230 /*!
00231  *  jbWordsInTextlines()
00232  *
00233  *      Input:  dirin (directory of input pages)
00234  *              reduction (1 for full res; 2 for half-res)
00235  *              maxwidth (of word mask components, to be kept)
00236  *              maxheight (of word mask components, to be kept)
00237  *              thresh (on correlation; 0.80 is reasonable)
00238  *              weight (for handling thick text; 0.6 is reasonable)
00239  *              natl (<return> numa with textline index for each component)
00240  *              firstpage (0-based)
00241  *              npages (use 0 for all pages in dirin)
00242  *      Return: classer (for the set of pages)
00243  *
00244  *  Notes:
00245  *      (1) This is a high-level function.  See prog/jbwords for example
00246  *          of usage.
00247  *      (2) Typically, words can be found reasonably well at a resolution
00248  *          of about 150 ppi.  For highest accuracy, you should use 300 ppi.
00249  *          Assuming that the input images are 300 ppi, use reduction = 1
00250  *          for finding words at full res, and reduction = 2 for finding
00251  *          them at 150 ppi.
00252  */
00253 JBCLASSER *
00254 jbWordsInTextlines(const char  *dirin,
00255                    l_int32      reduction,
00256                    l_int32      maxwidth,
00257                    l_int32      maxheight,
00258                    l_float32    thresh,
00259                    l_float32    weight,
00260                    NUMA       **pnatl,
00261                    l_int32      firstpage,
00262                    l_int32      npages)
00263 {
00264 char       *fname;
00265 l_int32     nfiles, i, w, h;
00266 BOXA       *boxa;
00267 JBCLASSER  *classer;
00268 NUMA       *nai, *natl;
00269 PIX        *pix;
00270 PIXA       *pixa;
00271 SARRAY     *safiles;
00272 
00273     PROCNAME("jbWordsInTextlines");
00274 
00275     if (!pnatl)
00276         return (JBCLASSER *)ERROR_PTR("&natl not defined", procName, NULL);
00277     *pnatl = NULL;
00278     if (!dirin)
00279         return (JBCLASSER *)ERROR_PTR("dirin not defined", procName, NULL);
00280     if (reduction != 1 && reduction != 2)
00281         return (JBCLASSER *)ERROR_PTR("reduction not in {1,2}", procName, NULL);
00282 
00283     safiles = getSortedPathnamesInDirectory(dirin, NULL, firstpage, npages);
00284     nfiles = sarrayGetCount(safiles);
00285 
00286         /* Classify components */
00287     classer = jbCorrelationInit(JB_WORDS, maxwidth, maxheight, thresh, weight);
00288     classer->safiles = sarrayCopy(safiles);
00289     natl = numaCreate(0);
00290     *pnatl = natl;
00291     for (i = 0; i < nfiles; i++) {
00292         fname = sarrayGetString(safiles, i, 0);
00293         if ((pix = pixRead(fname)) == NULL) {
00294             L_WARNING_INT("image file %d not read", procName, i);
00295             continue;
00296         }
00297         pixGetDimensions(pix, &w, &h, NULL);
00298         if (reduction == 1) {
00299             classer->w = w;
00300             classer->h = h;
00301         }
00302         else {  /* reduction == 2 */
00303             classer->w = w / 2;
00304             classer->h = h / 2;
00305         }
00306         pixGetWordsInTextlines(pix, reduction, JB_WORDS_MIN_WIDTH,
00307                                JB_WORDS_MIN_HEIGHT, maxwidth, maxheight,
00308                                &boxa, &pixa, &nai);
00309         jbAddPageComponents(classer, pix, boxa, pixa);
00310         numaJoin(natl, nai, 0, 0);
00311         pixDestroy(&pix);
00312         numaDestroy(&nai);
00313         boxaDestroy(&boxa);
00314         pixaDestroy(&pixa);
00315     }
00316 
00317     sarrayDestroy(&safiles);
00318     return classer;
00319 }
00320 
00321 
00322 /*!
00323  *  pixGetWordsInTextlines()
00324  *
00325  *      Input:  pixs (1 bpp, 300 ppi)
00326  *              reduction (1 for full res; 2 for half-res)
00327  *              minwidth, minheight (of saved components; smaller are discarded)
00328  *              maxwidth, maxheight (of saved components; larger are discarded)
00329  *              &boxad (<return> word boxes sorted in textline line order)
00330  *              &pixad (<return> word images sorted in textline line order)
00331  *              &naindex (<return> index of textline for each word)
00332  *      Return: 0 if OK, 1 on error
00333  *
00334  *  Notes:
00335  *      (1) The input should be at a resolution of about 300 ppi.
00336  *          The word masks can be computed at either 150 ppi or 300 ppi.
00337  *          For the former, set reduction = 2.
00338  *      (2) The four size constraints on saved components are all
00339  *          used at 2x reduction.
00340  *      (3) The result are word images (and their b.b.), extracted in
00341  *          textline order, all at 2x reduction, and with a numa giving
00342  *          the textline index for each word.
00343  *      (4) The pixa and boxa interfaces should make this type of
00344  *          application simple to put together.  The steps are:
00345  *           - generate first estimate of word masks
00346  *           - get b.b. of these, and remove the small and big ones
00347  *           - extract pixa of the word mask from these boxes
00348  *           - extract pixa of the actual word images, using word masks
00349  *           - sort actual word images in textline order (2d)
00350  *           - flatten them to a pixa (1d), saving the textline index
00351  *             for each pix
00352  *      (5) In an actual application, it may be desirable to pre-filter
00353  *          the input image to remove large components, to extract
00354  *          single columns of text, and to deskew them.  For example,
00355  *          to remove both large components and small noisy components
00356  *          that can interfere with the statistics used to estimate
00357  *          parameters for segmenting by words, but still retain text lines,
00358  *          the following image preprocessing can be done:
00359  *                Pix *pixt = pixMorphSequence(pixs, "c40.1", 0);
00360  *                Pix *pixf = pixSelectBySize(pixt, 0, 60, 8,
00361  *                                     L_SELECT_HEIGHT, L_SELECT_IF_LT, NULL);
00362  *                pixAnd(pixf, pixf, pixs);  // the filtered image
00363  *          The closing turns text lines into long blobs, but does not
00364  *          significantly increase their height.  But if there are many
00365  *          small connected components in a dense texture, this is likely
00366  *          to generate tall components that will be eliminated in pixf.
00367  */
00368 l_int32
00369 pixGetWordsInTextlines(PIX     *pixs,
00370                        l_int32  reduction,
00371                        l_int32  minwidth,
00372                        l_int32  minheight,
00373                        l_int32  maxwidth,
00374                        l_int32  maxheight,
00375                        BOXA   **pboxad,
00376                        PIXA   **ppixad,
00377                        NUMA   **pnai)
00378 {
00379 l_int32  maxsize;
00380 BOXA    *boxa1, *boxa2, *boxa3, *boxad;
00381 BOXAA   *baa;
00382 NUMA    *nai;
00383 NUMAA   *naa;
00384 PIXA    *pixa1, *pixa2, *pixad;
00385 PIX     *pixt1, *pixt2;
00386 PIXAA   *paa;
00387 
00388     PROCNAME("pixGetWordsInTextlines");
00389 
00390     if (!pboxad || !ppixad || !pnai)
00391         return ERROR_INT("&boxad, &pixad, &nai not all defined", procName, 1);
00392     *pboxad = NULL;
00393     *ppixad = NULL;
00394     *pnai = NULL;
00395     if (!pixs)
00396         return ERROR_INT("pixs not defined", procName, 1);
00397     if (reduction != 1 && reduction != 2)
00398         return ERROR_INT("reduction not in {1,2}", procName, 1);
00399 
00400     if (reduction == 1) {
00401         pixt1 = pixClone(pixs);
00402         maxsize = 14;
00403     }
00404     else {  /* reduction == 2 */
00405         pixt1 = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0);
00406         maxsize = 7;
00407     }
00408 
00409         /* First estimate of the word masks */
00410     pixt2 = pixWordMaskByDilation(pixt1, maxsize, NULL);
00411 
00412         /* Get the bounding boxes of the words. First remove the
00413          * small ones, which can be due to punctuation that was
00414          * not joined to a word.  Then remove the large ones, which are
00415          * also not likely to be words.  Here, pixa1 contains
00416          * the masks over each word.  */
00417     boxa1 = pixConnComp(pixt2, NULL, 8);
00418     boxa2 = boxaSelectBySize(boxa1, minwidth, minheight, L_SELECT_IF_BOTH,
00419                              L_SELECT_IF_GTE, NULL);
00420     boxa3 = boxaSelectBySize(boxa2, maxwidth, maxheight, L_SELECT_IF_BOTH,
00421                              L_SELECT_IF_LTE, NULL);
00422     pixa1 = pixaCreateFromBoxa(pixt2, boxa3, NULL);
00423 
00424         /* Generate a pixa of the actual word images, not the mask images. */
00425     pixa2 = pixaClipToPix(pixa1, pixt1);
00426 
00427         /* Sort the bounding boxes of these words, saving the
00428          * index mapping that will allow us to sort the pixa identically. */
00429     baa = boxaSort2d(boxa3, &naa, -1, -1, 4);
00430     paa = pixaSort2dByIndex(pixa2, naa, L_CLONE);
00431 
00432         /* Flatten the word pixa */
00433     pixad = pixaaFlattenToPixa(paa, &nai, L_CLONE);
00434     boxad = pixaGetBoxa(pixad, L_COPY);
00435 
00436     *pnai = nai;
00437     *pboxad = boxad;
00438     *ppixad = pixad;
00439 
00440     pixDestroy(&pixt1);
00441     pixDestroy(&pixt2);
00442     pixaDestroy(&pixa1);
00443     pixaDestroy(&pixa2);
00444     boxaDestroy(&boxa1);
00445     boxaDestroy(&boxa2);
00446     boxaDestroy(&boxa3);
00447     boxaaDestroy(&baa);
00448     pixaaDestroy(&paa);
00449     numaaDestroy(&naa);
00450     return 0;
00451 }
00452 
00453 
00454 /*!
00455  *  pixGetWordBoxesInTextlines()
00456  *
00457  *      Input:  pixs (1 bpp, 300 ppi)
00458  *              reduction (1 for full res; 2 for half-res)
00459  *              minwidth, minheight (of saved components; smaller are discarded)
00460  *              maxwidth, maxheight (of saved components; larger are discarded)
00461  *              &boxad (<return> word boxes sorted in textline line order)
00462  *              &naindex (<return> index of textline for each word)
00463  *      Return: 0 if OK, 1 on error
00464  *
00465  *  Notes:
00466  *      (1) The input should be at a resolution of about 300 ppi.
00467  *          The word masks can be computed at either 150 ppi or 300 ppi.
00468  *          For the former, set reduction = 2.
00469  *      (2) In an actual application, it may be desirable to pre-filter
00470  *          the input image to remove large components, to extract
00471  *          single columns of text, and to deskew them.
00472  *      (3) This is a special version that just finds the word boxes
00473  *          in line order, with a numa giving the textline index for
00474  *          each word.  See pixGetWordsInTextlines() for more details.
00475  */
00476 l_int32
00477 pixGetWordBoxesInTextlines(PIX     *pixs,
00478                            l_int32  reduction,
00479                            l_int32  minwidth,
00480                            l_int32  minheight,
00481                            l_int32  maxwidth,
00482                            l_int32  maxheight,
00483                            BOXA   **pboxad,
00484                            NUMA   **pnai)
00485 {
00486 l_int32  maxsize;
00487 BOXA    *boxa1, *boxa2, *boxa3, *boxad;
00488 BOXAA   *baa;
00489 NUMA    *nai;
00490 PIX     *pixt1, *pixt2;
00491 
00492     PROCNAME("pixGetWordBoxesInTextlines");
00493 
00494     if (!pboxad || !pnai)
00495         return ERROR_INT("&boxad and &nai not both defined", procName, 1);
00496     *pboxad = NULL;
00497     *pnai = NULL;
00498     if (!pixs)
00499         return ERROR_INT("pixs not defined", procName, 1);
00500     if (reduction != 1 && reduction != 2)
00501         return ERROR_INT("reduction not in {1,2}", procName, 1);
00502 
00503     if (reduction == 1) {
00504         pixt1 = pixClone(pixs);
00505         maxsize = 14;
00506     }
00507     else {  /* reduction == 2 */
00508         pixt1 = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0);
00509         maxsize = 7;
00510     }
00511 
00512         /* First estimate of the word masks */
00513     pixt2 = pixWordMaskByDilation(pixt1, maxsize, NULL);
00514 
00515         /* Get the bounding boxes of the words, and remove the
00516          * small ones, which can be due to punctuation that was
00517          * not joined to a word, and the large ones, which are
00518          * also not likely to be words. */
00519     boxa1 = pixConnComp(pixt2, NULL, 8);
00520     boxa2 = boxaSelectBySize(boxa1, minwidth, minheight,
00521                              L_SELECT_IF_BOTH, L_SELECT_IF_GTE, NULL);
00522     boxa3 = boxaSelectBySize(boxa2, maxwidth, maxheight,
00523                              L_SELECT_IF_BOTH, L_SELECT_IF_LTE, NULL);
00524 
00525         /* 2D sort the bounding boxes of these words. */
00526     baa = boxaSort2d(boxa3, NULL, 3, -5, 5);
00527 
00528         /* Flatten the boxaa, saving the boxa index for each box */
00529     boxad = boxaaFlattenToBoxa(baa, &nai, L_CLONE);
00530 
00531     *pnai = nai;
00532     *pboxad = boxad;
00533 
00534     pixDestroy(&pixt1);
00535     pixDestroy(&pixt2);
00536     boxaDestroy(&boxa1);
00537     boxaDestroy(&boxa2);
00538     boxaDestroy(&boxa3);
00539     boxaaDestroy(&baa);
00540     return 0;
00541 }
00542 
00543 
00544 /*------------------------------------------------------------------*
00545  *           Use word bounding boxes to compare page images         *
00546  *------------------------------------------------------------------*/
00547 /*!
00548  *  boxaExtractSortedPattern()
00549  *
00550  *      Input:  boxa (typ. of word bounding boxes, in textline order)
00551  *              numa (index of textline for each box in boxa)
00552  *      Return: naa (numaa, where each numa represents one textline),
00553  *                   or null on error
00554  *
00555  *  Notes:
00556  *      (1) The input is expected to come from pixGetWordBoxesInTextlines().
00557  *      (2) Each numa in the output consists of an average y coordinate
00558  *          of the first box in the textline, followed by pairs of 
00559  *          x coordinates representing the left and right edges of each
00560  *          of the boxes in the textline.
00561  */
00562 NUMAA *
00563 boxaExtractSortedPattern(BOXA  *boxa,
00564                          NUMA  *na)
00565 {
00566 l_int32  index, nbox, row, prevrow, x, y, w, h;
00567 BOX     *box;
00568 NUMA    *nad;
00569 NUMAA   *naa;
00570 
00571     PROCNAME("boxaExtractSortedPattern");
00572 
00573     if (!boxa)
00574         return (NUMAA *)ERROR_PTR("boxa not defined", procName, NULL);
00575     if (!na)
00576         return (NUMAA *)ERROR_PTR("na not defined", procName, NULL);
00577 
00578     naa = numaaCreate(0);
00579     nbox = boxaGetCount(boxa);
00580     if (nbox == 0)
00581         return naa;
00582 
00583     prevrow = -1;
00584     for (index = 0; index < nbox; index++) {
00585         box = boxaGetBox(boxa, index, L_CLONE);
00586         numaGetIValue(na, index, &row);
00587         if (row > prevrow) {
00588             if (index > 0)
00589                 numaaAddNuma(naa, nad, L_INSERT);
00590             nad = numaCreate(0);
00591             prevrow = row;
00592             boxGetGeometry(box, NULL, &y, NULL, &h);
00593             numaAddNumber(nad, y + h / 2);
00594         }
00595         boxGetGeometry(box, &x, NULL, &w, NULL);
00596         numaAddNumber(nad, x);
00597         numaAddNumber(nad, x + w - 1);
00598         boxDestroy(&box);
00599     }
00600     numaaAddNuma(naa, nad, L_INSERT);
00601 
00602     return naa;
00603 }
00604 
00605 
00606 /*!
00607  *  numaaCompareImagesByBoxes()
00608  *
00609  *      Input:  naa1 (for image 1, formatted by boxaExtractSortedPattern())
00610  *              naa2 (ditto; for image 2)
00611  *              nperline (number of box regions to be used in each textline)
00612  *              nreq (number of complete row matches required)
00613  *              maxshiftx (max allowed x shift between two patterns, in pixels)
00614  *              maxshifty (max allowed y shift between two patterns, in pixels)
00615  *              delx (max allowed difference in x data, after alignment)
00616  *              dely (max allowed difference in y data, after alignment)
00617  *              &same (<return> 1 if @nreq row matches are found; 0 otherwise)
00618  *              debugflag (1 for debug output)
00619  *      Return: 0 if OK, 1 on error
00620  *
00621  *  Notes:
00622  *      (1) Each input numaa describes a set of sorted bounding boxes
00623  *          (sorted by textline and, within each textline, from
00624  *          left to right) in the images from which they are derived.
00625  *          See boxaExtractSortedPattern() for a description of the data
00626  *          format in each of the input numaa.
00627  *      (2) This function does an alignment between the input
00628  *          descriptions of bounding boxes for two images. The
00629  *          input parameter @nperline specifies the number of boxes
00630  *          to consider in each line when testing for a match, and
00631  *          @nreq is the required number of lines that must be well-aligned
00632  *          to get a match.
00633  *      (3) Testing by alignment has 3 steps:
00634  *          (a) Generating the location of word bounding boxes from the
00635  *              images (prior to calling this function).
00636  *          (b) Listing all possible pairs of aligned rows, based on
00637  *              tolerances in horizontal and vertical positions of
00638  *              the boxes.  Specifically, all pairs of rows are enumerated
00639  *              whose first @nperline boxes can be brought into close
00640  *              alignment, based on the delx parameter for boxes in the
00641  *              line and within the overall the @maxshiftx and @maxshifty
00642  *              constraints.
00643  *          (c) Each pair, starting with the first, is used to search
00644  *              for a set of @nreq - 1 other pairs that can all be aligned
00645  *              with a difference in global translation of not more
00646  *              than (@delx, @dely).
00647  */
00648 l_int32
00649 numaaCompareImagesByBoxes(NUMAA    *naa1,
00650                           NUMAA    *naa2,
00651                           l_int32   nperline,
00652                           l_int32   nreq,
00653                           l_int32   maxshiftx,
00654                           l_int32   maxshifty,
00655                           l_int32   delx,
00656                           l_int32   dely,
00657                           l_int32  *psame,
00658                           l_int32   debugflag)
00659 {
00660 l_int32   n1, n2, i, j, nbox, y1, y2, xl1, xl2;
00661 l_int32   shiftx, shifty, match;
00662 l_int32  *line1, *line2;  /* indicator for sufficient boxes in a line */
00663 l_int32  *yloc1, *yloc2;  /* arrays of y value for first box in a line */
00664 l_int32  *xleft1, *xleft2;  /* arrays of x value for left side of first box */
00665 NUMA     *na1, *na2, *nai1, *nai2, *nasx, *nasy;
00666 
00667     PROCNAME("numaaCompareImagesByBoxes");
00668 
00669     if (!psame)
00670         return ERROR_INT("&same not defined", procName, 1);
00671     *psame = 0;
00672     if (!naa1)
00673         return ERROR_INT("naa1 not defined", procName, 1);
00674     if (!naa2)
00675         return ERROR_INT("naa2 not defined", procName, 1);
00676     if (nperline < 1)
00677         return ERROR_INT("nperline < 1", procName, 1);
00678     if (nreq < 1)
00679         return ERROR_INT("nreq < 1", procName, 1);
00680 
00681     n1 = numaaGetCount(naa1);
00682     n2 = numaaGetCount(naa2);
00683     if (n1 < nreq || n2 < nreq)
00684         return 0;
00685 
00686         /* Find the lines in naa1 and naa2 with sufficient boxes.
00687          * Also, find the y-values for each of the lines, and the
00688          * LH x-values of the first box in each line. */
00689     line1 = (l_int32 *)CALLOC(n1, sizeof(l_int32));
00690     line2 = (l_int32 *)CALLOC(n2, sizeof(l_int32));
00691     yloc1 = (l_int32 *)CALLOC(n1, sizeof(l_int32));
00692     yloc2 = (l_int32 *)CALLOC(n2, sizeof(l_int32));
00693     xleft1 = (l_int32 *)CALLOC(n1, sizeof(l_int32));
00694     xleft2 = (l_int32 *)CALLOC(n2, sizeof(l_int32));
00695     for (i = 0; i < n1; i++) {
00696         na1 = numaaGetNuma(naa1, i, L_CLONE);
00697         numaGetIValue(na1, 0, yloc1 + i);
00698         numaGetIValue(na1, 1, xleft1 + i);
00699         nbox = (numaGetCount(na1) - 1) / 2;
00700         if (nbox >= nperline)
00701             line1[i] = 1;
00702         numaDestroy(&na1);
00703     }
00704     for (i = 0; i < n2; i++) {
00705         na2 = numaaGetNuma(naa2, i, L_CLONE);
00706         numaGetIValue(na2, 0, yloc2 + i);
00707         numaGetIValue(na2, 1, xleft2 + i);
00708         nbox = (numaGetCount(na2) - 1) / 2;
00709         if (nbox >= nperline)
00710             line2[i] = 1;
00711         numaDestroy(&na2);
00712     }
00713 
00714         /* Enumerate all possible line matches.  A 'possible' line
00715          * match is one where the x and y shifts for the first box
00716          * in each line are within the maxshiftx and maxshifty
00717          * constraints, and the left and right sides of the remaining
00718          * (nperline - 1) successive boxes are within delx of each other.
00719          * The result is a set of four numas giving parameters of
00720          * each set of matching lines. */
00721     nai1 = numaCreate(0);  /* line index 1 of match */
00722     nai2 = numaCreate(0);  /* line index 2 of match */
00723     nasx = numaCreate(0);  /* shiftx for match */
00724     nasy = numaCreate(0);  /* shifty for match */
00725     for (i = 0; i < n1; i++) {
00726         if (line1[i] == 0) continue;
00727         y1 = yloc1[i];
00728         xl1 = xleft1[i];
00729         na1 = numaaGetNuma(naa1, i, L_CLONE);
00730         for (j = 0; j < n2; j++) {
00731             if (line2[j] == 0) continue;
00732             y2 = yloc2[j];
00733             if (L_ABS(y1 - y2) > maxshifty) continue;
00734             xl2 = xleft2[j];
00735             if (L_ABS(xl1 - xl2) > maxshiftx) continue;
00736             shiftx = xl1 - xl2;  /* shift to add to x2 values */
00737             shifty = y1 - y2;  /* shift to add to y2 values */
00738             na2 = numaaGetNuma(naa2, j, L_CLONE);
00739 
00740                 /* Now check if 'nperline' boxes in the two lines match */
00741             match = testLineAlignmentX(na1, na2, shiftx, delx, nperline);
00742             if (match) {
00743                 numaAddNumber(nai1, i);
00744                 numaAddNumber(nai2, j);
00745                 numaAddNumber(nasx, shiftx);
00746                 numaAddNumber(nasy, shifty);
00747             }
00748             numaDestroy(&na2);
00749         }
00750         numaDestroy(&na1);
00751     }
00752 
00753         /* Determine if there are a sufficient number of mutually
00754          * aligned matches.  Mutually aligned matches place an additional
00755          * constraint on the 'possible' matches, where the relative
00756          * shifts must not exceed the (delx, dely) distances. */
00757     countAlignedMatches(nai1, nai2, nasx, nasy, n1, n2, delx, dely,
00758                         nreq, psame, debugflag);
00759 
00760     FREE(line1);
00761     FREE(line2);
00762     FREE(yloc1);
00763     FREE(yloc2);
00764     FREE(xleft1);
00765     FREE(xleft2);
00766     numaDestroy(&nai1);
00767     numaDestroy(&nai2);
00768     numaDestroy(&nasx);
00769     numaDestroy(&nasy);
00770     return 0;
00771 }
00772 
00773 
00774 static l_int32
00775 testLineAlignmentX(NUMA    *na1,
00776                    NUMA    *na2,
00777                    l_int32  shiftx,
00778                    l_int32  delx,
00779                    l_int32  nperline)
00780 {
00781 l_int32  i, xl1, xr1, xl2, xr2, diffl, diffr;
00782 
00783     PROCNAME("testLineAlignmentX");
00784 
00785     if (!na1)
00786         return ERROR_INT("na1 not defined", procName, 1);
00787     if (!na2)
00788         return ERROR_INT("na2 not defined", procName, 1);
00789 
00790     for (i = 0; i < nperline; i++) {
00791         numaGetIValue(na1, i + 1, &xl1);
00792         numaGetIValue(na1, i + 2, &xr1);
00793         numaGetIValue(na2, i + 1, &xl2);
00794         numaGetIValue(na2, i + 2, &xr2);
00795         diffl = L_ABS(xl1 - xl2 - shiftx);
00796         diffr = L_ABS(xr1 - xr2 - shiftx);
00797         if (diffl > delx || diffr > delx)
00798             return 0;
00799     }
00800 
00801     return 1;
00802 }
00803 
00804 
00805 /*
00806  *  countAlignedMatches()
00807  *      Input:  nai1, nai2 (numas of row pairs for matches)
00808  *              nasx, nasy (numas of x and y shifts for the matches)
00809  *              n1, n2 (number of rows in images 1 and 2)
00810  *              delx, dely (allowed difference in shifts of the match,
00811  *                          compared to the reference match)
00812  *              nreq (number of required aligned matches)
00813  *              &same (<return> 1 if @nreq row matches are found; 0 otherwise)
00814  *      Return: 0 if OK, 1 on error
00815  *
00816  *  Notes:
00817  *      (1) This takes 4 input arrays giving parameters of all the
00818  *          line matches.  It looks for the maximum set of aligned
00819  *          matches (matches with approximately the same overall shifts)
00820  *          that do not use rows from either image more than once.
00821  */
00822 static l_int32
00823 countAlignedMatches(NUMA     *nai1,
00824                     NUMA     *nai2,
00825                     NUMA     *nasx,
00826                     NUMA     *nasy,
00827                     l_int32   n1,
00828                     l_int32   n2,
00829                     l_int32   delx,
00830                     l_int32   dely,
00831                     l_int32   nreq,
00832                     l_int32  *psame,
00833                     l_int32   debugflag)
00834 {
00835 l_int32   i, j, nm, shiftx, shifty, nmatch, diffx, diffy;
00836 l_int32  *ia1, *ia2, *iasx, *iasy, *index1, *index2;
00837 
00838     PROCNAME("countAlignedMatches");
00839 
00840     if (!nai1 || !nai2 || !nasx || !nasy)
00841         return ERROR_INT("4 input numas not defined", procName, 1);
00842     if (!psame)
00843         return ERROR_INT("&same not defined", procName, 1);
00844     *psame = 0;
00845 
00846         /* Check for sufficient aligned matches, doing a double iteration
00847          * over the set of raw matches.  The row index arrays
00848          * are used to verify that the same rows in either image
00849          * are not used in more than one match.  Whenever there
00850          * is a match that is properly aligned, those rows are
00851          * marked in the index arrays.  */
00852     nm = numaGetCount(nai1);  /* number of matches */
00853     if (nm < nreq)
00854         return 0;
00855 
00856     ia1 = numaGetIArray(nai1);
00857     ia2 = numaGetIArray(nai2);
00858     iasx = numaGetIArray(nasx);
00859     iasy = numaGetIArray(nasy);
00860     index1 = (l_int32 *)CALLOC(n1, sizeof(l_int32));  /* keep track of rows */
00861     index2 = (l_int32 *)CALLOC(n2, sizeof(l_int32));
00862     for (i = 0; i < nm; i++) {
00863         if (*psame == 1)
00864             break;
00865 
00866             /* Reset row index arrays */
00867         memset(index1, 0, 4 * n1);
00868         memset(index2, 0, 4 * n2);
00869         nmatch = 1;
00870         index1[ia1[i]] = nmatch;  /* mark these rows as taken */
00871         index2[ia2[i]] = nmatch;
00872         shiftx = iasx[i];  /* reference shift between two rows */
00873         shifty = iasy[i];  /* ditto */
00874         if (nreq == 1) {
00875             *psame = 1;
00876             break;
00877         }
00878         for (j = 0; j < nm; j++) {
00879             if (j == i) continue;
00880                 /* Rows must both be different from any previously seen */
00881             if (index1[ia1[j]] > 0 || index2[ia2[j]] > 0) continue;
00882                 /* Check the shift for this match */
00883             diffx = L_ABS(shiftx - iasx[j]);
00884             diffy = L_ABS(shifty - iasy[j]);
00885             if (diffx > delx || diffy > dely) continue;
00886                 /* We have a match */   
00887             nmatch++;
00888             index1[ia1[j]] = nmatch;  /* mark the rows */
00889             index2[ia2[j]] = nmatch;
00890             if (nmatch >= nreq) {
00891                 *psame = 1;
00892                 if (debugflag)
00893                     printRowIndices(index1, n1, index2, n2);
00894                 break;
00895             }
00896         }
00897     }
00898         
00899     FREE(ia1);
00900     FREE(ia2);
00901     FREE(iasx);
00902     FREE(iasy);
00903     FREE(index1);
00904     FREE(index2);
00905     return 0;
00906 }
00907 
00908 
00909 static void
00910 printRowIndices(l_int32  *index1,
00911                 l_int32   n1,
00912                 l_int32  *index2,
00913                 l_int32   n2)
00914 {
00915 l_int32  i;
00916 
00917     fprintf(stderr, "Index1: ");
00918     for (i = 0; i < n1; i++) {
00919         if (i && (i % 20 == 0))
00920             fprintf(stderr, "\n        ");
00921         fprintf(stderr, "%3d", index1[i]);
00922     }
00923     fprintf(stderr, "\n");
00924 
00925     fprintf(stderr, "Index2: ");
00926     for (i = 0; i < n2; i++) {
00927         if (i && (i % 20 == 0))
00928             fprintf(stderr, "\n        ");
00929         fprintf(stderr, "%3d", index2[i]);
00930     }
00931     fprintf(stderr, "\n");
00932     return;
00933 }
00934 
00935 
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Defines