Leptonica 1.68
C Image Processing Library
|
00001 /*====================================================================* 00002 - Copyright (C) 2001 Leptonica. All rights reserved. 00003 - This software is distributed in the hope that it will be 00004 - useful, but with NO WARRANTY OF ANY KIND. 00005 - No author or distributor accepts responsibility to anyone for the 00006 - consequences of using this software, or for whether it serves any 00007 - particular purpose or works at all, unless he or she says so in 00008 - writing. Everyone is granted permission to copy, modify and 00009 - redistribute this source code, for commercial or non-commercial 00010 - purposes, with the following restrictions: (1) the origin of this 00011 - source code must not be misrepresented; (2) modified versions must 00012 - be plainly marked as such; and (3) this notice may not be removed 00013 - or altered from any source or modified source distribution. 00014 *====================================================================*/ 00015 00016 00017 /* 00018 * classapp.c 00019 * 00020 * Top-level jb2 correlation and rank-hausdorff 00021 * 00022 * l_int32 jbCorrelation() 00023 * l_int32 jbRankHaus() 00024 * 00025 * Extract and classify words in textline order 00026 * 00027 * JBCLASSER *jbWordsInTextlines() 00028 * l_int32 pixGetWordsInTextlines() 00029 * l_int32 pixGetWordBoxesInTextlines() 00030 * 00031 * Use word bounding boxes to compare page images 00032 * 00033 * NUMAA *boxaExtractSortedPattern() 00034 * l_int32 numaaCompareImagesByBoxes() 00035 * static l_int32 testLineAlignmentX() 00036 * static l_int32 countAlignedMatches() 00037 * static void printRowIndices() 00038 */ 00039 00040 #include <string.h> 00041 #include "allheaders.h" 00042 00043 static const l_int32 JB_WORDS_MIN_WIDTH = 5; /* pixels */ 00044 static const l_int32 JB_WORDS_MIN_HEIGHT = 3; /* pixels */ 00045 00046 /* MSVC can't handle arrays dimensioned by static const integers */ 00047 #define L_BUF_SIZE 512 00048 00049 /* Static comparison functions */ 00050 static l_int32 testLineAlignmentX(NUMA *na1, NUMA *na2, l_int32 shiftx, 00051 l_int32 delx, l_int32 nperline); 00052 static l_int32 countAlignedMatches(NUMA *nai1, NUMA *nai2, NUMA *nasx, 00053 NUMA *nasy, l_int32 n1, l_int32 n2, 00054 l_int32 delx, l_int32 dely, 00055 l_int32 nreq, l_int32 *psame, 00056 l_int32 debugflag); 00057 static void printRowIndices(l_int32 *index1, l_int32 n1, 00058 l_int32 *index2, l_int32 n2); 00059 00060 00061 /*------------------------------------------------------------------* 00062 * Top-level jb2 correlation and rank-hausdorff * 00063 *------------------------------------------------------------------*/ 00064 /*! 00065 * jbCorrelation() 00066 * 00067 * Input: dirin (directory of input images) 00068 * thresh (typically ~0.8) 00069 * weight (typically ~0.6) 00070 * components (JB_CONN_COMPS, JB_CHARACTERS, JB_WORDS) 00071 * rootname (for output files) 00072 * firstpage (0-based) 00073 * npages (use 0 for all pages in dirin) 00074 * renderflag (1 to render from templates; 0 to skip) 00075 * Return: 0 if OK, 1 on error 00076 * 00077 * Notes: 00078 * (1) The images must be 1 bpp. If they are not, you can convert 00079 * them using convertFilesTo1bpp(). 00080 * (2) See prog/jbcorrelation for generating more output (e.g., 00081 * for debugging) 00082 */ 00083 l_int32 00084 jbCorrelation(const char *dirin, 00085 l_float32 thresh, 00086 l_float32 weight, 00087 l_int32 components, 00088 const char *rootname, 00089 l_int32 firstpage, 00090 l_int32 npages, 00091 l_int32 renderflag) 00092 { 00093 char filename[L_BUF_SIZE]; 00094 l_int32 nfiles, i, numpages; 00095 JBDATA *data; 00096 JBCLASSER *classer; 00097 PIX *pix; 00098 PIXA *pixa; 00099 SARRAY *safiles; 00100 00101 PROCNAME("jbCorrelation"); 00102 00103 if (!dirin) 00104 return ERROR_INT("dirin not defined", procName, 1); 00105 if (!rootname) 00106 return ERROR_INT("rootname not defined", procName, 1); 00107 if (components != JB_CONN_COMPS && components != JB_CHARACTERS && 00108 components != JB_WORDS) 00109 return ERROR_INT("components invalid", procName, 1); 00110 00111 safiles = getSortedPathnamesInDirectory(dirin, NULL, firstpage, npages); 00112 nfiles = sarrayGetCount(safiles); 00113 00114 /* Classify components */ 00115 classer = jbCorrelationInit(components, 0, 0, thresh, weight); 00116 jbAddPages(classer, safiles); 00117 00118 /* Save data */ 00119 data = jbDataSave(classer); 00120 jbDataWrite(rootname, data); 00121 00122 /* Optionally, render pages using class templates */ 00123 if (renderflag) { 00124 pixa = jbDataRender(data, FALSE); 00125 numpages = pixaGetCount(pixa); 00126 if (numpages != nfiles) 00127 fprintf(stderr, "numpages = %d, nfiles = %d, not equal!\n", 00128 numpages, nfiles); 00129 for (i = 0; i < numpages; i++) { 00130 pix = pixaGetPix(pixa, i, L_CLONE); 00131 snprintf(filename, L_BUF_SIZE, "%s.%05d", rootname, i); 00132 fprintf(stderr, "filename: %s\n", filename); 00133 pixWrite(filename, pix, IFF_PNG); 00134 pixDestroy(&pix); 00135 } 00136 pixaDestroy(&pixa); 00137 } 00138 00139 sarrayDestroy(&safiles); 00140 jbClasserDestroy(&classer); 00141 jbDataDestroy(&data); 00142 return 0; 00143 } 00144 00145 00146 /*! 00147 * jbRankHaus() 00148 * 00149 * Input: dirin (directory of input images) 00150 * size (of Sel used for dilation; typ. 2) 00151 * rank (rank value of match; typ. 0.97) 00152 * components (JB_CONN_COMPS, JB_CHARACTERS, JB_WORDS) 00153 * rootname (for output files) 00154 * firstpage (0-based) 00155 * npages (use 0 for all pages in dirin) 00156 * renderflag (1 to render from templates; 0 to skip) 00157 * Return: 0 if OK, 1 on error 00158 * 00159 * Notes: 00160 * (1) See prog/jbrankhaus for generating more output (e.g., 00161 * for debugging) 00162 */ 00163 l_int32 00164 jbRankHaus(const char *dirin, 00165 l_int32 size, 00166 l_float32 rank, 00167 l_int32 components, 00168 const char *rootname, 00169 l_int32 firstpage, 00170 l_int32 npages, 00171 l_int32 renderflag) 00172 { 00173 char filename[L_BUF_SIZE]; 00174 l_int32 nfiles, i, numpages; 00175 JBDATA *data; 00176 JBCLASSER *classer; 00177 PIX *pix; 00178 PIXA *pixa; 00179 SARRAY *safiles; 00180 00181 PROCNAME("jbRankHaus"); 00182 00183 if (!dirin) 00184 return ERROR_INT("dirin not defined", procName, 1); 00185 if (!rootname) 00186 return ERROR_INT("rootname not defined", procName, 1); 00187 if (components != JB_CONN_COMPS && components != JB_CHARACTERS && 00188 components != JB_WORDS) 00189 return ERROR_INT("components invalid", procName, 1); 00190 00191 safiles = getSortedPathnamesInDirectory(dirin, NULL, firstpage, npages); 00192 nfiles = sarrayGetCount(safiles); 00193 00194 /* Classify components */ 00195 classer = jbRankHausInit(components, 0, 0, size, rank); 00196 jbAddPages(classer, safiles); 00197 00198 /* Save data */ 00199 data = jbDataSave(classer); 00200 jbDataWrite(rootname, data); 00201 00202 /* Optionally, render pages using class templates */ 00203 if (renderflag) { 00204 pixa = jbDataRender(data, FALSE); 00205 numpages = pixaGetCount(pixa); 00206 if (numpages != nfiles) 00207 fprintf(stderr, "numpages = %d, nfiles = %d, not equal!\n", 00208 numpages, nfiles); 00209 for (i = 0; i < numpages; i++) { 00210 pix = pixaGetPix(pixa, i, L_CLONE); 00211 snprintf(filename, L_BUF_SIZE, "%s.%05d", rootname, i); 00212 fprintf(stderr, "filename: %s\n", filename); 00213 pixWrite(filename, pix, IFF_PNG); 00214 pixDestroy(&pix); 00215 } 00216 pixaDestroy(&pixa); 00217 } 00218 00219 sarrayDestroy(&safiles); 00220 jbClasserDestroy(&classer); 00221 jbDataDestroy(&data); 00222 return 0; 00223 } 00224 00225 00226 00227 /*------------------------------------------------------------------* 00228 * Extract and classify words in textline order * 00229 *------------------------------------------------------------------*/ 00230 /*! 00231 * jbWordsInTextlines() 00232 * 00233 * Input: dirin (directory of input pages) 00234 * reduction (1 for full res; 2 for half-res) 00235 * maxwidth (of word mask components, to be kept) 00236 * maxheight (of word mask components, to be kept) 00237 * thresh (on correlation; 0.80 is reasonable) 00238 * weight (for handling thick text; 0.6 is reasonable) 00239 * natl (<return> numa with textline index for each component) 00240 * firstpage (0-based) 00241 * npages (use 0 for all pages in dirin) 00242 * Return: classer (for the set of pages) 00243 * 00244 * Notes: 00245 * (1) This is a high-level function. See prog/jbwords for example 00246 * of usage. 00247 * (2) Typically, words can be found reasonably well at a resolution 00248 * of about 150 ppi. For highest accuracy, you should use 300 ppi. 00249 * Assuming that the input images are 300 ppi, use reduction = 1 00250 * for finding words at full res, and reduction = 2 for finding 00251 * them at 150 ppi. 00252 */ 00253 JBCLASSER * 00254 jbWordsInTextlines(const char *dirin, 00255 l_int32 reduction, 00256 l_int32 maxwidth, 00257 l_int32 maxheight, 00258 l_float32 thresh, 00259 l_float32 weight, 00260 NUMA **pnatl, 00261 l_int32 firstpage, 00262 l_int32 npages) 00263 { 00264 char *fname; 00265 l_int32 nfiles, i, w, h; 00266 BOXA *boxa; 00267 JBCLASSER *classer; 00268 NUMA *nai, *natl; 00269 PIX *pix; 00270 PIXA *pixa; 00271 SARRAY *safiles; 00272 00273 PROCNAME("jbWordsInTextlines"); 00274 00275 if (!pnatl) 00276 return (JBCLASSER *)ERROR_PTR("&natl not defined", procName, NULL); 00277 *pnatl = NULL; 00278 if (!dirin) 00279 return (JBCLASSER *)ERROR_PTR("dirin not defined", procName, NULL); 00280 if (reduction != 1 && reduction != 2) 00281 return (JBCLASSER *)ERROR_PTR("reduction not in {1,2}", procName, NULL); 00282 00283 safiles = getSortedPathnamesInDirectory(dirin, NULL, firstpage, npages); 00284 nfiles = sarrayGetCount(safiles); 00285 00286 /* Classify components */ 00287 classer = jbCorrelationInit(JB_WORDS, maxwidth, maxheight, thresh, weight); 00288 classer->safiles = sarrayCopy(safiles); 00289 natl = numaCreate(0); 00290 *pnatl = natl; 00291 for (i = 0; i < nfiles; i++) { 00292 fname = sarrayGetString(safiles, i, 0); 00293 if ((pix = pixRead(fname)) == NULL) { 00294 L_WARNING_INT("image file %d not read", procName, i); 00295 continue; 00296 } 00297 pixGetDimensions(pix, &w, &h, NULL); 00298 if (reduction == 1) { 00299 classer->w = w; 00300 classer->h = h; 00301 } 00302 else { /* reduction == 2 */ 00303 classer->w = w / 2; 00304 classer->h = h / 2; 00305 } 00306 pixGetWordsInTextlines(pix, reduction, JB_WORDS_MIN_WIDTH, 00307 JB_WORDS_MIN_HEIGHT, maxwidth, maxheight, 00308 &boxa, &pixa, &nai); 00309 jbAddPageComponents(classer, pix, boxa, pixa); 00310 numaJoin(natl, nai, 0, 0); 00311 pixDestroy(&pix); 00312 numaDestroy(&nai); 00313 boxaDestroy(&boxa); 00314 pixaDestroy(&pixa); 00315 } 00316 00317 sarrayDestroy(&safiles); 00318 return classer; 00319 } 00320 00321 00322 /*! 00323 * pixGetWordsInTextlines() 00324 * 00325 * Input: pixs (1 bpp, 300 ppi) 00326 * reduction (1 for full res; 2 for half-res) 00327 * minwidth, minheight (of saved components; smaller are discarded) 00328 * maxwidth, maxheight (of saved components; larger are discarded) 00329 * &boxad (<return> word boxes sorted in textline line order) 00330 * &pixad (<return> word images sorted in textline line order) 00331 * &naindex (<return> index of textline for each word) 00332 * Return: 0 if OK, 1 on error 00333 * 00334 * Notes: 00335 * (1) The input should be at a resolution of about 300 ppi. 00336 * The word masks can be computed at either 150 ppi or 300 ppi. 00337 * For the former, set reduction = 2. 00338 * (2) The four size constraints on saved components are all 00339 * used at 2x reduction. 00340 * (3) The result are word images (and their b.b.), extracted in 00341 * textline order, all at 2x reduction, and with a numa giving 00342 * the textline index for each word. 00343 * (4) The pixa and boxa interfaces should make this type of 00344 * application simple to put together. The steps are: 00345 * - generate first estimate of word masks 00346 * - get b.b. of these, and remove the small and big ones 00347 * - extract pixa of the word mask from these boxes 00348 * - extract pixa of the actual word images, using word masks 00349 * - sort actual word images in textline order (2d) 00350 * - flatten them to a pixa (1d), saving the textline index 00351 * for each pix 00352 * (5) In an actual application, it may be desirable to pre-filter 00353 * the input image to remove large components, to extract 00354 * single columns of text, and to deskew them. For example, 00355 * to remove both large components and small noisy components 00356 * that can interfere with the statistics used to estimate 00357 * parameters for segmenting by words, but still retain text lines, 00358 * the following image preprocessing can be done: 00359 * Pix *pixt = pixMorphSequence(pixs, "c40.1", 0); 00360 * Pix *pixf = pixSelectBySize(pixt, 0, 60, 8, 00361 * L_SELECT_HEIGHT, L_SELECT_IF_LT, NULL); 00362 * pixAnd(pixf, pixf, pixs); // the filtered image 00363 * The closing turns text lines into long blobs, but does not 00364 * significantly increase their height. But if there are many 00365 * small connected components in a dense texture, this is likely 00366 * to generate tall components that will be eliminated in pixf. 00367 */ 00368 l_int32 00369 pixGetWordsInTextlines(PIX *pixs, 00370 l_int32 reduction, 00371 l_int32 minwidth, 00372 l_int32 minheight, 00373 l_int32 maxwidth, 00374 l_int32 maxheight, 00375 BOXA **pboxad, 00376 PIXA **ppixad, 00377 NUMA **pnai) 00378 { 00379 l_int32 maxsize; 00380 BOXA *boxa1, *boxa2, *boxa3, *boxad; 00381 BOXAA *baa; 00382 NUMA *nai; 00383 NUMAA *naa; 00384 PIXA *pixa1, *pixa2, *pixad; 00385 PIX *pixt1, *pixt2; 00386 PIXAA *paa; 00387 00388 PROCNAME("pixGetWordsInTextlines"); 00389 00390 if (!pboxad || !ppixad || !pnai) 00391 return ERROR_INT("&boxad, &pixad, &nai not all defined", procName, 1); 00392 *pboxad = NULL; 00393 *ppixad = NULL; 00394 *pnai = NULL; 00395 if (!pixs) 00396 return ERROR_INT("pixs not defined", procName, 1); 00397 if (reduction != 1 && reduction != 2) 00398 return ERROR_INT("reduction not in {1,2}", procName, 1); 00399 00400 if (reduction == 1) { 00401 pixt1 = pixClone(pixs); 00402 maxsize = 14; 00403 } 00404 else { /* reduction == 2 */ 00405 pixt1 = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0); 00406 maxsize = 7; 00407 } 00408 00409 /* First estimate of the word masks */ 00410 pixt2 = pixWordMaskByDilation(pixt1, maxsize, NULL); 00411 00412 /* Get the bounding boxes of the words. First remove the 00413 * small ones, which can be due to punctuation that was 00414 * not joined to a word. Then remove the large ones, which are 00415 * also not likely to be words. Here, pixa1 contains 00416 * the masks over each word. */ 00417 boxa1 = pixConnComp(pixt2, NULL, 8); 00418 boxa2 = boxaSelectBySize(boxa1, minwidth, minheight, L_SELECT_IF_BOTH, 00419 L_SELECT_IF_GTE, NULL); 00420 boxa3 = boxaSelectBySize(boxa2, maxwidth, maxheight, L_SELECT_IF_BOTH, 00421 L_SELECT_IF_LTE, NULL); 00422 pixa1 = pixaCreateFromBoxa(pixt2, boxa3, NULL); 00423 00424 /* Generate a pixa of the actual word images, not the mask images. */ 00425 pixa2 = pixaClipToPix(pixa1, pixt1); 00426 00427 /* Sort the bounding boxes of these words, saving the 00428 * index mapping that will allow us to sort the pixa identically. */ 00429 baa = boxaSort2d(boxa3, &naa, -1, -1, 4); 00430 paa = pixaSort2dByIndex(pixa2, naa, L_CLONE); 00431 00432 /* Flatten the word pixa */ 00433 pixad = pixaaFlattenToPixa(paa, &nai, L_CLONE); 00434 boxad = pixaGetBoxa(pixad, L_COPY); 00435 00436 *pnai = nai; 00437 *pboxad = boxad; 00438 *ppixad = pixad; 00439 00440 pixDestroy(&pixt1); 00441 pixDestroy(&pixt2); 00442 pixaDestroy(&pixa1); 00443 pixaDestroy(&pixa2); 00444 boxaDestroy(&boxa1); 00445 boxaDestroy(&boxa2); 00446 boxaDestroy(&boxa3); 00447 boxaaDestroy(&baa); 00448 pixaaDestroy(&paa); 00449 numaaDestroy(&naa); 00450 return 0; 00451 } 00452 00453 00454 /*! 00455 * pixGetWordBoxesInTextlines() 00456 * 00457 * Input: pixs (1 bpp, 300 ppi) 00458 * reduction (1 for full res; 2 for half-res) 00459 * minwidth, minheight (of saved components; smaller are discarded) 00460 * maxwidth, maxheight (of saved components; larger are discarded) 00461 * &boxad (<return> word boxes sorted in textline line order) 00462 * &naindex (<return> index of textline for each word) 00463 * Return: 0 if OK, 1 on error 00464 * 00465 * Notes: 00466 * (1) The input should be at a resolution of about 300 ppi. 00467 * The word masks can be computed at either 150 ppi or 300 ppi. 00468 * For the former, set reduction = 2. 00469 * (2) In an actual application, it may be desirable to pre-filter 00470 * the input image to remove large components, to extract 00471 * single columns of text, and to deskew them. 00472 * (3) This is a special version that just finds the word boxes 00473 * in line order, with a numa giving the textline index for 00474 * each word. See pixGetWordsInTextlines() for more details. 00475 */ 00476 l_int32 00477 pixGetWordBoxesInTextlines(PIX *pixs, 00478 l_int32 reduction, 00479 l_int32 minwidth, 00480 l_int32 minheight, 00481 l_int32 maxwidth, 00482 l_int32 maxheight, 00483 BOXA **pboxad, 00484 NUMA **pnai) 00485 { 00486 l_int32 maxsize; 00487 BOXA *boxa1, *boxa2, *boxa3, *boxad; 00488 BOXAA *baa; 00489 NUMA *nai; 00490 PIX *pixt1, *pixt2; 00491 00492 PROCNAME("pixGetWordBoxesInTextlines"); 00493 00494 if (!pboxad || !pnai) 00495 return ERROR_INT("&boxad and &nai not both defined", procName, 1); 00496 *pboxad = NULL; 00497 *pnai = NULL; 00498 if (!pixs) 00499 return ERROR_INT("pixs not defined", procName, 1); 00500 if (reduction != 1 && reduction != 2) 00501 return ERROR_INT("reduction not in {1,2}", procName, 1); 00502 00503 if (reduction == 1) { 00504 pixt1 = pixClone(pixs); 00505 maxsize = 14; 00506 } 00507 else { /* reduction == 2 */ 00508 pixt1 = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0); 00509 maxsize = 7; 00510 } 00511 00512 /* First estimate of the word masks */ 00513 pixt2 = pixWordMaskByDilation(pixt1, maxsize, NULL); 00514 00515 /* Get the bounding boxes of the words, and remove the 00516 * small ones, which can be due to punctuation that was 00517 * not joined to a word, and the large ones, which are 00518 * also not likely to be words. */ 00519 boxa1 = pixConnComp(pixt2, NULL, 8); 00520 boxa2 = boxaSelectBySize(boxa1, minwidth, minheight, 00521 L_SELECT_IF_BOTH, L_SELECT_IF_GTE, NULL); 00522 boxa3 = boxaSelectBySize(boxa2, maxwidth, maxheight, 00523 L_SELECT_IF_BOTH, L_SELECT_IF_LTE, NULL); 00524 00525 /* 2D sort the bounding boxes of these words. */ 00526 baa = boxaSort2d(boxa3, NULL, 3, -5, 5); 00527 00528 /* Flatten the boxaa, saving the boxa index for each box */ 00529 boxad = boxaaFlattenToBoxa(baa, &nai, L_CLONE); 00530 00531 *pnai = nai; 00532 *pboxad = boxad; 00533 00534 pixDestroy(&pixt1); 00535 pixDestroy(&pixt2); 00536 boxaDestroy(&boxa1); 00537 boxaDestroy(&boxa2); 00538 boxaDestroy(&boxa3); 00539 boxaaDestroy(&baa); 00540 return 0; 00541 } 00542 00543 00544 /*------------------------------------------------------------------* 00545 * Use word bounding boxes to compare page images * 00546 *------------------------------------------------------------------*/ 00547 /*! 00548 * boxaExtractSortedPattern() 00549 * 00550 * Input: boxa (typ. of word bounding boxes, in textline order) 00551 * numa (index of textline for each box in boxa) 00552 * Return: naa (numaa, where each numa represents one textline), 00553 * or null on error 00554 * 00555 * Notes: 00556 * (1) The input is expected to come from pixGetWordBoxesInTextlines(). 00557 * (2) Each numa in the output consists of an average y coordinate 00558 * of the first box in the textline, followed by pairs of 00559 * x coordinates representing the left and right edges of each 00560 * of the boxes in the textline. 00561 */ 00562 NUMAA * 00563 boxaExtractSortedPattern(BOXA *boxa, 00564 NUMA *na) 00565 { 00566 l_int32 index, nbox, row, prevrow, x, y, w, h; 00567 BOX *box; 00568 NUMA *nad; 00569 NUMAA *naa; 00570 00571 PROCNAME("boxaExtractSortedPattern"); 00572 00573 if (!boxa) 00574 return (NUMAA *)ERROR_PTR("boxa not defined", procName, NULL); 00575 if (!na) 00576 return (NUMAA *)ERROR_PTR("na not defined", procName, NULL); 00577 00578 naa = numaaCreate(0); 00579 nbox = boxaGetCount(boxa); 00580 if (nbox == 0) 00581 return naa; 00582 00583 prevrow = -1; 00584 for (index = 0; index < nbox; index++) { 00585 box = boxaGetBox(boxa, index, L_CLONE); 00586 numaGetIValue(na, index, &row); 00587 if (row > prevrow) { 00588 if (index > 0) 00589 numaaAddNuma(naa, nad, L_INSERT); 00590 nad = numaCreate(0); 00591 prevrow = row; 00592 boxGetGeometry(box, NULL, &y, NULL, &h); 00593 numaAddNumber(nad, y + h / 2); 00594 } 00595 boxGetGeometry(box, &x, NULL, &w, NULL); 00596 numaAddNumber(nad, x); 00597 numaAddNumber(nad, x + w - 1); 00598 boxDestroy(&box); 00599 } 00600 numaaAddNuma(naa, nad, L_INSERT); 00601 00602 return naa; 00603 } 00604 00605 00606 /*! 00607 * numaaCompareImagesByBoxes() 00608 * 00609 * Input: naa1 (for image 1, formatted by boxaExtractSortedPattern()) 00610 * naa2 (ditto; for image 2) 00611 * nperline (number of box regions to be used in each textline) 00612 * nreq (number of complete row matches required) 00613 * maxshiftx (max allowed x shift between two patterns, in pixels) 00614 * maxshifty (max allowed y shift between two patterns, in pixels) 00615 * delx (max allowed difference in x data, after alignment) 00616 * dely (max allowed difference in y data, after alignment) 00617 * &same (<return> 1 if @nreq row matches are found; 0 otherwise) 00618 * debugflag (1 for debug output) 00619 * Return: 0 if OK, 1 on error 00620 * 00621 * Notes: 00622 * (1) Each input numaa describes a set of sorted bounding boxes 00623 * (sorted by textline and, within each textline, from 00624 * left to right) in the images from which they are derived. 00625 * See boxaExtractSortedPattern() for a description of the data 00626 * format in each of the input numaa. 00627 * (2) This function does an alignment between the input 00628 * descriptions of bounding boxes for two images. The 00629 * input parameter @nperline specifies the number of boxes 00630 * to consider in each line when testing for a match, and 00631 * @nreq is the required number of lines that must be well-aligned 00632 * to get a match. 00633 * (3) Testing by alignment has 3 steps: 00634 * (a) Generating the location of word bounding boxes from the 00635 * images (prior to calling this function). 00636 * (b) Listing all possible pairs of aligned rows, based on 00637 * tolerances in horizontal and vertical positions of 00638 * the boxes. Specifically, all pairs of rows are enumerated 00639 * whose first @nperline boxes can be brought into close 00640 * alignment, based on the delx parameter for boxes in the 00641 * line and within the overall the @maxshiftx and @maxshifty 00642 * constraints. 00643 * (c) Each pair, starting with the first, is used to search 00644 * for a set of @nreq - 1 other pairs that can all be aligned 00645 * with a difference in global translation of not more 00646 * than (@delx, @dely). 00647 */ 00648 l_int32 00649 numaaCompareImagesByBoxes(NUMAA *naa1, 00650 NUMAA *naa2, 00651 l_int32 nperline, 00652 l_int32 nreq, 00653 l_int32 maxshiftx, 00654 l_int32 maxshifty, 00655 l_int32 delx, 00656 l_int32 dely, 00657 l_int32 *psame, 00658 l_int32 debugflag) 00659 { 00660 l_int32 n1, n2, i, j, nbox, y1, y2, xl1, xl2; 00661 l_int32 shiftx, shifty, match; 00662 l_int32 *line1, *line2; /* indicator for sufficient boxes in a line */ 00663 l_int32 *yloc1, *yloc2; /* arrays of y value for first box in a line */ 00664 l_int32 *xleft1, *xleft2; /* arrays of x value for left side of first box */ 00665 NUMA *na1, *na2, *nai1, *nai2, *nasx, *nasy; 00666 00667 PROCNAME("numaaCompareImagesByBoxes"); 00668 00669 if (!psame) 00670 return ERROR_INT("&same not defined", procName, 1); 00671 *psame = 0; 00672 if (!naa1) 00673 return ERROR_INT("naa1 not defined", procName, 1); 00674 if (!naa2) 00675 return ERROR_INT("naa2 not defined", procName, 1); 00676 if (nperline < 1) 00677 return ERROR_INT("nperline < 1", procName, 1); 00678 if (nreq < 1) 00679 return ERROR_INT("nreq < 1", procName, 1); 00680 00681 n1 = numaaGetCount(naa1); 00682 n2 = numaaGetCount(naa2); 00683 if (n1 < nreq || n2 < nreq) 00684 return 0; 00685 00686 /* Find the lines in naa1 and naa2 with sufficient boxes. 00687 * Also, find the y-values for each of the lines, and the 00688 * LH x-values of the first box in each line. */ 00689 line1 = (l_int32 *)CALLOC(n1, sizeof(l_int32)); 00690 line2 = (l_int32 *)CALLOC(n2, sizeof(l_int32)); 00691 yloc1 = (l_int32 *)CALLOC(n1, sizeof(l_int32)); 00692 yloc2 = (l_int32 *)CALLOC(n2, sizeof(l_int32)); 00693 xleft1 = (l_int32 *)CALLOC(n1, sizeof(l_int32)); 00694 xleft2 = (l_int32 *)CALLOC(n2, sizeof(l_int32)); 00695 for (i = 0; i < n1; i++) { 00696 na1 = numaaGetNuma(naa1, i, L_CLONE); 00697 numaGetIValue(na1, 0, yloc1 + i); 00698 numaGetIValue(na1, 1, xleft1 + i); 00699 nbox = (numaGetCount(na1) - 1) / 2; 00700 if (nbox >= nperline) 00701 line1[i] = 1; 00702 numaDestroy(&na1); 00703 } 00704 for (i = 0; i < n2; i++) { 00705 na2 = numaaGetNuma(naa2, i, L_CLONE); 00706 numaGetIValue(na2, 0, yloc2 + i); 00707 numaGetIValue(na2, 1, xleft2 + i); 00708 nbox = (numaGetCount(na2) - 1) / 2; 00709 if (nbox >= nperline) 00710 line2[i] = 1; 00711 numaDestroy(&na2); 00712 } 00713 00714 /* Enumerate all possible line matches. A 'possible' line 00715 * match is one where the x and y shifts for the first box 00716 * in each line are within the maxshiftx and maxshifty 00717 * constraints, and the left and right sides of the remaining 00718 * (nperline - 1) successive boxes are within delx of each other. 00719 * The result is a set of four numas giving parameters of 00720 * each set of matching lines. */ 00721 nai1 = numaCreate(0); /* line index 1 of match */ 00722 nai2 = numaCreate(0); /* line index 2 of match */ 00723 nasx = numaCreate(0); /* shiftx for match */ 00724 nasy = numaCreate(0); /* shifty for match */ 00725 for (i = 0; i < n1; i++) { 00726 if (line1[i] == 0) continue; 00727 y1 = yloc1[i]; 00728 xl1 = xleft1[i]; 00729 na1 = numaaGetNuma(naa1, i, L_CLONE); 00730 for (j = 0; j < n2; j++) { 00731 if (line2[j] == 0) continue; 00732 y2 = yloc2[j]; 00733 if (L_ABS(y1 - y2) > maxshifty) continue; 00734 xl2 = xleft2[j]; 00735 if (L_ABS(xl1 - xl2) > maxshiftx) continue; 00736 shiftx = xl1 - xl2; /* shift to add to x2 values */ 00737 shifty = y1 - y2; /* shift to add to y2 values */ 00738 na2 = numaaGetNuma(naa2, j, L_CLONE); 00739 00740 /* Now check if 'nperline' boxes in the two lines match */ 00741 match = testLineAlignmentX(na1, na2, shiftx, delx, nperline); 00742 if (match) { 00743 numaAddNumber(nai1, i); 00744 numaAddNumber(nai2, j); 00745 numaAddNumber(nasx, shiftx); 00746 numaAddNumber(nasy, shifty); 00747 } 00748 numaDestroy(&na2); 00749 } 00750 numaDestroy(&na1); 00751 } 00752 00753 /* Determine if there are a sufficient number of mutually 00754 * aligned matches. Mutually aligned matches place an additional 00755 * constraint on the 'possible' matches, where the relative 00756 * shifts must not exceed the (delx, dely) distances. */ 00757 countAlignedMatches(nai1, nai2, nasx, nasy, n1, n2, delx, dely, 00758 nreq, psame, debugflag); 00759 00760 FREE(line1); 00761 FREE(line2); 00762 FREE(yloc1); 00763 FREE(yloc2); 00764 FREE(xleft1); 00765 FREE(xleft2); 00766 numaDestroy(&nai1); 00767 numaDestroy(&nai2); 00768 numaDestroy(&nasx); 00769 numaDestroy(&nasy); 00770 return 0; 00771 } 00772 00773 00774 static l_int32 00775 testLineAlignmentX(NUMA *na1, 00776 NUMA *na2, 00777 l_int32 shiftx, 00778 l_int32 delx, 00779 l_int32 nperline) 00780 { 00781 l_int32 i, xl1, xr1, xl2, xr2, diffl, diffr; 00782 00783 PROCNAME("testLineAlignmentX"); 00784 00785 if (!na1) 00786 return ERROR_INT("na1 not defined", procName, 1); 00787 if (!na2) 00788 return ERROR_INT("na2 not defined", procName, 1); 00789 00790 for (i = 0; i < nperline; i++) { 00791 numaGetIValue(na1, i + 1, &xl1); 00792 numaGetIValue(na1, i + 2, &xr1); 00793 numaGetIValue(na2, i + 1, &xl2); 00794 numaGetIValue(na2, i + 2, &xr2); 00795 diffl = L_ABS(xl1 - xl2 - shiftx); 00796 diffr = L_ABS(xr1 - xr2 - shiftx); 00797 if (diffl > delx || diffr > delx) 00798 return 0; 00799 } 00800 00801 return 1; 00802 } 00803 00804 00805 /* 00806 * countAlignedMatches() 00807 * Input: nai1, nai2 (numas of row pairs for matches) 00808 * nasx, nasy (numas of x and y shifts for the matches) 00809 * n1, n2 (number of rows in images 1 and 2) 00810 * delx, dely (allowed difference in shifts of the match, 00811 * compared to the reference match) 00812 * nreq (number of required aligned matches) 00813 * &same (<return> 1 if @nreq row matches are found; 0 otherwise) 00814 * Return: 0 if OK, 1 on error 00815 * 00816 * Notes: 00817 * (1) This takes 4 input arrays giving parameters of all the 00818 * line matches. It looks for the maximum set of aligned 00819 * matches (matches with approximately the same overall shifts) 00820 * that do not use rows from either image more than once. 00821 */ 00822 static l_int32 00823 countAlignedMatches(NUMA *nai1, 00824 NUMA *nai2, 00825 NUMA *nasx, 00826 NUMA *nasy, 00827 l_int32 n1, 00828 l_int32 n2, 00829 l_int32 delx, 00830 l_int32 dely, 00831 l_int32 nreq, 00832 l_int32 *psame, 00833 l_int32 debugflag) 00834 { 00835 l_int32 i, j, nm, shiftx, shifty, nmatch, diffx, diffy; 00836 l_int32 *ia1, *ia2, *iasx, *iasy, *index1, *index2; 00837 00838 PROCNAME("countAlignedMatches"); 00839 00840 if (!nai1 || !nai2 || !nasx || !nasy) 00841 return ERROR_INT("4 input numas not defined", procName, 1); 00842 if (!psame) 00843 return ERROR_INT("&same not defined", procName, 1); 00844 *psame = 0; 00845 00846 /* Check for sufficient aligned matches, doing a double iteration 00847 * over the set of raw matches. The row index arrays 00848 * are used to verify that the same rows in either image 00849 * are not used in more than one match. Whenever there 00850 * is a match that is properly aligned, those rows are 00851 * marked in the index arrays. */ 00852 nm = numaGetCount(nai1); /* number of matches */ 00853 if (nm < nreq) 00854 return 0; 00855 00856 ia1 = numaGetIArray(nai1); 00857 ia2 = numaGetIArray(nai2); 00858 iasx = numaGetIArray(nasx); 00859 iasy = numaGetIArray(nasy); 00860 index1 = (l_int32 *)CALLOC(n1, sizeof(l_int32)); /* keep track of rows */ 00861 index2 = (l_int32 *)CALLOC(n2, sizeof(l_int32)); 00862 for (i = 0; i < nm; i++) { 00863 if (*psame == 1) 00864 break; 00865 00866 /* Reset row index arrays */ 00867 memset(index1, 0, 4 * n1); 00868 memset(index2, 0, 4 * n2); 00869 nmatch = 1; 00870 index1[ia1[i]] = nmatch; /* mark these rows as taken */ 00871 index2[ia2[i]] = nmatch; 00872 shiftx = iasx[i]; /* reference shift between two rows */ 00873 shifty = iasy[i]; /* ditto */ 00874 if (nreq == 1) { 00875 *psame = 1; 00876 break; 00877 } 00878 for (j = 0; j < nm; j++) { 00879 if (j == i) continue; 00880 /* Rows must both be different from any previously seen */ 00881 if (index1[ia1[j]] > 0 || index2[ia2[j]] > 0) continue; 00882 /* Check the shift for this match */ 00883 diffx = L_ABS(shiftx - iasx[j]); 00884 diffy = L_ABS(shifty - iasy[j]); 00885 if (diffx > delx || diffy > dely) continue; 00886 /* We have a match */ 00887 nmatch++; 00888 index1[ia1[j]] = nmatch; /* mark the rows */ 00889 index2[ia2[j]] = nmatch; 00890 if (nmatch >= nreq) { 00891 *psame = 1; 00892 if (debugflag) 00893 printRowIndices(index1, n1, index2, n2); 00894 break; 00895 } 00896 } 00897 } 00898 00899 FREE(ia1); 00900 FREE(ia2); 00901 FREE(iasx); 00902 FREE(iasy); 00903 FREE(index1); 00904 FREE(index2); 00905 return 0; 00906 } 00907 00908 00909 static void 00910 printRowIndices(l_int32 *index1, 00911 l_int32 n1, 00912 l_int32 *index2, 00913 l_int32 n2) 00914 { 00915 l_int32 i; 00916 00917 fprintf(stderr, "Index1: "); 00918 for (i = 0; i < n1; i++) { 00919 if (i && (i % 20 == 0)) 00920 fprintf(stderr, "\n "); 00921 fprintf(stderr, "%3d", index1[i]); 00922 } 00923 fprintf(stderr, "\n"); 00924 00925 fprintf(stderr, "Index2: "); 00926 for (i = 0; i < n2; i++) { 00927 if (i && (i % 20 == 0)) 00928 fprintf(stderr, "\n "); 00929 fprintf(stderr, "%3d", index2[i]); 00930 } 00931 fprintf(stderr, "\n"); 00932 return; 00933 } 00934 00935