Leptonica 1.68
C Image Processing Library
|
00001 /*====================================================================* 00002 - Copyright (C) 2001 Leptonica. All rights reserved. 00003 - This software is distributed in the hope that it will be 00004 - useful, but with NO WARRANTY OF ANY KIND. 00005 - No author or distributor accepts responsibility to anyone for the 00006 - consequences of using this software, or for whether it serves any 00007 - particular purpose or works at all, unless he or she says so in 00008 - writing. Everyone is granted permission to copy, modify and 00009 - redistribute this source code, for commercial or non-commercial 00010 - purposes, with the following restrictions: (1) the origin of this 00011 - source code must not be misrepresented; (2) modified versions must 00012 - be plainly marked as such; and (3) this notice may not be removed 00013 - or altered from any source or modified source distribution. 00014 *====================================================================*/ 00015 00016 /* 00017 * partition.c 00018 * 00019 * Whitespace block extraction 00020 * BOXA *boxaGetWhiteblocks() 00021 * 00022 * Helpers 00023 * static PARTEL *partelCreate() 00024 * static void partelDestroy() 00025 * static l_int32 partelSetSize() 00026 * static BOXA *boxaGenerateSubboxes() 00027 * static BOX *boxaSelectPivotBox() 00028 * static l_int32 boxaCheckIfOverlapIsSmall() 00029 * BOXA *boxaPruneSortedOnOverlap() 00030 */ 00031 00032 #include <stdio.h> 00033 #include <stdlib.h> 00034 #include "allheaders.h" 00035 00036 struct PartitionElement { 00037 l_float32 size; /* sorting key */ 00038 BOX *box; /* region of the element */ 00039 BOXA *boxa; /* set of intersecting boxes */ 00040 }; 00041 typedef struct PartitionElement PARTEL; 00042 00043 static PARTEL * partelCreate(BOX *box); 00044 static void partelDestroy(PARTEL **ppartel); 00045 static l_int32 partelSetSize(PARTEL *partel, l_int32 sortflag); 00046 static BOXA * boxaGenerateSubboxes(BOX *box, BOXA *boxa, l_int32 maxperim, 00047 l_float32 fract); 00048 static BOX * boxaSelectPivotBox(BOX *box, BOXA *boxa, l_int32 maxperim, 00049 l_float32 fract); 00050 static l_int32 boxCheckIfOverlapIsBig(BOX *box, BOXA *boxa, 00051 l_float32 maxoverlap); 00052 00053 static const l_int32 DEFAULT_MAX_POPS = 20000; /* a big number! */ 00054 00055 00056 #ifndef NO_CONSOLE_IO 00057 #define OUTPUT_HEAP_STATS 0 00058 #endif /* ~NO_CONSOLE_IO */ 00059 00060 00061 /*------------------------------------------------------------------* 00062 * Whitespace block extraction * 00063 *------------------------------------------------------------------*/ 00064 /*! 00065 * boxaGetWhiteblocks() 00066 * 00067 * Input: boxas (typically, a set of bounding boxes of fg components) 00068 * box (initial region; typically including all boxes in boxas; 00069 * if null, it computes the region to include all boxes 00070 * in boxas) 00071 * sortflag (L_SORT_BY_WIDTH, L_SORT_BY_HEIGHT, 00072 * L_SORT_BY_MIN_DIMENSION, L_SORT_BY_MAX_DIMENSION, 00073 * L_SORT_BY_PERIMETER, L_SORT_BY_AREA) 00074 * maxboxes (maximum number of output whitespace boxes; e.g., 100) 00075 * maxoverlap (maximum fractional overlap of a box by any 00076 * of the larger boxes; e.g., 0.2) 00077 * maxperim (maximum half-perimeter, in pixels, for which 00078 * pivot is selected by proximity to box centroid; 00079 * e.g., 200) 00080 * fract (fraction of box diagonal that is an acceptable 00081 * distance from the box centroid to select the pivot; 00082 * e.g., 0.2) 00083 * maxpops (maximum number of pops from the heap; use 0 as default) 00084 * Return: boxa (of sorted whitespace boxes), or null on error 00085 * 00086 * Notes: 00087 * (1) This uses the elegant Breuel algorithm, found in "Two 00088 * Geometric Algorithms for Layout Analysis", 2002, 00089 * url: "citeseer.ist.psu.edu/breuel02two.html". 00090 * It starts with the bounding boxes (b.b.) of the connected 00091 * components (c.c.) in a region, along with the rectangle 00092 * representing that region. It repeatedly divides the 00093 * rectangle into four maximal rectangles that exclude a 00094 * pivot rectangle, sorting them in a priority queue 00095 * according to one of the six sort flags. It returns a boxa 00096 * of the "largest" set that have no intersection with boxes 00097 * from the input boxas. 00098 * (2) If box == NULL, the initial region is the minimal region 00099 * that includes the origin and every box in boxas. 00100 * (3) maxboxes is the maximum number of whitespace boxes that will 00101 * be returned. The actual number will depend on the image 00102 * and the values chosen for maxoverlap and maxpops. In many 00103 * cases, the actual number will be 'maxboxes'. 00104 * (4) maxoverlap allows pruning of whitespace boxes depending on 00105 * the overlap. To avoid all pruning, use maxoverlap = 1.0. 00106 * To select only boxes that have no overlap with each other 00107 * (maximal pruning), choose maxoverlap = 0.0. 00108 * Otherwise, no box can have more than the 'maxoverlap' fraction 00109 * of its area overlapped by any larger (in the sense of the 00110 * sortflag) box. 00111 * (5) Choose maxperim (actually, maximum half-perimeter) to 00112 * represent a c.c. that is small enough so that you don't care 00113 * about the white space that could be inside of it. For all such 00114 * c.c., the pivot for 'quadfurcation' of a rectangle is selected 00115 * as having a reasonable proximity to the rectangle centroid. 00116 * (6) Use fract in the range [0.0 ... 1.0]. Set fract = 0.0 00117 * to choose the small box nearest the centroid as the pivot. 00118 * If you choose fract > 0.0, it is suggested that you call 00119 * boxaPermuteRandom() first, to permute the boxes (see usage below). 00120 * This should reduce the search time for each of the pivot boxes. 00121 * (7) Choose maxpops to be the maximum number of rectangles that 00122 * are popped from the heap. This is an indirect way to limit the 00123 * execution time. Use 0 for default (a fairly large number). 00124 * At any time, you can expect the heap to contain about 00125 * 2.5 times as many boxes as have been popped off. 00126 * (8) The output result is a sorted set of overlapping 00127 * boxes, constrained by 'maxboxes', 'maxoverlap' and 'maxpops'. 00128 * (9) The main defect of the method is that it abstracts out the 00129 * actual components, retaining only the b.b. for analysis. 00130 * Consider a component with a large b.b. If this is chosen 00131 * as a pivot, all white space inside is immediately taken 00132 * out of consideration. Furthermore, even if it is never chosen 00133 * as a pivot, as the partitioning continues, at no time will 00134 * any of the whitespace inside this component be part of a 00135 * rectangle with zero overlapping boxes. Thus, the interiors 00136 * of all boxes are necessarily excluded from the union of 00137 * the returned whitespace boxes. 00138 * (10) USAGE: One way to accommodate to this weakness is to remove such 00139 * large b.b. before starting the computation. For example, 00140 * if 'box' is an input image region containing 'boxa' b.b. of c.c.: 00141 * 00142 * // Faster pivot choosing 00143 * boxaPermuteRandom(boxa, boxa); 00144 * 00145 * // Remove anything either large width or height 00146 * boxat = boxaSelectBySize(boxa, maxwidth, maxheight, 00147 * L_SELECT_IF_BOTH, L_SELECT_IF_LT, 00148 * NULL); 00149 * 00150 * boxad = boxaGetWhiteblocks(boxat, box, type, maxboxes, 00151 * maxoverlap, maxperim, fract, 00152 * maxpops); 00153 * 00154 * The result will be rectangular regions of "white space" that 00155 * extend into (and often through) the excluded components. 00156 * (11) As a simple example, suppose you wish to find the columns on a page. 00157 * First exclude large c.c. that may block the columns, and then call: 00158 * 00159 * boxad = boxaGetWhiteblocks(boxa, box, L_SORT_BY_HEIGHT, 00160 * 20, 0.15, 200, 0.2, 2000); 00161 * 00162 * to get the 20 tallest boxes with no more than 0.15 overlap 00163 * between a box and any of the taller ones, and avoiding the 00164 * use of any c.c. with a b.b. half perimeter greater than 200 00165 * as a pivot. 00166 */ 00167 BOXA * 00168 boxaGetWhiteblocks(BOXA *boxas, 00169 BOX *box, 00170 l_int32 sortflag, 00171 l_int32 maxboxes, 00172 l_float32 maxoverlap, 00173 l_int32 maxperim, 00174 l_float32 fract, 00175 l_int32 maxpops) 00176 { 00177 l_int32 i, w, h, n, nsub, npush, npop; 00178 BOX *boxsub; 00179 BOXA *boxa, *boxa4, *boxasub, *boxad; 00180 PARTEL *partel; 00181 L_HEAP *lh; 00182 00183 PROCNAME("boxaGetWhiteblocks"); 00184 00185 if (!boxas) 00186 return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL); 00187 if (sortflag != L_SORT_BY_WIDTH && sortflag != L_SORT_BY_HEIGHT && 00188 sortflag != L_SORT_BY_MIN_DIMENSION && 00189 sortflag != L_SORT_BY_MAX_DIMENSION && 00190 sortflag != L_SORT_BY_PERIMETER && sortflag != L_SORT_BY_AREA) 00191 return (BOXA *)ERROR_PTR("invalid sort flag", procName, NULL); 00192 if (maxboxes < 1) { 00193 maxboxes = 1; 00194 L_WARNING("setting maxboxes = 1", procName); 00195 } 00196 if (maxoverlap < 0.0 || maxoverlap > 1.0) 00197 return (BOXA *)ERROR_PTR("invalid maxoverlap", procName, NULL); 00198 if (maxpops == 0) 00199 maxpops = DEFAULT_MAX_POPS; 00200 00201 if (!box) { 00202 boxaGetExtent(boxas, &w, &h, NULL); 00203 box = boxCreate(0, 0, w, h); 00204 } 00205 00206 /* Prime the heap */ 00207 lh = lheapCreate(20, L_SORT_DECREASING); 00208 partel = partelCreate(box); 00209 partel->boxa = boxaCopy(boxas, L_CLONE); 00210 partelSetSize(partel, sortflag); 00211 lheapAdd(lh, partel); 00212 00213 boxad = boxaCreate(0); 00214 00215 npush = npop = 0; 00216 while (1) { 00217 if ((partel = (PARTEL *)lheapRemove(lh)) == NULL) /* we're done */ 00218 break; 00219 00220 npop++; /* How many boxes have we retrieved from the queue? */ 00221 if (npop > maxpops) 00222 break; 00223 00224 /* Extract the contents */ 00225 boxa = boxaCopy(partel->boxa, L_CLONE); 00226 box = boxClone(partel->box); 00227 partelDestroy(&partel); 00228 00229 /* Can we output this one? */ 00230 n = boxaGetCount(boxa); 00231 if (n == 0) { 00232 if (boxCheckIfOverlapIsBig(box, boxad, maxoverlap) == 0) 00233 boxaAddBox(boxad, box, L_INSERT); 00234 else 00235 boxDestroy(&box); 00236 boxaDestroy(&boxa); 00237 if (boxaGetCount(boxad) >= maxboxes) /* we're done */ 00238 break; 00239 continue; 00240 } 00241 00242 00243 /* Generate up to 4 subboxes and put them on the heap */ 00244 boxa4 = boxaGenerateSubboxes(box, boxa, maxperim, fract); 00245 boxDestroy(&box); 00246 nsub = boxaGetCount(boxa4); 00247 for (i = 0; i < nsub; i++) { 00248 boxsub = boxaGetBox(boxa4, i, L_CLONE); 00249 boxasub = boxaIntersectsBox(boxa, boxsub); 00250 partel = partelCreate(boxsub); 00251 partel->boxa = boxasub; 00252 partelSetSize(partel, sortflag); 00253 lheapAdd(lh, partel); 00254 boxDestroy(&boxsub); 00255 } 00256 npush += nsub; /* How many boxes have we put on the queue? */ 00257 00258 /* boxaWriteStream(stderr, boxa4); */ 00259 00260 boxaDestroy(&boxa4); 00261 boxaDestroy(&boxa); 00262 } 00263 00264 #if OUTPUT_HEAP_STATS 00265 fprintf(stderr, "Heap statistics:\n"); 00266 fprintf(stderr, " Number of boxes pushed: %d\n", npush); 00267 fprintf(stderr, " Number of boxes popped: %d\n", npop); 00268 #endif /* OUTPUT_HEAP_STATS */ 00269 00270 /* Clean up the heap */ 00271 while ((partel = (PARTEL *)lheapRemove(lh)) != NULL) 00272 partelDestroy(&partel); 00273 lheapDestroy(&lh, FALSE); 00274 00275 return boxad; 00276 } 00277 00278 00279 /*------------------------------------------------------------------* 00280 * Helpers * 00281 *------------------------------------------------------------------*/ 00282 /*! 00283 * partelCreate() 00284 * 00285 * Input: box (region; inserts a copy) 00286 * Return: partel, or null on error 00287 */ 00288 static PARTEL * 00289 partelCreate(BOX *box) 00290 { 00291 PARTEL *partel; 00292 00293 PROCNAME("partelCreate"); 00294 00295 if ((partel = (PARTEL *)CALLOC(1, sizeof(PARTEL))) == NULL) 00296 return (PARTEL *)ERROR_PTR("partel not made", procName, NULL); 00297 00298 partel->box = boxCopy(box); 00299 return partel; 00300 } 00301 00302 00303 /*! 00304 * partelDestroy() 00305 * 00306 * Input: &partel (<will be set to null before returning>) 00307 * Return: void 00308 */ 00309 static void 00310 partelDestroy(PARTEL **ppartel) 00311 { 00312 PARTEL *partel; 00313 00314 PROCNAME("partelDestroy"); 00315 00316 if (ppartel == NULL) { 00317 L_WARNING("ptr address is null!", procName); 00318 return; 00319 } 00320 00321 if ((partel = *ppartel) == NULL) 00322 return; 00323 00324 boxDestroy(&partel->box); 00325 boxaDestroy(&partel->boxa); 00326 FREE(partel); 00327 *ppartel = NULL; 00328 return; 00329 } 00330 00331 00332 /*! 00333 * partelSetSize() 00334 * 00335 * Input: partel 00336 * sortflag (L_SORT_BY_WIDTH, L_SORT_BY_HEIGHT, 00337 * L_SORT_BY_MIN_DIMENSION, L_SORT_BY_MAX_DIMENSION, 00338 * L_SORT_BY_PERIMETER, L_SORT_BY_AREA) 00339 * Return: 0 if OK, 1 on error 00340 */ 00341 static l_int32 00342 partelSetSize(PARTEL *partel, 00343 l_int32 sortflag) 00344 { 00345 l_int32 w, h; 00346 00347 PROCNAME("partelSetSize"); 00348 00349 if (!partel) 00350 return ERROR_INT("partel not defined", procName, 1); 00351 00352 boxGetGeometry(partel->box, NULL, NULL, &w, &h); 00353 if (sortflag == L_SORT_BY_WIDTH) 00354 partel->size = (l_float32)w; 00355 else if (sortflag == L_SORT_BY_HEIGHT) 00356 partel->size = (l_float32)h; 00357 else if (sortflag == L_SORT_BY_MIN_DIMENSION) 00358 partel->size = (l_float32)L_MIN(w, h); 00359 else if (sortflag == L_SORT_BY_MAX_DIMENSION) 00360 partel->size = (l_float32)L_MAX(w, h); 00361 else if (sortflag == L_SORT_BY_PERIMETER) 00362 partel->size = (l_float32)(w + h); 00363 else if (sortflag == L_SORT_BY_AREA) 00364 partel->size = (l_float32)(w * h); 00365 else 00366 return ERROR_INT("invalid sortflag", procName, 1); 00367 return 0; 00368 } 00369 00370 00371 /*! 00372 * boxaGenerateSubboxes() 00373 * 00374 * Input: box (region to be split into up to four overlapping subregions) 00375 * boxa (boxes of rectangles intersecting the box) 00376 * maxperim (maximum half-perimeter for which pivot 00377 * is selected by proximity to box centroid) 00378 * fract (fraction of box diagonal that is an acceptable 00379 * distance from the box centroid to select the pivot) 00380 * Return: boxa (of four or less overlapping subrectangles of the box), 00381 * or null on error 00382 */ 00383 static BOXA * 00384 boxaGenerateSubboxes(BOX *box, 00385 BOXA *boxa, 00386 l_int32 maxperim, 00387 l_float32 fract) 00388 { 00389 l_int32 x, y, w, h, xp, yp, wp, hp; 00390 BOX *boxp; /* pivot box */ 00391 BOX *boxsub; 00392 BOXA *boxa4; 00393 00394 PROCNAME("boxaGenerateSubboxes"); 00395 00396 if (!box) 00397 return (BOXA *)ERROR_PTR("box not defined", procName, NULL); 00398 if (!boxa) 00399 return (BOXA *)ERROR_PTR("boxa not defined", procName, NULL); 00400 00401 boxa4 = boxaCreate(4); 00402 boxp = boxaSelectPivotBox(box, boxa, maxperim, fract); 00403 boxGetGeometry(box, &x, &y, &w, &h); 00404 boxGetGeometry(boxp, &xp, &yp, &wp, &hp); 00405 boxDestroy(&boxp); 00406 if (xp > x) { /* left sub-box */ 00407 boxsub = boxCreate(x, y, xp - x, h); 00408 boxaAddBox(boxa4, boxsub, L_INSERT); 00409 } 00410 if (yp > y) { /* top sub-box */ 00411 boxsub = boxCreate(x, y, w, yp - y); 00412 boxaAddBox(boxa4, boxsub, L_INSERT); 00413 } 00414 if (xp + wp < x + w) { /* right sub-box */ 00415 boxsub = boxCreate(xp + wp, y, x + w - xp - wp, h); 00416 boxaAddBox(boxa4, boxsub, L_INSERT); 00417 } 00418 if (yp + hp < y + h) { /* bottom sub-box */ 00419 boxsub = boxCreate(x, yp + hp, w, y + h - yp - hp); 00420 boxaAddBox(boxa4, boxsub, L_INSERT); 00421 } 00422 00423 return boxa4; 00424 } 00425 00426 00427 /*! 00428 * boxaSelectPivotBox() 00429 * 00430 * Input: box (containing box; to be split by the pivot box) 00431 * boxa (boxes of rectangles, from which 1 is to be chosen) 00432 * maxperim (maximum half-perimeter for which pivot 00433 * is selected by proximity to box centroid) 00434 * fract (fraction of box diagonal that is an acceptable 00435 * distance from the box centroid to select the pivot) 00436 * Return: box (pivot box for subdivision into 4 rectangles), or 00437 * null on error 00438 * 00439 * Notes: 00440 * (1) This is a tricky piece that wasn't discussed in the 00441 * Breuel's 2002 paper. 00442 * (2) Selects a box from boxa whose centroid is reasonably close to 00443 * the centroid of the containing box (xc, yc) and whose 00444 * half-perimeter does not exceed the maxperim value. 00445 * (3) If there are no boxes in the boxa that are small enough, 00446 * then it selects the smallest of the larger boxes, 00447 * without reference to its location in the containing box. 00448 * (4) If a small box has a centroid at a distance from the 00449 * centroid of the containing box that is not more than 00450 * the fraction 'fract' of the diagonal of the containing 00451 * box, that box is chosen as the pivot, terminating the 00452 * search for the nearest small box. 00453 * (5) Use fract in the range [0.0 ... 1.0]. Set fract = 0.0 00454 * to choose the small box nearest the centroid. 00455 * (6) Choose maxperim to represent a connected component that is 00456 * small enough so that you don't care about the white space 00457 * that could be inside of it. 00458 */ 00459 static BOX * 00460 boxaSelectPivotBox(BOX *box, 00461 BOXA *boxa, 00462 l_int32 maxperim, 00463 l_float32 fract) 00464 { 00465 l_int32 i, n, bw, bh, w, h; 00466 l_int32 smallfound, minindex, perim, minsize; 00467 l_float32 delx, dely, mindist, threshdist, dist, x, y, cx, cy; 00468 BOX *boxt; 00469 00470 PROCNAME("boxaSelectPivotBox"); 00471 00472 if (!box) 00473 return (BOX *)ERROR_PTR("box not defined", procName, NULL); 00474 if (!boxa) 00475 return (BOX *)ERROR_PTR("boxa not defined", procName, NULL); 00476 n = boxaGetCount(boxa); 00477 if (n == 0) 00478 return (BOX *)ERROR_PTR("no boxes in boxa", procName, NULL); 00479 if (fract < 0.0 || fract > 1.0) { 00480 L_WARNING("fract out of bounds; using 0.0", procName); 00481 fract = 0.0; 00482 } 00483 00484 boxGetGeometry(box, NULL, NULL, &w, &h); 00485 boxGetCenter(box, &x, &y); 00486 threshdist = fract * (w * w + h * h); 00487 mindist = 1000000000.; 00488 minindex = 0; 00489 smallfound = FALSE; 00490 for (i = 0; i < n; i++) { 00491 boxt = boxaGetBox(boxa, i, L_CLONE); 00492 boxGetGeometry(boxt, NULL, NULL, &bw, &bh); 00493 boxGetCenter(boxt, &cx, &cy); 00494 boxDestroy(&boxt); 00495 if (bw + bh > maxperim) 00496 continue; 00497 smallfound = TRUE; 00498 delx = cx - x; 00499 dely = cy - y; 00500 dist = delx * delx + dely * dely; 00501 if (dist <= threshdist) 00502 return boxaGetBox(boxa, i, L_COPY); 00503 if (dist < mindist) { 00504 minindex = i; 00505 mindist = dist; 00506 } 00507 } 00508 00509 /* If there are small boxes but none are within 'fract' of the 00510 * centroid, return the nearest one. */ 00511 if (smallfound == TRUE) 00512 return boxaGetBox(boxa, minindex, L_COPY); 00513 00514 /* No small boxes; return the smallest of the large boxes */ 00515 minsize = 1000000000; 00516 minindex = 0; 00517 for (i = 0; i < n; i++) { 00518 boxaGetBoxGeometry(boxa, i, NULL, NULL, &bw, &bh); 00519 perim = bw + bh; 00520 if (perim < minsize) { 00521 minsize = perim; 00522 minindex = i; 00523 } 00524 } 00525 return boxaGetBox(boxa, minindex, L_COPY); 00526 } 00527 00528 00529 /*! 00530 * boxCheckIfOverlapIsBig() 00531 * 00532 * Input: box (to be tested) 00533 * boxa (of boxes already stored) 00534 * maxoverlap (maximum fractional overlap of the input box 00535 * by any of the boxes in boxa) 00536 * Return: 0 if box has small overlap with every box in boxa; 00537 * 1 otherwise or on error 00538 */ 00539 static l_int32 00540 boxCheckIfOverlapIsBig(BOX *box, 00541 BOXA *boxa, 00542 l_float32 maxoverlap) 00543 { 00544 l_int32 i, n, bigoverlap; 00545 l_float32 fract; 00546 BOX *boxt; 00547 00548 PROCNAME("boxCheckIfOverlapIsBig"); 00549 00550 if (!box) 00551 return ERROR_INT("box not defined", procName, 1); 00552 if (!boxa) 00553 return ERROR_INT("boxa not defined", procName, 1); 00554 if (maxoverlap < 0.0 || maxoverlap > 1.0) 00555 return ERROR_INT("invalid maxoverlap", procName, 1); 00556 00557 n = boxaGetCount(boxa); 00558 if (n == 0 || maxoverlap == 1.0) 00559 return 0; 00560 00561 bigoverlap = 0; 00562 for (i = 0; i < n; i++) { 00563 boxt = boxaGetBox(boxa, i, L_CLONE); 00564 boxOverlapFraction(boxt, box, &fract); 00565 boxDestroy(&boxt); 00566 if (fract > maxoverlap) { 00567 bigoverlap = 1; 00568 break; 00569 } 00570 } 00571 00572 return bigoverlap; 00573 } 00574 00575 00576 /*! 00577 * boxaPruneSortedOnOverlap() 00578 * 00579 * Input: boxas (sorted by size in decreasing order) 00580 * maxoverlap (maximum fractional overlap of a box by any 00581 * of the larger boxes) 00582 * Return: boxad (pruned), or null on error 00583 * 00584 * Notes: 00585 * (1) This selectively removes smaller boxes when they are overlapped 00586 * by any larger box by more than the input 'maxoverlap' fraction. 00587 * (2) To avoid all pruning, use maxoverlap = 1.0. To select only 00588 * boxes that have no overlap with each other (maximal pruning), 00589 * set maxoverlap = 0.0. 00590 * (3) If there are no boxes in boxas, returns an empty boxa. 00591 */ 00592 BOXA * 00593 boxaPruneSortedOnOverlap(BOXA *boxas, 00594 l_float32 maxoverlap) 00595 { 00596 l_int32 i, j, n, remove; 00597 l_float32 fract; 00598 BOX *box1, *box2; 00599 BOXA *boxad; 00600 00601 PROCNAME("boxaPruneSortedOnOverlap"); 00602 00603 if (!boxas) 00604 return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL); 00605 if (maxoverlap < 0.0 || maxoverlap > 1.0) 00606 return (BOXA *)ERROR_PTR("invalid maxoverlap", procName, NULL); 00607 00608 n = boxaGetCount(boxas); 00609 if (n == 0 || maxoverlap == 1.0) 00610 return boxaCopy(boxas, L_COPY); 00611 00612 boxad = boxaCreate(0); 00613 box2 = boxaGetBox(boxas, 0, L_COPY); 00614 boxaAddBox(boxad, box2, L_INSERT); 00615 for (j = 1; j < n; j++) { /* prune on j */ 00616 box2 = boxaGetBox(boxas, j, L_COPY); 00617 remove = FALSE; 00618 for (i = 0; i < j; i++) { /* test on i */ 00619 box1 = boxaGetBox(boxas, i, L_CLONE); 00620 boxOverlapFraction(box1, box2, &fract); 00621 boxDestroy(&box1); 00622 if (fract > maxoverlap) { 00623 remove = TRUE; 00624 break; 00625 } 00626 } 00627 if (remove == TRUE) 00628 boxDestroy(&box2); 00629 else 00630 boxaAddBox(boxad, box2, L_INSERT); 00631 } 00632 00633 return boxad; 00634 } 00635 00636 00637 00638 00639