Leptonica 1.68
C Image Processing Library

partition.c

Go to the documentation of this file.
00001 /*====================================================================*
00002  -  Copyright (C) 2001 Leptonica.  All rights reserved.
00003  -  This software is distributed in the hope that it will be
00004  -  useful, but with NO WARRANTY OF ANY KIND.
00005  -  No author or distributor accepts responsibility to anyone for the
00006  -  consequences of using this software, or for whether it serves any
00007  -  particular purpose or works at all, unless he or she says so in
00008  -  writing.  Everyone is granted permission to copy, modify and
00009  -  redistribute this source code, for commercial or non-commercial
00010  -  purposes, with the following restrictions: (1) the origin of this
00011  -  source code must not be misrepresented; (2) modified versions must
00012  -  be plainly marked as such; and (3) this notice may not be removed
00013  -  or altered from any source or modified source distribution.
00014  *====================================================================*/
00015 
00016 /*
00017  *   partition.c
00018  *
00019  *      Whitespace block extraction
00020  *          BOXA            *boxaGetWhiteblocks()
00021  *
00022  *      Helpers
00023  *          static PARTEL   *partelCreate()
00024  *          static void      partelDestroy()
00025  *          static l_int32   partelSetSize()
00026  *          static BOXA     *boxaGenerateSubboxes()
00027  *          static BOX      *boxaSelectPivotBox()
00028  *          static l_int32   boxaCheckIfOverlapIsSmall()
00029  *          BOXA            *boxaPruneSortedOnOverlap()
00030  */
00031 
00032 #include <stdio.h>
00033 #include <stdlib.h>
00034 #include "allheaders.h"
00035 
00036 struct PartitionElement {
00037     l_float32  size;  /* sorting key */
00038     BOX       *box;   /* region of the element */
00039     BOXA      *boxa;  /* set of intersecting boxes */
00040 };
00041 typedef struct PartitionElement PARTEL;
00042 
00043 static PARTEL * partelCreate(BOX *box);
00044 static void partelDestroy(PARTEL **ppartel);
00045 static l_int32 partelSetSize(PARTEL *partel, l_int32 sortflag);
00046 static BOXA * boxaGenerateSubboxes(BOX *box, BOXA *boxa, l_int32 maxperim,
00047                                    l_float32  fract);
00048 static BOX * boxaSelectPivotBox(BOX *box, BOXA *boxa, l_int32 maxperim,
00049                                 l_float32 fract);
00050 static l_int32 boxCheckIfOverlapIsBig(BOX *box, BOXA *boxa,
00051                                       l_float32 maxoverlap);
00052 
00053 static const l_int32  DEFAULT_MAX_POPS = 20000;  /* a big number! */
00054 
00055 
00056 #ifndef  NO_CONSOLE_IO
00057 #define  OUTPUT_HEAP_STATS   0
00058 #endif  /* ~NO_CONSOLE_IO */
00059 
00060 
00061 /*------------------------------------------------------------------*
00062  *                    Whitespace block extraction                   *
00063  *------------------------------------------------------------------*/
00064 /*!
00065  *  boxaGetWhiteblocks()
00066  *
00067  *      Input:  boxas (typically, a set of bounding boxes of fg components)
00068  *              box (initial region; typically including all boxes in boxas;
00069  *                   if null, it computes the region to include all boxes
00070  *                   in boxas)
00071  *              sortflag (L_SORT_BY_WIDTH, L_SORT_BY_HEIGHT,
00072  *                        L_SORT_BY_MIN_DIMENSION, L_SORT_BY_MAX_DIMENSION,
00073  *                        L_SORT_BY_PERIMETER, L_SORT_BY_AREA)
00074  *              maxboxes (maximum number of output whitespace boxes; e.g., 100)
00075  *              maxoverlap (maximum fractional overlap of a box by any
00076  *                          of the larger boxes; e.g., 0.2)
00077  *              maxperim (maximum half-perimeter, in pixels, for which
00078  *                        pivot is selected by proximity to box centroid;
00079  *                        e.g., 200)
00080  *              fract (fraction of box diagonal that is an acceptable
00081  *                     distance from the box centroid to select the pivot;
00082  *                     e.g., 0.2)
00083  *              maxpops (maximum number of pops from the heap; use 0 as default)
00084  *      Return: boxa (of sorted whitespace boxes), or null on error
00085  *
00086  *  Notes:
00087  *      (1) This uses the elegant Breuel algorithm, found in "Two
00088  *          Geometric Algorithms for Layout Analysis", 2002,
00089  *          url: "citeseer.ist.psu.edu/breuel02two.html".
00090  *          It starts with the bounding boxes (b.b.) of the connected
00091  *          components (c.c.) in a region, along with the rectangle
00092  *          representing that region.  It repeatedly divides the
00093  *          rectangle into four maximal rectangles that exclude a
00094  *          pivot rectangle, sorting them in a priority queue
00095  *          according to one of the six sort flags.  It returns a boxa
00096  *          of the "largest" set that have no intersection with boxes
00097  *          from the input boxas.
00098  *      (2) If box == NULL, the initial region is the minimal region
00099  *          that includes the origin and every box in boxas.
00100  *      (3) maxboxes is the maximum number of whitespace boxes that will
00101  *          be returned.  The actual number will depend on the image
00102  *          and the values chosen for maxoverlap and maxpops.  In many
00103  *          cases, the actual number will be 'maxboxes'.
00104  *      (4) maxoverlap allows pruning of whitespace boxes depending on
00105  *          the overlap.  To avoid all pruning, use maxoverlap = 1.0.
00106  *          To select only boxes that have no overlap with each other
00107  *          (maximal pruning), choose maxoverlap = 0.0.
00108  *          Otherwise, no box can have more than the 'maxoverlap' fraction
00109  *          of its area overlapped by any larger (in the sense of the
00110  *          sortflag) box.
00111  *      (5) Choose maxperim (actually, maximum half-perimeter) to
00112  *          represent a c.c. that is small enough so that you don't care
00113  *          about the white space that could be inside of it.  For all such
00114  *          c.c., the pivot for 'quadfurcation' of a rectangle is selected
00115  *          as having a reasonable proximity to the rectangle centroid.
00116  *      (6) Use fract in the range [0.0 ... 1.0].  Set fract = 0.0
00117  *          to choose the small box nearest the centroid as the pivot.
00118  *          If you choose fract > 0.0, it is suggested that you call
00119  *          boxaPermuteRandom() first, to permute the boxes (see usage below).
00120  *          This should reduce the search time for each of the pivot boxes.
00121  *      (7) Choose maxpops to be the maximum number of rectangles that
00122  *          are popped from the heap.  This is an indirect way to limit the
00123  *          execution time.  Use 0 for default (a fairly large number).
00124  *          At any time, you can expect the heap to contain about
00125  *          2.5 times as many boxes as have been popped off.  
00126  *      (8) The output result is a sorted set of overlapping
00127  *          boxes, constrained by 'maxboxes', 'maxoverlap' and 'maxpops'.
00128  *      (9) The main defect of the method is that it abstracts out the
00129  *          actual components, retaining only the b.b. for analysis.
00130  *          Consider a component with a large b.b.  If this is chosen
00131  *          as a pivot, all white space inside is immediately taken
00132  *          out of consideration.  Furthermore, even if it is never chosen
00133  *          as a pivot, as the partitioning continues, at no time will
00134  *          any of the whitespace inside this component be part of a
00135  *          rectangle with zero overlapping boxes.  Thus, the interiors
00136 *           of all boxes are necessarily excluded from the union of
00137 *           the returned whitespace boxes.
00138  *     (10) USAGE: One way to accommodate to this weakness is to remove such
00139  *          large b.b. before starting the computation.  For example,
00140  *          if 'box' is an input image region containing 'boxa' b.b. of c.c.:
00141  *
00142  *                   // Faster pivot choosing
00143  *               boxaPermuteRandom(boxa, boxa);
00144  *
00145  *                   // Remove anything either large width or height
00146  *               boxat = boxaSelectBySize(boxa, maxwidth, maxheight,
00147  *                                        L_SELECT_IF_BOTH, L_SELECT_IF_LT,
00148  *                                        NULL);          
00149  *
00150  *               boxad = boxaGetWhiteblocks(boxat, box, type, maxboxes,
00151  *                                          maxoverlap, maxperim, fract,
00152  *                                          maxpops);
00153  *
00154  *          The result will be rectangular regions of "white space" that
00155  *          extend into (and often through) the excluded components.
00156  *     (11) As a simple example, suppose you wish to find the columns on a page.
00157  *          First exclude large c.c. that may block the columns, and then call:
00158  *
00159  *               boxad = boxaGetWhiteblocks(boxa, box, L_SORT_BY_HEIGHT,
00160  *                                          20, 0.15, 200, 0.2, 2000);
00161  *
00162  *          to get the 20 tallest boxes with no more than 0.15 overlap
00163  *          between a box and any of the taller ones, and avoiding the
00164  *          use of any c.c. with a b.b. half perimeter greater than 200
00165  *          as a pivot.
00166  */
00167 BOXA *
00168 boxaGetWhiteblocks(BOXA      *boxas,
00169                    BOX       *box,
00170                    l_int32    sortflag,
00171                    l_int32    maxboxes,
00172                    l_float32  maxoverlap,
00173                    l_int32    maxperim,
00174                    l_float32  fract,
00175                    l_int32    maxpops)
00176 {
00177 l_int32  i, w, h, n, nsub, npush, npop;
00178 BOX     *boxsub;
00179 BOXA    *boxa, *boxa4, *boxasub, *boxad;
00180 PARTEL  *partel;
00181 L_HEAP  *lh;
00182 
00183     PROCNAME("boxaGetWhiteblocks");
00184 
00185     if (!boxas)
00186         return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL);
00187     if (sortflag != L_SORT_BY_WIDTH && sortflag != L_SORT_BY_HEIGHT &&
00188         sortflag != L_SORT_BY_MIN_DIMENSION &&
00189         sortflag != L_SORT_BY_MAX_DIMENSION &&
00190         sortflag != L_SORT_BY_PERIMETER && sortflag != L_SORT_BY_AREA)
00191         return (BOXA *)ERROR_PTR("invalid sort flag", procName, NULL);
00192     if (maxboxes < 1) {
00193         maxboxes = 1;
00194         L_WARNING("setting maxboxes = 1", procName);
00195     }
00196     if (maxoverlap < 0.0 || maxoverlap > 1.0)
00197         return (BOXA *)ERROR_PTR("invalid maxoverlap", procName, NULL);
00198     if (maxpops == 0)
00199         maxpops = DEFAULT_MAX_POPS;
00200 
00201     if (!box) {
00202         boxaGetExtent(boxas, &w, &h, NULL);
00203         box = boxCreate(0, 0, w, h);
00204     }
00205 
00206         /* Prime the heap */
00207     lh = lheapCreate(20, L_SORT_DECREASING);
00208     partel = partelCreate(box);
00209     partel->boxa = boxaCopy(boxas, L_CLONE);
00210     partelSetSize(partel, sortflag);
00211     lheapAdd(lh, partel);
00212 
00213     boxad = boxaCreate(0);
00214 
00215     npush = npop = 0;
00216     while (1) {
00217         if ((partel = (PARTEL *)lheapRemove(lh)) == NULL)  /* we're done */
00218             break;
00219 
00220         npop++;  /* How many boxes have we retrieved from the queue? */
00221         if (npop > maxpops)
00222             break;
00223 
00224             /* Extract the contents */
00225         boxa = boxaCopy(partel->boxa, L_CLONE);
00226         box = boxClone(partel->box);
00227         partelDestroy(&partel);
00228 
00229             /* Can we output this one? */
00230         n = boxaGetCount(boxa);
00231         if (n == 0) {
00232             if (boxCheckIfOverlapIsBig(box, boxad, maxoverlap) == 0)
00233                 boxaAddBox(boxad, box, L_INSERT);
00234             else
00235                 boxDestroy(&box);
00236             boxaDestroy(&boxa);
00237             if (boxaGetCount(boxad) >= maxboxes)  /* we're done */
00238                 break;
00239             continue;
00240         }
00241 
00242 
00243             /* Generate up to 4 subboxes and put them on the heap */
00244         boxa4 = boxaGenerateSubboxes(box, boxa, maxperim, fract);
00245         boxDestroy(&box);
00246         nsub = boxaGetCount(boxa4);
00247         for (i = 0; i < nsub; i++) {
00248             boxsub = boxaGetBox(boxa4, i, L_CLONE);
00249             boxasub = boxaIntersectsBox(boxa, boxsub);
00250             partel = partelCreate(boxsub);
00251             partel->boxa = boxasub;
00252             partelSetSize(partel, sortflag);
00253             lheapAdd(lh, partel);
00254             boxDestroy(&boxsub);
00255         }
00256         npush += nsub;  /* How many boxes have we put on the queue? */
00257 
00258 /*      boxaWriteStream(stderr, boxa4); */
00259 
00260         boxaDestroy(&boxa4);
00261         boxaDestroy(&boxa);
00262     }
00263 
00264 #if  OUTPUT_HEAP_STATS
00265     fprintf(stderr, "Heap statistics:\n");
00266     fprintf(stderr, "  Number of boxes pushed: %d\n", npush);
00267     fprintf(stderr, "  Number of boxes popped: %d\n", npop);
00268 #endif  /* OUTPUT_HEAP_STATS */
00269 
00270         /* Clean up the heap */
00271     while ((partel = (PARTEL *)lheapRemove(lh)) != NULL)
00272         partelDestroy(&partel);
00273     lheapDestroy(&lh, FALSE);
00274 
00275     return boxad;
00276 }
00277 
00278 
00279 /*------------------------------------------------------------------*
00280  *                               Helpers                            *
00281  *------------------------------------------------------------------*/
00282 /*!
00283  *  partelCreate()
00284  *
00285  *      Input:  box (region; inserts a copy)
00286  *      Return: partel, or null on error
00287  */
00288 static PARTEL *
00289 partelCreate(BOX  *box)
00290 {
00291 PARTEL  *partel;
00292 
00293     PROCNAME("partelCreate");
00294 
00295     if ((partel = (PARTEL *)CALLOC(1, sizeof(PARTEL))) == NULL)
00296         return (PARTEL *)ERROR_PTR("partel not made", procName, NULL);
00297 
00298     partel->box = boxCopy(box);
00299     return partel;
00300 }
00301 
00302 
00303 /*!
00304  *  partelDestroy()
00305  *
00306  *      Input:  &partel (<will be set to null before returning>)
00307  *      Return: void
00308  */
00309 static void
00310 partelDestroy(PARTEL  **ppartel)
00311 {
00312 PARTEL  *partel;
00313 
00314     PROCNAME("partelDestroy");
00315 
00316     if (ppartel == NULL) {
00317         L_WARNING("ptr address is null!", procName);
00318         return;
00319     }
00320 
00321     if ((partel = *ppartel) == NULL)
00322         return;
00323 
00324     boxDestroy(&partel->box);
00325     boxaDestroy(&partel->boxa);
00326     FREE(partel);
00327     *ppartel = NULL;
00328     return;
00329 }
00330 
00331 
00332 /*!
00333  *  partelSetSize()
00334  *
00335  *      Input:  partel
00336  *              sortflag (L_SORT_BY_WIDTH, L_SORT_BY_HEIGHT,
00337  *                        L_SORT_BY_MIN_DIMENSION, L_SORT_BY_MAX_DIMENSION,
00338  *                        L_SORT_BY_PERIMETER, L_SORT_BY_AREA)
00339  *      Return: 0 if OK, 1 on error
00340  */
00341 static l_int32
00342 partelSetSize(PARTEL  *partel,
00343               l_int32  sortflag)
00344 {
00345 l_int32  w, h;
00346 
00347     PROCNAME("partelSetSize");
00348 
00349     if (!partel)
00350         return ERROR_INT("partel not defined", procName, 1);
00351 
00352     boxGetGeometry(partel->box, NULL, NULL, &w, &h);
00353     if (sortflag == L_SORT_BY_WIDTH)
00354         partel->size = (l_float32)w;
00355     else if (sortflag == L_SORT_BY_HEIGHT)
00356         partel->size = (l_float32)h;
00357     else if (sortflag == L_SORT_BY_MIN_DIMENSION)
00358         partel->size = (l_float32)L_MIN(w, h);
00359     else if (sortflag == L_SORT_BY_MAX_DIMENSION)
00360         partel->size = (l_float32)L_MAX(w, h);
00361     else if (sortflag == L_SORT_BY_PERIMETER)
00362         partel->size = (l_float32)(w + h);
00363     else if (sortflag == L_SORT_BY_AREA)
00364         partel->size = (l_float32)(w * h);
00365     else
00366         return ERROR_INT("invalid sortflag", procName, 1);
00367     return 0;
00368 }
00369 
00370 
00371 /*!
00372  *  boxaGenerateSubboxes()
00373  *
00374  *      Input:  box (region to be split into up to four overlapping subregions)
00375  *              boxa (boxes of rectangles intersecting the box)
00376  *              maxperim (maximum half-perimeter for which pivot
00377  *                        is selected by proximity to box centroid)
00378  *              fract (fraction of box diagonal that is an acceptable
00379  *                     distance from the box centroid to select the pivot)
00380  *      Return: boxa (of four or less overlapping subrectangles of the box),
00381  *              or null on error
00382  */
00383 static BOXA *
00384 boxaGenerateSubboxes(BOX       *box,
00385                      BOXA      *boxa,
00386                      l_int32    maxperim,
00387                      l_float32  fract)
00388 {
00389 l_int32  x, y, w, h, xp, yp, wp, hp;
00390 BOX     *boxp;  /* pivot box */
00391 BOX     *boxsub;
00392 BOXA    *boxa4;
00393 
00394     PROCNAME("boxaGenerateSubboxes");
00395 
00396     if (!box)
00397         return (BOXA *)ERROR_PTR("box not defined", procName, NULL);
00398     if (!boxa)
00399         return (BOXA *)ERROR_PTR("boxa not defined", procName, NULL);
00400 
00401     boxa4 = boxaCreate(4);
00402     boxp = boxaSelectPivotBox(box, boxa, maxperim, fract);
00403     boxGetGeometry(box, &x, &y, &w, &h);
00404     boxGetGeometry(boxp, &xp, &yp, &wp, &hp);
00405     boxDestroy(&boxp);
00406     if (xp > x) {   /* left sub-box */
00407         boxsub = boxCreate(x, y, xp - x, h);
00408         boxaAddBox(boxa4, boxsub, L_INSERT);
00409     }
00410     if (yp > y) {   /* top sub-box */
00411         boxsub = boxCreate(x, y, w, yp - y);
00412         boxaAddBox(boxa4, boxsub, L_INSERT);
00413     }
00414     if (xp + wp < x + w) {   /* right sub-box */
00415         boxsub = boxCreate(xp + wp, y, x + w - xp - wp, h);
00416         boxaAddBox(boxa4, boxsub, L_INSERT);
00417     }
00418     if (yp + hp < y + h) {   /* bottom sub-box */
00419         boxsub = boxCreate(x, yp + hp, w, y + h - yp - hp);
00420         boxaAddBox(boxa4, boxsub, L_INSERT);
00421     }
00422 
00423     return boxa4;
00424 }
00425 
00426 
00427 /*!
00428  *  boxaSelectPivotBox()
00429  *
00430  *      Input:  box (containing box; to be split by the pivot box)
00431  *              boxa (boxes of rectangles, from which 1 is to be chosen)
00432  *              maxperim (maximum half-perimeter for which pivot
00433  *                        is selected by proximity to box centroid)
00434  *              fract (fraction of box diagonal that is an acceptable
00435  *                     distance from the box centroid to select the pivot)
00436  *      Return: box (pivot box for subdivision into 4 rectangles), or
00437  *                   null on error
00438  *
00439  *  Notes:
00440  *      (1) This is a tricky piece that wasn't discussed in the
00441  *          Breuel's 2002 paper.
00442  *      (2) Selects a box from boxa whose centroid is reasonably close to
00443  *          the centroid of the containing box (xc, yc) and whose
00444  *          half-perimeter does not exceed the maxperim value.
00445  *      (3) If there are no boxes in the boxa that are small enough,
00446  *          then it selects the smallest of the larger boxes,
00447  *          without reference to its location in the containing box.
00448  *      (4) If a small box has a centroid at a distance from the
00449  *          centroid of the containing box that is not more than
00450  *          the fraction 'fract' of the diagonal of the containing
00451  *          box, that box is chosen as the pivot, terminating the
00452  *          search for the nearest small box.
00453  *      (5) Use fract in the range [0.0 ... 1.0].  Set fract = 0.0
00454  *          to choose the small box nearest the centroid.
00455  *      (6) Choose maxperim to represent a connected component that is
00456  *          small enough so that you don't care about the white space
00457  *          that could be inside of it.
00458  */
00459 static BOX *
00460 boxaSelectPivotBox(BOX       *box,
00461                    BOXA      *boxa,
00462                    l_int32    maxperim,
00463                    l_float32  fract)
00464 {
00465 l_int32    i, n, bw, bh, w, h;
00466 l_int32    smallfound, minindex, perim, minsize;
00467 l_float32  delx, dely, mindist, threshdist, dist, x, y, cx, cy;
00468 BOX       *boxt;
00469 
00470     PROCNAME("boxaSelectPivotBox");
00471 
00472     if (!box)
00473         return (BOX *)ERROR_PTR("box not defined", procName, NULL);
00474     if (!boxa)
00475         return (BOX *)ERROR_PTR("boxa not defined", procName, NULL);
00476     n = boxaGetCount(boxa);
00477     if (n == 0)
00478         return (BOX *)ERROR_PTR("no boxes in boxa", procName, NULL);
00479     if (fract < 0.0 || fract > 1.0) {
00480         L_WARNING("fract out of bounds; using 0.0", procName);
00481         fract = 0.0;
00482     }
00483 
00484     boxGetGeometry(box, NULL, NULL, &w, &h);
00485     boxGetCenter(box, &x, &y);
00486     threshdist = fract * (w * w + h * h);
00487     mindist = 1000000000.;
00488     minindex = 0;
00489     smallfound = FALSE;
00490     for (i = 0; i < n; i++) {
00491         boxt = boxaGetBox(boxa, i, L_CLONE);
00492         boxGetGeometry(boxt, NULL, NULL, &bw, &bh);
00493         boxGetCenter(boxt, &cx, &cy);
00494         boxDestroy(&boxt);
00495         if (bw + bh > maxperim)
00496             continue;
00497         smallfound = TRUE;
00498         delx = cx - x;
00499         dely = cy - y;
00500         dist = delx * delx + dely * dely;
00501         if (dist <= threshdist)
00502             return boxaGetBox(boxa, i, L_COPY);
00503         if (dist < mindist) {
00504             minindex = i;
00505             mindist = dist;
00506         }
00507     }
00508 
00509         /* If there are small boxes but none are within 'fract' of the
00510          * centroid, return the nearest one. */
00511     if (smallfound == TRUE)
00512         return boxaGetBox(boxa, minindex, L_COPY);
00513 
00514         /* No small boxes; return the smallest of the large boxes */
00515     minsize = 1000000000;
00516     minindex = 0;
00517     for (i = 0; i < n; i++) {
00518         boxaGetBoxGeometry(boxa, i, NULL, NULL, &bw, &bh);
00519         perim = bw + bh;
00520         if (perim < minsize) {
00521             minsize = perim;
00522             minindex = i;
00523         }
00524     }
00525     return boxaGetBox(boxa, minindex, L_COPY);
00526 }
00527 
00528 
00529 /*!
00530  *  boxCheckIfOverlapIsBig()
00531  *
00532  *      Input:  box (to be tested)
00533  *              boxa (of boxes already stored)
00534  *              maxoverlap (maximum fractional overlap of the input box
00535  *                          by any of the boxes in boxa)
00536  *      Return: 0 if box has small overlap with every box in boxa;
00537  *              1 otherwise or on error
00538  */
00539 static l_int32
00540 boxCheckIfOverlapIsBig(BOX       *box,
00541                        BOXA      *boxa,
00542                        l_float32  maxoverlap)
00543 {
00544 l_int32    i, n, bigoverlap;
00545 l_float32  fract;
00546 BOX       *boxt;
00547 
00548     PROCNAME("boxCheckIfOverlapIsBig");
00549 
00550     if (!box)
00551         return ERROR_INT("box not defined", procName, 1);
00552     if (!boxa)
00553         return ERROR_INT("boxa not defined", procName, 1);
00554     if (maxoverlap < 0.0 || maxoverlap > 1.0)
00555         return ERROR_INT("invalid maxoverlap", procName, 1);
00556 
00557     n = boxaGetCount(boxa);
00558     if (n == 0 || maxoverlap == 1.0)
00559         return 0;
00560 
00561     bigoverlap = 0;
00562     for (i = 0; i < n; i++) {
00563         boxt = boxaGetBox(boxa, i, L_CLONE);
00564         boxOverlapFraction(boxt, box, &fract);
00565         boxDestroy(&boxt);
00566         if (fract > maxoverlap) {
00567             bigoverlap = 1;
00568             break;
00569         }
00570     }
00571 
00572     return bigoverlap;
00573 }
00574 
00575 
00576 /*!
00577  *  boxaPruneSortedOnOverlap()
00578  *
00579  *      Input:  boxas (sorted by size in decreasing order)
00580  *              maxoverlap (maximum fractional overlap of a box by any
00581  *                          of the larger boxes)
00582  *      Return: boxad (pruned), or null on error
00583  *
00584  *  Notes:
00585  *      (1) This selectively removes smaller boxes when they are overlapped
00586  *          by any larger box by more than the input 'maxoverlap' fraction.
00587  *      (2) To avoid all pruning, use maxoverlap = 1.0.  To select only
00588  *          boxes that have no overlap with each other (maximal pruning),
00589  *          set maxoverlap = 0.0.
00590  *      (3) If there are no boxes in boxas, returns an empty boxa.
00591  */
00592 BOXA *
00593 boxaPruneSortedOnOverlap(BOXA      *boxas,
00594                          l_float32  maxoverlap)
00595 {
00596 l_int32    i, j, n, remove;
00597 l_float32  fract;
00598 BOX       *box1, *box2;
00599 BOXA      *boxad;
00600 
00601     PROCNAME("boxaPruneSortedOnOverlap");
00602 
00603     if (!boxas)
00604         return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL);
00605     if (maxoverlap < 0.0 || maxoverlap > 1.0)
00606         return (BOXA *)ERROR_PTR("invalid maxoverlap", procName, NULL);
00607 
00608     n = boxaGetCount(boxas);
00609     if (n == 0 || maxoverlap == 1.0)
00610         return boxaCopy(boxas, L_COPY);
00611 
00612     boxad = boxaCreate(0);
00613     box2 = boxaGetBox(boxas, 0, L_COPY);
00614     boxaAddBox(boxad, box2, L_INSERT);
00615     for (j = 1; j < n; j++) {   /* prune on j */
00616         box2 = boxaGetBox(boxas, j, L_COPY);
00617         remove = FALSE;
00618         for (i = 0; i < j; i++) {   /* test on i */
00619             box1 = boxaGetBox(boxas, i, L_CLONE);
00620             boxOverlapFraction(box1, box2, &fract);
00621             boxDestroy(&box1);
00622             if (fract > maxoverlap) {
00623                 remove = TRUE;
00624                 break;
00625             }
00626         }
00627         if (remove == TRUE)
00628             boxDestroy(&box2);
00629         else
00630             boxaAddBox(boxad, box2, L_INSERT);
00631     }
00632 
00633     return boxad;
00634 }
00635 
00636 
00637 
00638 
00639 
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Defines