Leptonica 1.68
C Image Processing Library

pageseg.c

Go to the documentation of this file.
00001 /*====================================================================*
00002  -  Copyright (C) 2001 Leptonica.  All rights reserved.
00003  -  This software is distributed in the hope that it will be
00004  -  useful, but with NO WARRANTY OF ANY KIND.
00005  -  No author or distributor accepts responsibility to anyone for the
00006  -  consequences of using this software, or for whether it serves any
00007  -  particular purpose or works at all, unless he or she says so in
00008  -  writing.  Everyone is granted permission to copy, modify and
00009  -  redistribute this source code, for commercial or non-commercial
00010  -  purposes, with the following restrictions: (1) the origin of this
00011  -  source code must not be misrepresented; (2) modified versions must
00012  -  be plainly marked as such; and (3) this notice may not be removed
00013  -  or altered from any source or modified source distribution.
00014  *====================================================================*/
00015 
00016 /*
00017  *   pageseg.c
00018  *
00019  *      Top level page segmentation
00020  *          l_int32   pixGetRegionsBinary()
00021  *
00022  *      Halftone region extraction
00023  *          PIX      *pixGenHalftoneMask()
00024  *
00025  *      Textline extraction
00026  *          PIX      *pixGenTextlineMask()
00027  *
00028  *      Textblock extraction
00029  *          PIX      *pixGenTextblockMask()
00030  */
00031 
00032 #include <stdio.h>
00033 #include <stdlib.h>
00034 #include "allheaders.h"
00035 
00036 
00037 /*------------------------------------------------------------------*
00038  *                     Top level page segmentation                  *
00039  *------------------------------------------------------------------*/
00040 /*!
00041  *  pixGetRegionsBinary()
00042  *
00043  *      Input:  pixs (1 bpp, assumed to be 300 to 400 ppi)
00044  *              &pixhm (<optional return> halftone mask)
00045  *              &pixtm (<optional return> textline mask)
00046  *              &pixtb (<optional return> textblock mask)
00047  *              debug (flag: set to 1 for debug output)
00048  *      Return: 0 if OK, 1 on error
00049  *
00050  *  Notes:
00051  *      (1) It is best to deskew the image before segmenting.
00052  *      (2) The debug flag enables a number of outputs.  These
00053  *          are included to show how to generate and save/display
00054  *          these results.
00055  */
00056 l_int32
00057 pixGetRegionsBinary(PIX     *pixs,
00058                     PIX    **ppixhm,
00059                     PIX    **ppixtm,
00060                     PIX    **ppixtb,
00061                     l_int32  debug)
00062 {
00063 char    *tempname;
00064 l_int32  htfound, tlfound;
00065 PIX     *pixr, *pixt1, *pixt2;
00066 PIX     *pixtext;  /* text pixels only */
00067 PIX     *pixhm2;   /* halftone mask; 2x reduction */
00068 PIX     *pixhm;    /* halftone mask;  */
00069 PIX     *pixtm2;   /* textline mask; 2x reduction */
00070 PIX     *pixtm;    /* textline mask */
00071 PIX     *pixvws;   /* vertical white space mask */
00072 PIX     *pixtb2;   /* textblock mask; 2x reduction */
00073 PIX     *pixtbf2;  /* textblock mask; 2x reduction; small comps filtered */
00074 PIX     *pixtb;    /* textblock mask */
00075 
00076     PROCNAME("pixGetRegionsBinary");
00077 
00078     if (ppixhm) *ppixhm = NULL;
00079     if (ppixtm) *ppixtm = NULL;
00080     if (ppixtb) *ppixtb = NULL;
00081     if (!pixs)
00082         return ERROR_INT("pixs not defined", procName, 1);
00083     if (pixGetDepth(pixs) != 1)
00084         return ERROR_INT("pixs not 1 bpp", procName, 1);
00085 
00086         /* 2x reduce, to 150 -200 ppi */
00087     pixr = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0);
00088     pixDisplayWrite(pixr, debug);
00089 
00090         /* Get the halftone mask */
00091     pixhm2 = pixGenHalftoneMask(pixr, &pixtext, &htfound, debug);
00092 
00093         /* Get the textline mask from the text pixels */
00094     pixtm2 = pixGenTextlineMask(pixtext, &pixvws, &tlfound, debug);
00095 
00096         /* Get the textblock mask from the textline mask */
00097     pixtb2 = pixGenTextblockMask(pixtm2, pixvws, debug);
00098     pixDestroy(&pixr);
00099     pixDestroy(&pixtext);
00100     pixDestroy(&pixvws);
00101 
00102         /* Remove small components from the mask, where a small
00103          * component is defined as one with both width and height < 60 */
00104     pixtbf2 = pixSelectBySize(pixtb2, 60, 60, 4, L_SELECT_IF_EITHER,
00105                               L_SELECT_IF_GTE, NULL);
00106     pixDestroy(&pixtb2);
00107     pixDisplayWriteFormat(pixtbf2, debug, IFF_PNG);
00108 
00109         /* Expand all masks to full resolution, and do filling or
00110          * small dilations for better coverage. */
00111     pixhm = pixExpandReplicate(pixhm2, 2);
00112     pixt1 = pixSeedfillBinary(NULL, pixhm, pixs, 8);
00113     pixOr(pixhm, pixhm, pixt1);
00114     pixDestroy(&pixt1);
00115     pixDisplayWriteFormat(pixhm, debug, IFF_PNG);
00116 
00117     pixt1 = pixExpandReplicate(pixtm2, 2);
00118     pixtm = pixDilateBrick(NULL, pixt1, 3, 3);
00119     pixDestroy(&pixt1);
00120     pixDisplayWriteFormat(pixtm, debug, IFF_PNG);
00121 
00122     pixt1 = pixExpandReplicate(pixtbf2, 2);
00123     pixtb = pixDilateBrick(NULL, pixt1, 3, 3);
00124     pixDestroy(&pixt1);
00125     pixDisplayWriteFormat(pixtb, debug, IFF_PNG);
00126 
00127     pixDestroy(&pixhm2);
00128     pixDestroy(&pixtm2);
00129     pixDestroy(&pixtbf2);
00130 
00131         /* Debug: identify objects that are neither text nor halftone image */
00132     if (debug) {
00133         pixt1 = pixSubtract(NULL, pixs, pixtm);  /* remove text pixels */
00134         pixt2 = pixSubtract(NULL, pixt1, pixhm);  /* remove halftone pixels */
00135         pixDisplayWriteFormat(pixt2, 1, IFF_PNG);
00136         pixDestroy(&pixt1);
00137         pixDestroy(&pixt2);
00138     }
00139 
00140         /* Debug: display textline components with random colors */
00141     if (debug) {
00142         l_int32  w, h;
00143         BOXA    *boxa;
00144         PIXA    *pixa;
00145         boxa = pixConnComp(pixtm, &pixa, 8);
00146         pixGetDimensions(pixtm, &w, &h, NULL);
00147         pixt1 = pixaDisplayRandomCmap(pixa, w, h);
00148         pixcmapResetColor(pixGetColormap(pixt1), 0, 255, 255, 255);
00149         pixDisplay(pixt1, 100, 100);
00150         pixDisplayWriteFormat(pixt1, 1, IFF_PNG);
00151         pixaDestroy(&pixa);
00152         boxaDestroy(&boxa);
00153         pixDestroy(&pixt1);
00154     }
00155 
00156         /* Debug: identify the outlines of each textblock */
00157     if (debug) {
00158         PIXCMAP  *cmap;
00159         PTAA     *ptaa;
00160         ptaa = pixGetOuterBordersPtaa(pixtb);
00161         tempname = genTempFilename("/tmp", "tb_outlines.ptaa", 0, 0);
00162         ptaaWrite(tempname, ptaa, 1);
00163         FREE(tempname);
00164         pixt1 = pixRenderRandomCmapPtaa(pixtb, ptaa, 1, 16, 1);
00165         cmap = pixGetColormap(pixt1);
00166         pixcmapResetColor(cmap, 0, 130, 130, 130);
00167         pixDisplay(pixt1, 500, 100);
00168         pixDisplayWriteFormat(pixt1, 1, IFF_PNG);
00169         pixDestroy(&pixt1);
00170         ptaaDestroy(&ptaa);
00171     }
00172 
00173         /* Debug: get b.b. for all mask components */
00174     if (debug) {
00175         BOXA  *bahm, *batm, *batb;
00176         bahm = pixConnComp(pixhm, NULL, 4);
00177         batm = pixConnComp(pixtm, NULL, 4);
00178         batb = pixConnComp(pixtb, NULL, 4);
00179         tempname = genTempFilename("/tmp", "htmask.boxa", 0, 0);
00180         boxaWrite(tempname, bahm);
00181         FREE(tempname);
00182         tempname = genTempFilename("/tmp", "textmask.boxa", 0, 0);
00183         boxaWrite(tempname, batm);
00184         FREE(tempname);
00185         tempname = genTempFilename("/tmp", "textblock.boxa", 0, 0);
00186         boxaWrite(tempname, batb);
00187         FREE(tempname);
00188         boxaDestroy(&bahm);
00189         boxaDestroy(&batm);
00190         boxaDestroy(&batb);
00191     }
00192 
00193     if (ppixhm)
00194         *ppixhm = pixhm;
00195     else
00196         pixDestroy(&pixhm);
00197     if (ppixtm)
00198         *ppixtm = pixtm;
00199     else
00200         pixDestroy(&pixtm);
00201     if (ppixtb)
00202         *ppixtb = pixtb;
00203     else
00204         pixDestroy(&pixtb);
00205 
00206     return 0;
00207 }
00208 
00209 
00210 /*------------------------------------------------------------------*
00211  *                    Halftone region extraction                    *
00212  *------------------------------------------------------------------*/
00213 /*!
00214  *  pixGenHalftoneMask()
00215  *
00216  *      Input:  pixs (1 bpp, assumed to be 150 to 200 ppi)
00217  *              &pixtext (<optional return> text part of pixs)
00218  *              &htfound (<optional return> 1 if the mask is not empty)
00219  *              debug (flag: 1 for debug output)
00220  *      Return: pixd (halftone mask), or null on error
00221  */
00222 PIX *
00223 pixGenHalftoneMask(PIX      *pixs,
00224                    PIX     **ppixtext,
00225                    l_int32  *phtfound,
00226                    l_int32   debug)
00227 {
00228 l_int32  empty;
00229 PIX     *pixt1, *pixt2, *pixhs, *pixhm, *pixd;
00230 
00231     PROCNAME("pixGenHalftoneMask");
00232 
00233     if (ppixtext) *ppixtext = NULL;
00234     if (!pixs)
00235         return (PIX *)ERROR_PTR("pixs not defined", procName, NULL);
00236     if (pixGetDepth(pixs) != 1)
00237         return (PIX *)ERROR_PTR("pixs not 1 bpp", procName, NULL);
00238 
00239         /* Compute seed for halftone parts at 8x reduction */
00240     pixt1 = pixReduceRankBinaryCascade(pixs, 4, 4, 3, 0);
00241     pixt2 = pixOpenBrick(NULL, pixt1, 5, 5);
00242     pixhs = pixExpandReplicate(pixt2, 8);  /* back to 2x reduction */
00243     pixDestroy(&pixt1);
00244     pixDestroy(&pixt2);
00245     pixDisplayWriteFormat(pixhs, debug, IFF_PNG);
00246 
00247         /* Compute mask for connected regions */
00248     pixhm = pixCloseSafeBrick(NULL, pixs, 4, 4);
00249     pixDisplayWriteFormat(pixhm, debug, IFF_PNG);
00250 
00251         /* Fill seed into mask to get halftone mask */
00252     pixd = pixSeedfillBinary(NULL, pixhs, pixhm, 4);
00253 
00254 #if 0
00255         /* Moderate opening to remove thin lines, etc. */
00256     pixOpenBrick(pixd, pixd, 10, 10);
00257     pixDisplayWrite(pixd, debug);
00258 #endif
00259 
00260         /* Check if mask is empty */
00261     pixZero(pixd, &empty);
00262     if (phtfound) {
00263         *phtfound = 0;
00264         if (!empty)
00265             *phtfound = 1;
00266     }
00267 
00268         /* Optionally, get all pixels that are not under the halftone mask */
00269     if (ppixtext) {
00270         if (empty)
00271             *ppixtext = pixCopy(NULL, pixs);
00272         else
00273             *ppixtext = pixSubtract(NULL, pixs, pixd);
00274         pixDisplayWriteFormat(*ppixtext, debug, IFF_PNG);
00275     }
00276 
00277     pixDestroy(&pixhs);
00278     pixDestroy(&pixhm);
00279     return pixd;
00280 }
00281 
00282 
00283 /*------------------------------------------------------------------*
00284  *                         Textline extraction                      *
00285  *------------------------------------------------------------------*/
00286 /*!
00287  *  pixGenTextlineMask()
00288  *
00289  *      Input:  pixs (1 bpp, assumed to be 150 to 200 ppi)
00290  *              &pixvws (<return> vertical whitespace mask)
00291  *              &tlfound (<optional return> 1 if the mask is not empty)
00292  *              debug (flag: 1 for debug output)
00293  *      Return: pixd (textline mask), or null on error
00294  *
00295  *  Notes:
00296  *      (1) The input pixs should be deskewed.
00297  *      (2) pixs should have no halftone pixels.
00298  *      (3) Both the input image and the returned textline mask
00299  *          are at the same resolution.
00300  */
00301 PIX *
00302 pixGenTextlineMask(PIX      *pixs,
00303                    PIX     **ppixvws,
00304                    l_int32  *ptlfound,
00305                    l_int32   debug)
00306 {
00307 l_int32  empty;
00308 PIX     *pixt1, *pixt2, *pixvws, *pixd;
00309 
00310     PROCNAME("pixGenTextlineMask");
00311 
00312     if (!pixs)
00313         return (PIX *)ERROR_PTR("pixs not defined", procName, NULL);
00314     if (!ppixvws)
00315         return (PIX *)ERROR_PTR("&pixvws not defined", procName, NULL);
00316     if (pixGetDepth(pixs) != 1)
00317         return (PIX *)ERROR_PTR("pixs not 1 bpp", procName, NULL);
00318 
00319         /* First we need a vertical whitespace mask.  Invert the image. */
00320     pixt1 = pixInvert(NULL, pixs);
00321 
00322         /* The whitespace mask will break textlines where there
00323          * is a large amount of white space below or above.
00324          * This can be prevented by identifying regions of the
00325          * inverted image that have large horizontal extent (bigger than
00326          * the separation between columns) and significant
00327          * vertical extent (bigger than the separation between
00328          * textlines), and subtracting this from the bg. */
00329     pixt2 = pixMorphCompSequence(pixt1, "o80.60", 0);
00330     pixSubtract(pixt1, pixt1, pixt2);
00331     pixDisplayWriteFormat(pixt1, debug, IFF_PNG);
00332     pixDestroy(&pixt2);
00333 
00334         /* Identify vertical whitespace by opening the remaining bg.
00335          * o5.1 removes thin vertical bg lines and o1.200 extracts
00336          * long vertical bg lines. */
00337     pixvws = pixMorphCompSequence(pixt1, "o5.1 + o1.200", 0);
00338     *ppixvws = pixvws;
00339     pixDisplayWriteFormat(pixvws, debug, IFF_PNG);
00340     pixDestroy(&pixt1);
00341 
00342         /* Three steps to getting text line mask:
00343          *   (1) close the characters and words in the textlines
00344          *   (2) open the vertical whitespace corridors back up
00345          *   (3) small opening to remove noise    */
00346     pixt1 = pixCloseSafeBrick(NULL, pixs, 30, 1);
00347     pixDisplayWrite(pixt1, debug);
00348     pixd = pixSubtract(NULL, pixt1, pixvws);
00349     pixOpenBrick(pixd, pixd, 3, 3);
00350     pixDisplayWriteFormat(pixd, debug, IFF_PNG);
00351     pixDestroy(&pixt1);
00352 
00353         /* Check if text line mask is empty */
00354     if (ptlfound) {
00355         *ptlfound = 0;
00356         pixZero(pixd, &empty);
00357         if (!empty)
00358             *ptlfound = 1;
00359     }
00360 
00361     return pixd;
00362 }
00363 
00364 
00365 /*------------------------------------------------------------------*
00366  *                       Textblock extraction                       *
00367  *------------------------------------------------------------------*/
00368 /*!
00369  *  pixGenTextblockMask()
00370  *
00371  *      Input:  pixs (1 bpp, textline mask, assumed to be 150 to 200 ppi)
00372  *              pixvws (vertical white space mask) 
00373  *              debug (flag: 1 for debug output)
00374  *      Return: pixd (textblock mask), or null on error
00375  *
00376  *  Notes:
00377  *      (1) Both the input masks (textline and vertical white space) and
00378  *          the returned textblock mask are at the same resolution.
00379  *      (2) The result is somewhat noisy, in that small "blocks" of
00380  *          text may be included.  These can be removed by post-processing,
00381  *          using, e.g.,
00382  *             pixSelectBySize(pix, 60, 60, 4, L_SELECT_IF_EITHER,
00383  *                             L_SELECT_IF_GTE, NULL);
00384  */
00385 PIX *
00386 pixGenTextblockMask(PIX     *pixs,
00387                     PIX     *pixvws,
00388                     l_int32  debug)
00389 {
00390 PIX  *pixt1, *pixt2, *pixt3, *pixd;
00391 
00392     PROCNAME("pixGenTextblockMask");
00393 
00394     if (!pixs)
00395         return (PIX *)ERROR_PTR("pixs not defined", procName, NULL);
00396     if (!pixvws)
00397         return (PIX *)ERROR_PTR("pixvws not defined", procName, NULL);
00398     if (pixGetDepth(pixs) != 1)
00399         return (PIX *)ERROR_PTR("pixs not 1 bpp", procName, NULL);
00400 
00401         /* Join pixels vertically to make a textblock mask */
00402     pixt1 = pixMorphSequence(pixs, "c1.10 + o4.1", 0);
00403     pixDisplayWriteFormat(pixt1, debug, IFF_PNG);
00404 
00405         /* Solidify the textblock mask and remove noise: 
00406          *   (1) For each cc, close the blocks and dilate slightly
00407          *       to form a solid mask.
00408          *   (2) Small horizontal closing between components.
00409          *   (3) Open the white space between columns, again.
00410          *   (4) Remove small components. */
00411     pixt2 = pixMorphSequenceByComponent(pixt1, "c30.30 + d3.3", 8, 0, 0, NULL);
00412     pixCloseSafeBrick(pixt2, pixt2, 10, 1);
00413     pixDisplayWriteFormat(pixt2, debug, IFF_PNG);
00414     pixt3 = pixSubtract(NULL, pixt2, pixvws);
00415     pixDisplayWriteFormat(pixt3, debug, IFF_PNG);
00416     pixd = pixSelectBySize(pixt3, 25, 5, 8, L_SELECT_IF_BOTH,
00417                             L_SELECT_IF_GTE, NULL);
00418     pixDisplayWriteFormat(pixd, debug, IFF_PNG);
00419 
00420     pixDestroy(&pixt1);
00421     pixDestroy(&pixt2);
00422     pixDestroy(&pixt3);
00423     return pixd;
00424 }
00425 
00426 
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Defines