Leptonica 1.68
C Image Processing Library
|
00001 /*====================================================================* 00002 - Copyright (C) 2001 Leptonica. All rights reserved. 00003 - This software is distributed in the hope that it will be 00004 - useful, but with NO WARRANTY OF ANY KIND. 00005 - No author or distributor accepts responsibility to anyone for the 00006 - consequences of using this software, or for whether it serves any 00007 - particular purpose or works at all, unless he or she says so in 00008 - writing. Everyone is granted permission to copy, modify and 00009 - redistribute this source code, for commercial or non-commercial 00010 - purposes, with the following restrictions: (1) the origin of this 00011 - source code must not be misrepresented; (2) modified versions must 00012 - be plainly marked as such; and (3) this notice may not be removed 00013 - or altered from any source or modified source distribution. 00014 *====================================================================*/ 00015 00016 /* 00017 * pageseg.c 00018 * 00019 * Top level page segmentation 00020 * l_int32 pixGetRegionsBinary() 00021 * 00022 * Halftone region extraction 00023 * PIX *pixGenHalftoneMask() 00024 * 00025 * Textline extraction 00026 * PIX *pixGenTextlineMask() 00027 * 00028 * Textblock extraction 00029 * PIX *pixGenTextblockMask() 00030 */ 00031 00032 #include <stdio.h> 00033 #include <stdlib.h> 00034 #include "allheaders.h" 00035 00036 00037 /*------------------------------------------------------------------* 00038 * Top level page segmentation * 00039 *------------------------------------------------------------------*/ 00040 /*! 00041 * pixGetRegionsBinary() 00042 * 00043 * Input: pixs (1 bpp, assumed to be 300 to 400 ppi) 00044 * &pixhm (<optional return> halftone mask) 00045 * &pixtm (<optional return> textline mask) 00046 * &pixtb (<optional return> textblock mask) 00047 * debug (flag: set to 1 for debug output) 00048 * Return: 0 if OK, 1 on error 00049 * 00050 * Notes: 00051 * (1) It is best to deskew the image before segmenting. 00052 * (2) The debug flag enables a number of outputs. These 00053 * are included to show how to generate and save/display 00054 * these results. 00055 */ 00056 l_int32 00057 pixGetRegionsBinary(PIX *pixs, 00058 PIX **ppixhm, 00059 PIX **ppixtm, 00060 PIX **ppixtb, 00061 l_int32 debug) 00062 { 00063 char *tempname; 00064 l_int32 htfound, tlfound; 00065 PIX *pixr, *pixt1, *pixt2; 00066 PIX *pixtext; /* text pixels only */ 00067 PIX *pixhm2; /* halftone mask; 2x reduction */ 00068 PIX *pixhm; /* halftone mask; */ 00069 PIX *pixtm2; /* textline mask; 2x reduction */ 00070 PIX *pixtm; /* textline mask */ 00071 PIX *pixvws; /* vertical white space mask */ 00072 PIX *pixtb2; /* textblock mask; 2x reduction */ 00073 PIX *pixtbf2; /* textblock mask; 2x reduction; small comps filtered */ 00074 PIX *pixtb; /* textblock mask */ 00075 00076 PROCNAME("pixGetRegionsBinary"); 00077 00078 if (ppixhm) *ppixhm = NULL; 00079 if (ppixtm) *ppixtm = NULL; 00080 if (ppixtb) *ppixtb = NULL; 00081 if (!pixs) 00082 return ERROR_INT("pixs not defined", procName, 1); 00083 if (pixGetDepth(pixs) != 1) 00084 return ERROR_INT("pixs not 1 bpp", procName, 1); 00085 00086 /* 2x reduce, to 150 -200 ppi */ 00087 pixr = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0); 00088 pixDisplayWrite(pixr, debug); 00089 00090 /* Get the halftone mask */ 00091 pixhm2 = pixGenHalftoneMask(pixr, &pixtext, &htfound, debug); 00092 00093 /* Get the textline mask from the text pixels */ 00094 pixtm2 = pixGenTextlineMask(pixtext, &pixvws, &tlfound, debug); 00095 00096 /* Get the textblock mask from the textline mask */ 00097 pixtb2 = pixGenTextblockMask(pixtm2, pixvws, debug); 00098 pixDestroy(&pixr); 00099 pixDestroy(&pixtext); 00100 pixDestroy(&pixvws); 00101 00102 /* Remove small components from the mask, where a small 00103 * component is defined as one with both width and height < 60 */ 00104 pixtbf2 = pixSelectBySize(pixtb2, 60, 60, 4, L_SELECT_IF_EITHER, 00105 L_SELECT_IF_GTE, NULL); 00106 pixDestroy(&pixtb2); 00107 pixDisplayWriteFormat(pixtbf2, debug, IFF_PNG); 00108 00109 /* Expand all masks to full resolution, and do filling or 00110 * small dilations for better coverage. */ 00111 pixhm = pixExpandReplicate(pixhm2, 2); 00112 pixt1 = pixSeedfillBinary(NULL, pixhm, pixs, 8); 00113 pixOr(pixhm, pixhm, pixt1); 00114 pixDestroy(&pixt1); 00115 pixDisplayWriteFormat(pixhm, debug, IFF_PNG); 00116 00117 pixt1 = pixExpandReplicate(pixtm2, 2); 00118 pixtm = pixDilateBrick(NULL, pixt1, 3, 3); 00119 pixDestroy(&pixt1); 00120 pixDisplayWriteFormat(pixtm, debug, IFF_PNG); 00121 00122 pixt1 = pixExpandReplicate(pixtbf2, 2); 00123 pixtb = pixDilateBrick(NULL, pixt1, 3, 3); 00124 pixDestroy(&pixt1); 00125 pixDisplayWriteFormat(pixtb, debug, IFF_PNG); 00126 00127 pixDestroy(&pixhm2); 00128 pixDestroy(&pixtm2); 00129 pixDestroy(&pixtbf2); 00130 00131 /* Debug: identify objects that are neither text nor halftone image */ 00132 if (debug) { 00133 pixt1 = pixSubtract(NULL, pixs, pixtm); /* remove text pixels */ 00134 pixt2 = pixSubtract(NULL, pixt1, pixhm); /* remove halftone pixels */ 00135 pixDisplayWriteFormat(pixt2, 1, IFF_PNG); 00136 pixDestroy(&pixt1); 00137 pixDestroy(&pixt2); 00138 } 00139 00140 /* Debug: display textline components with random colors */ 00141 if (debug) { 00142 l_int32 w, h; 00143 BOXA *boxa; 00144 PIXA *pixa; 00145 boxa = pixConnComp(pixtm, &pixa, 8); 00146 pixGetDimensions(pixtm, &w, &h, NULL); 00147 pixt1 = pixaDisplayRandomCmap(pixa, w, h); 00148 pixcmapResetColor(pixGetColormap(pixt1), 0, 255, 255, 255); 00149 pixDisplay(pixt1, 100, 100); 00150 pixDisplayWriteFormat(pixt1, 1, IFF_PNG); 00151 pixaDestroy(&pixa); 00152 boxaDestroy(&boxa); 00153 pixDestroy(&pixt1); 00154 } 00155 00156 /* Debug: identify the outlines of each textblock */ 00157 if (debug) { 00158 PIXCMAP *cmap; 00159 PTAA *ptaa; 00160 ptaa = pixGetOuterBordersPtaa(pixtb); 00161 tempname = genTempFilename("/tmp", "tb_outlines.ptaa", 0, 0); 00162 ptaaWrite(tempname, ptaa, 1); 00163 FREE(tempname); 00164 pixt1 = pixRenderRandomCmapPtaa(pixtb, ptaa, 1, 16, 1); 00165 cmap = pixGetColormap(pixt1); 00166 pixcmapResetColor(cmap, 0, 130, 130, 130); 00167 pixDisplay(pixt1, 500, 100); 00168 pixDisplayWriteFormat(pixt1, 1, IFF_PNG); 00169 pixDestroy(&pixt1); 00170 ptaaDestroy(&ptaa); 00171 } 00172 00173 /* Debug: get b.b. for all mask components */ 00174 if (debug) { 00175 BOXA *bahm, *batm, *batb; 00176 bahm = pixConnComp(pixhm, NULL, 4); 00177 batm = pixConnComp(pixtm, NULL, 4); 00178 batb = pixConnComp(pixtb, NULL, 4); 00179 tempname = genTempFilename("/tmp", "htmask.boxa", 0, 0); 00180 boxaWrite(tempname, bahm); 00181 FREE(tempname); 00182 tempname = genTempFilename("/tmp", "textmask.boxa", 0, 0); 00183 boxaWrite(tempname, batm); 00184 FREE(tempname); 00185 tempname = genTempFilename("/tmp", "textblock.boxa", 0, 0); 00186 boxaWrite(tempname, batb); 00187 FREE(tempname); 00188 boxaDestroy(&bahm); 00189 boxaDestroy(&batm); 00190 boxaDestroy(&batb); 00191 } 00192 00193 if (ppixhm) 00194 *ppixhm = pixhm; 00195 else 00196 pixDestroy(&pixhm); 00197 if (ppixtm) 00198 *ppixtm = pixtm; 00199 else 00200 pixDestroy(&pixtm); 00201 if (ppixtb) 00202 *ppixtb = pixtb; 00203 else 00204 pixDestroy(&pixtb); 00205 00206 return 0; 00207 } 00208 00209 00210 /*------------------------------------------------------------------* 00211 * Halftone region extraction * 00212 *------------------------------------------------------------------*/ 00213 /*! 00214 * pixGenHalftoneMask() 00215 * 00216 * Input: pixs (1 bpp, assumed to be 150 to 200 ppi) 00217 * &pixtext (<optional return> text part of pixs) 00218 * &htfound (<optional return> 1 if the mask is not empty) 00219 * debug (flag: 1 for debug output) 00220 * Return: pixd (halftone mask), or null on error 00221 */ 00222 PIX * 00223 pixGenHalftoneMask(PIX *pixs, 00224 PIX **ppixtext, 00225 l_int32 *phtfound, 00226 l_int32 debug) 00227 { 00228 l_int32 empty; 00229 PIX *pixt1, *pixt2, *pixhs, *pixhm, *pixd; 00230 00231 PROCNAME("pixGenHalftoneMask"); 00232 00233 if (ppixtext) *ppixtext = NULL; 00234 if (!pixs) 00235 return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); 00236 if (pixGetDepth(pixs) != 1) 00237 return (PIX *)ERROR_PTR("pixs not 1 bpp", procName, NULL); 00238 00239 /* Compute seed for halftone parts at 8x reduction */ 00240 pixt1 = pixReduceRankBinaryCascade(pixs, 4, 4, 3, 0); 00241 pixt2 = pixOpenBrick(NULL, pixt1, 5, 5); 00242 pixhs = pixExpandReplicate(pixt2, 8); /* back to 2x reduction */ 00243 pixDestroy(&pixt1); 00244 pixDestroy(&pixt2); 00245 pixDisplayWriteFormat(pixhs, debug, IFF_PNG); 00246 00247 /* Compute mask for connected regions */ 00248 pixhm = pixCloseSafeBrick(NULL, pixs, 4, 4); 00249 pixDisplayWriteFormat(pixhm, debug, IFF_PNG); 00250 00251 /* Fill seed into mask to get halftone mask */ 00252 pixd = pixSeedfillBinary(NULL, pixhs, pixhm, 4); 00253 00254 #if 0 00255 /* Moderate opening to remove thin lines, etc. */ 00256 pixOpenBrick(pixd, pixd, 10, 10); 00257 pixDisplayWrite(pixd, debug); 00258 #endif 00259 00260 /* Check if mask is empty */ 00261 pixZero(pixd, &empty); 00262 if (phtfound) { 00263 *phtfound = 0; 00264 if (!empty) 00265 *phtfound = 1; 00266 } 00267 00268 /* Optionally, get all pixels that are not under the halftone mask */ 00269 if (ppixtext) { 00270 if (empty) 00271 *ppixtext = pixCopy(NULL, pixs); 00272 else 00273 *ppixtext = pixSubtract(NULL, pixs, pixd); 00274 pixDisplayWriteFormat(*ppixtext, debug, IFF_PNG); 00275 } 00276 00277 pixDestroy(&pixhs); 00278 pixDestroy(&pixhm); 00279 return pixd; 00280 } 00281 00282 00283 /*------------------------------------------------------------------* 00284 * Textline extraction * 00285 *------------------------------------------------------------------*/ 00286 /*! 00287 * pixGenTextlineMask() 00288 * 00289 * Input: pixs (1 bpp, assumed to be 150 to 200 ppi) 00290 * &pixvws (<return> vertical whitespace mask) 00291 * &tlfound (<optional return> 1 if the mask is not empty) 00292 * debug (flag: 1 for debug output) 00293 * Return: pixd (textline mask), or null on error 00294 * 00295 * Notes: 00296 * (1) The input pixs should be deskewed. 00297 * (2) pixs should have no halftone pixels. 00298 * (3) Both the input image and the returned textline mask 00299 * are at the same resolution. 00300 */ 00301 PIX * 00302 pixGenTextlineMask(PIX *pixs, 00303 PIX **ppixvws, 00304 l_int32 *ptlfound, 00305 l_int32 debug) 00306 { 00307 l_int32 empty; 00308 PIX *pixt1, *pixt2, *pixvws, *pixd; 00309 00310 PROCNAME("pixGenTextlineMask"); 00311 00312 if (!pixs) 00313 return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); 00314 if (!ppixvws) 00315 return (PIX *)ERROR_PTR("&pixvws not defined", procName, NULL); 00316 if (pixGetDepth(pixs) != 1) 00317 return (PIX *)ERROR_PTR("pixs not 1 bpp", procName, NULL); 00318 00319 /* First we need a vertical whitespace mask. Invert the image. */ 00320 pixt1 = pixInvert(NULL, pixs); 00321 00322 /* The whitespace mask will break textlines where there 00323 * is a large amount of white space below or above. 00324 * This can be prevented by identifying regions of the 00325 * inverted image that have large horizontal extent (bigger than 00326 * the separation between columns) and significant 00327 * vertical extent (bigger than the separation between 00328 * textlines), and subtracting this from the bg. */ 00329 pixt2 = pixMorphCompSequence(pixt1, "o80.60", 0); 00330 pixSubtract(pixt1, pixt1, pixt2); 00331 pixDisplayWriteFormat(pixt1, debug, IFF_PNG); 00332 pixDestroy(&pixt2); 00333 00334 /* Identify vertical whitespace by opening the remaining bg. 00335 * o5.1 removes thin vertical bg lines and o1.200 extracts 00336 * long vertical bg lines. */ 00337 pixvws = pixMorphCompSequence(pixt1, "o5.1 + o1.200", 0); 00338 *ppixvws = pixvws; 00339 pixDisplayWriteFormat(pixvws, debug, IFF_PNG); 00340 pixDestroy(&pixt1); 00341 00342 /* Three steps to getting text line mask: 00343 * (1) close the characters and words in the textlines 00344 * (2) open the vertical whitespace corridors back up 00345 * (3) small opening to remove noise */ 00346 pixt1 = pixCloseSafeBrick(NULL, pixs, 30, 1); 00347 pixDisplayWrite(pixt1, debug); 00348 pixd = pixSubtract(NULL, pixt1, pixvws); 00349 pixOpenBrick(pixd, pixd, 3, 3); 00350 pixDisplayWriteFormat(pixd, debug, IFF_PNG); 00351 pixDestroy(&pixt1); 00352 00353 /* Check if text line mask is empty */ 00354 if (ptlfound) { 00355 *ptlfound = 0; 00356 pixZero(pixd, &empty); 00357 if (!empty) 00358 *ptlfound = 1; 00359 } 00360 00361 return pixd; 00362 } 00363 00364 00365 /*------------------------------------------------------------------* 00366 * Textblock extraction * 00367 *------------------------------------------------------------------*/ 00368 /*! 00369 * pixGenTextblockMask() 00370 * 00371 * Input: pixs (1 bpp, textline mask, assumed to be 150 to 200 ppi) 00372 * pixvws (vertical white space mask) 00373 * debug (flag: 1 for debug output) 00374 * Return: pixd (textblock mask), or null on error 00375 * 00376 * Notes: 00377 * (1) Both the input masks (textline and vertical white space) and 00378 * the returned textblock mask are at the same resolution. 00379 * (2) The result is somewhat noisy, in that small "blocks" of 00380 * text may be included. These can be removed by post-processing, 00381 * using, e.g., 00382 * pixSelectBySize(pix, 60, 60, 4, L_SELECT_IF_EITHER, 00383 * L_SELECT_IF_GTE, NULL); 00384 */ 00385 PIX * 00386 pixGenTextblockMask(PIX *pixs, 00387 PIX *pixvws, 00388 l_int32 debug) 00389 { 00390 PIX *pixt1, *pixt2, *pixt3, *pixd; 00391 00392 PROCNAME("pixGenTextblockMask"); 00393 00394 if (!pixs) 00395 return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); 00396 if (!pixvws) 00397 return (PIX *)ERROR_PTR("pixvws not defined", procName, NULL); 00398 if (pixGetDepth(pixs) != 1) 00399 return (PIX *)ERROR_PTR("pixs not 1 bpp", procName, NULL); 00400 00401 /* Join pixels vertically to make a textblock mask */ 00402 pixt1 = pixMorphSequence(pixs, "c1.10 + o4.1", 0); 00403 pixDisplayWriteFormat(pixt1, debug, IFF_PNG); 00404 00405 /* Solidify the textblock mask and remove noise: 00406 * (1) For each cc, close the blocks and dilate slightly 00407 * to form a solid mask. 00408 * (2) Small horizontal closing between components. 00409 * (3) Open the white space between columns, again. 00410 * (4) Remove small components. */ 00411 pixt2 = pixMorphSequenceByComponent(pixt1, "c30.30 + d3.3", 8, 0, 0, NULL); 00412 pixCloseSafeBrick(pixt2, pixt2, 10, 1); 00413 pixDisplayWriteFormat(pixt2, debug, IFF_PNG); 00414 pixt3 = pixSubtract(NULL, pixt2, pixvws); 00415 pixDisplayWriteFormat(pixt3, debug, IFF_PNG); 00416 pixd = pixSelectBySize(pixt3, 25, 5, 8, L_SELECT_IF_BOTH, 00417 L_SELECT_IF_GTE, NULL); 00418 pixDisplayWriteFormat(pixd, debug, IFF_PNG); 00419 00420 pixDestroy(&pixt1); 00421 pixDestroy(&pixt2); 00422 pixDestroy(&pixt3); 00423 return pixd; 00424 } 00425 00426