Leptonica 1.68
C Image Processing Library
|
00001 /*====================================================================* 00002 - Copyright (C) 2001 Leptonica. All rights reserved. 00003 - This software is distributed in the hope that it will be 00004 - useful, but with NO WARRANTY OF ANY KIND. 00005 - No author or distributor accepts responsibility to anyone for the 00006 - consequences of using this software, or for whether it serves any 00007 - particular purpose or works at all, unless he or she says so in 00008 - writing. Everyone is granted permission to copy, modify and 00009 - redistribute this source code, for commercial or non-commercial 00010 - purposes, with the following restrictions: (1) the origin of this 00011 - source code must not be misrepresented; (2) modified versions must 00012 - be plainly marked as such; and (3) this notice may not be removed 00013 - or altered from any source or modified source distribution. 00014 *====================================================================*/ 00015 00016 /* 00017 * livre_pageseg.c 00018 * 00019 * This gives examples of the use of binary morphology for 00020 * some simple and fast document segmentation operations. 00021 * 00022 * The operations are carried out at 2x reduction. 00023 * For images scanned at 300 ppi, this is typically 00024 * high enough resolution for accurate results. 00025 * 00026 * This generates several of the figures used in Chapter 18 of 00027 * "Mathematical morphology: from theory to applications", 00028 * edited by Laurent Najman and Hugues Talbot. Published by 00029 * Hermes Scientific Publishing, Ltd, 2010. 00030 */ 00031 00032 #include <stdio.h> 00033 #include <stdlib.h> 00034 #include "allheaders.h" 00035 00036 /* Control the display output */ 00037 #define DFLAG 0 00038 00039 00040 l_int32 DoPageSegmentation(PIX *pixs, l_int32 which); 00041 00042 main(int argc, 00043 char **argv) 00044 { 00045 char *filein; 00046 l_int32 i; 00047 PIX *pixs; /* input image sould be at least 300 ppi */ 00048 static char mainName[] = "livre_pageseg"; 00049 00050 if (argc != 2) 00051 exit(ERROR_INT(" Syntax: livre_pageseg filein", mainName, 1)); 00052 00053 filein = argv[1]; 00054 if ((pixs = pixRead(filein)) == NULL) 00055 exit(ERROR_INT("pix not made", mainName, 1)); 00056 00057 for (i = 1; i <= 4; i++) 00058 DoPageSegmentation(pixs, i); 00059 pixDestroy(&pixs); 00060 return 0; 00061 } 00062 00063 00064 l_int32 00065 DoPageSegmentation(PIX *pixs, /* should be at least 300 ppi */ 00066 l_int32 which) /* 1, 2, 3, 4 */ 00067 { 00068 char buf[256]; 00069 l_int32 index, zero; 00070 BOXA *boxatm, *boxahm; 00071 PIX *pixr; /* image reduced to 150 ppi */ 00072 PIX *pixhs; /* image of halftone seed, 150 ppi */ 00073 PIX *pixm; /* image of mask of components, 150 ppi */ 00074 PIX *pixhm1; /* image of halftone mask, 150 ppi */ 00075 PIX *pixhm2; /* image of halftone mask, 300 ppi */ 00076 PIX *pixht; /* image of halftone components, 150 ppi */ 00077 PIX *pixnht; /* image without halftone components, 150 ppi */ 00078 PIX *pixi; /* inverted image, 150 ppi */ 00079 PIX *pixvws; /* image of vertical whitespace, 150 ppi */ 00080 PIX *pixtm1; /* image of closed textlines, 150 ppi */ 00081 PIX *pixtm2; /* image of refined text line mask, 150 ppi */ 00082 PIX *pixtm3; /* image of refined text line mask, 300 ppi */ 00083 PIX *pixtb1; /* image of text block mask, 150 ppi */ 00084 PIX *pixtb2; /* image of text block mask, 300 ppi */ 00085 PIX *pixnon; /* image of non-text or halftone, 150 ppi */ 00086 PIX *pixt1, *pixt2, *pixt3, *pixt4, *pixt5; 00087 PIXA *pixa; 00088 PIXCMAP *cmap; 00089 PTAA *ptaa; 00090 l_int32 ht_flag = 0; 00091 l_int32 ws_flag = 0; 00092 l_int32 text_flag = 0; 00093 l_int32 block_flag = 0; 00094 00095 PROCNAME("DoPageSegmentation"); 00096 00097 if (which == 1) 00098 ht_flag = 1; 00099 else if (which == 2) 00100 ws_flag = 1; 00101 else if (which == 3) 00102 text_flag = 1; 00103 else if (which == 4) 00104 block_flag = 1; 00105 else 00106 return ERROR_INT("invalid parameter: not in [1...4]", procName, 1); 00107 pixDisplayWrite(NULL, -1); 00108 00109 /* Reduce to 150 ppi */ 00110 pixt1 = pixScaleToGray2(pixs); 00111 pixDisplayWriteFormat(pixt1, L_MAX(ws_flag, L_MAX(ht_flag, block_flag)), 00112 IFF_PNG); 00113 if (which == 1) pixWrite("/tmp/orig.gray.150.png", pixt1, IFF_PNG); 00114 pixDestroy(&pixt1); 00115 pixr = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0); 00116 00117 /* Get seed for halftone parts */ 00118 pixt1 = pixReduceRankBinaryCascade(pixr, 4, 4, 3, 0); 00119 pixt2 = pixOpenBrick(NULL, pixt1, 5, 5); 00120 pixhs = pixExpandBinaryPower2(pixt2, 8); 00121 pixDisplayWriteFormat(pixhs, ht_flag, IFF_PNG); 00122 if (which == 1) pixWrite("/tmp/htseed.150.png", pixhs, IFF_PNG); 00123 pixDestroy(&pixt1); 00124 pixDestroy(&pixt2); 00125 00126 /* Get mask for connected regions */ 00127 pixm = pixCloseSafeBrick(NULL, pixr, 4, 4); 00128 pixDisplayWriteFormat(pixm, ht_flag, IFF_PNG); 00129 if (which == 1) pixWrite("/tmp/ccmask.150.png", pixm, IFF_PNG); 00130 00131 /* Fill seed into mask to get halftone mask */ 00132 pixhm1 = pixSeedfillBinary(NULL, pixhs, pixm, 4); 00133 pixDisplayWriteFormat(pixhm1, ht_flag, IFF_PNG); 00134 if (which == 1) pixWrite("/tmp/htmask.150.png", pixhm1, IFF_PNG); 00135 pixhm2 = pixExpandBinaryPower2(pixhm1, 2); 00136 00137 /* Extract halftone stuff */ 00138 pixht = pixAnd(NULL, pixhm1, pixr); 00139 if (which == 1) pixWrite("/tmp/ht.150.png", pixht, IFF_PNG); 00140 00141 /* Extract non-halftone stuff */ 00142 pixnht = pixXor(NULL, pixht, pixr); 00143 pixDisplayWriteFormat(pixnht, text_flag, IFF_PNG); 00144 if (which == 1) pixWrite("/tmp/text.150.png", pixnht, IFF_PNG); 00145 pixZero(pixht, &zero); 00146 if (zero) 00147 fprintf(stderr, "No halftone parts found\n"); 00148 else 00149 fprintf(stderr, "Halftone parts found\n"); 00150 00151 /* Get bit-inverted image */ 00152 pixi = pixInvert(NULL, pixnht); 00153 if (which == 1) pixWrite("/tmp/invert.150.png", pixi, IFF_PNG); 00154 pixDisplayWriteFormat(pixi, ws_flag, IFF_PNG); 00155 00156 /* The whitespace mask will break textlines where there 00157 * is a large amount of white space below or above. 00158 * We can prevent this by identifying regions of the 00159 * inverted image that have large horizontal (bigger than 00160 * the separation between columns) and significant 00161 * vertical extent (bigger than the separation between 00162 * textlines), and subtracting this from the whitespace mask. */ 00163 pixt1 = pixMorphCompSequence(pixi, "o80.60", 0); 00164 pixt2 = pixSubtract(NULL, pixi, pixt1); 00165 pixDisplayWriteFormat(pixt2, ws_flag, IFF_PNG); 00166 pixDestroy(&pixt1); 00167 00168 /* Identify vertical whitespace by opening inverted image */ 00169 pixt3 = pixOpenBrick(NULL, pixt2, 5, 1); /* removes thin vertical lines */ 00170 pixvws = pixOpenBrick(NULL, pixt3, 1, 200); /* gets long vertical lines */ 00171 pixDisplayWriteFormat(pixvws, L_MAX(text_flag, ws_flag), IFF_PNG); 00172 if (which == 1) pixWrite("/tmp/vertws.150.png", pixvws, IFF_PNG); 00173 pixDestroy(&pixt2); 00174 pixDestroy(&pixt3); 00175 00176 /* Get proto (early processed) text line mask. */ 00177 /* First close the characters and words in the textlines */ 00178 pixtm1 = pixCloseSafeBrick(NULL, pixnht, 30, 1); 00179 pixDisplayWriteFormat(pixtm1, text_flag, IFF_PNG); 00180 if (which == 1) pixWrite("/tmp/textmask1.150.png", pixtm1, IFF_PNG); 00181 00182 /* Next open back up the vertical whitespace corridors */ 00183 pixtm2 = pixSubtract(NULL, pixtm1, pixvws); 00184 if (which == 1) pixWrite("/tmp/textmask2.150.png", pixtm2, IFF_PNG); 00185 00186 /* Do a small opening to remove noise */ 00187 pixOpenBrick(pixtm2, pixtm2, 3, 3); 00188 pixDisplayWriteFormat(pixtm2, text_flag, IFF_PNG); 00189 if (which == 1) pixWrite("/tmp/textmask3.150.png", pixtm2, IFF_PNG); 00190 pixtm3 = pixExpandBinaryPower2(pixtm2, 2); 00191 00192 /* Join pixels vertically to make text block mask */ 00193 pixtb1 = pixMorphSequence(pixtm2, "c1.10 + o4.1", 0); 00194 pixDisplayWriteFormat(pixtb1, block_flag, IFF_PNG); 00195 if (which == 1) pixWrite("/tmp/textblock1.150.png", pixtb1, IFF_PNG); 00196 00197 /* Solidify the textblock mask and remove noise: 00198 * (1) For each c.c., close the blocks and dilate slightly 00199 * to form a solid mask. 00200 * (2) Small horizontal closing between components 00201 * (3) Open the white space between columns, again 00202 * (4) Remove small components */ 00203 pixt1 = pixMorphSequenceByComponent(pixtb1, "c30.30 + d3.3", 8, 0, 0, NULL); 00204 pixCloseSafeBrick(pixt1, pixt1, 10, 1); 00205 pixDisplayWriteFormat(pixt1, block_flag, IFF_PNG); 00206 pixt2 = pixSubtract(NULL, pixt1, pixvws); 00207 pixt3 = pixSelectBySize(pixt2, 25, 5, 8, L_SELECT_IF_BOTH, 00208 L_SELECT_IF_GTE, NULL); 00209 pixDisplayWriteFormat(pixt3, block_flag, IFF_PNG); 00210 if (which == 1) pixWrite("/tmp/textblock2.150.png", pixt3, IFF_PNG); 00211 pixtb2 = pixExpandBinaryPower2(pixt3, 2); 00212 pixDestroy(&pixt1); 00213 pixDestroy(&pixt2); 00214 pixDestroy(&pixt3); 00215 00216 /* Identify the outlines of each textblock */ 00217 ptaa = pixGetOuterBordersPtaa(pixtb2); 00218 pixt1 = pixRenderRandomCmapPtaa(pixtb2, ptaa, 1, 8, 1); 00219 cmap = pixGetColormap(pixt1); 00220 pixcmapResetColor(cmap, 0, 130, 130, 130); /* set interior to gray */ 00221 if (which == 1) pixWrite("/tmp/textblock3.300.png", pixt1, IFF_PNG); 00222 pixDisplayWithTitle(pixt1, 480, 360, "textblock mask with outlines", DFLAG); 00223 ptaaDestroy(&ptaa); 00224 pixDestroy(&pixt1); 00225 00226 /* Fill line mask (as seed) into the original */ 00227 pixt1 = pixSeedfillBinary(NULL, pixtm3, pixs, 8); 00228 pixOr(pixtm3, pixtm3, pixt1); 00229 pixDestroy(&pixt1); 00230 if (which == 1) pixWrite("/tmp/textmask.300.png", pixtm3, IFF_PNG); 00231 pixDisplayWithTitle(pixtm3, 480, 360, "textline mask 4", DFLAG); 00232 00233 /* Fill halftone mask (as seed) into the original */ 00234 pixt1 = pixSeedfillBinary(NULL, pixhm2, pixs, 8); 00235 pixOr(pixhm2, pixhm2, pixt1); 00236 pixDestroy(&pixt1); 00237 if (which == 1) pixWrite("/tmp/htmask.300.png", pixhm2, IFF_PNG); 00238 pixDisplayWithTitle(pixhm2, 520, 390, "halftonemask 2", DFLAG); 00239 00240 /* Find objects that are neither text nor halftones */ 00241 pixt1 = pixSubtract(NULL, pixs, pixtm3); /* remove text pixels */ 00242 pixnon = pixSubtract(NULL, pixt1, pixhm2); /* remove halftone pixels */ 00243 if (which == 1) pixWrite("/tmp/other.300.png", pixnon, IFF_PNG); 00244 pixDisplayWithTitle(pixnon, 540, 420, "other stuff", DFLAG); 00245 pixDestroy(&pixt1); 00246 00247 /* Write out b.b. for text line mask and halftone mask components */ 00248 boxatm = pixConnComp(pixtm3, NULL, 4); 00249 boxahm = pixConnComp(pixhm2, NULL, 8); 00250 if (which == 1) boxaWrite("/tmp/textmask.boxa", boxatm); 00251 if (which == 1) boxaWrite("/tmp/htmask.boxa", boxahm); 00252 00253 pixa = pixaReadFiles("/tmp", "junk_write_display"); 00254 pixt1 = pixaDisplayTiledAndScaled(pixa, 8, 250, 4, 0, 25, 2); 00255 snprintf(buf, sizeof(buf), "/tmp/segout.%d.png", which); 00256 pixWrite(buf, pixt1, IFF_PNG); 00257 pixDestroy(&pixt1); 00258 pixaDestroy(&pixa); 00259 00260 /* clean up to test with valgrind */ 00261 pixDestroy(&pixr); 00262 pixDestroy(&pixhs); 00263 pixDestroy(&pixm); 00264 pixDestroy(&pixhm1); 00265 pixDestroy(&pixhm2); 00266 pixDestroy(&pixht); 00267 pixDestroy(&pixnht); 00268 pixDestroy(&pixi); 00269 pixDestroy(&pixvws); 00270 pixDestroy(&pixtm1); 00271 pixDestroy(&pixtm2); 00272 pixDestroy(&pixtm3); 00273 pixDestroy(&pixtb1); 00274 pixDestroy(&pixtb2); 00275 pixDestroy(&pixnon); 00276 boxaDestroy(&boxatm); 00277 boxaDestroy(&boxahm); 00278 return 0; 00279 } 00280