Leptonica 1.68
C Image Processing Library

livre_pageseg.c

Go to the documentation of this file.
00001 /*====================================================================*
00002  -  Copyright (C) 2001 Leptonica.  All rights reserved.
00003  -  This software is distributed in the hope that it will be
00004  -  useful, but with NO WARRANTY OF ANY KIND.
00005  -  No author or distributor accepts responsibility to anyone for the
00006  -  consequences of using this software, or for whether it serves any
00007  -  particular purpose or works at all, unless he or she says so in
00008  -  writing.  Everyone is granted permission to copy, modify and
00009  -  redistribute this source code, for commercial or non-commercial
00010  -  purposes, with the following restrictions: (1) the origin of this
00011  -  source code must not be misrepresented; (2) modified versions must
00012  -  be plainly marked as such; and (3) this notice may not be removed
00013  -  or altered from any source or modified source distribution.
00014  *====================================================================*/
00015 
00016 /*
00017  * livre_pageseg.c
00018  *
00019  *    This gives examples of the use of binary morphology for
00020  *    some simple and fast document segmentation operations.
00021  *
00022  *    The operations are carried out at 2x reduction.
00023  *    For images scanned at 300 ppi, this is typically
00024  *    high enough resolution for accurate results.
00025  *
00026  *    This generates several of the figures used in Chapter 18 of
00027  *    "Mathematical morphology: from theory to applications",
00028  *    edited by Laurent Najman and Hugues Talbot.  Published by
00029  *    Hermes Scientific Publishing, Ltd, 2010.
00030  */
00031 
00032 #include <stdio.h>
00033 #include <stdlib.h>
00034 #include "allheaders.h"
00035 
00036     /* Control the display output */
00037 #define   DFLAG        0
00038 
00039 
00040 l_int32 DoPageSegmentation(PIX *pixs, l_int32 which);
00041 
00042 main(int    argc,
00043      char **argv)
00044 {
00045 char        *filein;
00046 l_int32      i;
00047 PIX         *pixs;   /* input image sould be at least 300 ppi */
00048 static char  mainName[] = "livre_pageseg";
00049 
00050     if (argc != 2)
00051         exit(ERROR_INT(" Syntax:  livre_pageseg filein", mainName, 1));
00052 
00053     filein = argv[1];
00054     if ((pixs = pixRead(filein)) == NULL)
00055         exit(ERROR_INT("pix not made", mainName, 1));
00056 
00057     for (i = 1; i <= 4; i++)
00058         DoPageSegmentation(pixs, i);
00059     pixDestroy(&pixs);
00060     return 0;
00061 }
00062 
00063 
00064 l_int32
00065 DoPageSegmentation(PIX     *pixs,   /* should be at least 300 ppi */
00066                    l_int32  which)  /* 1, 2, 3, 4 */
00067 {
00068 char         buf[256];
00069 l_int32      index, zero;
00070 BOXA        *boxatm, *boxahm;
00071 PIX         *pixr;   /* image reduced to 150 ppi */
00072 PIX         *pixhs;  /* image of halftone seed, 150 ppi */
00073 PIX         *pixm;   /* image of mask of components, 150 ppi */
00074 PIX         *pixhm1; /* image of halftone mask, 150 ppi */
00075 PIX         *pixhm2; /* image of halftone mask, 300 ppi */
00076 PIX         *pixht;  /* image of halftone components, 150 ppi */
00077 PIX         *pixnht; /* image without halftone components, 150 ppi */
00078 PIX         *pixi;   /* inverted image, 150 ppi */
00079 PIX         *pixvws; /* image of vertical whitespace, 150 ppi */
00080 PIX         *pixtm1; /* image of closed textlines, 150 ppi */
00081 PIX         *pixtm2; /* image of refined text line mask, 150 ppi */
00082 PIX         *pixtm3; /* image of refined text line mask, 300 ppi */
00083 PIX         *pixtb1; /* image of text block mask, 150 ppi */
00084 PIX         *pixtb2; /* image of text block mask, 300 ppi */
00085 PIX         *pixnon; /* image of non-text or halftone, 150 ppi */
00086 PIX         *pixt1, *pixt2, *pixt3, *pixt4, *pixt5;
00087 PIXA        *pixa;
00088 PIXCMAP     *cmap;
00089 PTAA        *ptaa;
00090 l_int32      ht_flag = 0;
00091 l_int32      ws_flag = 0;
00092 l_int32      text_flag = 0;
00093 l_int32      block_flag = 0;
00094 
00095     PROCNAME("DoPageSegmentation");
00096 
00097     if (which == 1)
00098         ht_flag = 1;
00099     else if (which == 2)
00100         ws_flag = 1;
00101     else if (which == 3)
00102         text_flag = 1;
00103     else if (which == 4)
00104         block_flag = 1;
00105     else 
00106         return ERROR_INT("invalid parameter: not in [1...4]", procName, 1);
00107     pixDisplayWrite(NULL, -1);
00108 
00109         /* Reduce to 150 ppi */
00110     pixt1 = pixScaleToGray2(pixs);
00111     pixDisplayWriteFormat(pixt1, L_MAX(ws_flag, L_MAX(ht_flag, block_flag)),
00112                           IFF_PNG);
00113     if (which == 1) pixWrite("/tmp/orig.gray.150.png", pixt1, IFF_PNG);
00114     pixDestroy(&pixt1);
00115     pixr = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0);
00116 
00117         /* Get seed for halftone parts */
00118     pixt1 = pixReduceRankBinaryCascade(pixr, 4, 4, 3, 0);
00119     pixt2 = pixOpenBrick(NULL, pixt1, 5, 5);
00120     pixhs = pixExpandBinaryPower2(pixt2, 8);
00121     pixDisplayWriteFormat(pixhs, ht_flag, IFF_PNG);
00122     if (which == 1) pixWrite("/tmp/htseed.150.png", pixhs, IFF_PNG);
00123     pixDestroy(&pixt1);
00124     pixDestroy(&pixt2);
00125 
00126         /* Get mask for connected regions */
00127     pixm = pixCloseSafeBrick(NULL, pixr, 4, 4);
00128     pixDisplayWriteFormat(pixm, ht_flag, IFF_PNG);
00129     if (which == 1) pixWrite("/tmp/ccmask.150.png", pixm, IFF_PNG);
00130 
00131         /* Fill seed into mask to get halftone mask */
00132     pixhm1 = pixSeedfillBinary(NULL, pixhs, pixm, 4);
00133     pixDisplayWriteFormat(pixhm1, ht_flag, IFF_PNG);
00134     if (which == 1) pixWrite("/tmp/htmask.150.png", pixhm1, IFF_PNG);
00135     pixhm2 = pixExpandBinaryPower2(pixhm1, 2);
00136     
00137         /* Extract halftone stuff */
00138     pixht = pixAnd(NULL, pixhm1, pixr);
00139     if (which == 1) pixWrite("/tmp/ht.150.png", pixht, IFF_PNG);
00140 
00141         /* Extract non-halftone stuff */
00142     pixnht = pixXor(NULL, pixht, pixr);
00143     pixDisplayWriteFormat(pixnht, text_flag, IFF_PNG);
00144     if (which == 1) pixWrite("/tmp/text.150.png", pixnht, IFF_PNG);
00145     pixZero(pixht, &zero);
00146     if (zero)
00147         fprintf(stderr, "No halftone parts found\n");
00148     else
00149         fprintf(stderr, "Halftone parts found\n");
00150 
00151         /* Get bit-inverted image */
00152     pixi = pixInvert(NULL, pixnht);
00153     if (which == 1) pixWrite("/tmp/invert.150.png", pixi, IFF_PNG);
00154     pixDisplayWriteFormat(pixi, ws_flag, IFF_PNG);
00155 
00156         /* The whitespace mask will break textlines where there
00157          * is a large amount of white space below or above.
00158          * We can prevent this by identifying regions of the
00159          * inverted image that have large horizontal (bigger than
00160          * the separation between columns) and significant
00161          * vertical extent (bigger than the separation between
00162          * textlines), and subtracting this from the whitespace mask. */
00163     pixt1 = pixMorphCompSequence(pixi, "o80.60", 0);
00164     pixt2 = pixSubtract(NULL, pixi, pixt1);
00165     pixDisplayWriteFormat(pixt2, ws_flag, IFF_PNG);
00166     pixDestroy(&pixt1);
00167 
00168         /* Identify vertical whitespace by opening inverted image */
00169     pixt3 = pixOpenBrick(NULL, pixt2, 5, 1);  /* removes thin vertical lines */
00170     pixvws = pixOpenBrick(NULL, pixt3, 1, 200);  /* gets long vertical lines */
00171     pixDisplayWriteFormat(pixvws, L_MAX(text_flag, ws_flag), IFF_PNG);
00172     if (which == 1) pixWrite("/tmp/vertws.150.png", pixvws, IFF_PNG);
00173     pixDestroy(&pixt2);
00174     pixDestroy(&pixt3);
00175 
00176         /* Get proto (early processed) text line mask. */
00177         /* First close the characters and words in the textlines */
00178     pixtm1 = pixCloseSafeBrick(NULL, pixnht, 30, 1);
00179     pixDisplayWriteFormat(pixtm1, text_flag, IFF_PNG);
00180     if (which == 1) pixWrite("/tmp/textmask1.150.png", pixtm1, IFF_PNG);
00181 
00182         /* Next open back up the vertical whitespace corridors */
00183     pixtm2 = pixSubtract(NULL, pixtm1, pixvws);
00184     if (which == 1) pixWrite("/tmp/textmask2.150.png", pixtm2, IFF_PNG);
00185 
00186         /* Do a small opening to remove noise */
00187     pixOpenBrick(pixtm2, pixtm2, 3, 3);
00188     pixDisplayWriteFormat(pixtm2, text_flag, IFF_PNG);
00189     if (which == 1) pixWrite("/tmp/textmask3.150.png", pixtm2, IFF_PNG);
00190     pixtm3 = pixExpandBinaryPower2(pixtm2, 2);
00191 
00192         /* Join pixels vertically to make text block mask */
00193     pixtb1 = pixMorphSequence(pixtm2, "c1.10 + o4.1", 0);
00194     pixDisplayWriteFormat(pixtb1, block_flag, IFF_PNG);
00195     if (which == 1) pixWrite("/tmp/textblock1.150.png", pixtb1, IFF_PNG);
00196 
00197         /* Solidify the textblock mask and remove noise:
00198          *  (1) For each c.c., close the blocks and dilate slightly
00199          *      to form a solid mask.
00200          *  (2) Small horizontal closing between components
00201          *  (3) Open the white space between columns, again
00202          *  (4) Remove small components */
00203     pixt1 = pixMorphSequenceByComponent(pixtb1, "c30.30 + d3.3", 8, 0, 0, NULL);
00204     pixCloseSafeBrick(pixt1, pixt1, 10, 1);
00205     pixDisplayWriteFormat(pixt1, block_flag, IFF_PNG);
00206     pixt2 = pixSubtract(NULL, pixt1, pixvws);
00207     pixt3 = pixSelectBySize(pixt2, 25, 5, 8, L_SELECT_IF_BOTH,
00208                             L_SELECT_IF_GTE, NULL);
00209     pixDisplayWriteFormat(pixt3, block_flag, IFF_PNG);
00210     if (which == 1) pixWrite("/tmp/textblock2.150.png", pixt3, IFF_PNG);
00211     pixtb2 = pixExpandBinaryPower2(pixt3, 2);
00212     pixDestroy(&pixt1);
00213     pixDestroy(&pixt2);
00214     pixDestroy(&pixt3);
00215 
00216         /* Identify the outlines of each textblock */
00217     ptaa = pixGetOuterBordersPtaa(pixtb2);
00218     pixt1 = pixRenderRandomCmapPtaa(pixtb2, ptaa, 1, 8, 1);
00219     cmap = pixGetColormap(pixt1);
00220     pixcmapResetColor(cmap, 0, 130, 130, 130);  /* set interior to gray */
00221     if (which == 1) pixWrite("/tmp/textblock3.300.png", pixt1, IFF_PNG);
00222     pixDisplayWithTitle(pixt1, 480, 360, "textblock mask with outlines", DFLAG);
00223     ptaaDestroy(&ptaa);
00224     pixDestroy(&pixt1);
00225 
00226         /* Fill line mask (as seed) into the original */
00227     pixt1 = pixSeedfillBinary(NULL, pixtm3, pixs, 8);
00228     pixOr(pixtm3, pixtm3, pixt1);
00229     pixDestroy(&pixt1);
00230     if (which == 1) pixWrite("/tmp/textmask.300.png", pixtm3, IFF_PNG);
00231     pixDisplayWithTitle(pixtm3, 480, 360, "textline mask 4", DFLAG);
00232 
00233         /* Fill halftone mask (as seed) into the original */
00234     pixt1 = pixSeedfillBinary(NULL, pixhm2, pixs, 8);
00235     pixOr(pixhm2, pixhm2, pixt1);
00236     pixDestroy(&pixt1);
00237     if (which == 1) pixWrite("/tmp/htmask.300.png", pixhm2, IFF_PNG);
00238     pixDisplayWithTitle(pixhm2, 520, 390, "halftonemask 2", DFLAG);
00239 
00240         /* Find objects that are neither text nor halftones */
00241     pixt1 = pixSubtract(NULL, pixs, pixtm3);  /* remove text pixels */
00242     pixnon = pixSubtract(NULL, pixt1, pixhm2);  /* remove halftone pixels */
00243     if (which == 1) pixWrite("/tmp/other.300.png", pixnon, IFF_PNG);
00244     pixDisplayWithTitle(pixnon, 540, 420, "other stuff", DFLAG);
00245     pixDestroy(&pixt1);
00246 
00247         /* Write out b.b. for text line mask and halftone mask components */
00248     boxatm = pixConnComp(pixtm3, NULL, 4);
00249     boxahm = pixConnComp(pixhm2, NULL, 8);
00250     if (which == 1) boxaWrite("/tmp/textmask.boxa", boxatm);
00251     if (which == 1) boxaWrite("/tmp/htmask.boxa", boxahm);
00252 
00253     pixa = pixaReadFiles("/tmp", "junk_write_display");
00254     pixt1 = pixaDisplayTiledAndScaled(pixa, 8, 250, 4, 0, 25, 2);
00255     snprintf(buf, sizeof(buf), "/tmp/segout.%d.png", which);
00256     pixWrite(buf, pixt1, IFF_PNG);
00257     pixDestroy(&pixt1);
00258     pixaDestroy(&pixa);
00259 
00260         /* clean up to test with valgrind */
00261     pixDestroy(&pixr);
00262     pixDestroy(&pixhs);
00263     pixDestroy(&pixm);
00264     pixDestroy(&pixhm1);
00265     pixDestroy(&pixhm2);
00266     pixDestroy(&pixht);
00267     pixDestroy(&pixnht);
00268     pixDestroy(&pixi);
00269     pixDestroy(&pixvws);
00270     pixDestroy(&pixtm1);
00271     pixDestroy(&pixtm2);
00272     pixDestroy(&pixtm3);
00273     pixDestroy(&pixtb1);
00274     pixDestroy(&pixtb2);
00275     pixDestroy(&pixnon);
00276     boxaDestroy(&boxatm);
00277     boxaDestroy(&boxahm);
00278     return 0;
00279 }
00280 
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Defines