Leptonica 1.68
C Image Processing Library

finditalic.c

Go to the documentation of this file.
00001 /*====================================================================*
00002  -  Copyright (C) 2001 Leptonica.  All rights reserved.
00003  -  This software is distributed in the hope that it will be
00004  -  useful, but with NO WARRANTY OF ANY KIND.
00005  -  No author or distributor accepts responsibility to anyone for the
00006  -  consequences of using this software, or for whether it serves any
00007  -  particular purpose or works at all, unless he or she says so in
00008  -  writing.  Everyone is granted permission to copy, modify and
00009  -  redistribute this source code, for commercial or non-commercial
00010  -  purposes, with the following restrictions: (1) the origin of this
00011  -  source code must not be misrepresented; (2) modified versions must
00012  -  be plainly marked as such; and (3) this notice may not be removed
00013  -  or altered from any source or modified source distribution.
00014  *====================================================================*/
00015 
00016 
00017 /*
00018  * finditalic.c
00019  *
00020  *    Locate italic words.  This is an example of the use of
00021  *    hit-miss binary morphology with binary reconstruction (filling
00022  *    from a seed into a mask).
00023  *    Example: use with prog/italics.png
00024  *
00025  *      l_int32   pixItalicWords()
00026  */
00027 
00028 #include <stdio.h>
00029 #include <stdlib.h>
00030 #include "allheaders.h"
00031 
00032     /* ---------------------------------------------------------------  *
00033      * These hit-miss sels match the slanted edge of italic characters  *
00034      * ---------------------------------------------------------------  */
00035 static const char *str_ital1 = "   o x"
00036                                "      "
00037                                "      "
00038                                "      "
00039                                "  o x "
00040                                "      "
00041                                "  C   "
00042                                "      "
00043                                " o x  "
00044                                "      "
00045                                "      "
00046                                "      "
00047                                "o x   ";
00048 
00049 static const char *str_ital2 = "   o x"
00050                                "      "
00051                                "      "
00052                                "  o x "
00053                                "  C   "
00054                                "      "
00055                                " o x  "
00056                                "      "
00057                                "      "
00058                                "o x   ";
00059 
00060     /* ------------------------------------------------------------- *
00061      * This sel removes noise that is not oriented as a slanted edge *
00062      * ------------------------------------------------------------- */
00063 static const char *str_ital3 = " x"
00064                                "Cx"
00065                                "x "
00066                                "x ";
00067 
00068 
00069 /*!
00070  *  pixItalicWords()
00071  *
00072  *      Input:  pixs (1 bpp)
00073  *              boxaw (<optional> word bounding boxes; can be NULL)
00074  *              pixw (<optional> word box mask; can be NULL)
00075  *              &boxa (<return> boxa of italian words) 
00076  *              debugflag (1 for debug output; 0 otherwise)
00077  *      Return: 0 if OK, 1 on error
00078  *
00079  *  Notes:
00080  *      (1) You can input the bounding boxes for the words in one of
00081  *          two forms: as bounding boxes (@boxaw) or as a word mask with
00082  *          the word bounding boxes filled (@pixw).  For example,
00083  *          to compute @pixw, you can use pixWordMaskByDilation().
00084  *      (2) Alternatively, you can set both of these inputs to NULL,
00085  *          in which case the word mask is generated here.  This is
00086  *          done by dilating and closing the input image to connect
00087  *          letters within a word, while leaving the words separated.
00088  *          The parameters are chosen under the assumption that the
00089  *          input is 10 to 12 pt text, scanned at about 300 ppi.
00090  */
00091 l_int32
00092 pixItalicWords(PIX     *pixs,
00093                BOXA    *boxaw,
00094                PIX     *pixw,
00095                BOXA   **pboxa,
00096                l_int32  debugflag)
00097 {
00098 BOXA  *boxa;
00099 PIX   *pixsd, *pixm, *pixd;
00100 SEL   *sel_ital1, *sel_ital2, *sel_ital3;
00101 
00102     PROCNAME("pixItalicWords");
00103 
00104     if (!pixs)
00105         return ERROR_INT("pixs not defined", procName, 1);
00106     if (!pboxa)
00107         return ERROR_INT("&boxa not defined", procName, 1);
00108     if (boxaw && pixw)
00109         return ERROR_INT("both boxaw and pixw are defined", procName, 1);
00110 
00111     sel_ital1 = selCreateFromString(str_ital1, 13, 6, NULL);
00112     sel_ital2 = selCreateFromString(str_ital2, 10, 6, NULL);
00113     sel_ital3 = selCreateFromString(str_ital3, 4, 2, NULL);
00114 
00115         /* Make the italic seed: extract with HMT; remove noise */
00116     pixsd = pixHMT(NULL, pixs, sel_ital1);
00117     pixClose(pixsd, pixsd, sel_ital3);
00118     pixOpen(pixsd, pixsd, sel_ital3);
00119 
00120         /* Make the word mask.  Use input boxes or mask if given. */
00121     if (boxaw) {
00122         pixm = pixCreateTemplate(pixs);
00123         pixMaskBoxa(pixm, pixm, boxaw, L_SET_PIXELS);
00124     }
00125     else if (pixw) {
00126         pixm = pixClone(pixw);
00127     }
00128     else 
00129         pixm = pixMorphSequence(pixs, "d1.5 + c6.1", 0);
00130 
00131         /* Binary reconstruction */
00132     pixd = pixSeedfillBinary(NULL, pixsd, pixm, 8);
00133     boxa = pixConnComp(pixd, NULL, 8);
00134     *pboxa = boxa;
00135 
00136     if (debugflag) {
00137         BOXA  *boxat;
00138         PIXA  *pad;
00139         PIX   *pixt1, *pixt2;
00140         pad = pixaCreate(0);
00141         pixSaveTiledOutline(pixs, pad, 2, 1, 20, 2, 32);
00142         pixSaveTiledOutline(pixsd, pad, 2, 1, 20, 2, 0);
00143         boxat = pixConnComp(pixm, NULL, 8);
00144         boxaWrite("/tmp/junkboxa.ba", boxat);
00145         pixt1 = pixConvertTo32(pixm);
00146         pixRenderBoxaArb(pixt1, boxat, 3, 255, 0, 0);
00147         pixSaveTiledOutline(pixt1, pad, 2, 1, 20, 2, 0);
00148         pixDestroy(&pixt1);
00149         pixSaveTiledOutline(pixd, pad, 2, 1, 20, 2, 0);
00150         pixt1 = pixConvertTo32(pixs);
00151         pixRenderBoxaArb(pixt1, boxa, 3, 255, 0, 0);
00152         pixSaveTiledOutline(pixt1, pad, 2, 1, 20, 2, 0);
00153         pixt2 = pixaDisplay(pad, 0, 0);
00154         pixWrite("/tmp/junkdebug.png", pixt2, IFF_PNG);
00155         pixaDestroy(&pad);
00156         pixDestroy(&pixt1);
00157         pixDestroy(&pixt2);
00158         boxaDestroy(&boxat);
00159     }
00160 
00161     selDestroy(&sel_ital1);
00162     selDestroy(&sel_ital2);
00163     selDestroy(&sel_ital3);
00164     pixDestroy(&pixsd);
00165     pixDestroy(&pixm);
00166     pixDestroy(&pixd);
00167     return 0;
00168 }
00169 
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Defines