Leptonica 1.68
C Image Processing Library
|
00001 /*====================================================================* 00002 - Copyright (C) 2001 Leptonica. All rights reserved. 00003 - This software is distributed in the hope that it will be 00004 - useful, but with NO WARRANTY OF ANY KIND. 00005 - No author or distributor accepts responsibility to anyone for the 00006 - consequences of using this software, or for whether it serves any 00007 - particular purpose or works at all, unless he or she says so in 00008 - writing. Everyone is granted permission to copy, modify and 00009 - redistribute this source code, for commercial or non-commercial 00010 - purposes, with the following restrictions: (1) the origin of this 00011 - source code must not be misrepresented; (2) modified versions must 00012 - be plainly marked as such; and (3) this notice may not be removed 00013 - or altered from any source or modified source distribution. 00014 *====================================================================*/ 00015 00016 00017 /* 00018 * finditalic.c 00019 * 00020 * Locate italic words. This is an example of the use of 00021 * hit-miss binary morphology with binary reconstruction (filling 00022 * from a seed into a mask). 00023 * Example: use with prog/italics.png 00024 * 00025 * l_int32 pixItalicWords() 00026 */ 00027 00028 #include <stdio.h> 00029 #include <stdlib.h> 00030 #include "allheaders.h" 00031 00032 /* --------------------------------------------------------------- * 00033 * These hit-miss sels match the slanted edge of italic characters * 00034 * --------------------------------------------------------------- */ 00035 static const char *str_ital1 = " o x" 00036 " " 00037 " " 00038 " " 00039 " o x " 00040 " " 00041 " C " 00042 " " 00043 " o x " 00044 " " 00045 " " 00046 " " 00047 "o x "; 00048 00049 static const char *str_ital2 = " o x" 00050 " " 00051 " " 00052 " o x " 00053 " C " 00054 " " 00055 " o x " 00056 " " 00057 " " 00058 "o x "; 00059 00060 /* ------------------------------------------------------------- * 00061 * This sel removes noise that is not oriented as a slanted edge * 00062 * ------------------------------------------------------------- */ 00063 static const char *str_ital3 = " x" 00064 "Cx" 00065 "x " 00066 "x "; 00067 00068 00069 /*! 00070 * pixItalicWords() 00071 * 00072 * Input: pixs (1 bpp) 00073 * boxaw (<optional> word bounding boxes; can be NULL) 00074 * pixw (<optional> word box mask; can be NULL) 00075 * &boxa (<return> boxa of italian words) 00076 * debugflag (1 for debug output; 0 otherwise) 00077 * Return: 0 if OK, 1 on error 00078 * 00079 * Notes: 00080 * (1) You can input the bounding boxes for the words in one of 00081 * two forms: as bounding boxes (@boxaw) or as a word mask with 00082 * the word bounding boxes filled (@pixw). For example, 00083 * to compute @pixw, you can use pixWordMaskByDilation(). 00084 * (2) Alternatively, you can set both of these inputs to NULL, 00085 * in which case the word mask is generated here. This is 00086 * done by dilating and closing the input image to connect 00087 * letters within a word, while leaving the words separated. 00088 * The parameters are chosen under the assumption that the 00089 * input is 10 to 12 pt text, scanned at about 300 ppi. 00090 */ 00091 l_int32 00092 pixItalicWords(PIX *pixs, 00093 BOXA *boxaw, 00094 PIX *pixw, 00095 BOXA **pboxa, 00096 l_int32 debugflag) 00097 { 00098 BOXA *boxa; 00099 PIX *pixsd, *pixm, *pixd; 00100 SEL *sel_ital1, *sel_ital2, *sel_ital3; 00101 00102 PROCNAME("pixItalicWords"); 00103 00104 if (!pixs) 00105 return ERROR_INT("pixs not defined", procName, 1); 00106 if (!pboxa) 00107 return ERROR_INT("&boxa not defined", procName, 1); 00108 if (boxaw && pixw) 00109 return ERROR_INT("both boxaw and pixw are defined", procName, 1); 00110 00111 sel_ital1 = selCreateFromString(str_ital1, 13, 6, NULL); 00112 sel_ital2 = selCreateFromString(str_ital2, 10, 6, NULL); 00113 sel_ital3 = selCreateFromString(str_ital3, 4, 2, NULL); 00114 00115 /* Make the italic seed: extract with HMT; remove noise */ 00116 pixsd = pixHMT(NULL, pixs, sel_ital1); 00117 pixClose(pixsd, pixsd, sel_ital3); 00118 pixOpen(pixsd, pixsd, sel_ital3); 00119 00120 /* Make the word mask. Use input boxes or mask if given. */ 00121 if (boxaw) { 00122 pixm = pixCreateTemplate(pixs); 00123 pixMaskBoxa(pixm, pixm, boxaw, L_SET_PIXELS); 00124 } 00125 else if (pixw) { 00126 pixm = pixClone(pixw); 00127 } 00128 else 00129 pixm = pixMorphSequence(pixs, "d1.5 + c6.1", 0); 00130 00131 /* Binary reconstruction */ 00132 pixd = pixSeedfillBinary(NULL, pixsd, pixm, 8); 00133 boxa = pixConnComp(pixd, NULL, 8); 00134 *pboxa = boxa; 00135 00136 if (debugflag) { 00137 BOXA *boxat; 00138 PIXA *pad; 00139 PIX *pixt1, *pixt2; 00140 pad = pixaCreate(0); 00141 pixSaveTiledOutline(pixs, pad, 2, 1, 20, 2, 32); 00142 pixSaveTiledOutline(pixsd, pad, 2, 1, 20, 2, 0); 00143 boxat = pixConnComp(pixm, NULL, 8); 00144 boxaWrite("/tmp/junkboxa.ba", boxat); 00145 pixt1 = pixConvertTo32(pixm); 00146 pixRenderBoxaArb(pixt1, boxat, 3, 255, 0, 0); 00147 pixSaveTiledOutline(pixt1, pad, 2, 1, 20, 2, 0); 00148 pixDestroy(&pixt1); 00149 pixSaveTiledOutline(pixd, pad, 2, 1, 20, 2, 0); 00150 pixt1 = pixConvertTo32(pixs); 00151 pixRenderBoxaArb(pixt1, boxa, 3, 255, 0, 0); 00152 pixSaveTiledOutline(pixt1, pad, 2, 1, 20, 2, 0); 00153 pixt2 = pixaDisplay(pad, 0, 0); 00154 pixWrite("/tmp/junkdebug.png", pixt2, IFF_PNG); 00155 pixaDestroy(&pad); 00156 pixDestroy(&pixt1); 00157 pixDestroy(&pixt2); 00158 boxaDestroy(&boxat); 00159 } 00160 00161 selDestroy(&sel_ital1); 00162 selDestroy(&sel_ital2); 00163 selDestroy(&sel_ital3); 00164 pixDestroy(&pixsd); 00165 pixDestroy(&pixm); 00166 pixDestroy(&pixd); 00167 return 0; 00168 } 00169