Leptonica 1.68
C Image Processing Library
|
00001 /*====================================================================* 00002 - Copyright (C) 2001 Leptonica. All rights reserved. 00003 - This software is distributed in the hope that it will be 00004 - useful, but with NO WARRANTY OF ANY KIND. 00005 - No author or distributor accepts responsibility to anyone for the 00006 - consequences of using this software, or for whether it serves any 00007 - particular purpose or works at all, unless he or she says so in 00008 - writing. Everyone is granted permission to copy, modify and 00009 - redistribute this source code, for commercial or non-commercial 00010 - purposes, with the following restrictions: (1) the origin of this 00011 - source code must not be misrepresented; (2) modified versions must 00012 - be plainly marked as such; and (3) this notice may not be removed 00013 - or altered from any source or modified source distribution. 00014 *====================================================================*/ 00015 00016 /* 00017 * flipdetect.c 00018 * 00019 * Page orientation detection (pure rotation by 90 degree increments): 00020 * l_int32 pixOrientDetect() 00021 * l_int32 makeOrientDecision() 00022 * l_int32 pixUpDownDetect() 00023 * l_int32 pixUpDownDetectGeneral() 00024 * l_int32 pixOrientDetectDwa() 00025 * l_int32 pixUpDownDetectDwa() 00026 * l_int32 pixUpDownDetectGeneralDwa() 00027 * 00028 * Page mirror detection (flip 180 degrees about line in plane of image): 00029 * l_int32 pixMirrorDetect() 00030 * l_int32 pixMirrorDetectDwa() 00031 * 00032 * Static debug helper 00033 * void pixDebugFlipDetect() 00034 * 00035 * =================================================================== 00036 * 00037 * Page transformation detection: 00038 * 00039 * Once a page is deskewed, there are 8 possible states that it 00040 * can be in, shown symbolically below. Suppose state 0 is correct. 00041 * 00042 * 0: correct 1 2 3 00043 * +------+ +------+ +------+ +------+ 00044 * | **** | | * | | **** | | * | 00045 * | * | | * | | * | | * | 00046 * | * | | **** | | * | | **** | 00047 * +------+ +------+ +------+ +------+ 00048 * 00049 * 4 5 6 7 00050 * +-----+ +-----+ +-----+ +-----+ 00051 * | *** | | * | | *** | | * | 00052 * | * | | * | | * | | * | 00053 * | * | | * | | * | | * | 00054 * | * | | *** | | * | | *** | 00055 * +-----+ +-----+ +-----+ +-----+ 00056 * 00057 * Each of the other seven can be derived from state 0 by applying some 00058 * combination of a 90 degree clockwise rotation, a flip about 00059 * a horizontal line, and a flip about a vertical line, 00060 * all abbreviated as: 00061 * R = Rotation (about a line perpendicular to the image) 00062 * H = Horizontal flip (about a vertical line in the plane of the image) 00063 * V = Vertical flip (about a horizontal line in the plane of the image) 00064 * 00065 * We get these transformations: 00066 * RHV 00067 * 000 -> 0 00068 * 001 -> 1 00069 * 010 -> 2 00070 * 011 -> 3 00071 * 100 -> 4 00072 * 101 -> 5 00073 * 110 -> 6 00074 * 111 -> 7 00075 * 00076 * Note that in four of these, the sum of H and V is 1 (odd). 00077 * For these four, we have a change in parity (handedness) of 00078 * the image, and the transformation cannot be performed by 00079 * rotation about a vertical line out of the page. Under 00080 * rotation R, the set of 8 transformations decomposes into 00081 * two subgroups linking {0, 3, 4, 7} and {1, 2, 5, 6} independently. 00082 * 00083 * pixOrientDetect*() tests for a pure rotation (0, 90, 180, 270 degrees). 00084 * It doesn't change parity. 00085 * 00086 * pixMirrorDetect*() tests for a horizontal flip about the vertical axis. 00087 * It changes parity. 00088 * 00089 * The landscape/portrait rotation can be detected in two ways: 00090 * 00091 * (1) Compute the deskew confidence for an image segment, 00092 * both as is and rotated 90 degrees (see skew.c). 00093 * 00094 * (2) Compute the ascender/descender signal for the image, 00095 * both as is and rotated 90 degrees (implemented here). 00096 * 00097 * The ascender/descender signal is useful for determining text 00098 * orientation in Roman alphabets because the incidence of letters 00099 * with straight-line ascenders (b, d, h, k, l, <t>) outnumber 00100 * those with descenders (<g>, p, q). The letters <t> and <g> 00101 * will respond variably to the filter, depending on the type face. 00102 * 00103 * What about the mirror image situations? These aren't common 00104 * unless you're dealing with film, for example. 00105 * But you can reliably test if the image has undergone a 00106 * parity-changing flip once about some axis in the plane 00107 * of the image, using pixMirrorDetect*(). This works ostensibly by 00108 * counting the number of characters with ascenders that 00109 * stick out to the left and right of the ascender. Characters 00110 * that are not mirror flipped are more likely to extend to the 00111 * right (b, h, k) than to the left (d). Of course, that is for 00112 * text that is rightside-up. So before you apply the mirror 00113 * test, it is necessary to insure that the text has the ascenders 00114 * going up, and not down or to the left or right. But here's 00115 * what *really* happens. It turns out that the pre-filtering before 00116 * the hit-miss transform (HMT) is crucial, and surprisingly, when 00117 * the pre-filtering is chosen to generate a large signal, the majority 00118 * of the signal comes from open regions of common lower-case 00119 * letters such as 'e', 'c' and 'f'. 00120 * 00121 * All operations are given in two implementations whose results are 00122 * identical: rasterop morphology and dwa morphology. The dwa 00123 * implementations are between 2x and 3x faster. 00124 * 00125 * The set of operations you actually use depends on your prior knowledge: 00126 * 00127 * (1) If the page is known to be either rightside-up or upside-down, use 00128 * either pixOrientDetect*() with pleftconf = NULL, or 00129 * pixUpDownDetect*(). [The '*' refers to either the rasterop 00130 * or dwa versions.] 00131 * 00132 * (2) If any of the four orientations are possible, use pixOrientDetect*(). 00133 * 00134 * (3) If the text is horizontal and rightside-up, the only remaining 00135 * degree of freedom is a left-right mirror flip: use 00136 * pixMirrorDetect*(). 00137 * 00138 * (4) If you have a relatively large amount of numbers on the page, 00139 * us the slower pixUpDownDetectGeneral(). 00140 * 00141 * We summarize the full orientation and mirror flip detection process: 00142 * 00143 * (1) First determine which of the four 90 degree rotations 00144 * causes the text to be rightside-up. This can be done 00145 * with either skew confidence or the pixOrientDetect*() 00146 * signals. For the latter, see the table for pixOrientDetect(). 00147 * 00148 * (2) Then, with ascenders pointing up, apply pixMirrorDetect*(). 00149 * In the normal situation the confidence confidence will be 00150 * large and positive. However, if mirror flipped, the 00151 * confidence will be large and negative. 00152 */ 00153 00154 #include <stdio.h> 00155 #include <stdlib.h> 00156 #include <math.h> 00157 #include "allheaders.h" 00158 00159 /* Sels for pixOrientDetect() and pixMirrorDetect() */ 00160 static const char *textsel1 = "x oo " 00161 "x oOo " 00162 "x o " 00163 "x " 00164 "xxxxxx"; 00165 00166 static const char *textsel2 = " oo x" 00167 " oOo x" 00168 " o x" 00169 " x" 00170 "xxxxxx"; 00171 00172 static const char *textsel3 = "xxxxxx" 00173 "x " 00174 "x o " 00175 "x oOo " 00176 "x oo "; 00177 00178 static const char *textsel4 = "xxxxxx" 00179 " x" 00180 " o x" 00181 " oOo x" 00182 " oo x"; 00183 00184 /* Parameters for determining orientation */ 00185 static const l_int32 DEFAULT_MIN_UP_DOWN_COUNT = 70; 00186 static const l_float32 DEFAULT_MIN_UP_DOWN_CONF = 7.0; 00187 static const l_float32 DEFAULT_MIN_UP_DOWN_RATIO = 2.5; 00188 00189 /* Parameters for determining mirror flip */ 00190 static const l_int32 DEFAULT_MIN_MIRROR_FLIP_COUNT = 100; 00191 static const l_float32 DEFAULT_MIN_MIRROR_FLIP_CONF = 5.0; 00192 00193 /* Static debug function */ 00194 static void pixDebugFlipDetect(const char *filename, PIX *pixs, 00195 PIX *pixhm, l_int32 enable); 00196 00197 00198 /*----------------------------------------------------------------* 00199 * Orientation detection (four 90 degree angles) * 00200 * Rasterop implementation * 00201 *----------------------------------------------------------------*/ 00202 /*! 00203 * pixOrientDetect() 00204 * 00205 * Input: pixs (1 bpp, deskewed, English text, 150 - 300 ppi) 00206 * &upconf (<optional return> ; may be null) 00207 * &leftconf (<optional return> ; may be null) 00208 * mincount (min number of up + down; use 0 for default) 00209 * debug (1 for debug output; 0 otherwise) 00210 * Return: 0 if OK, 1 on error 00211 * 00212 * Notes: 00213 * (1) See "Measuring document image skew and orientation" 00214 * Dan S. Bloomberg, Gary E. Kopec and Lakshmi Dasari 00215 * IS&T/SPIE EI'95, Conference 2422: Document Recognition II 00216 * pp 302-316, Feb 6-7, 1995, San Jose, CA 00217 * (2) upconf is the normalized difference between up ascenders 00218 * and down ascenders. The image is analyzed without rotation 00219 * for being rightside-up or upside-down. Set &upconf to null 00220 * to skip this operation. 00221 * (3) leftconf is the normalized difference between up ascenders 00222 * and down ascenders in the image after it has been 00223 * rotated 90 degrees clockwise. With that rotation, ascenders 00224 * projecting to the left in the source image will project up 00225 * in the rotated image. We compute this by rotating 90 degrees 00226 * clockwise and testing for up and down ascenders. Set 00227 * &leftconf to null to skip this operation. 00228 * (4) Note that upconf and leftconf are not linear measures of 00229 * confidence, e.g., in a range between 0 and 100. They 00230 * measure how far you are out on the tail of a (presumably) 00231 * normal distribution. For example, a confidence of 10 means 00232 * that it is nearly certain that the difference did not 00233 * happen at random. However, these values must be interpreted 00234 * cautiously, taking into consideration the estimated prior 00235 * for a particular orientation or mirror flip. The up-down 00236 * signal is very strong if applied to text with ascenders 00237 * up and down, and relatively weak for text at 90 degrees, 00238 * but even at 90 degrees, the difference can look significant. 00239 * For example, suppose the ascenders are oriented horizontally, 00240 * but the test is done vertically. Then upconf can 00241 * be < -MIN_CONF_FOR_UP_DOWN, suggesting the text may be 00242 * upside-down. However, if instead the test were done 00243 * horizontally, leftconf will be very much larger 00244 * (in absolute value), giving the correct orientation. 00245 * (5) If you compute both upconf and leftconf, and there is 00246 * sufficient signal, the following table determines the 00247 * cw angle necessary to rotate pixs so that the text is 00248 * rightside-up: 00249 * 0 deg : upconf >> 1, abs(upconf) >> abs(leftconf) 00250 * 90 deg : leftconf >> 1, abs(leftconf) >> abs(upconf) 00251 * 180 deg : upconf << -1, abs(upconf) >> abs(leftconf) 00252 * 270 deg : leftconf << -1, abs(leftconf) >> abs(upconf) 00253 * (6) One should probably not interpret the direction unless 00254 * there are a sufficient number of counts for both orientations, 00255 * in which case neither upconf nor leftconf will be 0.0. 00256 * (7) Uses rasterop implementation of HMT. 00257 */ 00258 l_int32 00259 pixOrientDetect(PIX *pixs, 00260 l_float32 *pupconf, 00261 l_float32 *pleftconf, 00262 l_int32 mincount, 00263 l_int32 debug) 00264 { 00265 PIX *pixt; 00266 00267 PROCNAME("pixOrientDetect"); 00268 00269 if (!pixs) 00270 return ERROR_INT("pixs not defined", procName, 1); 00271 if (pixGetDepth(pixs) != 1) 00272 return ERROR_INT("pixs not 1 bpp", procName, 1); 00273 if (!pupconf && !pleftconf) 00274 return ERROR_INT("nothing to do", procName, 1); 00275 if (mincount == 0) 00276 mincount = DEFAULT_MIN_UP_DOWN_COUNT; 00277 00278 if (pupconf) 00279 pixUpDownDetect(pixs, pupconf, mincount, debug); 00280 if (pleftconf) { 00281 pixt = pixRotate90(pixs, 1); 00282 pixUpDownDetect(pixt, pleftconf, mincount, debug); 00283 pixDestroy(&pixt); 00284 } 00285 00286 return 0; 00287 } 00288 00289 00290 /*! 00291 * makeOrientDecision() 00292 * 00293 * Input: upconf (nonzero) 00294 * leftconf (nonzero) 00295 * minupconf (minimum value for which a decision can be made) 00296 * minratio (minimum conf ratio required for a decision) 00297 * &orient (<return> text orientation enum {0,1,2,3,4}) 00298 * debug (1 for debug output; 0 otherwise) 00299 * Return: 0 if OK, 1 on error 00300 * 00301 * Notes: 00302 * (1) This can be run after pixOrientDetect() 00303 * (2) Both upconf and leftconf must be nonzero; otherwise the 00304 * orientation cannot be determined. 00305 * (3) The abs values of the input confidences are compared to 00306 * minupconf. 00307 * (4) The abs value of the largest of (upconf/leftconf) and 00308 * (leftconf/upconf) is compared with minratio. 00309 * (5) Input 0.0 for the default values for minupconf and minratio. 00310 * (6) The return value of orient is interpreted thus: 00311 * L_TEXT_ORIENT_UNKNOWN: not enough evidence to determine 00312 * L_TEXT_ORIENT_UP: text rightside-up 00313 * L_TEXT_ORIENT_LEFT: landscape, text up facing left 00314 * L_TEXT_ORIENT_DOWN: text upside-down 00315 * L_TEXT_ORIENT_RIGHT: landscape, text up facing right 00316 */ 00317 l_int32 00318 makeOrientDecision(l_float32 upconf, 00319 l_float32 leftconf, 00320 l_float32 minupconf, 00321 l_float32 minratio, 00322 l_int32 *porient, 00323 l_int32 debug) 00324 { 00325 l_float32 absupconf, absleftconf; 00326 00327 PROCNAME("makeOrientDecision"); 00328 00329 if (!porient) 00330 return ERROR_INT("&orient not defined", procName, 1); 00331 *porient = L_TEXT_ORIENT_UNKNOWN; /* default: no decision */ 00332 if (upconf == 0.0 || leftconf == 0.0) 00333 return ERROR_INT("not enough conf to get orientation", procName, 1); 00334 00335 if (minupconf == 0.0) 00336 minupconf = DEFAULT_MIN_UP_DOWN_CONF; 00337 if (minratio == 0.0) 00338 minratio = DEFAULT_MIN_UP_DOWN_RATIO; 00339 absupconf = L_ABS(upconf); 00340 absleftconf = L_ABS(leftconf); 00341 00342 /* Here are the four possible orientation decisions, based 00343 * on satisfaction of two threshold constraints. */ 00344 if (upconf > minupconf && absupconf > minratio * absleftconf) 00345 *porient = L_TEXT_ORIENT_UP; 00346 else if (leftconf > minupconf && absleftconf > minratio * absupconf) 00347 *porient = L_TEXT_ORIENT_LEFT; 00348 else if (upconf < -minupconf && absupconf > minratio * absleftconf) 00349 *porient = L_TEXT_ORIENT_DOWN; 00350 else if (leftconf < -minupconf && absleftconf > minratio * absupconf) 00351 *porient = L_TEXT_ORIENT_RIGHT; 00352 00353 if (debug) { 00354 fprintf(stderr, "upconf = %7.3f, leftconf = %7.3f\n", upconf, leftconf); 00355 if (*porient == L_TEXT_ORIENT_UNKNOWN) 00356 fprintf(stderr, "Confidence is low; no determination is made\n"); 00357 else if (*porient == L_TEXT_ORIENT_UP) 00358 fprintf(stderr, "Text is rightside-up\n"); 00359 else if (*porient == L_TEXT_ORIENT_LEFT) 00360 fprintf(stderr, "Text is rotated 90 deg ccw\n"); 00361 else if (*porient == L_TEXT_ORIENT_DOWN) 00362 fprintf(stderr, "Text is upside-down\n"); 00363 else /* *porient == L_TEXT_ORIENT_RIGHT */ 00364 fprintf(stderr, "Text is rotated 90 deg cw\n"); 00365 } 00366 00367 return 0; 00368 } 00369 00370 00371 /*! 00372 * pixUpDownDetect() 00373 * 00374 * Input: pixs (1 bpp, deskewed, English text, 150 - 300 ppi) 00375 * &conf (<return> confidence that text is rightside-up) 00376 * mincount (min number of up + down; use 0 for default) 00377 * debug (1 for debug output; 0 otherwise) 00378 * Return: 0 if OK, 1 on error 00379 * 00380 * Notes: 00381 * (1) Special (typical, slightly faster) case, where the pixels 00382 * identified through the HMT (hit-miss transform) are not 00383 * clipped by a truncated word mask pixm. See pixOrientDetect() 00384 * and pixUpDownDetectGeneral() for details. 00385 * (2) The returned confidence is the normalized difference 00386 * between the number of detected up and down ascenders, 00387 * assuming that the text is either rightside-up or upside-down 00388 * and not rotated at a 90 degree angle. 00389 */ 00390 l_int32 00391 pixUpDownDetect(PIX *pixs, 00392 l_float32 *pconf, 00393 l_int32 mincount, 00394 l_int32 debug) 00395 { 00396 return pixUpDownDetectGeneral(pixs, pconf, mincount, 0, debug); 00397 } 00398 00399 00400 /*! 00401 * pixUpDownDetectGeneral() 00402 * 00403 * Input: pixs (1 bpp, deskewed, English text, 150 - 300 ppi) 00404 * &conf (<return> confidence that text is rightside-up) 00405 * mincount (min number of up + down; use 0 for default) 00406 * npixels (number of pixels removed from each side of word box) 00407 * debug (1 for debug output; 0 otherwise) 00408 * Return: 0 if OK, 1 on error 00409 * 00410 * Notes: 00411 * (1) See pixOrientDetect() for other details. 00412 * (2) @conf is the normalized difference between the number of 00413 * detected up and down ascenders, assuming that the text 00414 * is either rightside-up or upside-down and not rotated 00415 * at a 90 degree angle. 00416 * (3) The typical mode of operation is @npixels == 0. 00417 * If @npixels > 0, this removes HMT matches at the 00418 * beginning and ending of "words." This is useful for 00419 * pages that may have mostly digits, because if npixels == 0, 00420 * leading "1" and "3" digits can register as having 00421 * ascenders or descenders, and "7" digits can match descenders. 00422 * Consequently, a page image of only digits may register 00423 * as being upside-down. 00424 * (4) We want to count the number of instances found using the HMT. 00425 * An expensive way to do this would be to count the 00426 * number of connected components. A cheap way is to do a rank 00427 * reduction cascade that reduces each component to a single 00428 * pixel, and results (after two or three 2x reductions) 00429 * in one pixel for each of the original components. 00430 * After the reduction, you have a much smaller pix over 00431 * which to count pixels. We do only 2 reductions, because 00432 * this function is designed to work for input pix between 00433 * 150 and 300 ppi, and an 8x reduction on a 150 ppi image 00434 * is going too far -- components will get merged. 00435 */ 00436 l_int32 00437 pixUpDownDetectGeneral(PIX *pixs, 00438 l_float32 *pconf, 00439 l_int32 mincount, 00440 l_int32 npixels, 00441 l_int32 debug) 00442 { 00443 l_int32 countup, countdown, nmax; 00444 l_float32 nup, ndown; 00445 PIX *pixt0, *pixt1, *pixt2, *pixt3, *pixm; 00446 SEL *sel1, *sel2, *sel3, *sel4; 00447 00448 PROCNAME("pixUpDownDetectGeneral"); 00449 00450 if (!pconf) 00451 return ERROR_INT("&conf not defined", procName, 1); 00452 *pconf = 0.0; 00453 if (!pixs) 00454 return ERROR_INT("pixs not defined", procName, 1); 00455 if (mincount == 0) 00456 mincount = DEFAULT_MIN_UP_DOWN_COUNT; 00457 if (npixels < 0) 00458 npixels = 0; 00459 00460 sel1 = selCreateFromString(textsel1, 5, 6, NULL); 00461 sel2 = selCreateFromString(textsel2, 5, 6, NULL); 00462 sel3 = selCreateFromString(textsel3, 5, 6, NULL); 00463 sel4 = selCreateFromString(textsel4, 5, 6, NULL); 00464 00465 /* One of many reasonable pre-filtering sequences: (1, 8) and (30, 1). 00466 * This closes holes in x-height characters and joins them at 00467 * the x-height. There is more noise in the descender detection 00468 * from this, but it works fairly well. */ 00469 pixt0 = pixMorphCompSequence(pixs, "c1.8 + c30.1", 0); 00470 00471 /* Optionally, make a mask of the word bounding boxes, shortening 00472 * each of them by a fixed amount at each end. */ 00473 pixm = NULL; 00474 if (npixels > 0) { 00475 l_int32 i, nbox, x, y, w, h; 00476 BOX *box; 00477 BOXA *boxa; 00478 pixt1 = pixMorphSequence(pixt0, "o10.1", 0); 00479 boxa = pixConnComp(pixt1, NULL, 8); 00480 pixm = pixCreateTemplate(pixt1); 00481 pixDestroy(&pixt1); 00482 nbox = boxaGetCount(boxa); 00483 for (i = 0; i < nbox; i++) { 00484 box = boxaGetBox(boxa, i, L_CLONE); 00485 boxGetGeometry(box, &x, &y, &w, &h); 00486 if (w > 2 * npixels) 00487 pixRasterop(pixm, x + npixels, y - 6, w - 2 * npixels, h + 13, 00488 PIX_SET, NULL, 0, 0); 00489 boxDestroy(&box); 00490 } 00491 boxaDestroy(&boxa); 00492 } 00493 00494 /* Find the ascenders and optionally filter with pixm. 00495 * For an explanation of the procedure used for counting the result 00496 * of the HMT, see comments at the beginning of this function. */ 00497 pixt1 = pixHMT(NULL, pixt0, sel1); 00498 pixt2 = pixHMT(NULL, pixt0, sel2); 00499 pixOr(pixt1, pixt1, pixt2); 00500 if (pixm) 00501 pixAnd(pixt1, pixt1, pixm); 00502 pixt3 = pixReduceRankBinaryCascade(pixt1, 1, 1, 0, 0); 00503 pixCountPixels(pixt3, &countup, NULL); 00504 pixDebugFlipDetect("junkpixup", pixs, pixt1, debug); 00505 pixDestroy(&pixt1); 00506 pixDestroy(&pixt2); 00507 pixDestroy(&pixt3); 00508 00509 /* Find the ascenders and optionally filter with pixm. */ 00510 pixt1 = pixHMT(NULL, pixt0, sel3); 00511 pixt2 = pixHMT(NULL, pixt0, sel4); 00512 pixOr(pixt1, pixt1, pixt2); 00513 if (pixm) 00514 pixAnd(pixt1, pixt1, pixm); 00515 pixt3 = pixReduceRankBinaryCascade(pixt1, 1, 1, 0, 0); 00516 pixCountPixels(pixt3, &countdown, NULL); 00517 pixDebugFlipDetect("junkpixdown", pixs, pixt1, debug); 00518 pixDestroy(&pixt1); 00519 pixDestroy(&pixt2); 00520 pixDestroy(&pixt3); 00521 00522 /* Evaluate statistically, generating a confidence that is 00523 * related to the probability with a gaussian distribution. */ 00524 nup = (l_float32)(countup); 00525 ndown = (l_float32)(countdown); 00526 nmax = L_MAX(countup, countdown); 00527 if (nmax > mincount) 00528 *pconf = 2. * ((nup - ndown) / sqrt(nup + ndown)); 00529 00530 if (debug) { 00531 if (pixm) pixWrite("junkpixm1", pixm, IFF_PNG); 00532 fprintf(stderr, "nup = %7.3f, ndown = %7.3f, conf = %7.3f\n", 00533 nup, ndown, *pconf); 00534 if (*pconf > DEFAULT_MIN_UP_DOWN_CONF) 00535 fprintf(stderr, "Text is rightside-up\n"); 00536 if (*pconf < -DEFAULT_MIN_UP_DOWN_CONF) 00537 fprintf(stderr, "Text is upside-down\n"); 00538 } 00539 00540 pixDestroy(&pixt0); 00541 pixDestroy(&pixm); 00542 selDestroy(&sel1); 00543 selDestroy(&sel2); 00544 selDestroy(&sel3); 00545 selDestroy(&sel4); 00546 return 0; 00547 } 00548 00549 00550 /*----------------------------------------------------------------* 00551 * Orientation detection (four 90 degree angles) * 00552 * DWA implementation * 00553 *----------------------------------------------------------------*/ 00554 /*! 00555 * pixOrientDetectDwa() 00556 * 00557 * Input: pixs (1 bpp, deskewed, English text) 00558 * &upconf (<optional return> ; may be null) 00559 * &leftconf (<optional return> ; may be null) 00560 * mincount (min number of up + down; use 0 for default) 00561 * debug (1 for debug output; 0 otherwise) 00562 * Return: 0 if OK, 1 on error 00563 * 00564 * Notes: 00565 * (1) Same interface as for pixOrientDetect(). See notes 00566 * there for usage. 00567 * (2) Uses auto-gen'd code for the Sels defined at the 00568 * top of this file, with some renaming of functions. 00569 * The auto-gen'd code is in fliphmtgen.c, and can 00570 * be generated by a simple executable; see prog/flipselgen.c. 00571 * (3) This runs about 2.5 times faster than the pixOrientDetect(). 00572 */ 00573 l_int32 00574 pixOrientDetectDwa(PIX *pixs, 00575 l_float32 *pupconf, 00576 l_float32 *pleftconf, 00577 l_int32 mincount, 00578 l_int32 debug) 00579 { 00580 PIX *pixt; 00581 00582 PROCNAME("pixOrientDetectDwa"); 00583 00584 if (!pixs) 00585 return ERROR_INT("pixs not defined", procName, 1); 00586 if (pixGetDepth(pixs) != 1) 00587 return ERROR_INT("pixs not 1 bpp", procName, 1); 00588 if (!pupconf && !pleftconf) 00589 return ERROR_INT("nothing to do", procName, 1); 00590 if (mincount == 0) 00591 mincount = DEFAULT_MIN_UP_DOWN_COUNT; 00592 00593 if (pupconf) 00594 pixUpDownDetectDwa(pixs, pupconf, mincount, debug); 00595 if (pleftconf) { 00596 pixt = pixRotate90(pixs, 1); 00597 pixUpDownDetectDwa(pixt, pleftconf, mincount, debug); 00598 pixDestroy(&pixt); 00599 } 00600 00601 return 0; 00602 } 00603 00604 00605 /*! 00606 * pixUpDownDetectDwa() 00607 * 00608 * Input: pixs (1 bpp, deskewed, English text, 150 - 300 ppi) 00609 * &conf (<return> confidence that text is rightside-up) 00610 * mincount (min number of up + down; use 0 for default) 00611 * debug (1 for debug output; 0 otherwise) 00612 * Return: 0 if OK, 1 on error 00613 * 00614 * Notes: 00615 * (1) Faster (DWA) version of pixUpDownDetect(). 00616 * (2) This is a special case (but typical and slightly faster) of 00617 * pixUpDownDetectGeneralDwa(), where the pixels identified 00618 * through the HMT (hit-miss transform) are not clipped by 00619 * a truncated word mask pixm. See pixUpDownDetectGeneral() 00620 * for usage and other details. 00621 * (3) The returned confidence is the normalized difference 00622 * between the number of detected up and down ascenders, 00623 * assuming that the text is either rightside-up or upside-down 00624 * and not rotated at a 90 degree angle. 00625 */ 00626 l_int32 00627 pixUpDownDetectDwa(PIX *pixs, 00628 l_float32 *pconf, 00629 l_int32 mincount, 00630 l_int32 debug) 00631 { 00632 return pixUpDownDetectGeneralDwa(pixs, pconf, mincount, 0, debug); 00633 } 00634 00635 00636 /*! 00637 * pixUpDownDetectGeneralDwa() 00638 * 00639 * Input: pixs (1 bpp, deskewed, English text) 00640 * &conf (<return> confidence that text is rightside-up) 00641 * mincount (min number of up + down; use 0 for default) 00642 * npixels (number of pixels removed from each side of word box) 00643 * debug (1 for debug output; 0 otherwise) 00644 * Return: 0 if OK, 1 on error 00645 * 00646 * Notes: 00647 * (1) See the notes in pixUpDownDetectGeneral() for usage. 00648 */ 00649 l_int32 00650 pixUpDownDetectGeneralDwa(PIX *pixs, 00651 l_float32 *pconf, 00652 l_int32 mincount, 00653 l_int32 npixels, 00654 l_int32 debug) 00655 { 00656 char flipsel1[] = "flipsel1"; 00657 char flipsel2[] = "flipsel2"; 00658 char flipsel3[] = "flipsel3"; 00659 char flipsel4[] = "flipsel4"; 00660 l_int32 countup, countdown, nmax; 00661 l_float32 nup, ndown; 00662 PIX *pixt, *pixt0, *pixt1, *pixt2, *pixt3, *pixm; 00663 00664 PROCNAME("pixUpDownDetectGeneralDwa"); 00665 00666 if (!pconf) 00667 return ERROR_INT("&conf not defined", procName, 1); 00668 *pconf = 0.0; 00669 if (!pixs) 00670 return ERROR_INT("pixs not defined", procName, 1); 00671 if (mincount == 0) 00672 mincount = DEFAULT_MIN_UP_DOWN_COUNT; 00673 if (npixels < 0) 00674 npixels = 0; 00675 00676 /* One of many reasonable pre-filtering sequences: (1, 8) and (30, 1). 00677 * This closes holes in x-height characters and joins them at 00678 * the x-height. There is more noise in the descender detection 00679 * from this, but it works fairly well. */ 00680 pixt = pixMorphSequenceDwa(pixs, "c1.8 + c30.1", 0); 00681 00682 /* Be sure to add the border before the flip DWA operations! */ 00683 pixt0 = pixAddBorderGeneral(pixt, ADDED_BORDER, ADDED_BORDER, 00684 ADDED_BORDER, ADDED_BORDER, 0); 00685 pixDestroy(&pixt); 00686 00687 /* Optionally, make a mask of the word bounding boxes, shortening 00688 * each of them by a fixed amount at each end. */ 00689 pixm = NULL; 00690 if (npixels > 0) { 00691 l_int32 i, nbox, x, y, w, h; 00692 BOX *box; 00693 BOXA *boxa; 00694 pixt1 = pixMorphSequenceDwa(pixt0, "o10.1", 0); 00695 boxa = pixConnComp(pixt1, NULL, 8); 00696 pixm = pixCreateTemplate(pixt1); 00697 pixDestroy(&pixt1); 00698 nbox = boxaGetCount(boxa); 00699 for (i = 0; i < nbox; i++) { 00700 box = boxaGetBox(boxa, i, L_CLONE); 00701 boxGetGeometry(box, &x, &y, &w, &h); 00702 if (w > 2 * npixels) 00703 pixRasterop(pixm, x + npixels, y - 6, w - 2 * npixels, h + 13, 00704 PIX_SET, NULL, 0, 0); 00705 boxDestroy(&box); 00706 } 00707 boxaDestroy(&boxa); 00708 } 00709 00710 /* Find the ascenders and optionally filter with pixm. 00711 * For an explanation of the procedure used for counting the result 00712 * of the HMT, see comments in pixUpDownDetectGeneral(). */ 00713 pixt1 = pixFlipFHMTGen(NULL, pixt0, flipsel1); 00714 pixt2 = pixFlipFHMTGen(NULL, pixt0, flipsel2); 00715 pixOr(pixt1, pixt1, pixt2); 00716 if (pixm) 00717 pixAnd(pixt1, pixt1, pixm); 00718 pixt3 = pixReduceRankBinaryCascade(pixt1, 1, 1, 0, 0); 00719 pixCountPixels(pixt3, &countup, NULL); 00720 pixDestroy(&pixt1); 00721 pixDestroy(&pixt2); 00722 pixDestroy(&pixt3); 00723 00724 /* Find the ascenders and optionally filter with pixm. */ 00725 pixt1 = pixFlipFHMTGen(NULL, pixt0, flipsel3); 00726 pixt2 = pixFlipFHMTGen(NULL, pixt0, flipsel4); 00727 pixOr(pixt1, pixt1, pixt2); 00728 if (pixm) 00729 pixAnd(pixt1, pixt1, pixm); 00730 pixt3 = pixReduceRankBinaryCascade(pixt1, 1, 1, 0, 0); 00731 pixCountPixels(pixt3, &countdown, NULL); 00732 pixDestroy(&pixt1); 00733 pixDestroy(&pixt2); 00734 pixDestroy(&pixt3); 00735 00736 /* Evaluate statistically, generating a confidence that is 00737 * related to the probability with a gaussian distribution. */ 00738 nup = (l_float32)(countup); 00739 ndown = (l_float32)(countdown); 00740 nmax = L_MAX(countup, countdown); 00741 if (nmax > mincount) 00742 *pconf = 2. * ((nup - ndown) / sqrt(nup + ndown)); 00743 00744 if (debug) { 00745 if (pixm) pixWrite("junkpixm2", pixm, IFF_PNG); 00746 fprintf(stderr, "nup = %7.3f, ndown = %7.3f, conf = %7.3f\n", 00747 nup, ndown, *pconf); 00748 if (*pconf > DEFAULT_MIN_UP_DOWN_CONF) 00749 fprintf(stderr, "Text is rightside-up\n"); 00750 if (*pconf < -DEFAULT_MIN_UP_DOWN_CONF) 00751 fprintf(stderr, "Text is upside-down\n"); 00752 } 00753 00754 pixDestroy(&pixt0); 00755 pixDestroy(&pixm); 00756 return 0; 00757 } 00758 00759 00760 00761 /*----------------------------------------------------------------* 00762 * Left-right mirror detection * 00763 * Rasterop implementation * 00764 *----------------------------------------------------------------*/ 00765 /*! 00766 * pixMirrorDetect() 00767 * 00768 * Input: pixs (1 bpp, deskewed, English text) 00769 * &conf (<return> confidence that text is not LR mirror reversed) 00770 * mincount (min number of left + right; use 0 for default) 00771 * debug (1 for debug output; 0 otherwise) 00772 * Return: 0 if OK, 1 on error 00773 * 00774 * Notes: 00775 * (1) For this test, it is necessary that the text is horizontally 00776 * oriented, with ascenders going up. 00777 * (2) conf is the normalized difference between the number of 00778 * right and left facing characters with ascenders. 00779 * Left-facing are {d}; right-facing are {b, h, k}. 00780 * At least that was the expectation. In practice, we can 00781 * really just say that it is the normalized difference in 00782 * hits using two specific hit-miss filters, textsel1 and textsel2, 00783 * after the image has been suitably pre-filtered so that 00784 * these filters are effective. See (4) for what's really happening. 00785 * (3) A large positive conf value indicates normal text, whereas 00786 * a large negative conf value means the page is mirror reversed. 00787 * (4) The implementation is a bit tricky. The general idea is 00788 * to fill the x-height part of characters, but not the space 00789 * between them, before doing the HMT. This is done by 00790 * finding pixels added using two different operations -- a 00791 * horizontal close and a vertical dilation -- and adding 00792 * the intersection of these sets to the original. It turns 00793 * out that the original intuition about the signal was largely 00794 * in error: much of the signal for right-facing characters 00795 * comes from the lower part of common x-height characters, like 00796 * the e and c, that remain open after these operations. 00797 * So it's important that the operations to close the x-height 00798 * parts of the characters are purposely weakened sufficiently 00799 * to allow these characters to remain open. The wonders 00800 * of morphology! 00801 */ 00802 l_int32 00803 pixMirrorDetect(PIX *pixs, 00804 l_float32 *pconf, 00805 l_int32 mincount, 00806 l_int32 debug) 00807 { 00808 l_int32 count1, count2, nmax; 00809 l_float32 nleft, nright; 00810 PIX *pixt0, *pixt1, *pixt2, *pixt3; 00811 SEL *sel1, *sel2; 00812 00813 PROCNAME("pixMirrorDetect"); 00814 00815 if (!pconf) 00816 return ERROR_INT("&conf not defined", procName, 1); 00817 *pconf = 0.0; 00818 if (!pixs) 00819 return ERROR_INT("pixs not defined", procName, 1); 00820 if (mincount == 0) 00821 mincount = DEFAULT_MIN_MIRROR_FLIP_COUNT; 00822 00823 sel1 = selCreateFromString(textsel1, 5, 6, NULL); 00824 sel2 = selCreateFromString(textsel2, 5, 6, NULL); 00825 00826 /* Fill x-height characters but not space between them, sort of. */ 00827 pixt3 = pixMorphCompSequence(pixs, "d1.30", 0); 00828 pixXor(pixt3, pixt3, pixs); 00829 pixt0 = pixMorphCompSequence(pixs, "c15.1", 0); 00830 pixXor(pixt0, pixt0, pixs); 00831 pixAnd(pixt0, pixt0, pixt3); 00832 pixOr(pixt0, pixt0, pixs); 00833 pixDestroy(&pixt3); 00834 /* pixDisplayWrite(pixt0, 1); */ 00835 00836 /* Filter the right-facing characters. */ 00837 pixt1 = pixHMT(NULL, pixt0, sel1); 00838 pixt3 = pixReduceRankBinaryCascade(pixt1, 1, 1, 0, 0); 00839 pixCountPixels(pixt3, &count1, NULL); 00840 pixDebugFlipDetect("junkpixright", pixs, pixt1, debug); 00841 pixDestroy(&pixt1); 00842 pixDestroy(&pixt3); 00843 00844 /* Filter the left-facing characters. */ 00845 pixt2 = pixHMT(NULL, pixt0, sel2); 00846 pixt3 = pixReduceRankBinaryCascade(pixt2, 1, 1, 0, 0); 00847 pixCountPixels(pixt3, &count2, NULL); 00848 pixDebugFlipDetect("junkpixleft", pixs, pixt2, debug); 00849 pixDestroy(&pixt2); 00850 pixDestroy(&pixt3); 00851 00852 nright = (l_float32)count1; 00853 nleft = (l_float32)count2; 00854 nmax = L_MAX(count1, count2); 00855 pixDestroy(&pixt0); 00856 selDestroy(&sel1); 00857 selDestroy(&sel2); 00858 00859 if (nmax > mincount) 00860 *pconf = 2. * ((nright - nleft) / sqrt(nright + nleft)); 00861 00862 if (debug) { 00863 fprintf(stderr, "nright = %f, nleft = %f\n", nright, nleft); 00864 if (*pconf > DEFAULT_MIN_MIRROR_FLIP_CONF) 00865 fprintf(stderr, "Text is not mirror reversed\n"); 00866 if (*pconf < -DEFAULT_MIN_MIRROR_FLIP_CONF) 00867 fprintf(stderr, "Text is mirror reversed\n"); 00868 } 00869 00870 return 0; 00871 } 00872 00873 00874 /*----------------------------------------------------------------* 00875 * Left-right mirror detection * 00876 * DWA implementation * 00877 *----------------------------------------------------------------*/ 00878 /*! 00879 * pixMirrorDetectDwa() 00880 * 00881 * Input: pixs (1 bpp, deskewed, English text) 00882 * &conf (<return> confidence that text is not LR mirror reversed) 00883 * mincount (min number of left + right; use 0 for default) 00884 * debug (1 for debug output; 0 otherwise) 00885 * Return: 0 if OK, 1 on error 00886 * 00887 * Notes: 00888 * (1) We assume the text is horizontally oriented, with 00889 * ascenders going up. 00890 * (2) See notes in pixMirrorDetect(). 00891 */ 00892 l_int32 00893 pixMirrorDetectDwa(PIX *pixs, 00894 l_float32 *pconf, 00895 l_int32 mincount, 00896 l_int32 debug) 00897 { 00898 char flipsel1[] = "flipsel1"; 00899 char flipsel2[] = "flipsel2"; 00900 l_int32 count1, count2, nmax; 00901 l_float32 nleft, nright; 00902 PIX *pixt0, *pixt1, *pixt2, *pixt3; 00903 00904 PROCNAME("pixMirrorDetectDwa"); 00905 00906 if (!pconf) 00907 return ERROR_INT("&conf not defined", procName, 1); 00908 *pconf = 0.0; 00909 if (!pixs) 00910 return ERROR_INT("pixs not defined", procName, 1); 00911 if (mincount == 0) 00912 mincount = DEFAULT_MIN_MIRROR_FLIP_COUNT; 00913 00914 /* Fill x-height characters but not space between them, sort of. */ 00915 pixt3 = pixMorphSequenceDwa(pixs, "d1.30", 0); 00916 pixXor(pixt3, pixt3, pixs); 00917 pixt0 = pixMorphSequenceDwa(pixs, "c15.1", 0); 00918 pixXor(pixt0, pixt0, pixs); 00919 pixAnd(pixt0, pixt0, pixt3); 00920 pixOr(pixt3, pixt0, pixs); 00921 pixDestroy(&pixt0); 00922 pixt0 = pixAddBorderGeneral(pixt3, ADDED_BORDER, ADDED_BORDER, 00923 ADDED_BORDER, ADDED_BORDER, 0); 00924 pixDestroy(&pixt3); 00925 00926 /* Filter the right-facing characters. */ 00927 pixt1 = pixFlipFHMTGen(NULL, pixt0, flipsel1); 00928 pixt3 = pixReduceRankBinaryCascade(pixt1, 1, 1, 0, 0); 00929 pixCountPixels(pixt3, &count1, NULL); 00930 pixDestroy(&pixt1); 00931 pixDestroy(&pixt3); 00932 00933 /* Filter the left-facing characters. */ 00934 pixt2 = pixFlipFHMTGen(NULL, pixt0, flipsel2); 00935 pixt3 = pixReduceRankBinaryCascade(pixt2, 1, 1, 0, 0); 00936 pixCountPixels(pixt3, &count2, NULL); 00937 pixDestroy(&pixt2); 00938 pixDestroy(&pixt3); 00939 00940 pixDestroy(&pixt0); 00941 nright = (l_float32)count1; 00942 nleft = (l_float32)count2; 00943 nmax = L_MAX(count1, count2); 00944 00945 if (nmax > mincount) 00946 *pconf = 2. * ((nright - nleft) / sqrt(nright + nleft)); 00947 00948 if (debug) { 00949 fprintf(stderr, "nright = %f, nleft = %f\n", nright, nleft); 00950 if (*pconf > DEFAULT_MIN_MIRROR_FLIP_CONF) 00951 fprintf(stderr, "Text is not mirror reversed\n"); 00952 if (*pconf < -DEFAULT_MIN_MIRROR_FLIP_CONF) 00953 fprintf(stderr, "Text is mirror reversed\n"); 00954 } 00955 00956 return 0; 00957 } 00958 00959 00960 /*----------------------------------------------------------------* 00961 * Static debug helper * 00962 *----------------------------------------------------------------*/ 00963 /* 00964 * pixDebugFlipDetect() 00965 * 00966 * Input: filename (for output debug file) 00967 * pixs (input to pix*Detect) 00968 * pixhm (hit-miss result from ascenders or descenders) 00969 * enable (1 to enable this function; 0 to disable) 00970 * Return: void 00971 */ 00972 static void 00973 pixDebugFlipDetect(const char *filename, 00974 PIX *pixs, 00975 PIX *pixhm, 00976 l_int32 enable) 00977 { 00978 PIX *pixt, *pixthm; 00979 00980 if (!enable) return; 00981 00982 /* Display with red dot at counted locations */ 00983 pixt = pixConvert1To4Cmap(pixs); 00984 pixthm = pixMorphSequence(pixhm, "d5.5", 0); 00985 pixSetMaskedCmap(pixt, pixthm, 0, 0, 255, 0, 0); 00986 00987 pixWrite(filename, pixt, IFF_PNG); 00988 pixDestroy(&pixthm); 00989 pixDestroy(&pixt); 00990 return; 00991 } 00992 00993