Leptonica 1.68
C Image Processing Library

flipdetect.c

Go to the documentation of this file.
00001 /*====================================================================*
00002   -  Copyright (C) 2001 Leptonica.  All rights reserved.
00003   -  This software is distributed in the hope that it will be
00004   -  useful, but with NO WARRANTY OF ANY KIND.
00005   -  No author or distributor accepts responsibility to anyone for the
00006   -  consequences of using this software, or for whether it serves any
00007   -  particular purpose or works at all, unless he or she says so in
00008   -  writing.  Everyone is granted permission to copy, modify and
00009   -  redistribute this source code, for commercial or non-commercial
00010   -  purposes, with the following restrictions: (1) the origin of this
00011   -  source code must not be misrepresented; (2) modified versions must
00012   -  be plainly marked as such; and (3) this notice may not be removed
00013   -  or altered from any source or modified source distribution.
00014   *====================================================================*/
00015 
00016 /*
00017  *  flipdetect.c
00018  *
00019  *      Page orientation detection (pure rotation by 90 degree increments):
00020  *          l_int32      pixOrientDetect()
00021  *          l_int32      makeOrientDecision()
00022  *          l_int32      pixUpDownDetect()
00023  *          l_int32      pixUpDownDetectGeneral()
00024  *          l_int32      pixOrientDetectDwa()
00025  *          l_int32      pixUpDownDetectDwa()
00026  *          l_int32      pixUpDownDetectGeneralDwa()
00027  *
00028  *      Page mirror detection (flip 180 degrees about line in plane of image):
00029  *          l_int32      pixMirrorDetect()
00030  *          l_int32      pixMirrorDetectDwa()
00031  *
00032  *      Static debug helper
00033  *          void         pixDebugFlipDetect()
00034  *
00035  *  ===================================================================    
00036  *
00037  *  Page transformation detection:
00038  *
00039  *  Once a page is deskewed, there are 8 possible states that it
00040  *  can be in, shown symbolically below.  Suppose state 0 is correct.
00041  *      
00042  *      0: correct     1          2          3
00043  *      +------+   +------+   +------+   +------+      
00044  *      | **** |   | *    |   | **** |   |    * |       
00045  *      | *    |   | *    |   |    * |   |    * |      
00046  *      | *    |   | **** |   |    * |   | **** |      
00047  *      +------+   +------+   +------+   +------+      
00048  *
00049  *         4          5          6          7
00050  *      +-----+    +-----+    +-----+    +-----+   
00051  *      | *** |    |   * |    | *** |    | *   |   
00052  *      |   * |    |   * |    | *   |    | *   |   
00053  *      |   * |    |   * |    | *   |    | *   |   
00054  *      |   * |    | *** |    | *   |    | *** |   
00055  *      +-----+    +-----+    +-----+    +-----+   
00056  *
00057  *  Each of the other seven can be derived from state 0 by applying some
00058  *  combination of a 90 degree clockwise rotation, a flip about
00059  *  a horizontal line, and a flip about a vertical line,
00060  *  all abbreviated as:
00061  *      R = Rotation (about a line perpendicular to the image)
00062  *      H = Horizontal flip (about a vertical line in the plane of the image)
00063  *      V = Vertical flip (about a horizontal line in the plane of the image)
00064  *
00065  *  We get these transformations:
00066  *      RHV
00067  *      000  -> 0
00068  *      001  -> 1
00069  *      010  -> 2
00070  *      011  -> 3
00071  *      100  -> 4
00072  *      101  -> 5
00073  *      110  -> 6
00074  *      111  -> 7
00075  *      
00076  *  Note that in four of these, the sum of H and V is 1 (odd).
00077  *  For these four, we have a change in parity (handedness) of
00078  *  the image, and the transformation cannot be performed by
00079  *  rotation about a vertical line out of the page.   Under
00080  *  rotation R, the set of 8 transformations decomposes into
00081  *  two subgroups linking {0, 3, 4, 7} and {1, 2, 5, 6} independently.
00082  *
00083  *  pixOrientDetect*() tests for a pure rotation (0, 90, 180, 270 degrees).
00084  *  It doesn't change parity.
00085  *
00086  *  pixMirrorDetect*() tests for a horizontal flip about the vertical axis.
00087  *  It changes parity.
00088  *
00089  *  The landscape/portrait rotation can be detected in two ways:
00090  *
00091  *    (1) Compute the deskew confidence for an image segment,
00092  *        both as is and rotated 90 degrees  (see skew.c).
00093  *
00094  *    (2) Compute the ascender/descender signal for the image,
00095  *        both as is and rotated 90 degrees  (implemented here).
00096  *
00097  *  The ascender/descender signal is useful for determining text
00098  *  orientation in Roman alphabets because the incidence of letters
00099  *  with straight-line ascenders (b, d, h, k, l, <t>) outnumber
00100  *  those with descenders (<g>, p, q).  The letters <t> and <g>
00101  *  will respond variably to the filter, depending on the type face.
00102  *
00103  *  What about the mirror image situations?  These aren't common
00104  *  unless you're dealing with film, for example.
00105  *  But you can reliably test if the image has undergone a
00106  *  parity-changing flip once about some axis in the plane
00107  *  of the image, using pixMirrorDetect*().  This works ostensibly by
00108  *  counting the number of characters with ascenders that
00109  *  stick out to the left and right of the ascender.  Characters
00110  *  that are not mirror flipped are more likely to extend to the
00111  *  right (b, h, k) than to the left (d).  Of course, that is for
00112  *  text that is rightside-up.  So before you apply the mirror
00113  *  test, it is necessary to insure that the text has the ascenders
00114  *  going up, and not down or to the left or right.  But here's
00115  *  what *really* happens.  It turns out that the pre-filtering before
00116  *  the hit-miss transform (HMT) is crucial, and surprisingly, when
00117  *  the pre-filtering is chosen to generate a large signal, the majority
00118  *  of the signal comes from open regions of common lower-case
00119  *  letters such as 'e', 'c' and 'f'.
00120  *
00121  *  All operations are given in two implementations whose results are
00122  *  identical: rasterop morphology and dwa morphology.  The dwa
00123  *  implementations are between 2x and 3x faster.
00124  *
00125  *  The set of operations you actually use depends on your prior knowledge:
00126  *
00127  *  (1) If the page is known to be either rightside-up or upside-down, use
00128  *      either pixOrientDetect*() with pleftconf = NULL, or
00129  *      pixUpDownDetect*().   [The '*' refers to either the rasterop
00130  *      or dwa versions.]
00131  *
00132  *  (2) If any of the four orientations are possible, use pixOrientDetect*().
00133  *
00134  *  (3) If the text is horizontal and rightside-up, the only remaining
00135  *      degree of freedom is a left-right mirror flip: use
00136  *      pixMirrorDetect*().
00137  *
00138  *  (4) If you have a relatively large amount of numbers on the page,
00139  *      us the slower pixUpDownDetectGeneral().
00140  *
00141  *  We summarize the full orientation and mirror flip detection process:
00142  *
00143  *  (1) First determine which of the four 90 degree rotations
00144  *      causes the text to be rightside-up.  This can be done
00145  *      with either skew confidence or the pixOrientDetect*()
00146  *      signals.  For the latter, see the table for pixOrientDetect().
00147  *
00148  *  (2) Then, with ascenders pointing up, apply pixMirrorDetect*().
00149  *      In the normal situation the confidence confidence will be
00150  *      large and positive.  However, if mirror flipped, the
00151  *      confidence will be large and negative.
00152  */
00153 
00154 #include <stdio.h>
00155 #include <stdlib.h>
00156 #include <math.h>
00157 #include "allheaders.h"
00158 
00159     /* Sels for pixOrientDetect() and pixMirrorDetect() */
00160 static const char *textsel1 = "x  oo "
00161                               "x oOo "
00162                               "x  o  "
00163                               "x     "
00164                               "xxxxxx";
00165 
00166 static const char *textsel2 = " oo  x"
00167                               " oOo x"
00168                               "  o  x"
00169                               "     x"
00170                               "xxxxxx";
00171 
00172 static const char *textsel3 = "xxxxxx"
00173                               "x     "
00174                               "x  o  "
00175                               "x oOo "
00176                               "x  oo ";
00177                           
00178 static const char *textsel4 = "xxxxxx"
00179                               "     x"
00180                               "  o  x"
00181                               " oOo x"
00182                               " oo  x";
00183 
00184     /* Parameters for determining orientation */
00185 static const l_int32  DEFAULT_MIN_UP_DOWN_COUNT = 70;
00186 static const l_float32  DEFAULT_MIN_UP_DOWN_CONF = 7.0;
00187 static const l_float32  DEFAULT_MIN_UP_DOWN_RATIO = 2.5;
00188 
00189     /* Parameters for determining mirror flip */
00190 static const l_int32  DEFAULT_MIN_MIRROR_FLIP_COUNT = 100;
00191 static const l_float32  DEFAULT_MIN_MIRROR_FLIP_CONF = 5.0;
00192 
00193     /* Static debug function */
00194 static void pixDebugFlipDetect(const char *filename, PIX *pixs,
00195                                PIX *pixhm, l_int32 enable);
00196 
00197 
00198 /*----------------------------------------------------------------*
00199  *         Orientation detection (four 90 degree angles)          *
00200  *                      Rasterop implementation                   *
00201  *----------------------------------------------------------------*/
00202 /*!
00203  *  pixOrientDetect()
00204  *
00205  *      Input:  pixs (1 bpp, deskewed, English text, 150 - 300 ppi)
00206  *              &upconf (<optional return> ; may be null)
00207  *              &leftconf (<optional return> ; may be null)
00208  *              mincount (min number of up + down; use 0 for default)
00209  *              debug (1 for debug output; 0 otherwise)
00210  *      Return: 0 if OK, 1 on error
00211  *
00212  *  Notes:
00213  *      (1) See "Measuring document image skew and orientation"
00214  *          Dan S. Bloomberg, Gary E. Kopec and Lakshmi Dasari
00215  *          IS&T/SPIE EI'95, Conference 2422: Document Recognition II
00216  *          pp 302-316, Feb 6-7, 1995, San Jose, CA
00217  *      (2) upconf is the normalized difference between up ascenders
00218  *          and down ascenders.  The image is analyzed without rotation
00219  *          for being rightside-up or upside-down.  Set &upconf to null
00220  *          to skip this operation.
00221  *      (3) leftconf is the normalized difference between up ascenders
00222  *          and down ascenders in the image after it has been
00223  *          rotated 90 degrees clockwise.  With that rotation, ascenders
00224  *          projecting to the left in the source image will project up
00225  *          in the rotated image.  We compute this by rotating 90 degrees
00226  *          clockwise and testing for up and down ascenders.  Set
00227  *          &leftconf to null to skip this operation.
00228  *      (4) Note that upconf and leftconf are not linear measures of
00229  *          confidence, e.g., in a range between 0 and 100.  They
00230  *          measure how far you are out on the tail of a (presumably)
00231  *          normal distribution.  For example, a confidence of 10 means
00232  *          that it is nearly certain that the difference did not
00233  *          happen at random.  However, these values must be interpreted
00234  *          cautiously, taking into consideration the estimated prior
00235  *          for a particular orientation or mirror flip.   The up-down
00236  *          signal is very strong if applied to text with ascenders
00237  *          up and down, and relatively weak for text at 90 degrees,
00238  *          but even at 90 degrees, the difference can look significant.
00239  *          For example, suppose the ascenders are oriented horizontally,
00240  *          but the test is done vertically.  Then upconf can
00241  *          be < -MIN_CONF_FOR_UP_DOWN, suggesting the text may be
00242  *          upside-down.  However, if instead the test were done
00243  *          horizontally, leftconf will be very much larger
00244  *          (in absolute value), giving the correct orientation.
00245  *      (5) If you compute both upconf and leftconf, and there is
00246  *          sufficient signal, the following table determines the
00247  *          cw angle necessary to rotate pixs so that the text is 
00248  *          rightside-up:
00249  *             0 deg :           upconf >> 1,    abs(upconf) >> abs(leftconf)
00250  *             90 deg :          leftconf >> 1,  abs(leftconf) >> abs(upconf)
00251  *             180 deg :         upconf << -1,   abs(upconf) >> abs(leftconf)
00252  *             270 deg :         leftconf << -1, abs(leftconf) >> abs(upconf)
00253  *      (6) One should probably not interpret the direction unless
00254  *          there are a sufficient number of counts for both orientations,
00255  *          in which case neither upconf nor leftconf will be 0.0.
00256  *      (7) Uses rasterop implementation of HMT.
00257  */
00258 l_int32
00259 pixOrientDetect(PIX        *pixs,
00260                 l_float32  *pupconf,
00261                 l_float32  *pleftconf,
00262                 l_int32     mincount,
00263                 l_int32     debug)
00264 {
00265 PIX  *pixt;
00266 
00267     PROCNAME("pixOrientDetect");
00268 
00269     if (!pixs)
00270         return ERROR_INT("pixs not defined", procName, 1);
00271     if (pixGetDepth(pixs) != 1)
00272         return ERROR_INT("pixs not 1 bpp", procName, 1);
00273     if (!pupconf && !pleftconf)
00274         return ERROR_INT("nothing to do", procName, 1);
00275     if (mincount == 0)
00276         mincount = DEFAULT_MIN_UP_DOWN_COUNT;
00277 
00278     if (pupconf)
00279         pixUpDownDetect(pixs, pupconf, mincount, debug);
00280     if (pleftconf) {
00281         pixt = pixRotate90(pixs, 1);
00282         pixUpDownDetect(pixt, pleftconf, mincount, debug);
00283         pixDestroy(&pixt);
00284     }
00285 
00286     return 0;
00287 }
00288 
00289 
00290 /*!
00291  *  makeOrientDecision()
00292  *
00293  *      Input:  upconf (nonzero)
00294  *              leftconf (nonzero)
00295  *              minupconf (minimum value for which a decision can be made)
00296  *              minratio (minimum conf ratio required for a decision)
00297  *              &orient (<return> text orientation enum {0,1,2,3,4})
00298  *              debug (1 for debug output; 0 otherwise)
00299  *      Return: 0 if OK, 1 on error
00300  *
00301  *  Notes:
00302  *      (1) This can be run after pixOrientDetect()
00303  *      (2) Both upconf and leftconf must be nonzero; otherwise the
00304  *          orientation cannot be determined.
00305  *      (3) The abs values of the input confidences are compared to
00306  *          minupconf.
00307  *      (4) The abs value of the largest of (upconf/leftconf) and
00308  *          (leftconf/upconf) is compared with minratio.
00309  *      (5) Input 0.0 for the default values for minupconf and minratio.
00310  *      (6) The return value of orient is interpreted thus:
00311  *            L_TEXT_ORIENT_UNKNOWN:  not enough evidence to determine
00312  *            L_TEXT_ORIENT_UP:       text rightside-up
00313  *            L_TEXT_ORIENT_LEFT:     landscape, text up facing left
00314  *            L_TEXT_ORIENT_DOWN:     text upside-down
00315  *            L_TEXT_ORIENT_RIGHT:    landscape, text up facing right
00316  */
00317 l_int32
00318 makeOrientDecision(l_float32  upconf,
00319                    l_float32  leftconf,
00320                    l_float32  minupconf,
00321                    l_float32  minratio,
00322                    l_int32   *porient,
00323                    l_int32    debug)
00324 {
00325 l_float32  absupconf, absleftconf;
00326 
00327     PROCNAME("makeOrientDecision");
00328 
00329     if (!porient)
00330         return ERROR_INT("&orient not defined", procName, 1);
00331     *porient = L_TEXT_ORIENT_UNKNOWN;  /* default: no decision */
00332     if (upconf == 0.0 || leftconf == 0.0)
00333         return ERROR_INT("not enough conf to get orientation", procName, 1);
00334 
00335     if (minupconf == 0.0)
00336         minupconf = DEFAULT_MIN_UP_DOWN_CONF;
00337     if (minratio == 0.0)
00338         minratio = DEFAULT_MIN_UP_DOWN_RATIO;
00339     absupconf = L_ABS(upconf);
00340     absleftconf = L_ABS(leftconf);
00341 
00342         /* Here are the four possible orientation decisions, based
00343          * on satisfaction of two threshold constraints. */
00344     if (upconf > minupconf && absupconf > minratio * absleftconf)
00345         *porient = L_TEXT_ORIENT_UP;
00346     else if (leftconf > minupconf && absleftconf > minratio * absupconf)
00347         *porient = L_TEXT_ORIENT_LEFT;
00348     else if (upconf < -minupconf && absupconf > minratio * absleftconf)
00349         *porient = L_TEXT_ORIENT_DOWN;
00350     else if (leftconf < -minupconf && absleftconf > minratio * absupconf)
00351         *porient = L_TEXT_ORIENT_RIGHT;
00352 
00353     if (debug) {
00354         fprintf(stderr, "upconf = %7.3f, leftconf = %7.3f\n", upconf, leftconf);
00355         if (*porient == L_TEXT_ORIENT_UNKNOWN)
00356             fprintf(stderr, "Confidence is low; no determination is made\n");
00357         else if (*porient == L_TEXT_ORIENT_UP)
00358             fprintf(stderr, "Text is rightside-up\n");
00359         else if (*porient == L_TEXT_ORIENT_LEFT)
00360             fprintf(stderr, "Text is rotated 90 deg ccw\n");
00361         else if (*porient == L_TEXT_ORIENT_DOWN)
00362             fprintf(stderr, "Text is upside-down\n");
00363         else   /* *porient == L_TEXT_ORIENT_RIGHT */
00364             fprintf(stderr, "Text is rotated 90 deg cw\n");
00365     }
00366 
00367     return 0;
00368 }
00369 
00370 
00371 /*!
00372  *  pixUpDownDetect()
00373  *
00374  *      Input:  pixs (1 bpp, deskewed, English text, 150 - 300 ppi)
00375  *              &conf (<return> confidence that text is rightside-up)
00376  *              mincount (min number of up + down; use 0 for default)
00377  *              debug (1 for debug output; 0 otherwise)
00378  *      Return: 0 if OK, 1 on error
00379  *
00380  *  Notes:
00381  *      (1) Special (typical, slightly faster) case, where the pixels
00382  *          identified through the HMT (hit-miss transform) are not
00383  *          clipped by a truncated word mask pixm.  See pixOrientDetect()
00384  *          and pixUpDownDetectGeneral() for details.
00385  *      (2) The returned confidence is the normalized difference
00386  *          between the number of detected up and down ascenders,
00387  *          assuming that the text is either rightside-up or upside-down
00388  *          and not rotated at a 90 degree angle.
00389  */
00390 l_int32
00391 pixUpDownDetect(PIX        *pixs,
00392                 l_float32  *pconf,
00393                 l_int32     mincount,
00394                 l_int32     debug)
00395 {
00396     return pixUpDownDetectGeneral(pixs, pconf, mincount, 0, debug);
00397 }
00398 
00399 
00400 /*!
00401  *  pixUpDownDetectGeneral()
00402  *
00403  *      Input:  pixs (1 bpp, deskewed, English text, 150 - 300 ppi)
00404  *              &conf (<return> confidence that text is rightside-up)
00405  *              mincount (min number of up + down; use 0 for default)
00406  *              npixels (number of pixels removed from each side of word box)
00407  *              debug (1 for debug output; 0 otherwise)
00408  *      Return: 0 if OK, 1 on error
00409  *
00410  *  Notes:
00411  *      (1) See pixOrientDetect() for other details.
00412  *      (2) @conf is the normalized difference between the number of
00413  *          detected up and down ascenders, assuming that the text
00414  *          is either rightside-up or upside-down and not rotated
00415  *          at a 90 degree angle.
00416  *      (3) The typical mode of operation is @npixels == 0.
00417  *          If @npixels > 0, this removes HMT matches at the 
00418  *          beginning and ending of "words."  This is useful for
00419  *          pages that may have mostly digits, because if npixels == 0,
00420  *          leading "1" and "3" digits can register as having
00421  *          ascenders or descenders, and "7" digits can match descenders.
00422  *          Consequently, a page image of only digits may register
00423  *          as being upside-down.
00424  *      (4) We want to count the number of instances found using the HMT.
00425  *          An expensive way to do this would be to count the
00426  *          number of connected components.  A cheap way is to do a rank
00427  *          reduction cascade that reduces each component to a single
00428  *          pixel, and results (after two or three 2x reductions)
00429  *          in one pixel for each of the original components.
00430  *          After the reduction, you have a much smaller pix over
00431  *          which to count pixels.  We do only 2 reductions, because
00432  *          this function is designed to work for input pix between
00433  *          150 and 300 ppi, and an 8x reduction on a 150 ppi image
00434  *          is going too far -- components will get merged.
00435  */
00436 l_int32
00437 pixUpDownDetectGeneral(PIX        *pixs,
00438                        l_float32  *pconf,
00439                        l_int32     mincount,
00440                        l_int32     npixels,
00441                        l_int32     debug)
00442 {
00443 l_int32    countup, countdown, nmax;
00444 l_float32  nup, ndown;
00445 PIX       *pixt0, *pixt1, *pixt2, *pixt3, *pixm;
00446 SEL       *sel1, *sel2, *sel3, *sel4;
00447 
00448     PROCNAME("pixUpDownDetectGeneral");
00449 
00450     if (!pconf)
00451         return ERROR_INT("&conf not defined", procName, 1);
00452     *pconf = 0.0;
00453     if (!pixs)
00454         return ERROR_INT("pixs not defined", procName, 1);
00455     if (mincount == 0)
00456         mincount = DEFAULT_MIN_UP_DOWN_COUNT;
00457     if (npixels < 0)
00458         npixels = 0;
00459 
00460     sel1 = selCreateFromString(textsel1, 5, 6, NULL);
00461     sel2 = selCreateFromString(textsel2, 5, 6, NULL);
00462     sel3 = selCreateFromString(textsel3, 5, 6, NULL);
00463     sel4 = selCreateFromString(textsel4, 5, 6, NULL);
00464 
00465         /* One of many reasonable pre-filtering sequences: (1, 8) and (30, 1).
00466          * This closes holes in x-height characters and joins them at
00467          * the x-height.  There is more noise in the descender detection
00468          * from this, but it works fairly well. */
00469     pixt0 = pixMorphCompSequence(pixs, "c1.8 + c30.1", 0);
00470 
00471         /* Optionally, make a mask of the word bounding boxes, shortening
00472          * each of them by a fixed amount at each end. */
00473     pixm = NULL;
00474     if (npixels > 0) {
00475         l_int32  i, nbox, x, y, w, h;
00476         BOX   *box;
00477         BOXA  *boxa;
00478         pixt1 = pixMorphSequence(pixt0, "o10.1", 0);
00479         boxa = pixConnComp(pixt1, NULL, 8);
00480         pixm = pixCreateTemplate(pixt1);
00481         pixDestroy(&pixt1);
00482         nbox = boxaGetCount(boxa);
00483         for (i = 0; i < nbox; i++) {
00484             box = boxaGetBox(boxa, i, L_CLONE);
00485             boxGetGeometry(box, &x, &y, &w, &h);
00486             if (w > 2 * npixels)
00487                 pixRasterop(pixm, x + npixels, y - 6, w - 2 * npixels, h + 13,
00488                             PIX_SET, NULL, 0, 0);
00489             boxDestroy(&box);
00490         }
00491         boxaDestroy(&boxa);
00492     }
00493 
00494         /* Find the ascenders and optionally filter with pixm.
00495          * For an explanation of the procedure used for counting the result
00496          * of the HMT, see comments at the beginning of this function. */
00497     pixt1 = pixHMT(NULL, pixt0, sel1);
00498     pixt2 = pixHMT(NULL, pixt0, sel2);
00499     pixOr(pixt1, pixt1, pixt2);
00500     if (pixm)
00501         pixAnd(pixt1, pixt1, pixm);
00502     pixt3 = pixReduceRankBinaryCascade(pixt1, 1, 1, 0, 0);
00503     pixCountPixels(pixt3, &countup, NULL);
00504     pixDebugFlipDetect("junkpixup", pixs, pixt1, debug);
00505     pixDestroy(&pixt1);
00506     pixDestroy(&pixt2);
00507     pixDestroy(&pixt3);
00508 
00509         /* Find the ascenders and optionally filter with pixm. */
00510     pixt1 = pixHMT(NULL, pixt0, sel3);
00511     pixt2 = pixHMT(NULL, pixt0, sel4);
00512     pixOr(pixt1, pixt1, pixt2);
00513     if (pixm)
00514         pixAnd(pixt1, pixt1, pixm);
00515     pixt3 = pixReduceRankBinaryCascade(pixt1, 1, 1, 0, 0);
00516     pixCountPixels(pixt3, &countdown, NULL);
00517     pixDebugFlipDetect("junkpixdown", pixs, pixt1, debug);
00518     pixDestroy(&pixt1);
00519     pixDestroy(&pixt2);
00520     pixDestroy(&pixt3);
00521 
00522         /* Evaluate statistically, generating a confidence that is
00523          * related to the probability with a gaussian distribution. */
00524     nup = (l_float32)(countup);
00525     ndown = (l_float32)(countdown);
00526     nmax = L_MAX(countup, countdown);
00527     if (nmax > mincount)
00528         *pconf = 2. * ((nup - ndown) / sqrt(nup + ndown));
00529 
00530     if (debug) {
00531         if (pixm) pixWrite("junkpixm1", pixm, IFF_PNG);
00532         fprintf(stderr, "nup = %7.3f, ndown = %7.3f, conf = %7.3f\n",
00533                 nup, ndown, *pconf);
00534         if (*pconf > DEFAULT_MIN_UP_DOWN_CONF)
00535             fprintf(stderr, "Text is rightside-up\n");
00536         if (*pconf < -DEFAULT_MIN_UP_DOWN_CONF)
00537             fprintf(stderr, "Text is upside-down\n");
00538     }
00539 
00540     pixDestroy(&pixt0);
00541     pixDestroy(&pixm);
00542     selDestroy(&sel1);
00543     selDestroy(&sel2);
00544     selDestroy(&sel3);
00545     selDestroy(&sel4);
00546     return 0;
00547 }
00548 
00549 
00550 /*----------------------------------------------------------------*
00551  *         Orientation detection (four 90 degree angles)          *
00552  *                         DWA implementation                     *
00553  *----------------------------------------------------------------*/
00554 /*!
00555  *  pixOrientDetectDwa()
00556  *
00557  *      Input:  pixs (1 bpp, deskewed, English text)
00558  *              &upconf (<optional return> ; may be null)
00559  *              &leftconf (<optional return> ; may be null)
00560  *              mincount (min number of up + down; use 0 for default)
00561  *              debug (1 for debug output; 0 otherwise)
00562  *      Return: 0 if OK, 1 on error
00563  *
00564  *  Notes:
00565  *      (1) Same interface as for pixOrientDetect().  See notes
00566  *          there for usage.
00567  *      (2) Uses auto-gen'd code for the Sels defined at the
00568  *          top of this file, with some renaming of functions.
00569  *          The auto-gen'd code is in fliphmtgen.c, and can
00570  *          be generated by a simple executable; see prog/flipselgen.c.
00571  *      (3) This runs about 2.5 times faster than the pixOrientDetect().
00572  */
00573 l_int32
00574 pixOrientDetectDwa(PIX        *pixs,
00575                    l_float32  *pupconf,
00576                    l_float32  *pleftconf,
00577                    l_int32     mincount,
00578                    l_int32     debug)
00579 {
00580 PIX  *pixt;
00581 
00582     PROCNAME("pixOrientDetectDwa");
00583 
00584     if (!pixs)
00585         return ERROR_INT("pixs not defined", procName, 1);
00586     if (pixGetDepth(pixs) != 1)
00587         return ERROR_INT("pixs not 1 bpp", procName, 1);
00588     if (!pupconf && !pleftconf)
00589         return ERROR_INT("nothing to do", procName, 1);
00590     if (mincount == 0)
00591         mincount = DEFAULT_MIN_UP_DOWN_COUNT;
00592 
00593     if (pupconf)
00594         pixUpDownDetectDwa(pixs, pupconf, mincount, debug);
00595     if (pleftconf) {
00596         pixt = pixRotate90(pixs, 1);
00597         pixUpDownDetectDwa(pixt, pleftconf, mincount, debug);
00598         pixDestroy(&pixt);
00599     }
00600 
00601     return 0;
00602 }
00603 
00604 
00605 /*!
00606  *  pixUpDownDetectDwa()
00607  *
00608  *      Input:  pixs (1 bpp, deskewed, English text, 150 - 300 ppi)
00609  *              &conf (<return> confidence that text is rightside-up)
00610  *              mincount (min number of up + down; use 0 for default)
00611  *              debug (1 for debug output; 0 otherwise)
00612  *      Return: 0 if OK, 1 on error
00613  *
00614  *  Notes:
00615  *      (1) Faster (DWA) version of pixUpDownDetect().
00616  *      (2) This is a special case (but typical and slightly faster) of
00617  *          pixUpDownDetectGeneralDwa(), where the pixels identified
00618  *          through the HMT (hit-miss transform) are not clipped by
00619  *          a truncated word mask pixm.  See pixUpDownDetectGeneral()
00620  *          for usage and other details.
00621  *      (3) The returned confidence is the normalized difference
00622  *          between the number of detected up and down ascenders,
00623  *          assuming that the text is either rightside-up or upside-down
00624  *          and not rotated at a 90 degree angle.
00625  */
00626 l_int32
00627 pixUpDownDetectDwa(PIX        *pixs,
00628                   l_float32  *pconf,
00629                   l_int32     mincount,
00630                   l_int32     debug)
00631 {
00632     return pixUpDownDetectGeneralDwa(pixs, pconf, mincount, 0, debug);
00633 }
00634 
00635 
00636 /*!
00637  *  pixUpDownDetectGeneralDwa()
00638  *
00639  *      Input:  pixs (1 bpp, deskewed, English text)
00640  *              &conf (<return> confidence that text is rightside-up)
00641  *              mincount (min number of up + down; use 0 for default)
00642  *              npixels (number of pixels removed from each side of word box)
00643  *              debug (1 for debug output; 0 otherwise)
00644  *      Return: 0 if OK, 1 on error
00645  *
00646  *  Notes:
00647  *      (1) See the notes in pixUpDownDetectGeneral() for usage.
00648  */
00649 l_int32
00650 pixUpDownDetectGeneralDwa(PIX        *pixs,
00651                           l_float32  *pconf,
00652                           l_int32     mincount,
00653                           l_int32     npixels,
00654                           l_int32     debug)
00655 {
00656 char       flipsel1[] = "flipsel1";
00657 char       flipsel2[] = "flipsel2";
00658 char       flipsel3[] = "flipsel3";
00659 char       flipsel4[] = "flipsel4";
00660 l_int32    countup, countdown, nmax;
00661 l_float32  nup, ndown;
00662 PIX       *pixt, *pixt0, *pixt1, *pixt2, *pixt3, *pixm;
00663 
00664     PROCNAME("pixUpDownDetectGeneralDwa");
00665 
00666     if (!pconf)
00667         return ERROR_INT("&conf not defined", procName, 1);
00668     *pconf = 0.0;
00669     if (!pixs)
00670         return ERROR_INT("pixs not defined", procName, 1);
00671     if (mincount == 0)
00672         mincount = DEFAULT_MIN_UP_DOWN_COUNT;
00673     if (npixels < 0)
00674         npixels = 0;
00675 
00676         /* One of many reasonable pre-filtering sequences: (1, 8) and (30, 1).
00677          * This closes holes in x-height characters and joins them at
00678          * the x-height.  There is more noise in the descender detection
00679          * from this, but it works fairly well. */
00680     pixt = pixMorphSequenceDwa(pixs, "c1.8 + c30.1", 0);
00681 
00682         /* Be sure to add the border before the flip DWA operations! */
00683     pixt0 = pixAddBorderGeneral(pixt, ADDED_BORDER, ADDED_BORDER,
00684                                 ADDED_BORDER, ADDED_BORDER, 0);
00685     pixDestroy(&pixt);
00686 
00687         /* Optionally, make a mask of the word bounding boxes, shortening
00688          * each of them by a fixed amount at each end. */
00689     pixm = NULL;
00690     if (npixels > 0) {
00691         l_int32  i, nbox, x, y, w, h;
00692         BOX   *box;
00693         BOXA  *boxa;
00694         pixt1 = pixMorphSequenceDwa(pixt0, "o10.1", 0);
00695         boxa = pixConnComp(pixt1, NULL, 8);
00696         pixm = pixCreateTemplate(pixt1);
00697         pixDestroy(&pixt1);
00698         nbox = boxaGetCount(boxa);
00699         for (i = 0; i < nbox; i++) {
00700             box = boxaGetBox(boxa, i, L_CLONE);
00701             boxGetGeometry(box, &x, &y, &w, &h);
00702             if (w > 2 * npixels)
00703                 pixRasterop(pixm, x + npixels, y - 6, w - 2 * npixels, h + 13,
00704                             PIX_SET, NULL, 0, 0);
00705             boxDestroy(&box);
00706         }
00707         boxaDestroy(&boxa);
00708     }
00709 
00710         /* Find the ascenders and optionally filter with pixm.
00711          * For an explanation of the procedure used for counting the result
00712          * of the HMT, see comments in pixUpDownDetectGeneral().  */
00713     pixt1 = pixFlipFHMTGen(NULL, pixt0, flipsel1);
00714     pixt2 = pixFlipFHMTGen(NULL, pixt0, flipsel2);
00715     pixOr(pixt1, pixt1, pixt2);
00716     if (pixm)
00717         pixAnd(pixt1, pixt1, pixm);
00718     pixt3 = pixReduceRankBinaryCascade(pixt1, 1, 1, 0, 0);
00719     pixCountPixels(pixt3, &countup, NULL);
00720     pixDestroy(&pixt1);
00721     pixDestroy(&pixt2);
00722     pixDestroy(&pixt3);
00723 
00724         /* Find the ascenders and optionally filter with pixm. */
00725     pixt1 = pixFlipFHMTGen(NULL, pixt0, flipsel3);
00726     pixt2 = pixFlipFHMTGen(NULL, pixt0, flipsel4);
00727     pixOr(pixt1, pixt1, pixt2);
00728     if (pixm)
00729         pixAnd(pixt1, pixt1, pixm);
00730     pixt3 = pixReduceRankBinaryCascade(pixt1, 1, 1, 0, 0);
00731     pixCountPixels(pixt3, &countdown, NULL);
00732     pixDestroy(&pixt1);
00733     pixDestroy(&pixt2);
00734     pixDestroy(&pixt3);
00735 
00736         /* Evaluate statistically, generating a confidence that is
00737          * related to the probability with a gaussian distribution. */
00738     nup = (l_float32)(countup);
00739     ndown = (l_float32)(countdown);
00740     nmax = L_MAX(countup, countdown);
00741     if (nmax > mincount)
00742         *pconf = 2. * ((nup - ndown) / sqrt(nup + ndown));
00743 
00744     if (debug) {
00745         if (pixm) pixWrite("junkpixm2", pixm, IFF_PNG);
00746         fprintf(stderr, "nup = %7.3f, ndown = %7.3f, conf = %7.3f\n",
00747                 nup, ndown, *pconf);
00748         if (*pconf > DEFAULT_MIN_UP_DOWN_CONF)
00749             fprintf(stderr, "Text is rightside-up\n");
00750         if (*pconf < -DEFAULT_MIN_UP_DOWN_CONF)
00751             fprintf(stderr, "Text is upside-down\n");
00752     }
00753 
00754     pixDestroy(&pixt0);
00755     pixDestroy(&pixm);
00756     return 0;
00757 }
00758 
00759 
00760 
00761 /*----------------------------------------------------------------*
00762  *                     Left-right mirror detection                *
00763  *                       Rasterop implementation                  *
00764  *----------------------------------------------------------------*/
00765 /*!
00766  *  pixMirrorDetect()
00767  *
00768  *      Input:  pixs (1 bpp, deskewed, English text)
00769  *              &conf (<return> confidence that text is not LR mirror reversed)
00770  *              mincount (min number of left + right; use 0 for default)
00771  *              debug (1 for debug output; 0 otherwise)
00772  *      Return: 0 if OK, 1 on error
00773  *
00774  *  Notes:
00775  *      (1) For this test, it is necessary that the text is horizontally
00776  *          oriented, with ascenders going up.
00777  *      (2) conf is the normalized difference between the number of
00778  *          right and left facing characters with ascenders.
00779  *          Left-facing are {d}; right-facing are {b, h, k}.
00780  *          At least that was the expectation.  In practice, we can
00781  *          really just say that it is the normalized difference in
00782  *          hits using two specific hit-miss filters, textsel1 and textsel2,
00783  *          after the image has been suitably pre-filtered so that
00784  *          these filters are effective.  See (4) for what's really happening.
00785  *      (3) A large positive conf value indicates normal text, whereas
00786  *          a large negative conf value means the page is mirror reversed.
00787  *      (4) The implementation is a bit tricky.  The general idea is
00788  *          to fill the x-height part of characters, but not the space
00789  *          between them, before doing the HMT.  This is done by
00790  *          finding pixels added using two different operations -- a
00791  *          horizontal close and a vertical dilation -- and adding
00792  *          the intersection of these sets to the original.  It turns
00793  *          out that the original intuition about the signal was largely
00794  *          in error: much of the signal for right-facing characters
00795  *          comes from the lower part of common x-height characters, like
00796  *          the e and c, that remain open after these operations.
00797  *          So it's important that the operations to close the x-height
00798  *          parts of the characters are purposely weakened sufficiently
00799  *          to allow these characters to remain open.  The wonders
00800  *          of morphology!
00801  */
00802 l_int32
00803 pixMirrorDetect(PIX        *pixs,
00804                 l_float32  *pconf,
00805                 l_int32     mincount,
00806                 l_int32     debug)
00807 {
00808 l_int32    count1, count2, nmax;
00809 l_float32  nleft, nright;
00810 PIX       *pixt0, *pixt1, *pixt2, *pixt3;
00811 SEL       *sel1, *sel2;
00812 
00813     PROCNAME("pixMirrorDetect");
00814 
00815     if (!pconf)
00816         return ERROR_INT("&conf not defined", procName, 1);
00817     *pconf = 0.0;
00818     if (!pixs)
00819         return ERROR_INT("pixs not defined", procName, 1);
00820     if (mincount == 0)
00821         mincount = DEFAULT_MIN_MIRROR_FLIP_COUNT;
00822 
00823     sel1 = selCreateFromString(textsel1, 5, 6, NULL);
00824     sel2 = selCreateFromString(textsel2, 5, 6, NULL);
00825 
00826         /* Fill x-height characters but not space between them, sort of. */
00827     pixt3 = pixMorphCompSequence(pixs, "d1.30", 0);
00828     pixXor(pixt3, pixt3, pixs);
00829     pixt0 = pixMorphCompSequence(pixs, "c15.1", 0);
00830     pixXor(pixt0, pixt0, pixs);
00831     pixAnd(pixt0, pixt0, pixt3);
00832     pixOr(pixt0, pixt0, pixs);
00833     pixDestroy(&pixt3);
00834 /*    pixDisplayWrite(pixt0, 1); */
00835 
00836         /* Filter the right-facing characters. */
00837     pixt1 = pixHMT(NULL, pixt0, sel1);
00838     pixt3 = pixReduceRankBinaryCascade(pixt1, 1, 1, 0, 0);
00839     pixCountPixels(pixt3, &count1, NULL);
00840     pixDebugFlipDetect("junkpixright", pixs, pixt1, debug);
00841     pixDestroy(&pixt1);
00842     pixDestroy(&pixt3);
00843 
00844         /* Filter the left-facing characters. */
00845     pixt2 = pixHMT(NULL, pixt0, sel2);
00846     pixt3 = pixReduceRankBinaryCascade(pixt2, 1, 1, 0, 0);
00847     pixCountPixels(pixt3, &count2, NULL);
00848     pixDebugFlipDetect("junkpixleft", pixs, pixt2, debug);
00849     pixDestroy(&pixt2);
00850     pixDestroy(&pixt3);
00851 
00852     nright = (l_float32)count1;
00853     nleft = (l_float32)count2;
00854     nmax = L_MAX(count1, count2);
00855     pixDestroy(&pixt0);
00856     selDestroy(&sel1);
00857     selDestroy(&sel2);
00858 
00859     if (nmax > mincount)
00860         *pconf = 2. * ((nright - nleft) / sqrt(nright + nleft));
00861 
00862     if (debug) {
00863         fprintf(stderr, "nright = %f, nleft = %f\n", nright, nleft);
00864         if (*pconf > DEFAULT_MIN_MIRROR_FLIP_CONF)
00865             fprintf(stderr, "Text is not mirror reversed\n");
00866         if (*pconf < -DEFAULT_MIN_MIRROR_FLIP_CONF)
00867             fprintf(stderr, "Text is mirror reversed\n");
00868     }
00869 
00870     return 0;
00871 }
00872 
00873 
00874 /*----------------------------------------------------------------*
00875  *                     Left-right mirror detection                *
00876  *                          DWA implementation                    *
00877  *----------------------------------------------------------------*/
00878 /*!
00879  *  pixMirrorDetectDwa()
00880  *
00881  *      Input:  pixs (1 bpp, deskewed, English text)
00882  *              &conf (<return> confidence that text is not LR mirror reversed)
00883  *              mincount (min number of left + right; use 0 for default)
00884  *              debug (1 for debug output; 0 otherwise)
00885  *      Return: 0 if OK, 1 on error
00886  *
00887  *  Notes:
00888  *      (1) We assume the text is horizontally oriented, with 
00889  *          ascenders going up.
00890  *      (2) See notes in pixMirrorDetect().
00891  */
00892 l_int32
00893 pixMirrorDetectDwa(PIX        *pixs,
00894                    l_float32  *pconf,
00895                    l_int32     mincount,
00896                    l_int32     debug)
00897 {
00898 char       flipsel1[] = "flipsel1";
00899 char       flipsel2[] = "flipsel2";
00900 l_int32    count1, count2, nmax;
00901 l_float32  nleft, nright;
00902 PIX       *pixt0, *pixt1, *pixt2, *pixt3;
00903 
00904     PROCNAME("pixMirrorDetectDwa");
00905 
00906     if (!pconf)
00907         return ERROR_INT("&conf not defined", procName, 1);
00908     *pconf = 0.0;
00909     if (!pixs)
00910         return ERROR_INT("pixs not defined", procName, 1);
00911     if (mincount == 0)
00912         mincount = DEFAULT_MIN_MIRROR_FLIP_COUNT;
00913 
00914         /* Fill x-height characters but not space between them, sort of. */
00915     pixt3 = pixMorphSequenceDwa(pixs, "d1.30", 0);
00916     pixXor(pixt3, pixt3, pixs);
00917     pixt0 = pixMorphSequenceDwa(pixs, "c15.1", 0);
00918     pixXor(pixt0, pixt0, pixs);
00919     pixAnd(pixt0, pixt0, pixt3);
00920     pixOr(pixt3, pixt0, pixs);
00921     pixDestroy(&pixt0);
00922     pixt0 = pixAddBorderGeneral(pixt3, ADDED_BORDER, ADDED_BORDER,
00923                                 ADDED_BORDER, ADDED_BORDER, 0);
00924     pixDestroy(&pixt3);
00925 
00926         /* Filter the right-facing characters. */
00927     pixt1 = pixFlipFHMTGen(NULL, pixt0, flipsel1);
00928     pixt3 = pixReduceRankBinaryCascade(pixt1, 1, 1, 0, 0);
00929     pixCountPixels(pixt3, &count1, NULL);
00930     pixDestroy(&pixt1);
00931     pixDestroy(&pixt3);
00932 
00933         /* Filter the left-facing characters. */
00934     pixt2 = pixFlipFHMTGen(NULL, pixt0, flipsel2);
00935     pixt3 = pixReduceRankBinaryCascade(pixt2, 1, 1, 0, 0);
00936     pixCountPixels(pixt3, &count2, NULL);
00937     pixDestroy(&pixt2);
00938     pixDestroy(&pixt3);
00939 
00940     pixDestroy(&pixt0);
00941     nright = (l_float32)count1;
00942     nleft = (l_float32)count2;
00943     nmax = L_MAX(count1, count2);
00944 
00945     if (nmax > mincount)
00946         *pconf = 2. * ((nright - nleft) / sqrt(nright + nleft));
00947 
00948     if (debug) {
00949         fprintf(stderr, "nright = %f, nleft = %f\n", nright, nleft);
00950         if (*pconf > DEFAULT_MIN_MIRROR_FLIP_CONF)
00951             fprintf(stderr, "Text is not mirror reversed\n");
00952         if (*pconf < -DEFAULT_MIN_MIRROR_FLIP_CONF)
00953             fprintf(stderr, "Text is mirror reversed\n");
00954     }
00955 
00956     return 0;
00957 }
00958 
00959 
00960 /*----------------------------------------------------------------*
00961  *                        Static debug helper                     *
00962  *----------------------------------------------------------------*/
00963 /*
00964  *  pixDebugFlipDetect()
00965  *
00966  *      Input:  filename (for output debug file)
00967  *              pixs (input to pix*Detect)
00968  *              pixhm (hit-miss result from ascenders or descenders)
00969  *              enable (1 to enable this function; 0 to disable)
00970  *      Return: void
00971  */
00972 static void
00973 pixDebugFlipDetect(const char *filename,
00974                    PIX        *pixs,
00975                    PIX        *pixhm,
00976                    l_int32     enable)
00977 {
00978 PIX  *pixt, *pixthm;
00979 
00980    if (!enable) return;
00981 
00982         /* Display with red dot at counted locations */
00983     pixt = pixConvert1To4Cmap(pixs);
00984     pixthm = pixMorphSequence(pixhm, "d5.5", 0);
00985     pixSetMaskedCmap(pixt, pixthm, 0, 0, 255, 0, 0);
00986 
00987     pixWrite(filename, pixt, IFF_PNG);
00988     pixDestroy(&pixthm);
00989     pixDestroy(&pixt);
00990     return;
00991 }
00992 
00993 
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Defines