Leptonica 1.68
C Image Processing Library

binarize.c

Go to the documentation of this file.
00001 /*====================================================================*
00002  -  Copyright (C) 2001 Leptonica.  All rights reserved.
00003  -  This software is distributed in the hope that it will be
00004  -  useful, but with NO WARRANTY OF ANY KIND.
00005  -  No author or distributor accepts responsibility to anyone for the
00006  -  consequences of using this software, or for whether it serves any
00007  -  particular purpose or works at all, unless he or she says so in
00008  -  writing.  Everyone is granted permission to copy, modify and
00009  -  redistribute this source code, for commercial or non-commercial
00010  -  purposes, with the following restrictions: (1) the origin of this
00011  -  source code must not be misrepresented; (2) modified versions must
00012  -  be plainly marked as such; and (3) this notice may not be removed
00013  -  or altered from any source or modified source distribution.
00014  *====================================================================*/
00015 
00016 /*
00017  *  binarize.c
00018  *                     
00019  *  ===================================================================
00020  *  Image binarization algorithms are found in:
00021  *    grayquant.c:   standard, simple, general grayscale quantization
00022  *    adaptmap.c:    local adaptive; mostly gray-to-gray in preparation
00023  *                   for binarization
00024  *    binarize.c:    special binarization methods, locally adaptive.
00025  *  ===================================================================
00026  *
00027  *      Adaptive Otsu-based thresholding
00028  *          l_int32    pixOtsuAdaptiveThreshold()       8 bpp
00029  *
00030  *      Otsu thresholding on adaptive background normalization
00031  *          PIX       *pixOtsuThreshOnBackgroundNorm()  8 bpp
00032  *
00033  *      Masking and Otsu estimate on adaptive background normalization
00034  *          PIX       *pixMaskedThreshOnBackgroundNorm()  8 bpp
00035  *
00036  *      Sauvola local thresholding
00037  *          l_int32    pixSauvolaBinarizeTiled()
00038  *          l_int32    pixSauvolaBinarize()
00039  *          PIX       *pixSauvolaGetThreshold()
00040  *          PIX       *pixApplyLocalThreshold();
00041  *
00042  *  Notes:
00043  *      (1) pixOtsuAdaptiveThreshold() computes a global threshold over each
00044  *          tile and performs the threshold operation, resulting in a
00045  *          binary image for each tile.  These are stitched into the
00046  *          final result.
00047  *      (2) pixOtsuThreshOnBackgroundNorm() and
00048  *          pixMaskedThreshOnBackgroundNorm() are binarization functions
00049  *          that use background normalization with other techniques.
00050  *      (3) Sauvola binarization computes a local threshold based on
00051  *          the local average and square average.  It takes two constants:
00052  *          the window size for the measurment at each pixel and a
00053  *          parameter that determines the amount of normalized local
00054  *          standard deviation to subtract from the local average value.
00055  */
00056 
00057 #include <math.h>
00058 #include "allheaders.h"
00059 
00060 /*------------------------------------------------------------------*
00061  *                 Adaptive Otsu-based thresholding                 *
00062  *------------------------------------------------------------------*/
00063 /*!
00064  *  pixOtsuAdaptiveThreshold()
00065  *
00066  *      Input:  pixs (8 bpp)
00067  *              sx, sy (desired tile dimensions; actual size may vary)
00068  *              smoothx, smoothy (half-width of convolution kernel applied to
00069  *                                threshold array: use 0 for no smoothing)
00070  *              scorefract (fraction of the max Otsu score; typ. 0.1;
00071  *                          use 0.0 for standard Otsu)
00072  *              &pixth (<optional return> array of threshold values
00073  *                      found for each tile)
00074  *              &pixd (<optional return> thresholded input pixs, based on
00075  *                     the threshold array)
00076  *      Return: 0 if OK, 1 on error
00077  *
00078  *  Notes:
00079  *      (1) The Otsu method finds a single global threshold for an image.
00080  *          This function allows a locally adapted threshold to be
00081  *          found for each tile into which the image is broken up.
00082  *      (2) The array of threshold values, one for each tile, constitutes
00083  *          a highly downscaled image.  This array is optionally
00084  *          smoothed using a convolution.  The full width and height of the
00085  *          convolution kernel are (2 * @smoothx + 1) and (2 * @smoothy + 1).
00086  *      (3) The minimum tile dimension allowed is 16.  If such small
00087  *          tiles are used, it is recommended to use smoothing, because
00088  *          without smoothing, each small tile determines the splitting
00089  *          threshold independently.  A tile that is entirely in the
00090  *          image bg will then hallucinate fg, resulting in a very noisy
00091  *          binarization.  The smoothing should be large enough that no
00092  *          tile is only influenced by one type (fg or bg) of pixels,
00093  *          because it will force a split of its pixels.
00094  *      (4) To get a single global threshold for the entire image, use
00095  *          input values of @sx and @sy that are larger than the image.
00096  *          For this situation, the smoothing parameters are ignored.
00097  *      (5) The threshold values partition the image pixels into two classes:
00098  *          one whose values are less than the threshold and another
00099  *          whose values are greater than or equal to the threshold.
00100  *          This is the same use of 'threshold' as in pixThresholdToBinary().
00101  *      (6) The scorefract is the fraction of the maximum Otsu score, which
00102  *          is used to determine the range over which the histogram minimum
00103  *          is searched.  See numaSplitDistribution() for details on the
00104  *          underlying method of choosing a threshold.
00105  *      (7) This uses enables a modified version of the Otsu criterion for
00106  *          splitting the distribution of pixels in each tile into a
00107  *          fg and bg part.  The modification consists of searching for
00108  *          a minimum in the histogram over a range of pixel values where
00109  *          the Otsu score is within a defined fraction, @scorefract,
00110  *          of the max score.  To get the original Otsu algorithm, set
00111  *          @scorefract == 0.
00112  */
00113 l_int32
00114 pixOtsuAdaptiveThreshold(PIX       *pixs,
00115                          l_int32    sx,
00116                          l_int32    sy,
00117                          l_int32    smoothx,
00118                          l_int32    smoothy,
00119                          l_float32  scorefract,
00120                          PIX      **ppixth,
00121                          PIX      **ppixd)
00122 {
00123 l_int32     w, h, nx, ny, i, j, thresh;
00124 l_uint32    val;
00125 PIX        *pixt, *pixb, *pixthresh, *pixth, *pixd;
00126 PIXTILING  *pt;
00127 
00128     PROCNAME("pixOtsuAdaptiveThreshold");
00129 
00130     if (!ppixth && !ppixd)
00131         return ERROR_INT("neither &pixth nor &pixd defined", procName, 1);
00132     if (ppixth) *ppixth = NULL;
00133     if (ppixd) *ppixd = NULL;
00134     if (!pixs || pixGetDepth(pixs) != 8)
00135         return ERROR_INT("pixs not defined or not 8 bpp", procName, 1);
00136     if (sx < 16 || sy < 16)
00137         return ERROR_INT("sx and sy must be >= 16", procName, 1);
00138 
00139         /* Compute the threshold array for the tiles */
00140     pixGetDimensions(pixs, &w, &h, NULL);
00141     nx = L_MAX(1, w / sx);
00142     ny = L_MAX(1, h / sy);
00143     smoothx = L_MIN(smoothx, (nx - 1) / 2);
00144     smoothy = L_MIN(smoothy, (ny - 1) / 2);
00145     pt = pixTilingCreate(pixs, nx, ny, 0, 0, 0, 0);
00146     pixthresh = pixCreate(nx, ny, 8);
00147     for (i = 0; i < ny; i++) {
00148         for (j = 0; j < nx; j++) {
00149             pixt = pixTilingGetTile(pt, i, j);
00150             pixSplitDistributionFgBg(pixt, scorefract, 1, &thresh,
00151                                      NULL, NULL, 0);
00152             pixSetPixel(pixthresh, j, i, thresh);  /* see note (4) */
00153             pixDestroy(&pixt);
00154         }
00155     }
00156 
00157         /* Optionally smooth the threshold array */
00158     if (smoothx > 0 || smoothy > 0)
00159         pixth = pixBlockconv(pixthresh, smoothx, smoothy);
00160     else
00161         pixth = pixClone(pixthresh);
00162     pixDestroy(&pixthresh);
00163 
00164         /* Optionally apply the threshold array to binarize pixs */
00165     if (ppixd) {
00166         pixd = pixCreate(w, h, 1);
00167         for (i = 0; i < ny; i++) {
00168             for (j = 0; j < nx; j++) {
00169                 pixt = pixTilingGetTile(pt, i, j);
00170                 pixGetPixel(pixth, j, i, &val);
00171                 pixb = pixThresholdToBinary(pixt, val);
00172                 pixTilingPaintTile(pixd, i, j, pixb, pt);
00173                 pixDestroy(&pixt);
00174                 pixDestroy(&pixb);
00175             }
00176         }
00177         *ppixd = pixd;
00178     }
00179 
00180     if (ppixth)
00181         *ppixth = pixth;
00182     else
00183         pixDestroy(&pixth);
00184 
00185     pixTilingDestroy(&pt);
00186     return 0;
00187 }
00188 
00189 
00190 /*------------------------------------------------------------------*
00191  *      Otsu thresholding on adaptive background normalization      *
00192  *------------------------------------------------------------------*/
00193 /*!
00194  *  pixOtsuThreshOnBackgroundNorm()
00195  *
00196  *      Input:  pixs (8 bpp grayscale; not colormapped)
00197  *              pixim (<optional> 1 bpp 'image' mask; can be null)
00198  *              sx, sy (tile size in pixels)
00199  *              thresh (threshold for determining foreground)
00200  *              mincount (min threshold on counts in a tile)
00201  *              bgval (target bg val; typ. > 128)
00202  *              smoothx (half-width of block convolution kernel width)
00203  *              smoothy (half-width of block convolution kernel height)
00204  *              scorefract (fraction of the max Otsu score; typ. 0.1)
00205  *              &thresh (<optional return> threshold value that was
00206  *                       used on the normalized image)
00207  *      Return: pixd (1 bpp thresholded image), or null on error
00208  *
00209  *  Notes:
00210  *      (1) This does background normalization followed by Otsu
00211  *          thresholding.  Otsu binarization attempts to split the
00212  *          image into two roughly equal sets of pixels, and it does
00213  *          a very poor job when there are large amounts of dark
00214  *          background.  By doing a background normalization first,
00215  *          to get the background near 255, we remove this problem.
00216  *          Then we use a modified Otsu to estimate the best global
00217  *          threshold on the normalized image.
00218  *      (2) See pixBackgroundNorm() for meaning and typical values
00219  *          of input parameters.  For a start, you can try:
00220  *            sx, sy = 10, 15
00221  *            thresh = 100
00222  *            mincount = 50
00223  *            bgval = 255
00224  *            smoothx, smoothy = 2
00225  */
00226 PIX *
00227 pixOtsuThreshOnBackgroundNorm(PIX       *pixs,
00228                               PIX       *pixim,
00229                               l_int32    sx,
00230                               l_int32    sy,
00231                               l_int32    thresh,
00232                               l_int32    mincount,
00233                               l_int32    bgval,
00234                               l_int32    smoothx,
00235                               l_int32    smoothy,
00236                               l_float32  scorefract,
00237                               l_int32   *pthresh)
00238 {
00239 l_int32   w, h;
00240 l_uint32  val;
00241 PIX      *pixn, *pixt, *pixd;
00242 
00243     PROCNAME("pixOtsuThreshOnBackgroundNorm");
00244 
00245     if (pthresh) *pthresh = 0;
00246     if (!pixs || pixGetDepth(pixs) != 8)
00247         return (PIX *)ERROR_PTR("pixs undefined or not 8 bpp", procName, NULL);
00248     if (pixGetColormap(pixs))
00249         return (PIX *)ERROR_PTR("pixs is colormapped", procName, NULL);
00250     if (sx < 4 || sy < 4)
00251         return (PIX *)ERROR_PTR("sx and sy must be >= 4", procName, NULL);
00252     if (mincount > sx * sy) {
00253         L_WARNING("mincount too large for tile size", procName);
00254         mincount = (sx * sy) / 3;
00255     }
00256 
00257     pixn = pixBackgroundNorm(pixs, pixim, NULL, sx, sy, thresh,
00258                              mincount, bgval, smoothx, smoothy);
00259     if (!pixn)
00260         return (PIX *)ERROR_PTR("pixn not made", procName, NULL);
00261 
00262         /* Just use 1 tile for a global threshold, which is stored
00263          * as a single pixel in pixt. */
00264     pixGetDimensions(pixn, &w, &h, NULL);
00265     pixOtsuAdaptiveThreshold(pixn, w, h, 0, 0, scorefract, &pixt, &pixd);
00266     pixDestroy(&pixn);
00267 
00268     if (pixt && pthresh) {
00269         pixGetPixel(pixt, 0, 0, &val);
00270         *pthresh = val;
00271     }
00272     pixDestroy(&pixt);
00273 
00274     if (!pixd)
00275         return (PIX *)ERROR_PTR("pixd not made", procName, NULL);
00276     else
00277         return pixd;
00278 }
00279     
00280 
00281 
00282 /*----------------------------------------------------------------------*
00283  *    Masking and Otsu estimate on adaptive background normalization    *
00284  *----------------------------------------------------------------------*/
00285 /*!
00286  *  pixMaskedThreshOnBackgroundNorm()
00287  *
00288  *      Input:  pixs (8 bpp grayscale; not colormapped)
00289  *              pixim (<optional> 1 bpp 'image' mask; can be null)
00290  *              sx, sy (tile size in pixels)
00291  *              thresh (threshold for determining foreground)
00292  *              mincount (min threshold on counts in a tile)
00293  *              smoothx (half-width of block convolution kernel width)
00294  *              smoothy (half-width of block convolution kernel height)
00295  *              scorefract (fraction of the max Otsu score; typ. ~ 0.1)
00296  *              &thresh (<optional return> threshold value that was
00297  *                       used on the normalized image)
00298  *      Return: pixd (1 bpp thresholded image), or null on error
00299  *
00300  *  Notes:
00301  *      (1) This begins with a standard background normalization.
00302  *          Additionally, there is a flexible background norm, that
00303  *          will adapt to a rapidly varying background, and this
00304  *          puts white pixels in the background near regions with
00305  *          significant foreground.  The white pixels are turned into
00306  *          a 1 bpp selection mask by binarization followed by dilation.
00307  *          Otsu thresholding is performed on the input image to get an
00308  *          estimate of the threshold in the non-mask regions.
00309  *          The background normalized image is thresholded with two
00310  *          different values, and the result is combined using
00311  *          the selection mask.
00312  *      (2) Note that the numbers 255 (for bgval target) and 190 (for
00313  *          thresholding on pixn) are tied together, and explicitly
00314  *          defined in this function.
00315  *      (3) See pixBackgroundNorm() for meaning and typical values
00316  *          of input parameters.  For a start, you can try:
00317  *            sx, sy = 10, 15
00318  *            thresh = 100
00319  *            mincount = 50
00320  *            smoothx, smoothy = 2
00321  */
00322 PIX *
00323 pixMaskedThreshOnBackgroundNorm(PIX       *pixs,
00324                                 PIX       *pixim,
00325                                 l_int32    sx,
00326                                 l_int32    sy,
00327                                 l_int32    thresh,
00328                                 l_int32    mincount,
00329                                 l_int32    smoothx,
00330                                 l_int32    smoothy,
00331                                 l_float32  scorefract,
00332                                 l_int32   *pthresh)
00333 {
00334 l_int32   w, h;
00335 l_uint32  val;
00336 PIX      *pixn, *pixm, *pixd, *pixt1, *pixt2, *pixt3, *pixt4;
00337 
00338     PROCNAME("pixMaskedThreshOnBackgroundNorm");
00339 
00340     if (pthresh) *pthresh = 0;
00341     if (!pixs || pixGetDepth(pixs) != 8)
00342         return (PIX *)ERROR_PTR("pixs undefined or not 8 bpp", procName, NULL);
00343     if (pixGetColormap(pixs))
00344         return (PIX *)ERROR_PTR("pixs is colormapped", procName, NULL);
00345     if (sx < 4 || sy < 4)
00346         return (PIX *)ERROR_PTR("sx and sy must be >= 4", procName, NULL);
00347     if (mincount > sx * sy) {
00348         L_WARNING("mincount too large for tile size", procName);
00349         mincount = (sx * sy) / 3;
00350     }
00351 
00352         /* Standard background normalization */
00353     pixn = pixBackgroundNorm(pixs, pixim, NULL, sx, sy, thresh,
00354                              mincount, 255, smoothx, smoothy);
00355     if (!pixn)
00356         return (PIX *)ERROR_PTR("pixn not made", procName, NULL);
00357 
00358         /* Special background normalization for adaptation to quickly
00359          * varying background.  Threshold on the very light parts,
00360          * which tend to be near significant edges, and dilate to
00361          * form a mask over regions that are typically text.  The
00362          * dilation size is chosen to cover the text completely,
00363          * except for very thick fonts. */
00364     pixt1 = pixBackgroundNormFlex(pixs, 7, 7, 1, 1, 20);
00365     pixt2 = pixThresholdToBinary(pixt1, 240);
00366     pixInvert(pixt2, pixt2);
00367     pixm = pixMorphSequence(pixt2, "d21.21", 0);
00368     pixDestroy(&pixt1);
00369     pixDestroy(&pixt2);
00370 
00371         /* Use Otsu to get a global threshold estimate for the image,
00372          * which is stored as a single pixel in pixt3. */
00373     pixGetDimensions(pixs, &w, &h, NULL);
00374     pixOtsuAdaptiveThreshold(pixs, w, h, 0, 0, scorefract, &pixt3, NULL);
00375     if (pixt3 && pthresh) {
00376         pixGetPixel(pixt3, 0, 0, &val);
00377         *pthresh = val;
00378     }
00379     pixDestroy(&pixt3);
00380 
00381         /* Threshold the background normalized images differentially,
00382          * using a high value correlated with the background normalization
00383          * for the part of the image under the mask (i.e., near the
00384          * darker, thicker foreground), and a value that depends on the Otsu
00385          * threshold for the rest of the image.  This gives a solid
00386          * (high) thresholding for the foreground parts of the image,
00387          * while allowing the background and light foreground to be
00388          * reasonably well cleaned using a threshold adapted to the
00389          * input image. */
00390     pixd = pixThresholdToBinary(pixn, val + 30);  /* for bg and light fg */
00391     pixt4 = pixThresholdToBinary(pixn, 190);  /* for heavier fg */
00392     pixCombineMasked(pixd, pixt4, pixm);
00393     pixDestroy(&pixt4);
00394     pixDestroy(&pixm);
00395     pixDestroy(&pixn);
00396 
00397     if (!pixd)
00398         return (PIX *)ERROR_PTR("pixd not made", procName, NULL);
00399     else
00400         return pixd;
00401 }
00402     
00403 
00404 /*----------------------------------------------------------------------*
00405  *                           Sauvola binarization                       *
00406  *----------------------------------------------------------------------*/
00407 /*!
00408  *  pixSauvolaBinarizeTiled()
00409  *
00410  *      Input:  pixs (8 bpp grayscale, not colormapped)
00411  *              whsize (window half-width for measuring local statistics)
00412  *              factor (factor for reducing threshold due to variance; >= 0)
00413  *              nx, ny (subdivision into tiles; >= 1)
00414  *              &pixth (<optional return> Sauvola threshold values)
00415  *              &pixd (<optional return> thresholded image)
00416  *      Return: 0 if OK, 1 on error
00417  *
00418  *  Notes:
00419  *      (1) The window width and height are 2 * @whsize + 1.  The minimum
00420  *          value for @whsize is 2; typically it is >= 7..
00421  *      (2) For nx == ny == 1, this defaults to pixSauvolaBinarize().
00422  *      (3) Why a tiled version?
00423  *          (a) Because the mean value accumulator is a uint32, overflow
00424  *              can occur for an image with more than 16M pixels.
00425  *          (b) The mean value accumulator array for 16M pixels is 64 MB.
00426  *              The mean square accumulator array for 16M pixels is 128 MB.
00427  *              Using tiles reduces the size of these arrays.
00428  *          (c) Each tile can be processed independently, in parallel,
00429  *              on a multicore processor.
00430  *      (4) The Sauvola threshold is determined from the formula:
00431  *              t = m * (1 - k * (1 - s / 128))
00432  *          See pixSauvolaBinarize() for details.
00433  */
00434 l_int32
00435 pixSauvolaBinarizeTiled(PIX       *pixs,
00436                         l_int32    whsize,
00437                         l_float32  factor,
00438                         l_int32    nx,
00439                         l_int32    ny,
00440                         PIX      **ppixth,
00441                         PIX      **ppixd)
00442 {
00443 l_int32     i, j, w, h, xrat, yrat;
00444 PIX        *pixth, *pixd, *tileth, *tiled, *pixt;
00445 PIX       **ptileth, **ptiled;
00446 PIXTILING  *pt;
00447 
00448     PROCNAME("pixSauvolaBinarizeTiled");
00449     
00450     if (!ppixth && !ppixd)
00451         return ERROR_INT("no outputs", procName, 1);
00452     if (ppixth) *ppixth = NULL;
00453     if (ppixd) *ppixd = NULL;
00454     if (!pixs || pixGetDepth(pixs) != 8)
00455         return ERROR_INT("pixs undefined or not 8 bpp", procName, 1);
00456     if (pixGetColormap(pixs))
00457         return ERROR_INT("pixs is cmapped", procName, 1);
00458     pixGetDimensions(pixs, &w, &h, NULL);
00459     if (whsize < 2)
00460         return ERROR_INT("whsize must be >= 2", procName, 1);
00461     if (w < 2 * whsize + 3 || h < 2 * whsize + 3)
00462         return ERROR_INT("whsize too large for image", procName, 1);
00463     if (factor < 0.0)
00464         return ERROR_INT("factor must be >= 0", procName, 1);
00465 
00466     if (nx <= 1 && ny <= 1)
00467         return pixSauvolaBinarize(pixs, whsize, factor, 1, NULL, NULL,
00468                                   ppixth, ppixd);
00469 
00470         /* Test to see if the tiles are too small.  The required
00471          * condition is that the tile dimensions must be at least
00472          * (whsize + 2) x (whsize + 2).  */
00473     xrat = w / nx;
00474     yrat = h / ny;
00475     if (xrat < whsize + 2) {
00476         nx = w / (whsize + 2);
00477         L_WARNING_INT("tile width too small; nx reduced to %d", procName, nx);
00478     }
00479     if (yrat < whsize + 2) {
00480         ny = h / (whsize + 2);
00481         L_WARNING_INT("tile height too small; ny reduced to %d", procName, ny);
00482     }
00483     if (nx <= 1 && ny <= 1)
00484         return pixSauvolaBinarize(pixs, whsize, factor, 1, NULL, NULL,
00485                                   ppixth, ppixd);
00486 
00487         /* We can use pixtiling for painting both outputs, if requested */
00488     if (ppixth) {
00489         pixth = pixCreateNoInit(w, h, 8);
00490         *ppixth = pixth;
00491     }
00492     if (ppixd) {
00493         pixd = pixCreateNoInit(w, h, 1);
00494         *ppixd = pixd;
00495     }
00496     pt = pixTilingCreate(pixs, nx, ny, 0, 0, whsize + 1, whsize + 1);
00497     pixTilingNoStripOnPaint(pt);  /* pixSauvolaBinarize() does the stripping */
00498 
00499     for (i = 0; i < ny; i++) {
00500         for (j = 0; j < nx; j++) {
00501             pixt = pixTilingGetTile(pt, i, j);
00502             ptileth = (ppixth) ? &tileth : NULL;
00503             ptiled = (ppixd) ? &tiled : NULL;
00504             pixSauvolaBinarize(pixt, whsize, factor, 0, NULL, NULL,
00505                                ptileth, ptiled);
00506             if (ppixth) {  /* do not strip */
00507                 pixTilingPaintTile(pixth, i, j, tileth, pt);
00508                 pixDestroy(&tileth);
00509             }
00510             if (ppixd) {
00511                 pixTilingPaintTile(pixd, i, j, tiled, pt);
00512                 pixDestroy(&tiled);
00513             }
00514             pixDestroy(&pixt);
00515         }
00516     }
00517 
00518     pixTilingDestroy(&pt);
00519     return 0;
00520 }
00521 
00522 
00523 /*!
00524  *  pixSauvolaBinarize()
00525  *
00526  *      Input:  pixs (8 bpp grayscale; not colormapped)
00527  *              whsize (window half-width for measuring local statistics)
00528  *              factor (factor for reducing threshold due to variance; >= 0)
00529  *              addborder (1 to add border of width (@whsize + 1) on all sides)
00530  *              &pixm (<optional return> local mean values)
00531  *              &pixsd (<optional return> local standard deviation values)
00532  *              &pixth (<optional return> threshold values)
00533  *              &pixd (<optional return> thresholded image)
00534  *      Return: 0 if OK, 1 on error
00535  *
00536  *  Notes:
00537  *      (1) The window width and height are 2 * @whsize + 1.  The minimum
00538  *          value for @whsize is 2; typically it is >= 7..
00539  *      (2) The local statistics, measured over the window, are the
00540  *          average and standard deviation.
00541  *      (3) The measurements of the mean and standard deviation are
00542  *          performed inside a border of (@whsize + 1) pixels.  If pixs does
00543  *          not have these added border pixels, use @addborder = 1 to add
00544  *          it here; otherwise use @addborder = 0.
00545  *      (4) The Sauvola threshold is determined from the formula:
00546  *            t = m * (1 - k * (1 - s / 128))
00547  *          where:
00548  *            t = local threshold
00549  *            m = local mean
00550  *            k = @factor (>= 0)   [ typ. 0.35 ]
00551  *            s = local standard deviation, which is maximized at
00552  *                127.5 when half the samples are 0 and half are 255.
00553  *      (5) The basic idea of Niblack and Sauvola binarization is that
00554  *          the local threshold should be less than the median value,
00555  *          and the larger the variance, the closer to the median
00556  *          it should be chosen.  Typical values for k are between
00557  *          0.2 and 0.5.
00558  */
00559 l_int32
00560 pixSauvolaBinarize(PIX       *pixs,
00561                    l_int32    whsize,
00562                    l_float32  factor,
00563                    l_int32    addborder,
00564                    PIX      **ppixm,
00565                    PIX      **ppixsd,
00566                    PIX      **ppixth,
00567                    PIX      **ppixd)
00568 {
00569 l_int32  w, h;
00570 PIX     *pixg, *pixsc, *pixm, *pixms, *pixth, *pixd;
00571 
00572     PROCNAME("pixSauvolaBinarize");
00573 
00574     
00575     if (!ppixm && !ppixsd && !ppixth && !ppixd)
00576         return ERROR_INT("no outputs", procName, 1);
00577     if (ppixm) *ppixm = NULL;
00578     if (ppixsd) *ppixsd = NULL;
00579     if (ppixth) *ppixth = NULL;
00580     if (ppixd) *ppixd = NULL;
00581     if (!pixs || pixGetDepth(pixs) != 8)
00582         return ERROR_INT("pixs undefined or not 8 bpp", procName, 1);
00583     if (pixGetColormap(pixs))
00584         return ERROR_INT("pixs is cmapped", procName, 1);
00585     pixGetDimensions(pixs, &w, &h, NULL);
00586     if (whsize < 2)
00587         return ERROR_INT("whsize must be >= 2", procName, 1);
00588     if (w < 2 * whsize + 3 || h < 2 * whsize + 3)
00589         return ERROR_INT("whsize too large for image", procName, 1);
00590     if (factor < 0.0)
00591         return ERROR_INT("factor must be >= 0", procName, 1);
00592 
00593     if (addborder) {
00594         pixg = pixAddMirroredBorder(pixs, whsize + 1, whsize + 1,
00595                                     whsize + 1, whsize + 1);
00596         pixsc = pixClone(pixs);
00597     }
00598     else {
00599         pixg = pixClone(pixs);
00600         pixsc = pixRemoveBorder(pixs, whsize + 1);
00601     }
00602     if (!pixg || !pixsc)
00603         return ERROR_INT("pixg and pixsc not made", procName, 1);
00604 
00605         /* All these functions strip off the border pixels. */
00606     if (ppixm || ppixth || ppixd)
00607         pixm = pixWindowedMean(pixg, whsize, whsize, 1, 1);
00608     if (ppixsd || ppixth || ppixd)
00609         pixms = pixWindowedMeanSquare(pixg, whsize, whsize, 1);
00610     if (ppixth || ppixd)
00611         pixth = pixSauvolaGetThreshold(pixm, pixms, factor, ppixsd);
00612     if (ppixd)
00613         pixd = pixApplyLocalThreshold(pixsc, pixth, 1);
00614 
00615     if (ppixm)
00616         *ppixm = pixm;
00617     else
00618         pixDestroy(&pixm);
00619     pixDestroy(&pixms);
00620     if (ppixth)
00621         *ppixth = pixth;
00622     else
00623         pixDestroy(&pixth);
00624     if (ppixd)
00625         *ppixd = pixd;
00626     else
00627         pixDestroy(&pixd);
00628     pixDestroy(&pixg);
00629     pixDestroy(&pixsc);
00630     return 0;
00631 }
00632 
00633 
00634 /*!
00635  *  pixSauvolaGetThreshold()
00636  *
00637  *      Input:  pixm (8 bpp grayscale; not colormapped)
00638  *              pixms (32 bpp)
00639  *              factor (factor for reducing threshold due to variance; >= 0)
00640  *              &pixsd (<optional return> local standard deviation)
00641  *      Return: pixd (8 bpp, sauvola threshold values), or null on error
00642  *
00643  *  Notes:
00644  *      (1) The Sauvola threshold is determined from the formula:
00645  *            t = m * (1 - k * (1 - s / 128))
00646  *          where:
00647  *            t = local threshold
00648  *            m = local mean
00649  *            k = @factor (>= 0)   [ typ. 0.35 ]
00650  *            s = local standard deviation, which is maximized at
00651  *                127.5 when half the samples are 0 and half are 255.
00652  *      (2) See pixSauvolaBinarize() for other details.
00653  *      (3) Important definitions and relations for computing averages:
00654  *            v == pixel value
00655  *            E(p) == expected value of p == average of p over some pixel set
00656  *            S(v) == square of v == v * v
00657  *            mv == E(v) == expected pixel value == mean value
00658  *            ms == E(S(v)) == expected square of pixel values
00659  *               == mean square value
00660  *            var == variance == expected square of deviation from mean
00661  *                == E(S(v - mv)) = E(S(v) - 2 * S(v * mv) + S(mv))
00662  *                                = E(S(v)) - S(mv)
00663  *                                = ms - mv * mv
00664  *            s == standard deviation = sqrt(var)
00665  *          So for evaluating the standard deviation in the Sauvola
00666  *          threshold, we take
00667  *            s = sqrt(ms - mv * mv)
00668  */
00669 PIX *
00670 pixSauvolaGetThreshold(PIX       *pixm,
00671                        PIX       *pixms,
00672                        l_float32  factor,
00673                        PIX      **ppixsd)
00674 {
00675 l_int32     i, j, w, h, tabsize, wplm, wplms, wplsd, wpld, usetab;
00676 l_int32     mv, ms, var, thresh;
00677 l_uint32   *datam, *datams, *datasd, *datad;
00678 l_uint32   *linem, *linems, *linesd, *lined;
00679 l_float32   sd;
00680 l_float32  *tab;  /* of 2^16 square roots */
00681 PIX        *pixsd, *pixd;
00682 
00683     PROCNAME("pixSauvolaGetThreshold");
00684     
00685     if (ppixsd) *ppixsd = NULL;
00686     if (!pixm || pixGetDepth(pixm) != 8)
00687         return (PIX *)ERROR_PTR("pixm undefined or not 8 bpp", procName, NULL);
00688     if (pixGetColormap(pixm))
00689         return (PIX *)ERROR_PTR("pixm is colormapped", procName, NULL);
00690     if (!pixms || pixGetDepth(pixms) != 32)
00691         return (PIX *)ERROR_PTR("pixms undefined or not 32 bpp",
00692                                 procName, NULL);
00693     if (factor < 0.0)
00694         return (PIX *)ERROR_PTR("factor must be >= 0", procName, NULL);
00695 
00696         /* Only make a table of 2^16 square roots if there
00697          * are enough pixels to justify it. */
00698     pixGetDimensions(pixm, &w, &h, NULL);
00699     usetab = (w * h > 100000) ? 1 : 0;
00700     if (usetab) {
00701         tabsize = 1 << 16;
00702         tab = (l_float32 *)CALLOC(tabsize, sizeof(l_float32));
00703         for (i = 0; i < tabsize; i++)
00704             tab[i] = (l_float32)sqrt((l_float64)i);
00705     }
00706 
00707     pixd = pixCreate(w, h, 8);
00708     if (ppixsd) {
00709         pixsd = pixCreate(w, h, 8);
00710         *ppixsd = pixsd;
00711     }
00712     datam = pixGetData(pixm);
00713     datams = pixGetData(pixms);
00714     if (ppixsd) datasd = pixGetData(pixsd);
00715     datad = pixGetData(pixd);
00716     wplm = pixGetWpl(pixm);
00717     wplms = pixGetWpl(pixms);
00718     if (ppixsd) wplsd = pixGetWpl(pixsd);
00719     wpld = pixGetWpl(pixd);
00720     for (i = 0; i < h; i++) {
00721         linem = datam + i * wplm;
00722         linems = datams + i * wplms;
00723         if (ppixsd) linesd = datasd + i * wplsd;
00724         lined = datad + i * wpld;
00725         for (j = 0; j < w; j++) {
00726             mv = GET_DATA_BYTE(linem, j);
00727             ms = linems[j];
00728             var = ms - mv * mv;
00729             if (usetab)
00730                 sd = tab[var];
00731             else
00732                 sd = (l_float32)sqrt(var);
00733             if (ppixsd) SET_DATA_BYTE(linesd, j, (l_int32)sd);
00734             thresh = (l_int32)(mv * (1.0 - factor * (1.0 - sd / 128.)));
00735             SET_DATA_BYTE(lined, j, thresh);
00736         }
00737     }
00738 
00739     if (usetab) FREE(tab);
00740     return pixd;
00741 }
00742 
00743 
00744 /*!
00745  *  pixApplyLocalThreshold()
00746  *
00747  *      Input:  pixs (8 bpp grayscale; not colormapped)
00748  *              pixth (8 bpp array of local thresholds)
00749  *              redfactor ( ... )
00750  *      Return: pixd (1 bpp, thresholded image), or null on error
00751  */
00752 PIX *
00753 pixApplyLocalThreshold(PIX     *pixs,
00754                        PIX     *pixth,
00755                        l_int32  redfactor)
00756 {
00757 l_int32    i, j, w, h, wpls, wplt, wpld, vals, valt;
00758 l_uint32  *datas, *datat, *datad, *lines, *linet, *lined;
00759 PIX       *pixd;
00760 
00761     PROCNAME("pixApplyLocalThreshold");
00762     
00763     if (!pixs || pixGetDepth(pixs) != 8)
00764         return (PIX *)ERROR_PTR("pixs undefined or not 8 bpp", procName, NULL);
00765     if (pixGetColormap(pixs))
00766         return (PIX *)ERROR_PTR("pixs is colormapped", procName, NULL);
00767     if (!pixth || pixGetDepth(pixth) != 8)
00768         return (PIX *)ERROR_PTR("pixth undefined or not 8 bpp", procName, NULL);
00769 
00770     pixGetDimensions(pixs, &w, &h, NULL);
00771     pixd = pixCreate(w, h, 1);
00772     datas = pixGetData(pixs);
00773     datat = pixGetData(pixth);
00774     datad = pixGetData(pixd);
00775     wpls = pixGetWpl(pixs);
00776     wplt = pixGetWpl(pixth);
00777     wpld = pixGetWpl(pixd);
00778     for (i = 0; i < h; i++) {
00779         lines = datas + i * wpls;
00780         linet = datat + i * wplt;
00781         lined = datad + i * wpld;
00782         for (j = 0; j < w; j++) {
00783             vals = GET_DATA_BYTE(lines, j);
00784             valt = GET_DATA_BYTE(linet, j);
00785             if (vals < valt)
00786                 SET_DATA_BIT(lined, j);
00787         }
00788     }
00789 
00790     return pixd;
00791 }
00792 
00793 
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Defines