Leptonica 1.68
C Image Processing Library
|
00001 /*====================================================================* 00002 - Copyright (C) 2001 Leptonica. All rights reserved. 00003 - This software is distributed in the hope that it will be 00004 - useful, but with NO WARRANTY OF ANY KIND. 00005 - No author or distributor accepts responsibility to anyone for the 00006 - consequences of using this software, or for whether it serves any 00007 - particular purpose or works at all, unless he or she says so in 00008 - writing. Everyone is granted permission to copy, modify and 00009 - redistribute this source code, for commercial or non-commercial 00010 - purposes, with the following restrictions: (1) the origin of this 00011 - source code must not be misrepresented; (2) modified versions must 00012 - be plainly marked as such; and (3) this notice may not be removed 00013 - or altered from any source or modified source distribution. 00014 *====================================================================*/ 00015 00016 /* 00017 * binarize.c 00018 * 00019 * =================================================================== 00020 * Image binarization algorithms are found in: 00021 * grayquant.c: standard, simple, general grayscale quantization 00022 * adaptmap.c: local adaptive; mostly gray-to-gray in preparation 00023 * for binarization 00024 * binarize.c: special binarization methods, locally adaptive. 00025 * =================================================================== 00026 * 00027 * Adaptive Otsu-based thresholding 00028 * l_int32 pixOtsuAdaptiveThreshold() 8 bpp 00029 * 00030 * Otsu thresholding on adaptive background normalization 00031 * PIX *pixOtsuThreshOnBackgroundNorm() 8 bpp 00032 * 00033 * Masking and Otsu estimate on adaptive background normalization 00034 * PIX *pixMaskedThreshOnBackgroundNorm() 8 bpp 00035 * 00036 * Sauvola local thresholding 00037 * l_int32 pixSauvolaBinarizeTiled() 00038 * l_int32 pixSauvolaBinarize() 00039 * PIX *pixSauvolaGetThreshold() 00040 * PIX *pixApplyLocalThreshold(); 00041 * 00042 * Notes: 00043 * (1) pixOtsuAdaptiveThreshold() computes a global threshold over each 00044 * tile and performs the threshold operation, resulting in a 00045 * binary image for each tile. These are stitched into the 00046 * final result. 00047 * (2) pixOtsuThreshOnBackgroundNorm() and 00048 * pixMaskedThreshOnBackgroundNorm() are binarization functions 00049 * that use background normalization with other techniques. 00050 * (3) Sauvola binarization computes a local threshold based on 00051 * the local average and square average. It takes two constants: 00052 * the window size for the measurment at each pixel and a 00053 * parameter that determines the amount of normalized local 00054 * standard deviation to subtract from the local average value. 00055 */ 00056 00057 #include <math.h> 00058 #include "allheaders.h" 00059 00060 /*------------------------------------------------------------------* 00061 * Adaptive Otsu-based thresholding * 00062 *------------------------------------------------------------------*/ 00063 /*! 00064 * pixOtsuAdaptiveThreshold() 00065 * 00066 * Input: pixs (8 bpp) 00067 * sx, sy (desired tile dimensions; actual size may vary) 00068 * smoothx, smoothy (half-width of convolution kernel applied to 00069 * threshold array: use 0 for no smoothing) 00070 * scorefract (fraction of the max Otsu score; typ. 0.1; 00071 * use 0.0 for standard Otsu) 00072 * &pixth (<optional return> array of threshold values 00073 * found for each tile) 00074 * &pixd (<optional return> thresholded input pixs, based on 00075 * the threshold array) 00076 * Return: 0 if OK, 1 on error 00077 * 00078 * Notes: 00079 * (1) The Otsu method finds a single global threshold for an image. 00080 * This function allows a locally adapted threshold to be 00081 * found for each tile into which the image is broken up. 00082 * (2) The array of threshold values, one for each tile, constitutes 00083 * a highly downscaled image. This array is optionally 00084 * smoothed using a convolution. The full width and height of the 00085 * convolution kernel are (2 * @smoothx + 1) and (2 * @smoothy + 1). 00086 * (3) The minimum tile dimension allowed is 16. If such small 00087 * tiles are used, it is recommended to use smoothing, because 00088 * without smoothing, each small tile determines the splitting 00089 * threshold independently. A tile that is entirely in the 00090 * image bg will then hallucinate fg, resulting in a very noisy 00091 * binarization. The smoothing should be large enough that no 00092 * tile is only influenced by one type (fg or bg) of pixels, 00093 * because it will force a split of its pixels. 00094 * (4) To get a single global threshold for the entire image, use 00095 * input values of @sx and @sy that are larger than the image. 00096 * For this situation, the smoothing parameters are ignored. 00097 * (5) The threshold values partition the image pixels into two classes: 00098 * one whose values are less than the threshold and another 00099 * whose values are greater than or equal to the threshold. 00100 * This is the same use of 'threshold' as in pixThresholdToBinary(). 00101 * (6) The scorefract is the fraction of the maximum Otsu score, which 00102 * is used to determine the range over which the histogram minimum 00103 * is searched. See numaSplitDistribution() for details on the 00104 * underlying method of choosing a threshold. 00105 * (7) This uses enables a modified version of the Otsu criterion for 00106 * splitting the distribution of pixels in each tile into a 00107 * fg and bg part. The modification consists of searching for 00108 * a minimum in the histogram over a range of pixel values where 00109 * the Otsu score is within a defined fraction, @scorefract, 00110 * of the max score. To get the original Otsu algorithm, set 00111 * @scorefract == 0. 00112 */ 00113 l_int32 00114 pixOtsuAdaptiveThreshold(PIX *pixs, 00115 l_int32 sx, 00116 l_int32 sy, 00117 l_int32 smoothx, 00118 l_int32 smoothy, 00119 l_float32 scorefract, 00120 PIX **ppixth, 00121 PIX **ppixd) 00122 { 00123 l_int32 w, h, nx, ny, i, j, thresh; 00124 l_uint32 val; 00125 PIX *pixt, *pixb, *pixthresh, *pixth, *pixd; 00126 PIXTILING *pt; 00127 00128 PROCNAME("pixOtsuAdaptiveThreshold"); 00129 00130 if (!ppixth && !ppixd) 00131 return ERROR_INT("neither &pixth nor &pixd defined", procName, 1); 00132 if (ppixth) *ppixth = NULL; 00133 if (ppixd) *ppixd = NULL; 00134 if (!pixs || pixGetDepth(pixs) != 8) 00135 return ERROR_INT("pixs not defined or not 8 bpp", procName, 1); 00136 if (sx < 16 || sy < 16) 00137 return ERROR_INT("sx and sy must be >= 16", procName, 1); 00138 00139 /* Compute the threshold array for the tiles */ 00140 pixGetDimensions(pixs, &w, &h, NULL); 00141 nx = L_MAX(1, w / sx); 00142 ny = L_MAX(1, h / sy); 00143 smoothx = L_MIN(smoothx, (nx - 1) / 2); 00144 smoothy = L_MIN(smoothy, (ny - 1) / 2); 00145 pt = pixTilingCreate(pixs, nx, ny, 0, 0, 0, 0); 00146 pixthresh = pixCreate(nx, ny, 8); 00147 for (i = 0; i < ny; i++) { 00148 for (j = 0; j < nx; j++) { 00149 pixt = pixTilingGetTile(pt, i, j); 00150 pixSplitDistributionFgBg(pixt, scorefract, 1, &thresh, 00151 NULL, NULL, 0); 00152 pixSetPixel(pixthresh, j, i, thresh); /* see note (4) */ 00153 pixDestroy(&pixt); 00154 } 00155 } 00156 00157 /* Optionally smooth the threshold array */ 00158 if (smoothx > 0 || smoothy > 0) 00159 pixth = pixBlockconv(pixthresh, smoothx, smoothy); 00160 else 00161 pixth = pixClone(pixthresh); 00162 pixDestroy(&pixthresh); 00163 00164 /* Optionally apply the threshold array to binarize pixs */ 00165 if (ppixd) { 00166 pixd = pixCreate(w, h, 1); 00167 for (i = 0; i < ny; i++) { 00168 for (j = 0; j < nx; j++) { 00169 pixt = pixTilingGetTile(pt, i, j); 00170 pixGetPixel(pixth, j, i, &val); 00171 pixb = pixThresholdToBinary(pixt, val); 00172 pixTilingPaintTile(pixd, i, j, pixb, pt); 00173 pixDestroy(&pixt); 00174 pixDestroy(&pixb); 00175 } 00176 } 00177 *ppixd = pixd; 00178 } 00179 00180 if (ppixth) 00181 *ppixth = pixth; 00182 else 00183 pixDestroy(&pixth); 00184 00185 pixTilingDestroy(&pt); 00186 return 0; 00187 } 00188 00189 00190 /*------------------------------------------------------------------* 00191 * Otsu thresholding on adaptive background normalization * 00192 *------------------------------------------------------------------*/ 00193 /*! 00194 * pixOtsuThreshOnBackgroundNorm() 00195 * 00196 * Input: pixs (8 bpp grayscale; not colormapped) 00197 * pixim (<optional> 1 bpp 'image' mask; can be null) 00198 * sx, sy (tile size in pixels) 00199 * thresh (threshold for determining foreground) 00200 * mincount (min threshold on counts in a tile) 00201 * bgval (target bg val; typ. > 128) 00202 * smoothx (half-width of block convolution kernel width) 00203 * smoothy (half-width of block convolution kernel height) 00204 * scorefract (fraction of the max Otsu score; typ. 0.1) 00205 * &thresh (<optional return> threshold value that was 00206 * used on the normalized image) 00207 * Return: pixd (1 bpp thresholded image), or null on error 00208 * 00209 * Notes: 00210 * (1) This does background normalization followed by Otsu 00211 * thresholding. Otsu binarization attempts to split the 00212 * image into two roughly equal sets of pixels, and it does 00213 * a very poor job when there are large amounts of dark 00214 * background. By doing a background normalization first, 00215 * to get the background near 255, we remove this problem. 00216 * Then we use a modified Otsu to estimate the best global 00217 * threshold on the normalized image. 00218 * (2) See pixBackgroundNorm() for meaning and typical values 00219 * of input parameters. For a start, you can try: 00220 * sx, sy = 10, 15 00221 * thresh = 100 00222 * mincount = 50 00223 * bgval = 255 00224 * smoothx, smoothy = 2 00225 */ 00226 PIX * 00227 pixOtsuThreshOnBackgroundNorm(PIX *pixs, 00228 PIX *pixim, 00229 l_int32 sx, 00230 l_int32 sy, 00231 l_int32 thresh, 00232 l_int32 mincount, 00233 l_int32 bgval, 00234 l_int32 smoothx, 00235 l_int32 smoothy, 00236 l_float32 scorefract, 00237 l_int32 *pthresh) 00238 { 00239 l_int32 w, h; 00240 l_uint32 val; 00241 PIX *pixn, *pixt, *pixd; 00242 00243 PROCNAME("pixOtsuThreshOnBackgroundNorm"); 00244 00245 if (pthresh) *pthresh = 0; 00246 if (!pixs || pixGetDepth(pixs) != 8) 00247 return (PIX *)ERROR_PTR("pixs undefined or not 8 bpp", procName, NULL); 00248 if (pixGetColormap(pixs)) 00249 return (PIX *)ERROR_PTR("pixs is colormapped", procName, NULL); 00250 if (sx < 4 || sy < 4) 00251 return (PIX *)ERROR_PTR("sx and sy must be >= 4", procName, NULL); 00252 if (mincount > sx * sy) { 00253 L_WARNING("mincount too large for tile size", procName); 00254 mincount = (sx * sy) / 3; 00255 } 00256 00257 pixn = pixBackgroundNorm(pixs, pixim, NULL, sx, sy, thresh, 00258 mincount, bgval, smoothx, smoothy); 00259 if (!pixn) 00260 return (PIX *)ERROR_PTR("pixn not made", procName, NULL); 00261 00262 /* Just use 1 tile for a global threshold, which is stored 00263 * as a single pixel in pixt. */ 00264 pixGetDimensions(pixn, &w, &h, NULL); 00265 pixOtsuAdaptiveThreshold(pixn, w, h, 0, 0, scorefract, &pixt, &pixd); 00266 pixDestroy(&pixn); 00267 00268 if (pixt && pthresh) { 00269 pixGetPixel(pixt, 0, 0, &val); 00270 *pthresh = val; 00271 } 00272 pixDestroy(&pixt); 00273 00274 if (!pixd) 00275 return (PIX *)ERROR_PTR("pixd not made", procName, NULL); 00276 else 00277 return pixd; 00278 } 00279 00280 00281 00282 /*----------------------------------------------------------------------* 00283 * Masking and Otsu estimate on adaptive background normalization * 00284 *----------------------------------------------------------------------*/ 00285 /*! 00286 * pixMaskedThreshOnBackgroundNorm() 00287 * 00288 * Input: pixs (8 bpp grayscale; not colormapped) 00289 * pixim (<optional> 1 bpp 'image' mask; can be null) 00290 * sx, sy (tile size in pixels) 00291 * thresh (threshold for determining foreground) 00292 * mincount (min threshold on counts in a tile) 00293 * smoothx (half-width of block convolution kernel width) 00294 * smoothy (half-width of block convolution kernel height) 00295 * scorefract (fraction of the max Otsu score; typ. ~ 0.1) 00296 * &thresh (<optional return> threshold value that was 00297 * used on the normalized image) 00298 * Return: pixd (1 bpp thresholded image), or null on error 00299 * 00300 * Notes: 00301 * (1) This begins with a standard background normalization. 00302 * Additionally, there is a flexible background norm, that 00303 * will adapt to a rapidly varying background, and this 00304 * puts white pixels in the background near regions with 00305 * significant foreground. The white pixels are turned into 00306 * a 1 bpp selection mask by binarization followed by dilation. 00307 * Otsu thresholding is performed on the input image to get an 00308 * estimate of the threshold in the non-mask regions. 00309 * The background normalized image is thresholded with two 00310 * different values, and the result is combined using 00311 * the selection mask. 00312 * (2) Note that the numbers 255 (for bgval target) and 190 (for 00313 * thresholding on pixn) are tied together, and explicitly 00314 * defined in this function. 00315 * (3) See pixBackgroundNorm() for meaning and typical values 00316 * of input parameters. For a start, you can try: 00317 * sx, sy = 10, 15 00318 * thresh = 100 00319 * mincount = 50 00320 * smoothx, smoothy = 2 00321 */ 00322 PIX * 00323 pixMaskedThreshOnBackgroundNorm(PIX *pixs, 00324 PIX *pixim, 00325 l_int32 sx, 00326 l_int32 sy, 00327 l_int32 thresh, 00328 l_int32 mincount, 00329 l_int32 smoothx, 00330 l_int32 smoothy, 00331 l_float32 scorefract, 00332 l_int32 *pthresh) 00333 { 00334 l_int32 w, h; 00335 l_uint32 val; 00336 PIX *pixn, *pixm, *pixd, *pixt1, *pixt2, *pixt3, *pixt4; 00337 00338 PROCNAME("pixMaskedThreshOnBackgroundNorm"); 00339 00340 if (pthresh) *pthresh = 0; 00341 if (!pixs || pixGetDepth(pixs) != 8) 00342 return (PIX *)ERROR_PTR("pixs undefined or not 8 bpp", procName, NULL); 00343 if (pixGetColormap(pixs)) 00344 return (PIX *)ERROR_PTR("pixs is colormapped", procName, NULL); 00345 if (sx < 4 || sy < 4) 00346 return (PIX *)ERROR_PTR("sx and sy must be >= 4", procName, NULL); 00347 if (mincount > sx * sy) { 00348 L_WARNING("mincount too large for tile size", procName); 00349 mincount = (sx * sy) / 3; 00350 } 00351 00352 /* Standard background normalization */ 00353 pixn = pixBackgroundNorm(pixs, pixim, NULL, sx, sy, thresh, 00354 mincount, 255, smoothx, smoothy); 00355 if (!pixn) 00356 return (PIX *)ERROR_PTR("pixn not made", procName, NULL); 00357 00358 /* Special background normalization for adaptation to quickly 00359 * varying background. Threshold on the very light parts, 00360 * which tend to be near significant edges, and dilate to 00361 * form a mask over regions that are typically text. The 00362 * dilation size is chosen to cover the text completely, 00363 * except for very thick fonts. */ 00364 pixt1 = pixBackgroundNormFlex(pixs, 7, 7, 1, 1, 20); 00365 pixt2 = pixThresholdToBinary(pixt1, 240); 00366 pixInvert(pixt2, pixt2); 00367 pixm = pixMorphSequence(pixt2, "d21.21", 0); 00368 pixDestroy(&pixt1); 00369 pixDestroy(&pixt2); 00370 00371 /* Use Otsu to get a global threshold estimate for the image, 00372 * which is stored as a single pixel in pixt3. */ 00373 pixGetDimensions(pixs, &w, &h, NULL); 00374 pixOtsuAdaptiveThreshold(pixs, w, h, 0, 0, scorefract, &pixt3, NULL); 00375 if (pixt3 && pthresh) { 00376 pixGetPixel(pixt3, 0, 0, &val); 00377 *pthresh = val; 00378 } 00379 pixDestroy(&pixt3); 00380 00381 /* Threshold the background normalized images differentially, 00382 * using a high value correlated with the background normalization 00383 * for the part of the image under the mask (i.e., near the 00384 * darker, thicker foreground), and a value that depends on the Otsu 00385 * threshold for the rest of the image. This gives a solid 00386 * (high) thresholding for the foreground parts of the image, 00387 * while allowing the background and light foreground to be 00388 * reasonably well cleaned using a threshold adapted to the 00389 * input image. */ 00390 pixd = pixThresholdToBinary(pixn, val + 30); /* for bg and light fg */ 00391 pixt4 = pixThresholdToBinary(pixn, 190); /* for heavier fg */ 00392 pixCombineMasked(pixd, pixt4, pixm); 00393 pixDestroy(&pixt4); 00394 pixDestroy(&pixm); 00395 pixDestroy(&pixn); 00396 00397 if (!pixd) 00398 return (PIX *)ERROR_PTR("pixd not made", procName, NULL); 00399 else 00400 return pixd; 00401 } 00402 00403 00404 /*----------------------------------------------------------------------* 00405 * Sauvola binarization * 00406 *----------------------------------------------------------------------*/ 00407 /*! 00408 * pixSauvolaBinarizeTiled() 00409 * 00410 * Input: pixs (8 bpp grayscale, not colormapped) 00411 * whsize (window half-width for measuring local statistics) 00412 * factor (factor for reducing threshold due to variance; >= 0) 00413 * nx, ny (subdivision into tiles; >= 1) 00414 * &pixth (<optional return> Sauvola threshold values) 00415 * &pixd (<optional return> thresholded image) 00416 * Return: 0 if OK, 1 on error 00417 * 00418 * Notes: 00419 * (1) The window width and height are 2 * @whsize + 1. The minimum 00420 * value for @whsize is 2; typically it is >= 7.. 00421 * (2) For nx == ny == 1, this defaults to pixSauvolaBinarize(). 00422 * (3) Why a tiled version? 00423 * (a) Because the mean value accumulator is a uint32, overflow 00424 * can occur for an image with more than 16M pixels. 00425 * (b) The mean value accumulator array for 16M pixels is 64 MB. 00426 * The mean square accumulator array for 16M pixels is 128 MB. 00427 * Using tiles reduces the size of these arrays. 00428 * (c) Each tile can be processed independently, in parallel, 00429 * on a multicore processor. 00430 * (4) The Sauvola threshold is determined from the formula: 00431 * t = m * (1 - k * (1 - s / 128)) 00432 * See pixSauvolaBinarize() for details. 00433 */ 00434 l_int32 00435 pixSauvolaBinarizeTiled(PIX *pixs, 00436 l_int32 whsize, 00437 l_float32 factor, 00438 l_int32 nx, 00439 l_int32 ny, 00440 PIX **ppixth, 00441 PIX **ppixd) 00442 { 00443 l_int32 i, j, w, h, xrat, yrat; 00444 PIX *pixth, *pixd, *tileth, *tiled, *pixt; 00445 PIX **ptileth, **ptiled; 00446 PIXTILING *pt; 00447 00448 PROCNAME("pixSauvolaBinarizeTiled"); 00449 00450 if (!ppixth && !ppixd) 00451 return ERROR_INT("no outputs", procName, 1); 00452 if (ppixth) *ppixth = NULL; 00453 if (ppixd) *ppixd = NULL; 00454 if (!pixs || pixGetDepth(pixs) != 8) 00455 return ERROR_INT("pixs undefined or not 8 bpp", procName, 1); 00456 if (pixGetColormap(pixs)) 00457 return ERROR_INT("pixs is cmapped", procName, 1); 00458 pixGetDimensions(pixs, &w, &h, NULL); 00459 if (whsize < 2) 00460 return ERROR_INT("whsize must be >= 2", procName, 1); 00461 if (w < 2 * whsize + 3 || h < 2 * whsize + 3) 00462 return ERROR_INT("whsize too large for image", procName, 1); 00463 if (factor < 0.0) 00464 return ERROR_INT("factor must be >= 0", procName, 1); 00465 00466 if (nx <= 1 && ny <= 1) 00467 return pixSauvolaBinarize(pixs, whsize, factor, 1, NULL, NULL, 00468 ppixth, ppixd); 00469 00470 /* Test to see if the tiles are too small. The required 00471 * condition is that the tile dimensions must be at least 00472 * (whsize + 2) x (whsize + 2). */ 00473 xrat = w / nx; 00474 yrat = h / ny; 00475 if (xrat < whsize + 2) { 00476 nx = w / (whsize + 2); 00477 L_WARNING_INT("tile width too small; nx reduced to %d", procName, nx); 00478 } 00479 if (yrat < whsize + 2) { 00480 ny = h / (whsize + 2); 00481 L_WARNING_INT("tile height too small; ny reduced to %d", procName, ny); 00482 } 00483 if (nx <= 1 && ny <= 1) 00484 return pixSauvolaBinarize(pixs, whsize, factor, 1, NULL, NULL, 00485 ppixth, ppixd); 00486 00487 /* We can use pixtiling for painting both outputs, if requested */ 00488 if (ppixth) { 00489 pixth = pixCreateNoInit(w, h, 8); 00490 *ppixth = pixth; 00491 } 00492 if (ppixd) { 00493 pixd = pixCreateNoInit(w, h, 1); 00494 *ppixd = pixd; 00495 } 00496 pt = pixTilingCreate(pixs, nx, ny, 0, 0, whsize + 1, whsize + 1); 00497 pixTilingNoStripOnPaint(pt); /* pixSauvolaBinarize() does the stripping */ 00498 00499 for (i = 0; i < ny; i++) { 00500 for (j = 0; j < nx; j++) { 00501 pixt = pixTilingGetTile(pt, i, j); 00502 ptileth = (ppixth) ? &tileth : NULL; 00503 ptiled = (ppixd) ? &tiled : NULL; 00504 pixSauvolaBinarize(pixt, whsize, factor, 0, NULL, NULL, 00505 ptileth, ptiled); 00506 if (ppixth) { /* do not strip */ 00507 pixTilingPaintTile(pixth, i, j, tileth, pt); 00508 pixDestroy(&tileth); 00509 } 00510 if (ppixd) { 00511 pixTilingPaintTile(pixd, i, j, tiled, pt); 00512 pixDestroy(&tiled); 00513 } 00514 pixDestroy(&pixt); 00515 } 00516 } 00517 00518 pixTilingDestroy(&pt); 00519 return 0; 00520 } 00521 00522 00523 /*! 00524 * pixSauvolaBinarize() 00525 * 00526 * Input: pixs (8 bpp grayscale; not colormapped) 00527 * whsize (window half-width for measuring local statistics) 00528 * factor (factor for reducing threshold due to variance; >= 0) 00529 * addborder (1 to add border of width (@whsize + 1) on all sides) 00530 * &pixm (<optional return> local mean values) 00531 * &pixsd (<optional return> local standard deviation values) 00532 * &pixth (<optional return> threshold values) 00533 * &pixd (<optional return> thresholded image) 00534 * Return: 0 if OK, 1 on error 00535 * 00536 * Notes: 00537 * (1) The window width and height are 2 * @whsize + 1. The minimum 00538 * value for @whsize is 2; typically it is >= 7.. 00539 * (2) The local statistics, measured over the window, are the 00540 * average and standard deviation. 00541 * (3) The measurements of the mean and standard deviation are 00542 * performed inside a border of (@whsize + 1) pixels. If pixs does 00543 * not have these added border pixels, use @addborder = 1 to add 00544 * it here; otherwise use @addborder = 0. 00545 * (4) The Sauvola threshold is determined from the formula: 00546 * t = m * (1 - k * (1 - s / 128)) 00547 * where: 00548 * t = local threshold 00549 * m = local mean 00550 * k = @factor (>= 0) [ typ. 0.35 ] 00551 * s = local standard deviation, which is maximized at 00552 * 127.5 when half the samples are 0 and half are 255. 00553 * (5) The basic idea of Niblack and Sauvola binarization is that 00554 * the local threshold should be less than the median value, 00555 * and the larger the variance, the closer to the median 00556 * it should be chosen. Typical values for k are between 00557 * 0.2 and 0.5. 00558 */ 00559 l_int32 00560 pixSauvolaBinarize(PIX *pixs, 00561 l_int32 whsize, 00562 l_float32 factor, 00563 l_int32 addborder, 00564 PIX **ppixm, 00565 PIX **ppixsd, 00566 PIX **ppixth, 00567 PIX **ppixd) 00568 { 00569 l_int32 w, h; 00570 PIX *pixg, *pixsc, *pixm, *pixms, *pixth, *pixd; 00571 00572 PROCNAME("pixSauvolaBinarize"); 00573 00574 00575 if (!ppixm && !ppixsd && !ppixth && !ppixd) 00576 return ERROR_INT("no outputs", procName, 1); 00577 if (ppixm) *ppixm = NULL; 00578 if (ppixsd) *ppixsd = NULL; 00579 if (ppixth) *ppixth = NULL; 00580 if (ppixd) *ppixd = NULL; 00581 if (!pixs || pixGetDepth(pixs) != 8) 00582 return ERROR_INT("pixs undefined or not 8 bpp", procName, 1); 00583 if (pixGetColormap(pixs)) 00584 return ERROR_INT("pixs is cmapped", procName, 1); 00585 pixGetDimensions(pixs, &w, &h, NULL); 00586 if (whsize < 2) 00587 return ERROR_INT("whsize must be >= 2", procName, 1); 00588 if (w < 2 * whsize + 3 || h < 2 * whsize + 3) 00589 return ERROR_INT("whsize too large for image", procName, 1); 00590 if (factor < 0.0) 00591 return ERROR_INT("factor must be >= 0", procName, 1); 00592 00593 if (addborder) { 00594 pixg = pixAddMirroredBorder(pixs, whsize + 1, whsize + 1, 00595 whsize + 1, whsize + 1); 00596 pixsc = pixClone(pixs); 00597 } 00598 else { 00599 pixg = pixClone(pixs); 00600 pixsc = pixRemoveBorder(pixs, whsize + 1); 00601 } 00602 if (!pixg || !pixsc) 00603 return ERROR_INT("pixg and pixsc not made", procName, 1); 00604 00605 /* All these functions strip off the border pixels. */ 00606 if (ppixm || ppixth || ppixd) 00607 pixm = pixWindowedMean(pixg, whsize, whsize, 1, 1); 00608 if (ppixsd || ppixth || ppixd) 00609 pixms = pixWindowedMeanSquare(pixg, whsize, whsize, 1); 00610 if (ppixth || ppixd) 00611 pixth = pixSauvolaGetThreshold(pixm, pixms, factor, ppixsd); 00612 if (ppixd) 00613 pixd = pixApplyLocalThreshold(pixsc, pixth, 1); 00614 00615 if (ppixm) 00616 *ppixm = pixm; 00617 else 00618 pixDestroy(&pixm); 00619 pixDestroy(&pixms); 00620 if (ppixth) 00621 *ppixth = pixth; 00622 else 00623 pixDestroy(&pixth); 00624 if (ppixd) 00625 *ppixd = pixd; 00626 else 00627 pixDestroy(&pixd); 00628 pixDestroy(&pixg); 00629 pixDestroy(&pixsc); 00630 return 0; 00631 } 00632 00633 00634 /*! 00635 * pixSauvolaGetThreshold() 00636 * 00637 * Input: pixm (8 bpp grayscale; not colormapped) 00638 * pixms (32 bpp) 00639 * factor (factor for reducing threshold due to variance; >= 0) 00640 * &pixsd (<optional return> local standard deviation) 00641 * Return: pixd (8 bpp, sauvola threshold values), or null on error 00642 * 00643 * Notes: 00644 * (1) The Sauvola threshold is determined from the formula: 00645 * t = m * (1 - k * (1 - s / 128)) 00646 * where: 00647 * t = local threshold 00648 * m = local mean 00649 * k = @factor (>= 0) [ typ. 0.35 ] 00650 * s = local standard deviation, which is maximized at 00651 * 127.5 when half the samples are 0 and half are 255. 00652 * (2) See pixSauvolaBinarize() for other details. 00653 * (3) Important definitions and relations for computing averages: 00654 * v == pixel value 00655 * E(p) == expected value of p == average of p over some pixel set 00656 * S(v) == square of v == v * v 00657 * mv == E(v) == expected pixel value == mean value 00658 * ms == E(S(v)) == expected square of pixel values 00659 * == mean square value 00660 * var == variance == expected square of deviation from mean 00661 * == E(S(v - mv)) = E(S(v) - 2 * S(v * mv) + S(mv)) 00662 * = E(S(v)) - S(mv) 00663 * = ms - mv * mv 00664 * s == standard deviation = sqrt(var) 00665 * So for evaluating the standard deviation in the Sauvola 00666 * threshold, we take 00667 * s = sqrt(ms - mv * mv) 00668 */ 00669 PIX * 00670 pixSauvolaGetThreshold(PIX *pixm, 00671 PIX *pixms, 00672 l_float32 factor, 00673 PIX **ppixsd) 00674 { 00675 l_int32 i, j, w, h, tabsize, wplm, wplms, wplsd, wpld, usetab; 00676 l_int32 mv, ms, var, thresh; 00677 l_uint32 *datam, *datams, *datasd, *datad; 00678 l_uint32 *linem, *linems, *linesd, *lined; 00679 l_float32 sd; 00680 l_float32 *tab; /* of 2^16 square roots */ 00681 PIX *pixsd, *pixd; 00682 00683 PROCNAME("pixSauvolaGetThreshold"); 00684 00685 if (ppixsd) *ppixsd = NULL; 00686 if (!pixm || pixGetDepth(pixm) != 8) 00687 return (PIX *)ERROR_PTR("pixm undefined or not 8 bpp", procName, NULL); 00688 if (pixGetColormap(pixm)) 00689 return (PIX *)ERROR_PTR("pixm is colormapped", procName, NULL); 00690 if (!pixms || pixGetDepth(pixms) != 32) 00691 return (PIX *)ERROR_PTR("pixms undefined or not 32 bpp", 00692 procName, NULL); 00693 if (factor < 0.0) 00694 return (PIX *)ERROR_PTR("factor must be >= 0", procName, NULL); 00695 00696 /* Only make a table of 2^16 square roots if there 00697 * are enough pixels to justify it. */ 00698 pixGetDimensions(pixm, &w, &h, NULL); 00699 usetab = (w * h > 100000) ? 1 : 0; 00700 if (usetab) { 00701 tabsize = 1 << 16; 00702 tab = (l_float32 *)CALLOC(tabsize, sizeof(l_float32)); 00703 for (i = 0; i < tabsize; i++) 00704 tab[i] = (l_float32)sqrt((l_float64)i); 00705 } 00706 00707 pixd = pixCreate(w, h, 8); 00708 if (ppixsd) { 00709 pixsd = pixCreate(w, h, 8); 00710 *ppixsd = pixsd; 00711 } 00712 datam = pixGetData(pixm); 00713 datams = pixGetData(pixms); 00714 if (ppixsd) datasd = pixGetData(pixsd); 00715 datad = pixGetData(pixd); 00716 wplm = pixGetWpl(pixm); 00717 wplms = pixGetWpl(pixms); 00718 if (ppixsd) wplsd = pixGetWpl(pixsd); 00719 wpld = pixGetWpl(pixd); 00720 for (i = 0; i < h; i++) { 00721 linem = datam + i * wplm; 00722 linems = datams + i * wplms; 00723 if (ppixsd) linesd = datasd + i * wplsd; 00724 lined = datad + i * wpld; 00725 for (j = 0; j < w; j++) { 00726 mv = GET_DATA_BYTE(linem, j); 00727 ms = linems[j]; 00728 var = ms - mv * mv; 00729 if (usetab) 00730 sd = tab[var]; 00731 else 00732 sd = (l_float32)sqrt(var); 00733 if (ppixsd) SET_DATA_BYTE(linesd, j, (l_int32)sd); 00734 thresh = (l_int32)(mv * (1.0 - factor * (1.0 - sd / 128.))); 00735 SET_DATA_BYTE(lined, j, thresh); 00736 } 00737 } 00738 00739 if (usetab) FREE(tab); 00740 return pixd; 00741 } 00742 00743 00744 /*! 00745 * pixApplyLocalThreshold() 00746 * 00747 * Input: pixs (8 bpp grayscale; not colormapped) 00748 * pixth (8 bpp array of local thresholds) 00749 * redfactor ( ... ) 00750 * Return: pixd (1 bpp, thresholded image), or null on error 00751 */ 00752 PIX * 00753 pixApplyLocalThreshold(PIX *pixs, 00754 PIX *pixth, 00755 l_int32 redfactor) 00756 { 00757 l_int32 i, j, w, h, wpls, wplt, wpld, vals, valt; 00758 l_uint32 *datas, *datat, *datad, *lines, *linet, *lined; 00759 PIX *pixd; 00760 00761 PROCNAME("pixApplyLocalThreshold"); 00762 00763 if (!pixs || pixGetDepth(pixs) != 8) 00764 return (PIX *)ERROR_PTR("pixs undefined or not 8 bpp", procName, NULL); 00765 if (pixGetColormap(pixs)) 00766 return (PIX *)ERROR_PTR("pixs is colormapped", procName, NULL); 00767 if (!pixth || pixGetDepth(pixth) != 8) 00768 return (PIX *)ERROR_PTR("pixth undefined or not 8 bpp", procName, NULL); 00769 00770 pixGetDimensions(pixs, &w, &h, NULL); 00771 pixd = pixCreate(w, h, 1); 00772 datas = pixGetData(pixs); 00773 datat = pixGetData(pixth); 00774 datad = pixGetData(pixd); 00775 wpls = pixGetWpl(pixs); 00776 wplt = pixGetWpl(pixth); 00777 wpld = pixGetWpl(pixd); 00778 for (i = 0; i < h; i++) { 00779 lines = datas + i * wpls; 00780 linet = datat + i * wplt; 00781 lined = datad + i * wpld; 00782 for (j = 0; j < w; j++) { 00783 vals = GET_DATA_BYTE(lines, j); 00784 valt = GET_DATA_BYTE(linet, j); 00785 if (vals < valt) 00786 SET_DATA_BIT(lined, j); 00787 } 00788 } 00789 00790 return pixd; 00791 } 00792 00793