Leptonica 1.68
C Image Processing Library
|
00001 /*====================================================================* 00002 - Copyright (C) 2001 Leptonica. All rights reserved. 00003 - This software is distributed in the hope that it will be 00004 - useful, but with NO WARRANTY OF ANY KIND. 00005 - No author or distributor accepts responsibility to anyone for the 00006 - consequences of using this software, or for whether it serves any 00007 - particular purpose or works at all, unless he or she says so in 00008 - writing. Everyone is granted permission to copy, modify and 00009 - redistribute this source code, for commercial or non-commercial 00010 - purposes, with the following restrictions: (1) the origin of this 00011 - source code must not be misrepresented; (2) modified versions must 00012 - be plainly marked as such; and (3) this notice may not be removed 00013 - or altered from any source or modified source distribution. 00014 *====================================================================*/ 00015 00016 /* 00017 * baseline.c 00018 * 00019 * Locate text baselines in an image 00020 * NUMA *pixFindBaselines() 00021 * 00022 * Projective transform to remove local skew 00023 * PIX *pixDeskewLocal() 00024 * 00025 * Determine local skew 00026 * l_int32 pixGetLocalSkewTransform() 00027 * NUMA *pixGetLocalSkewAngles() 00028 * 00029 * We have two apparently different functions here: 00030 * - finding baselines 00031 * - finding a projective transform to remove keystone warping 00032 * The function pixGetLocalSkewAngles() returns an array of angles, 00033 * one for each raster line, and the baselines of the text lines 00034 * should intersect the left edge of the image with that angle. 00035 */ 00036 00037 #include <stdio.h> 00038 #include <stdlib.h> 00039 #include <math.h> 00040 #include "allheaders.h" 00041 00042 #ifndef NO_CONSOLE_IO 00043 #define DEBUG_PLOT 0 00044 #endif /* NO_CONSOLE_IO */ 00045 00046 /* Min to travel after finding max before abandoning peak */ 00047 static const l_int32 MIN_DIST_IN_PEAK = 35; 00048 00049 /* Thresholds for peaks and zeros, relative to the max peak */ 00050 static const l_int32 PEAK_THRESHOLD_RATIO = 20; 00051 static const l_int32 ZERO_THRESHOLD_RATIO = 100; 00052 00053 /* Default values for determining local skew */ 00054 static const l_int32 DEFAULT_SLICES = 10; 00055 static const l_int32 DEFAULT_SWEEP_REDUCTION = 2; 00056 static const l_int32 DEFAULT_BS_REDUCTION = 1; 00057 static const l_float32 DEFAULT_SWEEP_RANGE = 5.; /* degrees */ 00058 static const l_float32 DEFAULT_SWEEP_DELTA = 1.; /* degrees */ 00059 static const l_float32 DEFAULT_MINBS_DELTA = 0.01; /* degrees */ 00060 00061 /* Overlap slice fraction added to top and bottom of each slice */ 00062 static const l_float32 OVERLAP_FRACTION = 0.5; 00063 00064 /* Minimum allowed confidence (ratio) for accepting a value */ 00065 static const l_float32 MIN_ALLOWED_CONFIDENCE = 3.0; 00066 00067 00068 /*---------------------------------------------------------------------* 00069 * Locate text baselines in an image * 00070 *---------------------------------------------------------------------*/ 00071 /*! 00072 * pixFindBaselines() 00073 * 00074 * Input: pixs (1 bpp) 00075 * &pta (<optional return> pairs of pts corresponding to 00076 * approx. ends of each text line) 00077 * debug (usually 0; set to 1 for debugging output) 00078 * Return: na (of baseline y values), or null on error 00079 * 00080 * Notes: 00081 * (1) Input binary image must have text lines already aligned 00082 * horizontally. This can be done by either rotating the 00083 * image with pixDeskew(), or, if a projective transform 00084 * is required, by doing pixDeskewLocal() first. 00085 * (2) Input null for &pta if you don't want this returned. 00086 * The pta will come in pairs of points (left and right end 00087 * of each baseline). 00088 * (3) Caution: this will not work properly on text with multiple 00089 * columns, where the lines are not aligned between columns. 00090 * If there are multiple columns, they should be extracted 00091 * separately before finding the baselines. 00092 * (4) This function constructs different types of output 00093 * for baselines; namely, a set of raster line values and 00094 * a set of end points of each baseline. 00095 * (5) This function was designed to handle short and long text lines 00096 * without using dangerous thresholds on the peak heights. It does 00097 * this by combining the differential signal with a morphological 00098 * analysis of the locations of the text lines. One can also 00099 * combine this data to normalize the peak heights, by weighting 00100 * the differential signal in the region of each baseline 00101 * by the inverse of the width of the text line found there. 00102 * (6) There are various debug sections that can be turned on 00103 * with the debug flag. 00104 */ 00105 NUMA * 00106 pixFindBaselines(PIX *pixs, 00107 PTA **ppta, 00108 l_int32 debug) 00109 { 00110 l_int32 w, h, i, j, nbox, val1, val2, ndiff, bx, by, bw, bh; 00111 l_int32 imaxloc, peakthresh, zerothresh, inpeak; 00112 l_int32 mintosearch, max, maxloc, nloc, locval; 00113 l_int32 *array; 00114 l_float32 maxval; 00115 BOXA *boxa1, *boxa2, *boxa3; 00116 GPLOT *gplot; 00117 NUMA *nasum, *nadiff, *naloc, *naval; 00118 PIX *pixt1, *pixt2; 00119 PTA *pta; 00120 00121 PROCNAME("pixFindBaselines"); 00122 00123 if (!pixs) 00124 return (NUMA *)ERROR_PTR("pixs not defined", procName, NULL); 00125 pta = NULL; 00126 if (ppta) { 00127 pta = ptaCreate(0); 00128 *ppta = pta; 00129 } 00130 00131 /* Close up the text characters, removing noise */ 00132 pixt1 = pixMorphSequence(pixs, "c25.1 + e3.1", 0); 00133 00134 /* Save the difference of adjacent row sums. 00135 * The high positive-going peaks are the baselines */ 00136 if ((nasum = pixCountPixelsByRow(pixt1, NULL)) == NULL) 00137 return (NUMA *)ERROR_PTR("nasum not made", procName, NULL); 00138 w = pixGetWidth(pixs); 00139 h = pixGetHeight(pixs); 00140 nadiff = numaCreate(h); 00141 numaGetIValue(nasum, 0, &val2); 00142 for (i = 0; i < h - 1; i++) { 00143 val1 = val2; 00144 numaGetIValue(nasum, i + 1, &val2); 00145 numaAddNumber(nadiff, val1 - val2); 00146 } 00147 00148 if (debug) /* show the difference signal */ 00149 gplotSimple1(nadiff, GPLOT_X11, "junkdiff", "difference"); 00150 00151 /* Use the zeroes of the profile to locate each baseline. */ 00152 array = numaGetIArray(nadiff); 00153 ndiff = numaGetCount(nadiff); 00154 numaGetMax(nadiff, &maxval, &imaxloc); 00155 /* Use this to begin locating a new peak: */ 00156 peakthresh = (l_int32)maxval / PEAK_THRESHOLD_RATIO; 00157 /* Use this to begin a region between peaks: */ 00158 zerothresh = (l_int32)maxval / ZERO_THRESHOLD_RATIO; 00159 naloc = numaCreate(0); 00160 naval = numaCreate(0); 00161 inpeak = FALSE; 00162 for (i = 0; i < ndiff; i++) { 00163 if (inpeak == FALSE) { 00164 if (array[i] > peakthresh) { /* transition to in-peak */ 00165 inpeak = TRUE; 00166 mintosearch = i + MIN_DIST_IN_PEAK; /* accept no zeros 00167 * between i and mintosearch */ 00168 max = array[i]; 00169 maxloc = i; 00170 } 00171 } 00172 else { /* inpeak == TRUE; look for max */ 00173 if (array[i] > max) { 00174 max = array[i]; 00175 maxloc = i; 00176 mintosearch = i + MIN_DIST_IN_PEAK; 00177 } 00178 else if (i > mintosearch && array[i] <= zerothresh) { /* leave */ 00179 inpeak = FALSE; 00180 numaAddNumber(naval, max); 00181 numaAddNumber(naloc, maxloc); 00182 } 00183 } 00184 } 00185 00186 /* If array[ndiff-1] is max, eg. no descenders, baseline at bottom */ 00187 if (inpeak) { 00188 numaAddNumber(naval, max); 00189 numaAddNumber(naloc, maxloc); 00190 } 00191 FREE(array); 00192 00193 if (debug) { /* show the raster locations for the peaks */ 00194 gplot = gplotCreate("junkloc", GPLOT_X11, "Peak locations", 00195 "rasterline", "height"); 00196 gplotAddPlot(gplot, naloc, naval, GPLOT_POINTS, "locs"); 00197 gplotMakeOutput(gplot); 00198 gplotDestroy(&gplot); 00199 } 00200 00201 /* Generate an approximate profile of text line width. 00202 * First, filter the boxes of text, where there may be 00203 * more than one box for a given textline. */ 00204 pixt2 = pixMorphSequence(pixt1, "r11 + c25.1 + o7.1 +c1.3", 0); 00205 boxa1 = pixConnComp(pixt2, NULL, 4); 00206 boxa2 = boxaTransform(boxa1, 0, 0, 4., 4.); 00207 boxa3 = boxaSort(boxa2, L_SORT_BY_Y, L_SORT_INCREASING, NULL); 00208 00209 /* Then find the baseline segments */ 00210 if (pta) { 00211 nloc = numaGetCount(naloc); 00212 nbox = boxaGetCount(boxa3); 00213 for (i = 0; i < nbox; i++) { 00214 boxaGetBoxGeometry(boxa3, i, &bx, &by, &bw, &bh); 00215 for (j = 0; j < nloc; j++) { 00216 numaGetIValue(naloc, j, &locval); 00217 if (L_ABS(locval - (by + bh)) > 25) 00218 continue; 00219 ptaAddPt(pta, bx, locval); 00220 ptaAddPt(pta, bx + bw, locval); 00221 break; 00222 } 00223 } 00224 } 00225 00226 if (debug) { /* display baselines */ 00227 PIX *pixd; 00228 l_int32 npts, x1, y1, x2, y2; 00229 if (pta) { 00230 pixd = pixConvertTo32(pixs); 00231 npts = ptaGetCount(pta); 00232 for (i = 0; i < npts; i += 2) { 00233 ptaGetIPt(pta, i, &x1, &y1); 00234 ptaGetIPt(pta, i + 1, &x2, &y2); 00235 pixRenderLineArb(pixd, x1, y1, x2, y2, 1, 255, 0, 0); 00236 } 00237 pixDisplay(pixd, 200, 200); 00238 pixWrite("junkbaselines", pixd, IFF_PNG); 00239 pixDestroy(&pixd); 00240 } 00241 } 00242 00243 boxaDestroy(&boxa1); 00244 boxaDestroy(&boxa2); 00245 boxaDestroy(&boxa3); 00246 pixDestroy(&pixt1); 00247 pixDestroy(&pixt2); 00248 numaDestroy(&nasum); 00249 numaDestroy(&nadiff); 00250 numaDestroy(&naval); 00251 return naloc; 00252 } 00253 00254 00255 /*---------------------------------------------------------------------* 00256 * Projective transform to remove local skew * 00257 *---------------------------------------------------------------------*/ 00258 /*! 00259 * pixDeskewLocal() 00260 * 00261 * Input: pixs 00262 * nslices (the number of horizontal overlapping slices; must 00263 * be larger than 1 and not exceed 20; use 0 for default) 00264 * redsweep (sweep reduction factor: 1, 2, 4 or 8; 00265 * use 0 for default value) 00266 * redsearch (search reduction factor: 1, 2, 4 or 8, and 00267 * not larger than redsweep; use 0 for default value) 00268 * sweeprange (half the full range, assumed about 0; in degrees; 00269 * use 0.0 for default value) 00270 * sweepdelta (angle increment of sweep; in degrees; 00271 * use 0.0 for default value) 00272 * minbsdelta (min binary search increment angle; in degrees; 00273 * use 0.0 for default value) 00274 * Return: pixd, or null on error 00275 * 00276 * Notes: 00277 * (1) This function allows deskew of a page whose skew changes 00278 * approximately linearly with vertical position. It uses 00279 * a projective tranform that in effect does a differential 00280 * shear about the LHS of the page, and makes all text lines 00281 * horizontal. 00282 * (2) The origin of the keystoning can be either a cheap document 00283 * feeder that rotates the page as it is passed through, or a 00284 * camera image taken from either the left or right side 00285 * of the vertical. 00286 * (3) The image transformation is a projective warping, 00287 * not a rotation. Apart from this function, the text lines 00288 * must be properly aligned vertically with respect to each 00289 * other. This can be done by pre-processing the page; e.g., 00290 * by rotating or horizontally shearing it. 00291 * Typically, this can be achieved by vertically aligning 00292 * the page edge. 00293 */ 00294 PIX * 00295 pixDeskewLocal(PIX *pixs, 00296 l_int32 nslices, 00297 l_int32 redsweep, 00298 l_int32 redsearch, 00299 l_float32 sweeprange, 00300 l_float32 sweepdelta, 00301 l_float32 minbsdelta) 00302 { 00303 l_int32 ret; 00304 PIX *pixd; 00305 PTA *ptas, *ptad; 00306 00307 PROCNAME("pixDeskewLocal"); 00308 00309 if (!pixs) 00310 return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); 00311 00312 /* Skew array gives skew angle (deg) as fctn of raster line 00313 * where it intersects the LHS of the image */ 00314 ret = pixGetLocalSkewTransform(pixs, nslices, redsweep, redsearch, 00315 sweeprange, sweepdelta, minbsdelta, 00316 &ptas, &ptad); 00317 if (ret != 0) 00318 return (PIX *)ERROR_PTR("transform pts not found", procName, NULL); 00319 00320 /* Use a projective transform */ 00321 pixd = pixProjectiveSampledPta(pixs, ptad, ptas, L_BRING_IN_WHITE); 00322 00323 ptaDestroy(&ptas); 00324 ptaDestroy(&ptad); 00325 return pixd; 00326 } 00327 00328 00329 /*---------------------------------------------------------------------* 00330 * Determine the local skew * 00331 *---------------------------------------------------------------------*/ 00332 /*! 00333 * pixGetLocalSkewTransform() 00334 * 00335 * Input: pixs 00336 * nslices (the number of horizontal overlapping slices; must 00337 * be larger than 1 and not exceed 20; use 0 for default) 00338 * redsweep (sweep reduction factor: 1, 2, 4 or 8; 00339 * use 0 for default value) 00340 * redsearch (search reduction factor: 1, 2, 4 or 8, and 00341 * not larger than redsweep; use 0 for default value) 00342 * sweeprange (half the full range, assumed about 0; in degrees; 00343 * use 0.0 for default value) 00344 * sweepdelta (angle increment of sweep; in degrees; 00345 * use 0.0 for default value) 00346 * minbsdelta (min binary search increment angle; in degrees; 00347 * use 0.0 for default value) 00348 * &ptas (<return> 4 points in the source) 00349 * &ptad (<return> the corresponding 4 pts in the dest) 00350 * Return: 0 if OK, 1 on error 00351 * 00352 * Notes: 00353 * (1) This generates two pairs of points in the src, each pair 00354 * corresponding to a pair of points that would lie along 00355 * the same raster line in a transformed (dewarped) image. 00356 * (2) The sets of 4 src and 4 dest points returned by this function 00357 * can then be used, in a projective or bilinear transform, 00358 * to remove keystoning in the src. 00359 */ 00360 l_int32 00361 pixGetLocalSkewTransform(PIX *pixs, 00362 l_int32 nslices, 00363 l_int32 redsweep, 00364 l_int32 redsearch, 00365 l_float32 sweeprange, 00366 l_float32 sweepdelta, 00367 l_float32 minbsdelta, 00368 PTA **pptas, 00369 PTA **pptad) 00370 { 00371 l_int32 w, h, i; 00372 l_float32 deg2rad, angr, angd, dely; 00373 NUMA *naskew; 00374 PTA *ptas, *ptad; 00375 00376 PROCNAME("pixGetLocalSkewTransform"); 00377 00378 if (!pptas || !pptad) 00379 return ERROR_INT("&ptas and &ptad not defined", procName, 1); 00380 *pptas = *pptad = NULL; 00381 if (!pixs) 00382 return ERROR_INT("pixs not defined", procName, 1); 00383 if (nslices < 2 || nslices > 20) 00384 nslices = DEFAULT_SLICES; 00385 if (redsweep < 1 || redsweep > 8) 00386 redsweep = DEFAULT_SWEEP_REDUCTION; 00387 if (redsearch < 1 || redsearch > redsweep) 00388 redsearch = DEFAULT_BS_REDUCTION; 00389 if (sweeprange == 0.0) 00390 sweeprange = DEFAULT_SWEEP_RANGE; 00391 if (sweepdelta == 0.0) 00392 sweepdelta = DEFAULT_SWEEP_DELTA; 00393 if (minbsdelta == 0.0) 00394 minbsdelta = DEFAULT_MINBS_DELTA; 00395 00396 naskew = pixGetLocalSkewAngles(pixs, nslices, redsweep, redsearch, 00397 sweeprange, sweepdelta, minbsdelta, 00398 NULL, NULL); 00399 if (!naskew) 00400 return ERROR_INT("naskew not made", procName, 1); 00401 00402 deg2rad = 3.14159265 / 180.; 00403 w = pixGetWidth(pixs); 00404 h = pixGetHeight(pixs); 00405 ptas = ptaCreate(4); 00406 ptad = ptaCreate(4); 00407 *pptas = ptas; 00408 *pptad = ptad; 00409 00410 /* Find i for skew line that intersects LHS at i and RHS at h / 20 */ 00411 for (i = 0; i < h; i++) { 00412 numaGetFValue(naskew, i, &angd); 00413 angr = angd * deg2rad; 00414 dely = w * tan(angr); 00415 if (i - dely > 0.05 * h) 00416 break; 00417 } 00418 ptaAddPt(ptas, 0, i); 00419 ptaAddPt(ptas, w - 1, i - dely); 00420 ptaAddPt(ptad, 0, i); 00421 ptaAddPt(ptad, w - 1, i); 00422 00423 /* Find i for skew line that intersects LHS at i and RHS at 19h / 20 */ 00424 for (i = h - 1; i > 0; i--) { 00425 numaGetFValue(naskew, i, &angd); 00426 angr = angd * deg2rad; 00427 dely = w * tan(angr); 00428 if (i - dely < 0.95 * h) 00429 break; 00430 } 00431 ptaAddPt(ptas, 0, i); 00432 ptaAddPt(ptas, w - 1, i - dely); 00433 ptaAddPt(ptad, 0, i); 00434 ptaAddPt(ptad, w - 1, i); 00435 00436 numaDestroy(&naskew); 00437 return 0; 00438 } 00439 00440 00441 /*! 00442 * pixGetLocalSkewAngles() 00443 * 00444 * Input: pixs 00445 * nslices (the number of horizontal overlapping slices; must 00446 * be larger than 1 and not exceed 20; use 0 for default) 00447 * redsweep (sweep reduction factor: 1, 2, 4 or 8; 00448 * use 0 for default value) 00449 * redsearch (search reduction factor: 1, 2, 4 or 8, and 00450 * not larger than redsweep; use 0 for default value) 00451 * sweeprange (half the full range, assumed about 0; in degrees; 00452 * use 0.0 for default value) 00453 * sweepdelta (angle increment of sweep; in degrees; 00454 * use 0.0 for default value) 00455 * minbsdelta (min binary search increment angle; in degrees; 00456 * use 0.0 for default value) 00457 * &a (<optional return> slope of skew as fctn of y) 00458 * &b (<optional return> intercept at y=0 of skew as fctn of y) 00459 * Return: naskew, or null on error 00460 * 00461 * Notes: 00462 * (1) The local skew is measured in a set of overlapping strips. 00463 * We then do a least square linear fit parameters to get 00464 * the slope and intercept parameters a and b in 00465 * skew-angle = a * y + b (degrees) 00466 * for the local skew as a function of raster line y. 00467 * This is then used to make naskew, which can be interpreted 00468 * as the computed skew angle (in degrees) at the left edge 00469 * of each raster line. 00470 * (2) naskew can then be used to find the baselines of text, because 00471 * each text line has a baseline that should intersect 00472 * the left edge of the image with the angle given by this 00473 * array, evaluated at the raster line of intersection. 00474 */ 00475 NUMA * 00476 pixGetLocalSkewAngles(PIX *pixs, 00477 l_int32 nslices, 00478 l_int32 redsweep, 00479 l_int32 redsearch, 00480 l_float32 sweeprange, 00481 l_float32 sweepdelta, 00482 l_float32 minbsdelta, 00483 l_float32 *pa, 00484 l_float32 *pb) 00485 { 00486 l_int32 w, h, hs, i, ystart, yend, ovlap, npts; 00487 l_float32 angle, conf, ycenter, a, b; 00488 BOX *box; 00489 NUMA *naskew; 00490 PIX *pix; 00491 PTA *pta; 00492 00493 PROCNAME("pixGetLocalSkewAngles"); 00494 00495 if (!pixs) 00496 return (NUMA *)ERROR_PTR("pixs not defined", procName, NULL); 00497 if (nslices < 2 || nslices > 20) 00498 nslices = DEFAULT_SLICES; 00499 if (redsweep < 1 || redsweep > 8) 00500 redsweep = DEFAULT_SWEEP_REDUCTION; 00501 if (redsearch < 1 || redsearch > redsweep) 00502 redsearch = DEFAULT_BS_REDUCTION; 00503 if (sweeprange == 0.0) 00504 sweeprange = DEFAULT_SWEEP_RANGE; 00505 if (sweepdelta == 0.0) 00506 sweepdelta = DEFAULT_SWEEP_DELTA; 00507 if (minbsdelta == 0.0) 00508 minbsdelta = DEFAULT_MINBS_DELTA; 00509 00510 h = pixGetHeight(pixs); 00511 w = pixGetWidth(pixs); 00512 hs = h / nslices; 00513 ovlap = (l_int32)(OVERLAP_FRACTION * hs); 00514 pta = ptaCreate(nslices); 00515 for (i = 0; i < nslices; i++) { 00516 ystart = L_MAX(0, hs * i - ovlap); 00517 yend = L_MIN(h - 1, hs * (i + 1) + ovlap); 00518 ycenter = (ystart + yend) / 2; 00519 box = boxCreate(0, ystart, w, yend - ystart + 1); 00520 pix = pixClipRectangle(pixs, box, NULL); 00521 pixFindSkewSweepAndSearch(pix, &angle, &conf, redsweep, redsearch, 00522 sweeprange, sweepdelta, minbsdelta); 00523 if (conf > MIN_ALLOWED_CONFIDENCE) 00524 ptaAddPt(pta, ycenter, angle); 00525 pixDestroy(&pix); 00526 boxDestroy(&box); 00527 } 00528 /* ptaWriteStream(stderr, pta, 0); */ 00529 00530 /* Do linear least squares fit */ 00531 if ((npts = ptaGetCount(pta)) < 2) { 00532 ptaDestroy(&pta); 00533 return (NUMA *)ERROR_PTR("can't fit skew", procName, NULL); 00534 } 00535 ptaGetLinearLSF(pta, &a, &b, NULL); 00536 if (pa) *pa = a; 00537 if (pb) *pb = b; 00538 00539 /* Make skew angle array as function of raster line */ 00540 naskew = numaCreate(h); 00541 for (i = 0; i < h; i++) { 00542 angle = a * i + b; 00543 numaAddNumber(naskew, angle); 00544 } 00545 00546 #if DEBUG_PLOT 00547 { NUMA *nax, *nay; 00548 GPLOT *gplot; 00549 ptaGetArrays(pta, &nax, &nay); 00550 gplot = gplotCreate("junkskew", GPLOT_X11, "skew as fctn of y", 00551 "y (in raster lines from top)", "angle (in degrees)"); 00552 gplotAddPlot(gplot, NULL, naskew, GPLOT_POINTS, "linear lsf"); 00553 gplotAddPlot(gplot, nax, nay, GPLOT_POINTS, "actual data pts"); 00554 gplotMakeOutput(gplot); 00555 gplotDestroy(&gplot); 00556 numaDestroy(&nax); 00557 numaDestroy(&nay); 00558 } 00559 #endif /* DEBUG_PLOT */ 00560 00561 ptaDestroy(&pta); 00562 return naskew; 00563 } 00564 00565