Leptonica/numafunc2_8c_source.html

00001 /*====================================================================*
00002  -  Copyright (C) 2001 Leptonica.  All rights reserved.
00003  -  This software is distributed in the hope that it will be
00004  -  useful, but with NO WARRANTY OF ANY KIND.
00005  -  No author or distributor accepts responsibility to anyone for the
00006  -  consequences of using this software, or for whether it serves any
00007  -  particular purpose or works at all, unless he or she says so in
00008  -  writing.  Everyone is granted permission to copy, modify and
00009  -  redistribute this source code, for commercial or non-commercial
00010  -  purposes, with the following restrictions: (1) the origin of this
00011  -  source code must not be misrepresented; (2) modified versions must
00012  -  be plainly marked as such; and (3) this notice may not be removed
00013  -  or altered from any source or modified source distribution.
00014  *====================================================================*/
00015
00016 /*
00017  *   numafunc2.c
00018  *
00019  *      Morphological (min/max) operations
00020  *          NUMA        *numaErode()
00021  *          NUMA        *numaDilate()
00022  *          NUMA        *numaOpen()
00023  *          NUMA        *numaClose()
00024  *
00025  *      Other transforms
00026  *          NUMA        *numaTransform()
00027  *          l_int32      numaWindowedStats()
00028  *          NUMA        *numaWindowedMean()
00029  *          NUMA        *numaWindowedMeanSquare()
00030  *          l_int32      numaWindowedVariance()
00031  *          NUMA        *numaConvertToInt()
00032  *
00033  *      Histogram generation and statistics
00034  *          NUMA        *numaMakeHistogram()
00035  *          NUMA        *numaMakeHistogramAuto()
00036  *          NUMA        *numaMakeHistogramClipped()
00037  *          NUMA        *numaRebinHistogram()
00038  *          NUMA        *numaNormalizeHistogram()
00039  *          l_int32      numaGetStatsUsingHistogram()
00040  *          l_int32      numaGetHistogramStats()
00041  *          l_int32      numaGetHistogramStatsOnInterval()
00042  *          l_int32      numaMakeRankFromHistogram()
00043  *          l_int32      numaHistogramGetRankFromVal()
00044  *          l_int32      numaHistogramGetValFromRank()
00045  *          l_int32      numaDiscretizeRankAndIntensity()
00046  *          l_int32      numaGetRankBinValues()
00047  *
00048  *      Splitting a distribution
00049  *          l_int32      numaSplitDistribution()
00050  *
00051  *      Extrema finding
00052  *          NUMA        *numaFindPeaks()
00053  *          NUMA        *numaFindExtrema()
00054  *          l_int32     *numaCountReversals()
00055  *
00056  *      Threshold crossings and frequency analysis
00057  *          l_int32      numaSelectCrossingThreshold()
00058  *          NUMA        *numaCrossingsByThreshold()
00059  *          NUMA        *numaCrossingsByPeaks()
00060  *          NUMA        *numaEvalBestHaarParameters()
00061  *          l_int32      numaEvalHaarSum()
00062  *
00063  *    Things to remember when using the Numa:
00064  *
00065  *    (1) The numa is a struct, not an array.  Always use accessors
00066  *        (see numabasic.c), never the fields directly.
00067  *
00068  *    (2) The number array holds l_float32 values.  It can also
00069  *        be used to store l_int32 values.  See numabasic.c for
00070  *        details on using the accessors.
00071  *
00072  *    (3) Occasionally, in the comments we denote the i-th element of a
00073  *        numa by na[i].  This is conceptual only -- the numa is not an array!
00074  *
00075  *    Some general comments on histograms:
00076  *
00077  *    (1) Histograms are the generic statistical representation of
00078  *        the data about some attribute.  Typically they're not
00079  *        normalized -- they simply give the number of occurrences
00080  *        within each range of values of the attribute.  This range
00081  *        of values is referred to as a 'bucket'.  For example,
00082  *        the histogram could specify how many connected components
00083  *        are found for each value of their width; in that case,
00084  *        the bucket size is 1.
00085  *
00086  *    (2) In leptonica, all buckets have the same size.  Histograms
00087  *        are therefore specified by a numa of occurrences, along
00088  *        with two other numbers: the 'value' associated with the
00089  *        occupants of the first bucket and the size (i.e., 'width')
00090  *        of each bucket.  These two numbers then allow us to calculate
00091  *        the value associated with the occupants of each bucket.
00092  *        These numbers are fields in the numa, initialized to
00093  *        a startx value of 0.0 and a binsize of 1.0.  Accessors for
00094  *        these fields are functions numa*XParameters().  All histograms
00095  *        must have these two numbers properly set.
00096  */
00097
00098 #include <math.h>
00099 #include "allheaders.h"
00100
00101     /* bin sizes in numaMakeHistogram() */
00102 static const l_int32 BinSizeArray[] = {2, 5, 10, 20, 50, 100, 200, 500, 1000,\
00103                       2000, 5000, 10000, 20000, 50000, 100000, 200000,\
00104                       500000, 1000000, 2000000, 5000000, 10000000,\
00105                       200000000, 50000000, 100000000};
00106 static const l_int32 NBinSizes = 24;
00107
00108
00109 #ifndef  NO_CONSOLE_IO
00110 #define  DEBUG_HISTO        0
00111 #define  DEBUG_CROSSINGS    0
00112 #define  DEBUG_FREQUENCY    0
00113 #endif  /* ~NO_CONSOLE_IO */
00114
00115
00116 /*----------------------------------------------------------------------*
00117  *                     Morphological operations                         *
00118  *----------------------------------------------------------------------*/
00119 /*!
00120  *  numaErode()
00121  *
00122  *      Input:  nas
00123  *              size (of sel; greater than 0, odd; origin implicitly in center)
00124  *      Return: nad (eroded), or null on error
00125  *
00126  *  Notes:
00127  *      (1) The structuring element (sel) is linear, all "hits"
00128  *      (2) If size == 1, this returns a copy
00129  *      (3) General comment.  The morphological operations are equivalent
00130  *          to those that would be performed on a 1-dimensional fpix.
00131  *          However, because we have not implemented morphological
00132  *          operations on fpix, we do this here.  Because it is only
00133  *          1 dimensional, there is no reason to use the more
00134  *          complicated van Herk/Gil-Werman algorithm, and we do it
00135  *          by brute force.
00136  */
00137 NUMA *
00138 numaErode(NUMA    *nas,
00139           l_int32  size)
00140 {
00141 l_int32     i, j, n, hsize, len;
00142 l_float32   minval;
00143 l_float32  *fa, *fas, *fad;
00144 NUMA       *nad;
00145
00146     PROCNAME("numaErode");
00147
00148     if (!nas)
00149         return (NUMA *)ERROR_PTR("nas not defined", procName, NULL);
00150     if (size <= 0)
00151         return (NUMA *)ERROR_PTR("size must be > 0", procName, NULL);
00152     if ((size & 1) == 0 ) {
00153         L_WARNING("sel size must be odd; increasing by 1", procName);
00154         size++;
00155     }
00156
00157     if (size == 1)
00158         return numaCopy(nas);
00159
00160         /* Make a source fa (fas) that has an added (size / 2) boundary
00161          * on left and right, contains a copy of nas in the interior region
00162          * (between 'size' and 'size + n', and has large values
00163          * inserted in the boundary (because it is an erosion). */
00164     n = numaGetCount(nas);
00165     hsize = size / 2;
00166     len = n + 2 * hsize;
00167     if ((fas = (l_float32 *)CALLOC(len, sizeof(l_float32))) == NULL)
00168         return (NUMA *)ERROR_PTR("fas not made", procName, NULL);
00169     for (i = 0; i < hsize; i++)
00170          fas[i] = 1.0e37;
00171     for (i = hsize + n; i < len; i++)
00172          fas[i] = 1.0e37;
00173     fa = numaGetFArray(nas, L_NOCOPY);
00174     for (i = 0; i < n; i++)
00175          fas[hsize + i] = fa[i];
00176
00177     nad = numaMakeConstant(0, n);
00178     numaCopyXParameters(nad, nas);
00179     fad = numaGetFArray(nad, L_NOCOPY);
00180     for (i = 0; i < n; i++) {
00181         minval = 1.0e37;  /* start big */
00182         for (j = 0; j < size; j++)
00183             minval = L_MIN(minval, fas[i + j]);
00184         fad[i] = minval;
00185     }
00186
00187     FREE(fas);
00188     return nad;
00189 }
00190
00191
00192 /*!
00193  *  numaDilate()
00194  *
00195  *      Input:  nas
00196  *              size (of sel; greater than 0, odd; origin implicitly in center)
00197  *      Return: nad (dilated), or null on error
00198  *
00199  *  Notes:
00200  *      (1) The structuring element (sel) is linear, all "hits"
00201  *      (2) If size == 1, this returns a copy
00202  */
00203 NUMA *
00204 numaDilate(NUMA    *nas,
00205            l_int32  size)
00206 {
00207 l_int32     i, j, n, hsize, len;
00208 l_float32   maxval;
00209 l_float32  *fa, *fas, *fad;
00210 NUMA       *nad;
00211
00212     PROCNAME("numaDilate");
00213
00214     if (!nas)
00215         return (NUMA *)ERROR_PTR("nas not defined", procName, NULL);
00216     if (size <= 0)
00217         return (NUMA *)ERROR_PTR("size must be > 0", procName, NULL);
00218     if ((size & 1) == 0 ) {
00219         L_WARNING("sel size must be odd; increasing by 1", procName);
00220         size++;
00221     }
00222
00223     if (size == 1)
00224         return numaCopy(nas);
00225
00226         /* Make a source fa (fas) that has an added (size / 2) boundary
00227          * on left and right, contains a copy of nas in the interior region
00228          * (between 'size' and 'size + n', and has small values
00229          * inserted in the boundary (because it is a dilation). */
00230     n = numaGetCount(nas);
00231     hsize = size / 2;
00232     len = n + 2 * hsize;
00233     if ((fas = (l_float32 *)CALLOC(len, sizeof(l_float32))) == NULL)
00234         return (NUMA *)ERROR_PTR("fas not made", procName, NULL);
00235     for (i = 0; i < hsize; i++)
00236          fas[i] = -1.0e37;
00237     for (i = hsize + n; i < len; i++)
00238          fas[i] = -1.0e37;
00239     fa = numaGetFArray(nas, L_NOCOPY);
00240     for (i = 0; i < n; i++)
00241          fas[hsize + i] = fa[i];
00242
00243     nad = numaMakeConstant(0, n);
00244     numaCopyXParameters(nad, nas);
00245     fad = numaGetFArray(nad, L_NOCOPY);
00246     for (i = 0; i < n; i++) {
00247         maxval = -1.0e37;  /* start small */
00248         for (j = 0; j < size; j++)
00249             maxval = L_MAX(maxval, fas[i + j]);
00250         fad[i] = maxval;
00251     }
00252
00253     FREE(fas);
00254     return nad;
00255 }
00256
00257
00258 /*!
00259  *  numaOpen()
00260  *
00261  *      Input:  nas
00262  *              size (of sel; greater than 0, odd; origin implicitly in center)
00263  *      Return: nad (opened), or null on error
00264  *
00265  *  Notes:
00266  *      (1) The structuring element (sel) is linear, all "hits"
00267  *      (2) If size == 1, this returns a copy
00268  */
00269 NUMA *
00270 numaOpen(NUMA    *nas,
00271          l_int32  size)
00272 {
00273 NUMA  *nat, *nad;
00274
00275     PROCNAME("numaOpen");
00276
00277     if (!nas)
00278         return (NUMA *)ERROR_PTR("nas not defined", procName, NULL);
00279     if (size <= 0)
00280         return (NUMA *)ERROR_PTR("size must be > 0", procName, NULL);
00281     if ((size & 1) == 0 ) {
00282         L_WARNING("sel size must be odd; increasing by 1", procName);
00283         size++;
00284     }
00285
00286     if (size == 1)
00287         return numaCopy(nas);
00288
00289     nat = numaErode(nas, size);
00290     nad = numaDilate(nat, size);
00291     numaDestroy(&nat);
00292     return nad;
00293 }
00294
00295
00296 /*!
00297  *  numaClose()
00298  *
00299  *      Input:  nas
00300  *              size (of sel; greater than 0, odd; origin implicitly in center)
00301  *      Return: nad (opened), or null on error
00302  *
00303  *  Notes:
00304  *      (1) The structuring element (sel) is linear, all "hits"
00305  *      (2) If size == 1, this returns a copy
00306  *      (3) We add a border before doing this operation, for the same
00307  *          reason that we add a border to a pix before doing a safe closing.
00308  *          Without the border, a small component near the border gets
00309  *          clipped at the border on dilation, and can be entirely removed
00310  *          by the following erosion, violating the basic extensivity
00311  *          property of closing.
00312  */
00313 NUMA *
00314 numaClose(NUMA    *nas,
00315           l_int32  size)
00316 {
00317 NUMA  *nab, *nat1, *nat2, *nad;
00318
00319     PROCNAME("numaClose");
00320
00321     if (!nas)
00322         return (NUMA *)ERROR_PTR("nas not defined", procName, NULL);
00323     if (size <= 0)
00324         return (NUMA *)ERROR_PTR("size must be > 0", procName, NULL);
00325     if ((size & 1) == 0 ) {
00326         L_WARNING("sel size must be odd; increasing by 1", procName);
00327         size++;
00328     }
00329
00330     if (size == 1)
00331         return numaCopy(nas);
00332
00333     nab = numaAddBorder(nas, size, size, 0);  /* to preserve extensivity */
00334     nat1 = numaDilate(nab, size);
00335     nat2 = numaErode(nat1, size);
00336     nad = numaRemoveBorder(nat2, size, size);
00337     numaDestroy(&nab);
00338     numaDestroy(&nat1);
00339     numaDestroy(&nat2);
00340     return nad;
00341 }
00342
00343
00344 /*----------------------------------------------------------------------*
00345  *                            Other transforms                          *
00346  *----------------------------------------------------------------------*/
00347 /*!
00348  *  numaTransform()
00349  *
00350  *      Input:  nas
00351  *              shift (add this to each number)
00352  *              scale (multiply each number by this)
00353  *      Return: nad (with all values shifted and scaled, or null on error)
00354  *
00355  *  Notes:
00356  *      (1) Each number is shifted before scaling.
00357  *      (2) The operation sequence is opposite to that for Box and Pta:
00358  *          scale first, then shift.
00359  */
00360 NUMA *
00361 numaTransform(NUMA      *nas,
00362               l_float32  shift,
00363               l_float32  scale)
00364 {
00365 l_int32    i, n;
00366 l_float32  val;
00367 NUMA      *nad;
00368
00369     PROCNAME("numaTransform");
00370
00371     if (!nas)
00372         return (NUMA *)ERROR_PTR("nas not defined", procName, NULL);
00373     n = numaGetCount(nas);
00374     if ((nad = numaCreate(n)) == NULL)
00375         return (NUMA *)ERROR_PTR("nad not made", procName, NULL);
00376     for (i = 0; i < n; i++) {
00377         numaGetFValue(nas, i, &val);
00378         val = scale * val + shift;
00379         numaAddNumber(nad, val);
00380     }
00381     return nad;
00382 }
00383
00384
00385 /*!
00386  *  numaWindowedStats()
00387  *
00388  *      Input:  nas (input numa)
00389  *              wc (half width of the window)
00390  *              &nam (<optional return> mean value in window)
00391  *              &nams (<optional return> mean square value in window)
00392  *              &pnav (<optional return> variance in window)
00393  *              &pnarv (<optional return> rms deviation from the mean)
00394  *      Return: 0 if OK, 1 on error
00395  *
00396  *  Notes:
00397  *      (1) This is a high-level convenience function for calculating
00398  *          any or all of these derived arrays.
00399  *      (2) These statistical measures over the values in the
00400  *          rectangular window are:
00401  *            - average value: <x>  (nam)
00402  *            - average squared value: <x*x> (nams)
00403  *            - variance: <(x - <x>)*(x - <x>)> = <x*x> - <x>*<x>  (nav)
00404  *            - square-root of variance: (narv)
00405  *          where the brackets < .. > indicate that the average value is
00406  *          to be taken over the window.
00407  *      (3) Note that the variance is just the mean square difference from
00408  *          the mean value; and the square root of the variance is the
00409  *          root mean square difference from the mean, sometimes also
00410  *          called the 'standard deviation'.
00411  *      (4) Internally, use mirrored borders to handle values near the
00412  *          end of each array.
00413  */
00414 l_int32
00415 numaWindowedStats(NUMA    *nas,
00416                   l_int32  wc,
00417                   NUMA   **pnam,
00418                   NUMA   **pnams,
00419                   NUMA   **pnav,
00420                   NUMA   **pnarv)
00421 {
00422 NUMA  *nam, *nams;
00423
00424     PROCNAME("numaWindowedStats");
00425
00426     if (!nas)
00427         return ERROR_INT("nas not defined", procName, 1);
00428     if (2 * wc + 1 > numaGetCount(nas))
00429         L_WARNING("filter wider than input array!", procName);
00430
00431     if (!pnav && !pnarv) {
00432         if (pnam) *pnam = numaWindowedMean(nas, wc);
00433         if (pnams) *pnams = numaWindowedMeanSquare(nas, wc);
00434         return 0;
00435     }
00436
00437     nam = numaWindowedMean(nas, wc);
00438     nams = numaWindowedMeanSquare(nas, wc);
00439     numaWindowedVariance(nam, nams, pnav, pnarv);
00440     if (pnam)
00441         *pnam = nam;
00442     else
00443         numaDestroy(&nam);
00444     if (pnams)
00445         *pnams = nams;
00446     else
00447         numaDestroy(&nams);
00448     return 0;
00449 }
00450
00451
00452 /*!
00453  *  numaWindowedMean()
00454  *
00455  *      Input:  nas
00456  *              wc (half width of the convolution window)
00457  *      Return: nad (after low-pass filtering), or null on error
00458  *
00459  *  Notes:
00460  *      (1) This is a convolution.  The window has width = 2 * @wc + 1.
00461  *      (2) We add a mirrored border of size @wc to each end of the array.
00462  */
00463 NUMA *
00464 numaWindowedMean(NUMA    *nas,
00465                  l_int32  wc)
00466 {
00467 l_int32     i, n, n1, width;
00468 l_float32   sum, norm;
00469 l_float32  *fa1, *fad, *suma;
00470 NUMA       *na1, *nad;
00471
00472     PROCNAME("numaWindowedMean");
00473
00474     if (!nas)
00475         return (NUMA *)ERROR_PTR("nas not defined", procName, NULL);
00476     n = numaGetCount(nas);
00477     width = 2 * wc + 1;  /* filter width */
00478     if (width > n)
00479         L_WARNING("filter wider than input array!", procName);
00480
00481     na1 = numaAddSpecifiedBorder(nas, wc, wc, L_MIRRORED_BORDER);
00482     n1 = n + 2 * wc;
00483     fa1 = numaGetFArray(na1, L_NOCOPY);
00484     nad = numaMakeConstant(0, n);
00485     fad = numaGetFArray(nad, L_NOCOPY);
00486
00487         /* Make sum array; note the indexing */
00488     if ((suma = (l_float32 *)CALLOC(n1 + 1, sizeof(l_float32))) == NULL)
00489         return (NUMA *)ERROR_PTR("suma not made", procName, NULL);
00490     sum = 0.0;
00491     suma[0] = 0.0;
00492     for (i = 0; i < n1; i++) {
00493         sum += fa1[i];
00494         suma[i + 1] = sum;
00495     }
00496
00497     norm = 1. / (2 * wc + 1);
00498     for (i = 0; i < n; i++)
00499         fad[i] = norm * (suma[width + i] - suma[i]);
00500
00501     FREE(suma);
00502     numaDestroy(&na1);
00503     return nad;
00504 }
00505
00506
00507 /*!
00508  *  numaWindowedMeanSquare()
00509  *
00510  *      Input:  nas
00511  *              wc (half width of the window)
00512  *      Return: nad (containing windowed mean square values), or null on error
00513  *
00514  *  Notes:
00515  *      (1) The window has width = 2 * @wc + 1.
00516  *      (2) We add a mirrored border of size @wc to each end of the array.
00517  */
00518 NUMA *
00519 numaWindowedMeanSquare(NUMA    *nas,
00520                        l_int32  wc)
00521 {
00522 l_int32     i, n, n1, width;
00523 l_float32   sum, norm;
00524 l_float32  *fa1, *fad, *suma;
00525 NUMA       *na1, *nad;
00526
00527     PROCNAME("numaWindowedMeanSquare");
00528
00529     if (!nas)
00530         return (NUMA *)ERROR_PTR("nas not defined", procName, NULL);
00531     n = numaGetCount(nas);
00532     width = 2 * wc + 1;  /* filter width */
00533     if (width > n)
00534         L_WARNING("filter wider than input array!", procName);
00535
00536     na1 = numaAddSpecifiedBorder(nas, wc, wc, L_MIRRORED_BORDER);
00537     n1 = n + 2 * wc;
00538     fa1 = numaGetFArray(na1, L_NOCOPY);
00539     nad = numaMakeConstant(0, n);
00540     fad = numaGetFArray(nad, L_NOCOPY);
00541
00542         /* Make sum array; note the indexing */
00543     if ((suma = (l_float32 *)CALLOC(n1 + 1, sizeof(l_float32))) == NULL)
00544         return (NUMA *)ERROR_PTR("suma not made", procName, NULL);
00545     sum = 0.0;
00546     suma[0] = 0.0;
00547     for (i = 0; i < n1; i++) {
00548         sum += fa1[i] * fa1[i];
00549         suma[i + 1] = sum;
00550     }
00551
00552     norm = 1. / (2 * wc + 1);
00553     for (i = 0; i < n; i++)
00554         fad[i] = norm * (suma[width + i] - suma[i]);
00555
00556     FREE(suma);
00557     numaDestroy(&na1);
00558     return nad;
00559 }
00560
00561
00562 /*!
00563  *  numaWindowedVariance()
00564  *
00565  *      Input:  nam (windowed mean values)
00566  *              nams (windowed mean square values)
00567  *              &pnav (<optional return> numa of variance -- the ms deviation
00568  *                     from the mean)
00569  *              &pnarv (<optional return> numa of rms deviation from the mean)
00570  *      Return: 0 if OK, 1 on error
00571  *
00572  *  Notes:
00573  *      (1) The numas of windowed mean and mean square are precomputed,
00574  *          using numaWindowedMean() and numaWindowedMeanSquare().
00575  *      (2) Either or both of the variance and square-root of variance
00576  *          are returned, where the variance is the average over the
00577  *          window of the mean square difference of the pixel value
00578  *          from the mean:
00579  *                <(x - <x>)*(x - <x>)> = <x*x> - <x>*<x>
00580  */
00581 l_int32
00582 numaWindowedVariance(NUMA   *nam,
00583                      NUMA   *nams,
00584                      NUMA  **pnav,
00585                      NUMA  **pnarv)
00586 {
00587 l_int32     i, nm, nms;
00588 l_float32   var;
00589 l_float32  *fam, *fams, *fav, *farv;
00590 NUMA       *nav, *narv;  /* variance and square root of variance */
00591
00592     PROCNAME("numaWindowedVariance");
00593
00594     if (!nam)
00595         return ERROR_INT("nam not defined", procName, 1);
00596     if (!nams)
00597         return ERROR_INT("nams not defined", procName, 1);
00598     if (!pnav && !pnarv)
00599         return ERROR_INT("neither &nav nor &narv are defined", procName, 1);
00600     nm = numaGetCount(nam);
00601     nms = numaGetCount(nams);
00602     if (nm != nms)
00603         return ERROR_INT("sizes of nam and nams differ", procName, 1);
00604
00605     if (pnav) {
00606         nav = numaMakeConstant(0, nm);
00607         *pnav = nav;
00608         fav = numaGetFArray(nav, L_NOCOPY);
00609     }
00610     if (pnarv) {
00611         narv = numaMakeConstant(0, nm);
00612         *pnarv = narv;
00613         farv = numaGetFArray(narv, L_NOCOPY);
00614     }
00615     fam = numaGetFArray(nam, L_NOCOPY);
00616     fams = numaGetFArray(nams, L_NOCOPY);
00617
00618     for (i = 0; i < nm; i++) {
00619         var = fams[i] - fam[i] * fam[i];
00620         if (pnav)
00621             fav[i] = var;
00622         if (pnarv)
00623             farv[i] = (l_float32)sqrt(var);
00624     }
00625
00626     return 0;
00627 }
00628
00629
00630 /*!
00631  *  numaConvertToInt()
00632  *
00633  *      Input:  na
00634  *      Return: na with all values rounded to nearest integer, or
00635  *              null on error
00636  */
00637 NUMA *
00638 numaConvertToInt(NUMA  *nas)
00639 {
00640 l_int32  i, n, ival;
00641 NUMA    *nad;
00642
00643     PROCNAME("numaConvertToInt");
00644
00645     if (!nas)
00646         return (NUMA *)ERROR_PTR("nas not defined", procName, NULL);
00647
00648     n = numaGetCount(nas);
00649     if ((nad = numaCreate(n)) == NULL)
00650         return (NUMA *)ERROR_PTR("nad not made", procName, NULL);
00651     for (i = 0; i < n; i++) {
00652         numaGetIValue(nas, i, &ival);
00653         numaAddNumber(nad, ival);
00654     }
00655     return nad;
00656 }
00657
00658
00659 /*----------------------------------------------------------------------*
00660  *                 Histogram generation and statistics                  *
00661  *----------------------------------------------------------------------*/
00662 /*!
00663  *  numaMakeHistogram()
00664  *
00665  *      Input:  na
00666  *              maxbins (max number of histogram bins)
00667  *              &binsize  (<return> size of histogram bins)
00668  *              &binstart (<optional return> start val of minimum bin;
00669  *                         input NULL to force start at 0)
00670  *      Return: na consisiting of histogram of integerized values,
00671  *              or null on error.
00672  *
00673  *  Note:
00674  *      (1) This simple interface is designed for integer data.
00675  *          The bins are of integer width and start on integer boundaries,
00676  *          so the results on float data will not have high precision.
00677  *      (2) Specify the max number of input bins.   Then @binsize,
00678  *          the size of bins necessary to accommodate the input data,
00679  *          is returned.  It is one of the sequence:
00680  *                {1, 2, 5, 10, 20, 50, ...}.
00681  *      (3) If &binstart is given, all values are accommodated,
00682  *          and the min value of the starting bin is returned.
00683  *          Otherwise, all negative values are discarded and
00684  *          the histogram bins start at 0.
00685  */
00686 NUMA *
00687 numaMakeHistogram(NUMA     *na,
00688                   l_int32   maxbins,
00689                   l_int32  *pbinsize,
00690                   l_int32  *pbinstart)
00691 {
00692 l_int32    i, n, ival, hval;
00693 l_int32    iminval, imaxval, range, binsize, nbins, ibin;
00694 l_float32  val, ratio;
00695 NUMA      *nai, *nahist;
00696
00697     PROCNAME("numaMakeHistogram");
00698
00699     if (!na)
00700         return (NUMA *)ERROR_PTR("na not defined", procName, NULL);
00701     if (!pbinsize)
00702         return (NUMA *)ERROR_PTR("&binsize not defined", procName, NULL);
00703
00704         /* Determine input range */
00705     numaGetMin(na, &val, NULL);
00706     iminval = (l_int32)(val + 0.5);
00707     numaGetMax(na, &val, NULL);
00708     imaxval = (l_int32)(val + 0.5);
00709     if (pbinstart == NULL) {  /* clip negative vals; start from 0 */
00710         iminval = 0;
00711         if (imaxval < 0)
00712             return (NUMA *)ERROR_PTR("all values < 0", procName, NULL);
00713     }
00714
00715         /* Determine binsize */
00716     range = imaxval - iminval + 1;
00717     if (range > maxbins - 1) {
00718         ratio = (l_float64)range / (l_float64)maxbins;
00719         binsize = 0;
00720         for (i = 0; i < NBinSizes; i++) {
00721             if (ratio < BinSizeArray[i]) {
00722                 binsize = BinSizeArray[i];
00723                 break;
00724             }
00725         }
00726         if (binsize == 0)
00727             return (NUMA *)ERROR_PTR("numbers too large", procName, NULL);
00728     }
00729     else
00730         binsize = 1;
00731     *pbinsize = binsize;
00732     nbins = 1 + range / binsize;  /* +1 seems to be sufficient */
00733
00734         /* Redetermine iminval */
00735     if (pbinstart && binsize > 1) {
00736         if (iminval >= 0)
00737             iminval = binsize * (iminval / binsize);
00738         else
00739             iminval = binsize * ((iminval - binsize + 1) / binsize);
00740     }
00741     if (pbinstart)
00742         *pbinstart = iminval;
00743
00744 #if  DEBUG_HISTO
00745     fprintf(stderr, " imaxval = %d, range = %d, nbins = %d\n",
00746             imaxval, range, nbins);
00747 #endif  /* DEBUG_HISTO */
00748
00749         /* Use integerized data for input */
00750     if ((nai = numaConvertToInt(na)) == NULL)
00751         return (NUMA *)ERROR_PTR("nai not made", procName, NULL);
00752     n = numaGetCount(nai);
00753
00754         /* Make histogram, converting value in input array
00755          * into a bin number for this histogram array. */
00756     if ((nahist = numaCreate(nbins)) == NULL)
00757         return (NUMA *)ERROR_PTR("nahist not made", procName, NULL);
00758     numaSetCount(nahist, nbins);
00759     numaSetXParameters(nahist, iminval, binsize);
00760     for (i = 0; i < n; i++) {
00761         numaGetIValue(nai, i, &ival);
00762         ibin = (ival - iminval) / binsize;
00763         if (ibin >= 0 && ibin < nbins) {
00764             numaGetIValue(nahist, ibin, &hval);
00765             numaSetValue(nahist, ibin, hval + 1.0);
00766         }
00767     }
00768
00769     numaDestroy(&nai);
00770     return nahist;
00771 }
00772
00773
00774 /*!
00775  *  numaMakeHistogramAuto()
00776  *
00777  *      Input:  na (numa of floats; these may be integers)
00778  *              maxbins (max number of histogram bins; >= 1)
00779  *      Return: na consisiting of histogram of quantized float values,
00780  *              or null on error.
00781  *
00782  *  Notes:
00783  *      (1) This simple interface is designed for accurate binning
00784  *          of both integer and float data.
00785  *      (2) If the array data is integers, and the range of integers
00786  *          is smaller than @maxbins, they are binned as they fall,
00787  *          with binsize = 1.
00788  *      (3) If the range of data, (maxval - minval), is larger than
00789  *          @maxbins, or if the data is floats, they are binned into
00790  *          exactly @maxbins bins.
00791  *      (4) Unlike numaMakeHistogram(), these bins in general have
00792  *          non-integer location and width, even for integer data.
00793  */
00794 NUMA *
00795 numaMakeHistogramAuto(NUMA    *na,
00796                       l_int32  maxbins)
00797 {
00798 l_int32    i, n, imin, imax, irange, ibin, ival, allints;
00799 l_float32  minval, maxval, range, binsize, fval;
00800 NUMA      *nah;
00801
00802     PROCNAME("numaMakeHistogramAuto");
00803
00804     if (!na)
00805         return (NUMA *)ERROR_PTR("na not defined", procName, NULL);
00806     maxbins = L_MAX(1, maxbins);
00807
00808         /* Determine input range */
00809     numaGetMin(na, &minval, NULL);
00810     numaGetMax(na, &maxval, NULL);
00811
00812         /* Determine if values are all integers */
00813     n = numaGetCount(na);
00814     numaHasOnlyIntegers(na, maxbins, &allints);
00815
00816         /* Do simple integer binning if possible */
00817     if (allints && (maxval - minval < maxbins)) {
00818         imin = (l_int32)minval;
00819         imax = (l_int32)maxval;
00820         irange = imax - imin + 1;
00821         nah = numaCreate(irange);
00822         numaSetCount(nah, irange);  /* init */
00823         numaSetXParameters(nah, minval, 1.0);
00824         for (i = 0; i < n; i++) {
00825             numaGetIValue(na, i, &ival);
00826             ibin = ival - imin;
00827             numaGetIValue(nah, ibin, &ival);
00828             numaSetValue(nah, ibin, ival + 1.0);
00829         }
00830
00831         return nah;
00832     }
00833
00834         /* Do float binning, even if the data is integers. */
00835     range = maxval - minval;
00836     binsize = range / (l_float32)maxbins;
00837     if (range == 0.0) {
00838         nah = numaCreate(1);
00839         numaSetXParameters(nah, minval, binsize);
00840         numaAddNumber(nah, n);
00841         return nah;
00842     }
00843     nah = numaCreate(maxbins);
00844     numaSetCount(nah, maxbins);
00845     numaSetXParameters(nah, minval, binsize);
00846     for (i = 0; i < n; i++) {
00847         numaGetFValue(na, i, &fval);
00848         ibin = (l_int32)((fval - minval) / binsize);
00849         ibin = L_MIN(ibin, maxbins - 1);  /* "edge" case; stay in bounds */
00850         numaGetIValue(nah, ibin, &ival);
00851         numaSetValue(nah, ibin, ival + 1.0);
00852     }
00853
00854     return nah;
00855 }
00856
00857
00858 /*!
00859  *  numaMakeHistogramClipped()
00860  *
00861  *      Input:  na
00862  *              binsize (typically 1.0)
00863  *              maxsize (of histogram ordinate)
00864  *      Return: na (histogram of bins of size @binsize, starting with
00865  *                  the na[0] (x = 0.0) and going up to a maximum of
00866  *                  x = @maxsize, by increments of @binsize), or null on error
00867  *
00868  *  Notes:
00869  *      (1) This simple function generates a histogram of values
00870  *          from na, discarding all values < 0.0 or greater than
00871  *          min(@maxsize, maxval), where maxval is the maximum value in na.
00872  *          The histogram data is put in bins of size delx = @binsize,
00873  *          starting at x = 0.0.  We use as many bins as are
00874  *          needed to hold the data.
00875  */
00876 NUMA *
00877 numaMakeHistogramClipped(NUMA      *na,
00878                          l_float32  binsize,
00879                          l_float32  maxsize)
00880 {
00881 l_int32    i, n, nbins, ival, ibin;
00882 l_float32  val, maxval;
00883 NUMA      *nad;
00884
00885     PROCNAME("numaMakeHistogramClipped");
00886
00887     if (!na)
00888         return (NUMA *)ERROR_PTR("na not defined", procName, NULL);
00889     if (binsize <= 0.0)
00890         return (NUMA *)ERROR_PTR("binsize must be > 0.0", procName, NULL);
00891     if (binsize > maxsize)
00892         binsize = maxsize;  /* just one bin */
00893
00894     numaGetMax(na, &maxval, NULL);
00895     n = numaGetCount(na);
00896     maxsize = L_MIN(maxsize, maxval);
00897     nbins = (l_int32)(maxsize / binsize) + 1;
00898
00899 /*    fprintf(stderr, "maxsize = %7.3f, nbins = %d\n", maxsize, nbins); */
00900
00901     if ((nad = numaCreate(nbins)) == NULL)
00902         return (NUMA *)ERROR_PTR("nad not made", procName, NULL);
00903     numaSetXParameters(nad, 0.0, binsize);
00904     numaSetCount(nad, nbins);  /* interpret zeroes in bins as data */
00905     for (i = 0; i < n; i++) {
00906         numaGetFValue(na, i, &val);
00907         ibin = (l_int32)(val / binsize);
00908         if (ibin >= 0 && ibin < nbins) {
00909             numaGetIValue(nad, ibin, &ival);
00910             numaSetValue(nad, ibin, ival + 1.0);
00911         }
00912     }
00913
00914     return nad;
00915 }
00916
00917
00918 /*!
00919  *  numaRebinHistogram()
00920  *
00921  *      Input:  nas (input histogram)
00922  *              newsize (number of old bins contained in each new bin)
00923  *      Return: nad (more coarsely re-binned histogram), or null on error
00924  */
00925 NUMA *
00926 numaRebinHistogram(NUMA    *nas,
00927                    l_int32  newsize)
00928 {
00929 l_int32    i, j, ns, nd, index, count, val;
00930 l_float32  start, oldsize;
00931 NUMA      *nad;
00932
00933     PROCNAME("numaRebinHistogram");
00934
00935     if (!nas)
00936         return (NUMA *)ERROR_PTR("nas not defined", procName, NULL);
00937     if (newsize <= 1)
00938         return (NUMA *)ERROR_PTR("newsize must be > 1", procName, NULL);
00939     if ((ns = numaGetCount(nas)) == 0)
00940         return (NUMA *)ERROR_PTR("no bins in nas", procName, NULL);
00941
00942     nd = (ns + newsize - 1) / newsize;
00943     if ((nad = numaCreate(nd)) == NULL)
00944         return (NUMA *)ERROR_PTR("nad not made", procName, NULL);
00945     numaGetXParameters(nad, &start, &oldsize);
00946     numaSetXParameters(nad, start, oldsize * newsize);
00947
00948     for (i = 0; i < nd; i++) {  /* new bins */
00949         count = 0;
00950         index = i * newsize;
00951         for (j = 0; j < newsize; j++) {
00952             if (index < ns) {
00953                 numaGetIValue(nas, index, &val);
00954                 count += val;
00955                 index++;
00956             }
00957         }
00958         numaAddNumber(nad, count);
00959     }
00960
00961     return nad;
00962 }
00963
00964
00965 /*!
00966  *  numaNormalizeHistogram()
00967  *
00968  *      Input:  nas (input histogram)
00969  *              area (target sum of all numbers in dest histogram;
00970  *                    e.g., use area = 1.0 if this represents a
00971  *                    probability distribution)
00972  *      Return: nad (normalized histogram), or null on error
00973  */
00974 NUMA *
00975 numaNormalizeHistogram(NUMA      *nas,
00976                        l_float32  area)
00977 {
00978 l_int32    i, ns;
00979 l_float32  sum, factor, fval;
00980 NUMA      *nad;
00981
00982     PROCNAME("numaNormalizeHistogram");
00983
00984     if (!nas)
00985         return (NUMA *)ERROR_PTR("nas not defined", procName, NULL);
00986     if (area <= 0.0)
00987         return (NUMA *)ERROR_PTR("area must be > 0.0", procName, NULL);
00988     if ((ns = numaGetCount(nas)) == 0)
00989         return (NUMA *)ERROR_PTR("no bins in nas", procName, NULL);
00990
00991     numaGetSum(nas, &sum);
00992     factor = area / sum;
00993
00994     if ((nad = numaCreate(ns)) == NULL)
00995         return (NUMA *)ERROR_PTR("nad not made", procName, NULL);
00996     numaCopyXParameters(nad, nas);
00997
00998     for (i = 0; i < ns; i++) {
00999         numaGetFValue(nas, i, &fval);
01000         fval *= factor;
01001         numaAddNumber(nad, fval);
01002     }
01003
01004     return nad;
01005 }
01006
01007
01008 /*!
01009  *  numaGetStatsUsingHistogram()
01010  *
01011  *      Input:  na (an arbitrary set of numbers; not ordered and not
01012  *                  a histogram)
01013  *              maxbins (the maximum number of bins to be allowed in
01014  *                       the histogram; use 0 for consecutive integer bins)
01015  *              &min (<optional return> min value of set)
01016  *              &max (<optional return> max value of set)
01017  *              &mean (<optional return> mean value of set)
01018  *              &variance (<optional return> variance)
01019  *              &median (<optional return> median value of set)
01020  *              rank (in [0.0 ... 1.0]; median has a rank 0.5; ignored
01021  *                    if &rval == NULL)
01022  *              &rval (<optional return> value in na corresponding to @rank)
01023  *              &histo (<optional return> Numa histogram; use NULL to prevent)
01024  *      Return: 0 if OK, 1 on error
01025  *
01026  *  Notes:
01027  *      (1) This is a simple interface for gathering statistics
01028  *          from a numa, where a histogram is used 'under the covers'
01029  *          to avoid sorting if a rank value is requested.  In that case,
01030  *          by using a histogram we are trading speed for accuracy, because
01031  *          the values in @na are quantized to the center of a set of bins.
01032  *      (2) If the median, other rank value, or histogram are not requested,
01033  *          the calculation is all performed on the input Numa.
01034  *      (3) The variance is the average of the square of the
01035  *          difference from the mean.  The median is the value in na
01036  *          with rank 0.5.
01037  *      (4) There are two situations where this gives rank results with
01038  *          accuracy comparable to computing stastics directly on the input
01039  *          data, without binning into a histogram:
01040  *           (a) the data is integers and the range of data is less than
01041  *               @maxbins, and
01042  *           (b) the data is floats and the range is small compared to
01043  *               @maxbins, so that the binsize is much less than 1.
01044  *      (5) If a histogram is used and the numbers in the Numa extend
01045  *          over a large range, you can limit the required storage by
01046  *          specifying the maximum number of bins in the histogram.
01047  *          Use @maxbins == 0 to force the bin size to be 1.
01048  *      (6) This optionally returns the median and one arbitrary rank value.
01049  *          If you need several rank values, return the histogram and use
01050  *               numaHistogramGetValFromRank(nah, rank, &rval)
01051  *          multiple times.
01052  */
01053 l_int32
01054 numaGetStatsUsingHistogram(NUMA       *na,
01055                            l_int32     maxbins,
01056                            l_float32  *pmin,
01057                            l_float32  *pmax,
01058                            l_float32  *pmean,
01059                            l_float32  *pvariance,
01060                            l_float32  *pmedian,
01061                            l_float32   rank,
01062                            l_float32  *prval,
01063                            NUMA      **phisto)
01064 {
01065 l_int32    i, n;
01066 l_float32  minval, maxval, fval, mean, sum;
01067 NUMA      *nah;
01068
01069     PROCNAME("numaGetStatsUsingHistogram");
01070
01071     if (pmin) *pmin = 0.0;
01072     if (pmax) *pmax = 0.0;
01073     if (pmean) *pmean = 0.0;
01074     if (pmedian) *pmedian = 0.0;
01075     if (pvariance) *pvariance = 0.0;
01076     if (!na)
01077         return ERROR_INT("na not defined", procName, 1);
01078     if ((n = numaGetCount(na)) == 0)
01079         return ERROR_INT("numa is empty", procName, 1);
01080
01081     numaGetMin(na, &minval, NULL);
01082     numaGetMax(na, &maxval, NULL);
01083     if (pmin) *pmin = minval;
01084     if (pmax) *pmax = maxval;
01085     if (pmean || pvariance) {
01086         sum = 0.0;
01087         for (i = 0; i < n; i++) {
01088             numaGetFValue(na, i, &fval);
01089             sum += fval;
01090         }
01091         mean = sum / (l_float32)n;
01092         if (pmean) *pmean = mean;
01093     }
01094     if (pvariance) {
01095         sum = 0.0;
01096         for (i = 0; i < n; i++) {
01097             numaGetFValue(na, i, &fval);
01098             sum += fval * fval;
01099         }
01100         *pvariance = sum / (l_float32)n - mean * mean;
01101     }
01102
01103     if (!pmedian && !prval && !phisto)
01104         return 0;
01105
01106     nah = numaMakeHistogramAuto(na, maxbins);
01107     if (pmedian)
01108         numaHistogramGetValFromRank(nah, 0.5, pmedian);
01109     if (prval)
01110         numaHistogramGetValFromRank(nah, rank, prval);
01111     if (phisto)
01112         *phisto = nah;
01113     else
01114         numaDestroy(&nah);
01115     return 0;
01116 }
01117
01118
01119 /*!
01120  *  numaGetHistogramStats()
01121  *
01122  *      Input:  nahisto (histogram: y(x(i)), i = 0 ... nbins - 1)
01123  *              startx (x value of first bin: x(0))
01124  *              deltax (x increment between bins; the bin size; x(1) - x(0))
01125  *              &xmean (<optional return> mean value of histogram)
01126  *              &xmedian (<optional return> median value of histogram)
01127  *              &xmode (<optional return> mode value of histogram:
01128  *                     xmode = x(imode), where y(xmode) >= y(x(i)) for
01129  *                     all i != imode)
01130  *              &xvariance (<optional return> variance of x)
01131  *      Return: 0 if OK, 1 on error
01132  *
01133  *  Notes:
01134  *      (1) If the histogram represents the relation y(x), the
01135  *          computed values that are returned are the x values.
01136  *          These are NOT the bucket indices i; they are related to the
01137  *          bucket indices by
01138  *                x(i) = startx + i * deltax
01139  */
01140 l_int32
01141 numaGetHistogramStats(NUMA       *nahisto,
01142                       l_float32   startx,
01143                       l_float32   deltax,
01144                       l_float32  *pxmean,
01145                       l_float32  *pxmedian,
01146                       l_float32  *pxmode,
01147                       l_float32  *pxvariance)
01148 {
01149     PROCNAME("numaGetHistogramStats");
01150
01151     if (pxmean) *pxmean = 0.0;
01152     if (pxmedian) *pxmedian = 0.0;
01153     if (pxmode) *pxmode = 0.0;
01154     if (pxvariance) *pxvariance = 0.0;
01155     if (!nahisto)
01156         return ERROR_INT("nahisto not defined", procName, 1);
01157
01158     return numaGetHistogramStatsOnInterval(nahisto, startx, deltax, 0, 0,
01159                                            pxmean, pxmedian, pxmode,
01160                                            pxvariance);
01161 }
01162
01163
01164 /*!
01165  *  numaGetHistogramStatsOnInterval()
01166  *
01167  *      Input:  nahisto (histogram: y(x(i)), i = 0 ... nbins - 1)
01168  *              startx (x value of first bin: x(0))
01169  *              deltax (x increment between bins; the bin size; x(1) - x(0))
01170  *              ifirst (first bin to use for collecting stats)
01171  *              ilast (last bin for collecting stats; use 0 to go to the end)
01172  *              &xmean (<optional return> mean value of histogram)
01173  *              &xmedian (<optional return> median value of histogram)
01174  *              &xmode (<optional return> mode value of histogram:
01175  *                     xmode = x(imode), where y(xmode) >= y(x(i)) for
01176  *                     all i != imode)
01177  *              &xvariance (<optional return> variance of x)
01178  *      Return: 0 if OK, 1 on error
01179  *
01180  *  Notes:
01181  *      (1) If the histogram represents the relation y(x), the
01182  *          computed values that are returned are the x values.
01183  *          These are NOT the bucket indices i; they are related to the
01184  *          bucket indices by
01185  *                x(i) = startx + i * deltax
01186  */
01187 l_int32
01188 numaGetHistogramStatsOnInterval(NUMA       *nahisto,
01189                                 l_float32   startx,
01190                                 l_float32   deltax,
01191                                 l_int32     ifirst,
01192                                 l_int32     ilast,
01193                                 l_float32  *pxmean,
01194                                 l_float32  *pxmedian,
01195                                 l_float32  *pxmode,
01196                                 l_float32  *pxvariance)
01197 {
01198 l_int32    i, n, imax;
01199 l_float32  sum, sumval, halfsum, moment, var, x, y, ymax;
01200
01201     PROCNAME("numaGetHistogramStats");
01202
01203     if (pxmean) *pxmean = 0.0;
01204     if (pxmedian) *pxmedian = 0.0;
01205     if (pxmode) *pxmode = 0.0;
01206     if (pxvariance) *pxvariance = 0.0;
01207     if (!nahisto)
01208         return ERROR_INT("nahisto not defined", procName, 1);
01209     if (!pxmean && !pxmedian && !pxmode && !pxvariance)
01210         return ERROR_INT("nothing to compute", procName, 1);
01211
01212     n = numaGetCount(nahisto);
01213     if (ilast <= 0) ilast = n - 1;
01214     if (ifirst < 0) ifirst = 0;
01215     if (ifirst > ilast || ifirst > n - 1)
01216         return ERROR_INT("ifirst is too large", procName, 1);
01217     for (sum = 0.0, moment = 0.0, var = 0.0, i = ifirst; i <= ilast ; i++) {
01218         x = startx + i * deltax;
01219         numaGetFValue(nahisto, i, &y);
01220         sum += y;
01221         moment += x * y;
01222         var += x * x * y;
01223     }
01224     if (sum == 0.0)
01225         return ERROR_INT("sum is 0", procName, 1);
01226
01227     if (pxmean)
01228         *pxmean = moment / sum;
01229     if (pxvariance)
01230         *pxvariance = var / sum - moment * moment / (sum * sum);
01231
01232     if (pxmedian) {
01233         halfsum = sum / 2.0;
01234         for (sumval = 0.0, i = ifirst; i <= ilast; i++) {
01235             numaGetFValue(nahisto, i, &y);
01236             sumval += y;
01237             if (sumval >= halfsum) {
01238                 *pxmedian = startx + i * deltax;
01239                 break;
01240             }
01241         }
01242     }
01243
01244     if (pxmode) {
01245         ymax = -1.0e10;
01246         for (i = ifirst; i <= ilast; i++) {
01247             numaGetFValue(nahisto, i, &y);
01248             if (y > ymax) {
01249                 ymax = y;
01250                 imax = i;
01251             }
01252         }
01253         *pxmode = startx + imax * deltax;
01254     }
01255
01256     return 0;
01257 }
01258
01259
01260 /*!
01261  *  numaMakeRankFromHistogram()
01262  *
01263  *      Input:  startx (xval corresponding to first element in nay)
01264  *              deltax (x increment between array elements in nay)
01265  *              nasy (input histogram, assumed equally spaced)
01266  *              npts (number of points to evaluate rank function)
01267  *              &nax (<optional return> array of x values in range)
01268  *              &nay (<return> rank array of specified npts)
01269  *      Return: 0 if OK, 1 on error
01270  */
01271 l_int32
01272 numaMakeRankFromHistogram(l_float32  startx,
01273                           l_float32  deltax,
01274                           NUMA      *nasy,
01275                           l_int32    npts,
01276                           NUMA     **pnax,
01277                           NUMA     **pnay)
01278 {
01279 l_int32    i, n;
01280 l_float32  sum, fval;
01281 NUMA      *nan, *nar;
01282
01283     PROCNAME("numaMakeRankFromHistogram");
01284
01285     if (pnax) *pnax = NULL;
01286     if (!pnay)
01287         return ERROR_INT("&nay not defined", procName, 1);
01288     *pnay = NULL;
01289     if (!nasy)
01290         return ERROR_INT("nasy not defined", procName, 1);
01291     if (!pnay)
01292         return ERROR_INT("&nay not defined", procName, 1);
01293     if ((n = numaGetCount(nasy)) == 0)
01294         return ERROR_INT("no bins in nas", procName, 1);
01295
01296         /* Normalize and generate the rank array corresponding to
01297          * the binned histogram. */
01298     nan = numaNormalizeHistogram(nasy, 1.0);
01299     nar = numaCreate(n + 1);  /* rank numa corresponding to nan */
01300     sum = 0.0;
01301     numaAddNumber(nar, sum);  /* first element is 0.0 */
01302     for (i = 0; i < n; i++) {
01303         numaGetFValue(nan, i, &fval);
01304         sum += fval;
01305         numaAddNumber(nar, sum);
01306     }
01307
01308         /* Compute rank array on full range with specified
01309          * number of points and correspondence to x-values. */
01310     numaInterpolateEqxInterval(startx, deltax, nar, L_LINEAR_INTERP,
01311                                startx, startx + n * deltax, npts,
01312                                pnax, pnay);
01313     numaDestroy(&nan);
01314     numaDestroy(&nar);
01315     return 0;
01316 }
01317
01318
01319 /*!
01320  *  numaHistogramGetRankFromVal()
01321  *
01322  *      Input:  na (histogram)
01323  *              rval (value of input sample for which we want the rank)
01324  *              &rank (<return> fraction of total samples below rval)
01325  *      Return: 0 if OK, 1 on error
01326  *
01327  *  Notes:
01328  *      (1) If we think of the histogram as a function y(x), normalized
01329  *          to 1, for a given input value of x, this computes the
01330  *          rank of x, which is the integral of y(x) from the start
01331  *          value of x to the input value.
01332  *      (2) This function only makes sense when applied to a Numa that
01333  *          is a histogram.  The values in the histogram can be ints and
01334  *          floats, and are computed as floats.  The rank is returned
01335  *          as a float between 0.0 and 1.0.
01336  *      (3) The numa parameters startx and binsize are used to
01337  *          compute x from the Numa index i.
01338  */
01339 l_int32
01340 numaHistogramGetRankFromVal(NUMA       *na,
01341                             l_float32   rval,
01342                             l_float32  *prank)
01343 {
01344 l_int32    i, ibinval, n;
01345 l_float32  startval, binsize, binval, maxval, fractval, total, sum, val;
01346
01347     PROCNAME("numaHistogramGetRankFromVal");
01348
01349     if (!prank)
01350         return ERROR_INT("prank not defined", procName, 1);
01351     *prank = 0.0;
01352     if (!na)
01353         return ERROR_INT("na not defined", procName, 1);
01354     numaGetXParameters(na, &startval, &binsize);
01355     n = numaGetCount(na);
01356     if (rval < startval)
01357         return 0;
01358     maxval = startval + n * binsize;
01359     if (rval > maxval) {
01360         *prank = 1.0;
01361         return 0;
01362     }
01363
01364     binval = (rval - startval) / binsize;
01365     ibinval = (l_int32)binval;
01366     if (ibinval >= n) {
01367         *prank = 1.0;
01368         return 0;
01369     }
01370     fractval = binval - (l_float32)ibinval;
01371
01372     sum = 0.0;
01373     for (i = 0; i < ibinval; i++) {
01374         numaGetFValue(na, i, &val);
01375         sum += val;
01376     }
01377     numaGetFValue(na, ibinval, &val);
01378     sum += fractval * val;
01379     numaGetSum(na, &total);
01380     *prank = sum / total;
01381
01382 /*    fprintf(stderr, "binval = %7.3f, rank = %7.3f\n", binval, *prank); */
01383
01384     return 0;
01385 }
01386
01387
01388 /*!
01389  *  numaHistogramGetValFromRank()
01390  *
01391  *      Input:  na (histogram)
01392  *              rank (fraction of total samples)
01393  *              &rval (<return> approx. to the bin value)
01394  *      Return: 0 if OK, 1 on error
01395  *
01396  *  Notes:
01397  *      (1) If we think of the histogram as a function y(x), this returns
01398  *          the value x such that the integral of y(x) from the start
01399  *          value to x gives the fraction 'rank' of the integral
01400  *          of y(x) over all bins.
01401  *      (2) This function only makes sense when applied to a Numa that
01402  *          is a histogram.  The values in the histogram can be ints and
01403  *          floats, and are computed as floats.  The val is returned
01404  *          as a float, even though the buckets are of integer width.
01405  *      (3) The numa parameters startx and binsize are used to
01406  *          compute x from the Numa index i.
01407  */
01408 l_int32
01409 numaHistogramGetValFromRank(NUMA       *na,
01410                             l_float32   rank,
01411                             l_float32  *prval)
01412 {
01413 l_int32    i, n;
01414 l_float32  startval, binsize, rankcount, total, sum, fract, val;
01415
01416     PROCNAME("numaHistogramGetValFromRank");
01417
01418     if (!prval)
01419         return ERROR_INT("prval not defined", procName, 1);
01420     *prval = 0.0;
01421     if (!na)
01422         return ERROR_INT("na not defined", procName, 1);
01423     if (rank < 0.0) {
01424         L_WARNING("rank < 0; setting to 0.0", procName);
01425         rank = 0.0;
01426     }
01427     if (rank > 1.0) {
01428         L_WARNING("rank > 1.0; setting to 1.0", procName);
01429         rank = 1.0;
01430     }
01431
01432     n = numaGetCount(na);
01433     numaGetXParameters(na, &startval, &binsize);
01434     numaGetSum(na, &total);
01435     rankcount = rank * total;  /* count that corresponds to rank */
01436     sum = 0.0;
01437     for (i = 0; i < n; i++) {
01438         numaGetFValue(na, i, &val);
01439         if (sum + val >= rankcount)
01440             break;
01441         sum += val;
01442     }
01443     if (val <= 0.0)  /* can == 0 if rank == 0.0 */
01444         fract = 0.0;
01445     else  /* sum + fract * val = rankcount */
01446         fract = (rankcount - sum) / val;
01447
01448     /* The use of the fraction of a bin allows a simple calculation
01449      * for the histogram value at the given rank. */
01450     *prval = startval + binsize * ((l_float32)i + fract);
01451
01452 /*    fprintf(stderr, "rank = %7.3f, val = %7.3f\n", rank, *prval); */
01453
01454     return 0;
01455 }
01456
01457
01458 /*!
01459  *  numaDiscretizeRankAndIntensity()
01460  *
01461  *      Input:  na (normalized histogram of probability density vs intensity)
01462  *              nbins (number of bins at which the rank is divided)
01463  *              &pnarbin (<optional return> rank bin value vs intensity)
01464  *              &pnam (<optional return> median intensity in a bin vs
01465  *                     rank bin value, with @nbins of discretized rank values)
01466  *              &pnar (<optional return> rank vs intensity; this is
01467  *                     a cumulative norm histogram)
01468  *              &pnabb (<optional return> intensity at the right bin boundary
01469  *                      vs rank bin)
01470  *      Return: 0 if OK, 1 on error
01471  *
01472  *  Notes:
01473  *      (1) We are inverting the rank(intensity) function to get
01474  *          the intensity(rank) function at @nbins equally spaced
01475  *          values of rank between 0.0 and 1.0.  We save integer values
01476  *          for the intensity.
01477  *      (2) We are using the word "intensity" to describe the type of
01478  *          array values, but any array of non-negative numbers will work.
01479  *      (3) The output arrays give the following mappings, where the
01480  *          input is a normalized histogram of array values:
01481  *             array values     -->  rank bin number  (narbin)
01482  *             rank bin number  -->  median array value in bin (nam)
01483  *             array values     -->  cumulative norm = rank  (nar)
01484  *             rank bin number  -->  array value at right bin edge (nabb)
01485  */
01486 l_int32
01487 numaDiscretizeRankAndIntensity(NUMA    *na,
01488                                l_int32  nbins,
01489                                NUMA   **pnarbin,
01490                                NUMA   **pnam,
01491                                NUMA   **pnar,
01492                                NUMA   **pnabb)
01493 {
01494 NUMA      *nar;  /* rank value as function of intensity */
01495 NUMA      *nam;  /* median intensity in the rank bins */
01496 NUMA      *nabb;  /* rank bin right boundaries (in intensity) */
01497 NUMA      *narbin;  /* binned rank value as a function of intensity */
01498 l_int32    i, j, npts, start, midfound, mcount, rightedge;
01499 l_float32  sum, midrank, endrank, val;
01500
01501     PROCNAME("numaDiscretizeRankAndIntensity");
01502
01503     if (!na)
01504         return ERROR_INT("na not defined", procName, 1);
01505     if (nbins < 2)
01506         return ERROR_INT("nbins must be > 1", procName, 1);
01507     if (!pnarbin && !pnam && !pnar && !pnabb)
01508         return ERROR_INT("no output requested", procName, 1);
01509
01510         /* Get cumulative normalized histogram (rank vs intensity value).
01511          * For a normalized histogram from an 8 bpp grayscale image
01512          * as input, we have 256 bins and 257 points in the
01513          * cumulative (rank) histogram. */
01514     npts = numaGetCount(na);
01515     nar = numaCreate(npts + 1);
01516     sum = 0.0;
01517     numaAddNumber(nar, sum);  /* left side of first bin */
01518     for (i = 0; i < npts; i++) {
01519         numaGetFValue(na, i, &val);
01520         sum += val;
01521         numaAddNumber(nar, sum);
01522     }
01523
01524     if ((nam = numaCreate(nbins)) == NULL)
01525         return ERROR_INT("nam not made", procName, 1);
01526     if ((narbin = numaCreate(npts)) == NULL)
01527         return ERROR_INT("narbin not made", procName, 1);
01528     if ((nabb = numaCreate(nbins)) == NULL)
01529         return ERROR_INT("nabb not made", procName, 1);
01530
01531         /* We find the intensity value at the right edge of each of
01532          * the rank bins.  We also find the median intensity in the bin,
01533          * where approximately half the samples are lower and half are
01534          * higher.  This can be considered as a simple approximation
01535          * for the average intensity in the bin. */
01536     start = 0;  /* index in nar */
01537     mcount = 0;  /* count of median values in rank bins; not to exceed nbins */
01538     for (i = 0; i < nbins; i++) {
01539         midrank = (l_float32)(i + 0.5) / (l_float32)(nbins);
01540         endrank = (l_float32)(i + 1.0) / (l_float32)(nbins);
01541         endrank = L_MAX(0.0, L_MIN(endrank - 0.001, 1.0));
01542         midfound = FALSE;
01543         for (j = start; j < npts; j++) {  /* scan up for each bin value */
01544             numaGetFValue(nar, j, &val);
01545                 /* Use (j == npts - 1) tests in case all weight is at top end */
01546             if ((!midfound && val >= midrank) ||
01547                 (mcount < nbins && j == npts - 1)) {
01548                 midfound = TRUE;
01549                 numaAddNumber(nam, j);
01550                 mcount++;
01551             }
01552             if ((val >= endrank) || (j == npts - 1)) {
01553                 numaAddNumber(nabb, j);
01554                 if (val == endrank)
01555                     start = j;
01556                 else
01557                     start = j - 1;
01558                 break;
01559             }
01560         }
01561     }
01562     numaSetValue(nabb, nbins - 1, npts - 1);  /* extend to max */
01563
01564         /* Error checking: did we get data in all bins? */
01565     if (mcount != nbins)
01566         L_WARNING_INT2("found data for %d bins; should be %d",
01567                        procName, mcount, nbins);
01568
01569         /* Generate LUT that maps from intensity to bin number */
01570     start = 0;
01571     for (i = 0; i < nbins; i++) {
01572         numaGetIValue(nabb, i, &rightedge);
01573         for (j = start; j < npts; j++) {
01574             if (j <= rightedge)
01575                 numaAddNumber(narbin, i);
01576             if (j > rightedge) {
01577                 start = j;
01578                 break;
01579             }
01580             if (j == npts - 1) {  /* we're done */
01581                 start = j + 1;
01582                 break;
01583             }
01584         }
01585     }
01586
01587     if (pnarbin)
01588         *pnarbin = narbin;
01589     else
01590         numaDestroy(&narbin);
01591     if (pnam)
01592         *pnam = nam;
01593     else
01594         numaDestroy(&nam);
01595     if (pnar)
01596         *pnar = nar;
01597     else
01598         numaDestroy(&nar);
01599     if (pnabb)
01600         *pnabb = nabb;
01601     else
01602         numaDestroy(&nabb);
01603     return 0;
01604 }
01605
01606
01607 /*!
01608  *  numaGetRankBinValues()
01609  *
01610  *      Input:  na (just an array of values)
01611  *              nbins (number of bins at which the rank is divided)
01612  *              &pnarbin (<optional return> rank bin value vs array value)
01613  *              &pnam (<optional return> median intensity in a bin vs
01614  *                     rank bin value, with @nbins of discretized rank values)
01615  *      Return: 0 if OK, 1 on error
01616  *
01617  *  Notes:
01618  *      (1) Simple interface for getting a binned rank representation
01619  *          of an input array of values.  This returns two mappings:
01620  *             array value     -->  rank bin number  (narbin)
01621  *             rank bin number -->  median array value in each rank bin (nam)
01622  */
01623 l_int32
01624 numaGetRankBinValues(NUMA    *na,
01625                      l_int32  nbins,
01626                      NUMA   **pnarbin,
01627                      NUMA   **pnam)
01628 {
01629 NUMA      *nah, *nan;  /* histo and normalized histo */
01630 l_int32    maxbins, discardval;
01631 l_float32  maxval, delx;
01632
01633     PROCNAME("numaGetRankBinValues");
01634
01635     if (pnarbin) *pnarbin = NULL;
01636     if (pnam) *pnam = NULL;
01637     if (!pnarbin && !pnam)
01638         return ERROR_INT("no output requested", procName, 1);
01639     if (!na)
01640         return ERROR_INT("na not defined", procName, 1);
01641     if (numaGetCount(na) == 0)
01642         return ERROR_INT("na is empty", procName, 1);
01643     if (nbins < 2)
01644         return ERROR_INT("nbins must be > 1", procName, 1);
01645
01646         /* Get normalized histogram  */
01647     numaGetMax(na, &maxval, NULL);
01648     maxbins = L_MIN(100002, (l_int32)maxval + 2);
01649     nah = numaMakeHistogram(na, maxbins, &discardval, NULL);
01650     nan = numaNormalizeHistogram(nah, 1.0);
01651
01652         /* Warn if there is a scale change.  This shouldn't happen
01653          * unless the max value is above 100000.  */
01654     numaGetXParameters(nan, NULL, &delx);
01655     if (delx > 1.0)
01656         L_WARNING_FLOAT("scale change: delx = %6.2f", procName, delx);
01657
01658         /* Rank bin the results */
01659     numaDiscretizeRankAndIntensity(nan, nbins, pnarbin, pnam, NULL, NULL);
01660     numaDestroy(&nah);
01661     numaDestroy(&nan);
01662     return 0;
01663 }
01664
01665
01666 /*----------------------------------------------------------------------*
01667  *                      Splitting a distribution                        *
01668  *----------------------------------------------------------------------*/
01669 /*!
01670  *  numaSplitDistribution()
01671  *
01672  *      Input:  na (histogram)
01673  *              scorefract (fraction of the max score, used to determine
01674  *                          the range over which the histogram min is searched)
01675  *              &splitindex (<optional return> index for splitting)
01676  *              &ave1 (<optional return> average of lower distribution)
01677  *              &ave2 (<optional return> average of upper distribution)
01678  *              &num1 (<optional return> population of lower distribution)
01679  *              &num2 (<optional return> population of upper distribution)
01680  *              &nascore (<optional return> for debugging; otherwise use null)
01681  *      Return: 0 if OK, 1 on error
01682  *
01683  *  Notes:
01684  *      (1) This function is intended to be used on a distribution of
01685  *          values that represent two sets, such as a histogram of
01686  *          pixel values for an image with a fg and bg, and the goal
01687  *          is to determine the averages of the two sets and the
01688  *          best splitting point.
01689  *      (2) The Otsu method finds a split point that divides the distribution
01690  *          into two parts by maximizing a score function that is the
01691  *          product of two terms:
01692  *            (a) the square of the difference of centroids, (ave1 - ave2)^2
01693  *            (b) fract1 * (1 - fract1)
01694  *          where fract1 is the fraction in the lower distribution.
01695  *      (3) This works well for images where the fg and bg are
01696  *          each relatively homogeneous and well-separated in color.
01697  *          However, if the actual fg and bg sets are very different
01698  *          in size, and the bg is highly varied, as can occur in some
01699  *          scanned document images, this will bias the split point
01700  *          into the larger "bump" (i.e., toward the point where the
01701  *          (b) term reaches its maximum of 0.25 at fract1 = 0.5.
01702  *          To avoid this, we define a range of values near the
01703  *          maximum of the score function, and choose the value within
01704  *          this range such that the histogram itself has a minimum value.
01705  *          The range is determined by scorefract: we include all abscissa
01706  *          values to the left and right of the value that maximizes the
01707  *          score, such that the score stays above (1 - scorefract) * maxscore.
01708  *          The intuition behind this modification is to try to find
01709  *          a split point that both has a high variance score and is
01710  *          at or near a minimum in the histogram, so that the histogram
01711  *          slope is small at the split point.
01712  *      (4) We normalize the score so that if the two distributions
01713  *          were of equal size and at opposite ends of the numa, the
01714  *          score would be 1.0.
01715  */
01716 l_int32
01717 numaSplitDistribution(NUMA       *na,
01718                       l_float32   scorefract,
01719                       l_int32    *psplitindex,
01720                       l_float32  *pave1,
01721                       l_float32  *pave2,
01722                       l_float32  *pnum1,
01723                       l_float32  *pnum2,
01724                       NUMA      **pnascore)
01725 {
01726 l_int32    i, n, bestsplit, minrange, maxrange, maxindex;
01727 l_float32  ave1, ave2, ave1prev, ave2prev;
01728 l_float32  num1, num2, num1prev, num2prev;
01729 l_float32  val, minval, sum, fract1;
01730 l_float32  norm, score, minscore, maxscore;
01731 NUMA      *nascore, *naave1, *naave2, *nanum1, *nanum2;
01732
01733     PROCNAME("numaSplitDistribution");
01734
01735     if (!na)
01736         return ERROR_INT("na not defined", procName, 1);
01737
01738     n = numaGetCount(na);
01739     if (n <= 1)
01740         return ERROR_INT("n = 1 in histogram", procName, 1);
01741     numaGetSum(na, &sum);
01742     if (sum <= 0.0)
01743         return ERROR_INT("sum <= 0.0", procName, 1);
01744     norm = 4.0 / ((n - 1) * (n - 1));
01745     ave1prev = 0.0;
01746     numaGetHistogramStats(na, 0.0, 1.0, &ave2prev, NULL, NULL, NULL);
01747     num1prev = 0.0;
01748     num2prev = sum;
01749     maxindex = n / 2;  /* initialize with something */
01750
01751         /* Split the histogram with [0 ... i] in the lower part
01752          * and [i+1 ... n-1] in upper part.  First, compute an otsu
01753          * score for each possible splitting.  */
01754     nascore = numaCreate(n);
01755     if (pave2) naave1 = numaCreate(n);
01756     if (pave2) naave2 = numaCreate(n);
01757     if (pnum1) nanum1 = numaCreate(n);
01758     if (pnum2) nanum2 = numaCreate(n);
01759     maxscore = 0.0;
01760     for (i = 0; i < n; i++) {
01761         numaGetFValue(na, i, &val);
01762         num1 = num1prev + val;
01763         if (num1 == 0)
01764             ave1 = ave1prev;
01765         else
01766             ave1 = (num1prev * ave1prev + i * val) / num1;
01767         num2 = num2prev - val;
01768         if (num2 == 0)
01769             ave2 = ave2prev;
01770         else
01771             ave2 = (num2prev * ave2prev - i * val) / num2;
01772         fract1 = num1 / sum;
01773         score = norm * (fract1 * (1 - fract1)) * (ave2 - ave1) * (ave2 - ave1);
01774         numaAddNumber(nascore, score);
01775         if (pave1) numaAddNumber(naave1, ave1);
01776         if (pave2) numaAddNumber(naave2, ave2);
01777         if (pnum1) numaAddNumber(nanum1, num1);
01778         if (pnum1) numaAddNumber(nanum2, num2);
01779         if (score > maxscore) {
01780             maxscore = score;
01781             maxindex = i;
01782         }
01783         num1prev = num1;
01784         num2prev = num2;
01785         ave1prev = ave1;
01786         ave2prev = ave2;
01787     }
01788
01789         /* Next, for all contiguous scores within a specified fraction
01790          * of the max, choose the split point as the value with the
01791          * minimum in the histogram. */
01792     minscore = (1. - scorefract) * maxscore;
01793     for (i = maxindex - 1; i >= 0; i--) {
01794         numaGetFValue(nascore, i, &val);
01795         if (val < minscore)
01796             break;
01797     }
01798     minrange = i + 1;
01799     for (i = maxindex + 1; i < n; i++) {
01800         numaGetFValue(nascore, i, &val);
01801         if (val < minscore)
01802             break;
01803     }
01804     maxrange = i - 1;
01805     numaGetFValue(na, minrange, &minval);
01806     bestsplit = minrange;
01807     for (i = minrange + 1; i <= maxrange; i++) {
01808         numaGetFValue(na, i, &val);
01809         if (val < minval) {
01810             minval = val;
01811             bestsplit = i;
01812         }
01813     }
01814
01815         /* Add one to the bestsplit value to get the threshold value,
01816          * because when we take a threshold, as in pixThresholdToBinary(),
01817          * we always choose the set with values below the threshold. */
01818     bestsplit = L_MIN(255, bestsplit + 1);
01819
01820     if (psplitindex) *psplitindex = bestsplit;
01821     if (pave1) numaGetFValue(naave1, bestsplit, pave1);
01822     if (pave2) numaGetFValue(naave2, bestsplit, pave2);
01823     if (pnum1) numaGetFValue(nanum1, bestsplit, pnum1);
01824     if (pnum2) numaGetFValue(nanum2, bestsplit, pnum2);
01825
01826     if (pnascore) {  /* debug mode */
01827         fprintf(stderr, "minrange = %d, maxrange = %d\n", minrange, maxrange);
01828         fprintf(stderr, "minval = %10.0f\n", minval);
01829         gplotSimple1(nascore, GPLOT_PNG, "/tmp/nascore",
01830                      "Score for split distribution");
01831         *pnascore = nascore;
01832     }
01833     else
01834         numaDestroy(&nascore);
01835
01836     if (pave1) numaDestroy(&naave1);
01837     if (pave2) numaDestroy(&naave2);
01838     if (pnum1) numaDestroy(&nanum1);
01839     if (pnum2) numaDestroy(&nanum2);
01840     return 0;
01841 }
01842
01843
01844 /*----------------------------------------------------------------------*
01845  *                             Extrema finding                          *
01846  *----------------------------------------------------------------------*/
01847 /*!
01848  *  numaFindPeaks()
01849  *
01850  *      Input:  source na
01851  *              max number of peaks to be found
01852  *              fract1  (min fraction of peak value)
01853  *              fract2  (min slope)
01854  *      Return: peak na, or null on error.
01855  *
01856  * Notes:
01857  *     (1) The returned na consists of sets of four numbers representing
01858  *         the peak, in the following order:
01859  *            left edge; peak center; right edge; normalized peak area
01860  */
01861 NUMA *
01862 numaFindPeaks(NUMA      *nas,
01863               l_int32    nmax,
01864               l_float32  fract1,
01865               l_float32  fract2)
01866 {
01867 l_int32    i, k, n, maxloc, lloc, rloc;
01868 l_float32  fmaxval, sum, total, newtotal, val, lastval;
01869 l_float32  peakfract;
01870 NUMA      *na, *napeak;
01871
01872     PROCNAME("numaFindPeaks");
01873
01874     if (!nas)
01875         return (NUMA *)ERROR_PTR("nas not defined", procName, NULL);
01876     n = numaGetCount(nas);
01877     numaGetSum(nas, &total);
01878
01879         /* We munge this copy */
01880     if ((na = numaCopy(nas)) == NULL)
01881         return (NUMA *)ERROR_PTR("na not made", procName, NULL);
01882     if ((napeak = numaCreate(4 * nmax)) == NULL)
01883         return (NUMA *)ERROR_PTR("napeak not made", procName, NULL);
01884
01885     for (k = 0; k < nmax; k++) {
01886         numaGetSum(na, &newtotal);
01887         if (newtotal == 0.0)   /* sanity check */
01888             break;
01889         numaGetMax(na, &fmaxval, &maxloc);
01890         sum = fmaxval;
01891         lastval = fmaxval;
01892         lloc = 0;
01893         for (i = maxloc - 1; i >= 0; --i) {
01894             numaGetFValue(na, i, &val);
01895             if (val == 0.0) {
01896                 lloc = i + 1;
01897                 break;
01898             }
01899             if (val > fract1 * fmaxval) {
01900                 sum += val;
01901                 lastval = val;
01902                 continue;
01903             }
01904             if (lastval - val > fract2 * lastval) {
01905                 sum += val;
01906                 lastval = val;
01907                 continue;
01908             }
01909             lloc = i;
01910             break;
01911         }
01912         lastval = fmaxval;
01913         rloc = n - 1;
01914         for (i = maxloc + 1; i < n; ++i) {
01915             numaGetFValue(na, i, &val);
01916             if (val == 0.0) {
01917                 rloc = i - 1;
01918                 break;
01919             }
01920             if (val > fract1 * fmaxval) {
01921                 sum += val;
01922                 lastval = val;
01923                 continue;
01924             }
01925             if (lastval - val > fract2 * lastval) {
01926                 sum += val;
01927                 lastval = val;
01928                 continue;
01929             }
01930             rloc = i;
01931             break;
01932         }
01933         peakfract = sum / total;
01934         numaAddNumber(napeak, lloc);
01935         numaAddNumber(napeak, maxloc);
01936         numaAddNumber(napeak, rloc);
01937         numaAddNumber(napeak, peakfract);
01938
01939         for (i = lloc; i <= rloc; i++)
01940             numaSetValue(na, i, 0.0);
01941     }
01942
01943     numaDestroy(&na);
01944     return napeak;
01945 }
01946
01947
01948 /*!
01949  *  numaFindExtrema()
01950  *
01951  *      Input:  nas (input values)
01952  *              delta (relative amount to resolve peaks and valleys)
01953  *      Return: nad (locations of extrema), or null on error
01954  *
01955  *  Notes:
01956  *      (1) This returns a sequence of extrema (peaks and valleys).
01957  *      (2) The algorithm is analogous to that for determining
01958  *          mountain peaks.  Suppose we have a local peak, with
01959  *          bumps on the side.  Under what conditions can we consider
01960  *          those 'bumps' to be actual peaks?  The answer: if the
01961  *          bump is separated from the peak by a saddle that is at
01962  *          least 500 feet below the bump.
01963  *      (3) Operationally, suppose we are looking for a peak.
01964  *          We are keeping the largest value we've seen since the
01965  *          last valley, and are looking for a value that is delta
01966  *          BELOW our current peak.  When we find such a value,
01967  *          we label the peak, use the current value to label the
01968  *          valley, and then do the same operation in reverse (looking
01969  *          for a valley).
01970  */
01971 NUMA *
01972 numaFindExtrema(NUMA      *nas,
01973                 l_float32  delta)
01974 {
01975 l_int32    i, n, found, loc, direction;
01976 l_float32  startval, val, maxval, minval;
01977 NUMA      *nad;
01978
01979     PROCNAME("numaFindExtrema");
01980
01981     if (!nas)
01982         return (NUMA *)ERROR_PTR("nas not defined", procName, NULL);
01983
01984     n = numaGetCount(nas);
01985     nad = numaCreate(0);
01986
01987         /* We don't know if we'll find a peak or valley first,
01988          * but use the first element of nas as the reference point.
01989          * Break when we deviate by 'delta' from the first point. */
01990     numaGetFValue(nas, 0, &startval);
01991     found = FALSE;
01992     for (i = 1; i < n; i++) {
01993         numaGetFValue(nas, i, &val);
01994         if (L_ABS(val - startval) >= delta) {
01995             found = TRUE;
01996             break;
01997         }
01998     }
01999
02000     if (!found)
02001         return nad;  /* it's empty */
02002
02003         /* Are we looking for a peak or a valley? */
02004     if (val > startval) {  /* peak */
02005         direction = 1;
02006         maxval = val;
02007     }
02008     else {
02009         direction = -1;
02010         minval = val;
02011     }
02012     loc = i;
02013
02014         /* Sweep through the rest of the array, recording alternating
02015          * peak/valley extrema. */
02016     for (i = i + 1; i < n; i++) {
02017         numaGetFValue(nas, i, &val);
02018         if (direction == 1 && val > maxval ) {  /* new local max */
02019             maxval = val;
02020             loc = i;
02021         }
02022         else if (direction == -1 && val < minval ) {  /* new local min */
02023             minval = val;
02024             loc = i;
02025         }
02026         else if (direction == 1 && (maxval - val >= delta)) {
02027             numaAddNumber(nad, loc);  /* save the current max location */
02028             direction = -1;  /* reverse: start looking for a min */
02029             minval = val;
02030             loc = i;  /* current min location */
02031         }
02032         else if (direction == -1 && (val - minval >= delta)) {
02033             numaAddNumber(nad, loc);  /* save the current min location */
02034             direction = 1;  /* reverse: start looking for a max */
02035             maxval = val;
02036             loc = i;  /* current max location */
02037         }
02038     }
02039
02040         /* Save the final extremum */
02041 /*    numaAddNumber(nad, loc); */
02042     return nad;
02043 }
02044
02045
02046 /*!
02047  *  numaCountReversals()
02048  *
02049  *      Input:  nas (input values)
02050  *              minreversal (relative amount to resolve peaks and valleys)
02051  *              &nr (<optional return> number of reversals
02052  *              &nrpl (<optional return> reversal density: reversals/length)
02053  *      Return: 0 if OK, 1 on error
02054  *
02055  *  Notes:
02056  *      (1) The input numa is can be generated from pixExtractAlongLine().
02057  *          If so, the x parameters can be used to find the reversal
02058  *          frequency along a line.
02059  */
02060 l_int32
02061 numaCountReversals(NUMA       *nas,
02062                    l_float32   minreversal,
02063                    l_int32    *pnr,
02064                    l_float32  *pnrpl)
02065 {
02066 l_int32    n, nr;
02067 l_float32  delx, len;
02068 NUMA      *nat;
02069
02070     PROCNAME("numaCountReversals");
02071
02072     if (!pnr && !pnrpl)
02073         return ERROR_INT("neither &nr nor &nrpl are defined", procName, 1);
02074     if (pnr) *pnr = 0;
02075     if (pnrpl) *pnrpl = 0.0;
02076     if (!nas)
02077         return ERROR_INT("nas not defined", procName, 1);
02078
02079     n = numaGetCount(nas);
02080     nat = numaFindExtrema(nas, minreversal);
02081     nr = numaGetCount(nat);
02082     if (pnr) *pnr = nr;
02083     if (pnrpl) {
02084         numaGetXParameters(nas, NULL, &delx);
02085         len = delx * n;
02086         *pnrpl = (l_float32)nr / len;
02087     }
02088
02089     numaDestroy(&nat);
02090     return 0;
02091 }
02092
02093
02094 /*----------------------------------------------------------------------*
02095  *                Threshold crossings and frequency analysis            *
02096  *----------------------------------------------------------------------*/
02097 /*!
02098  *  numaSelectCrossingThreshold()
02099  *
02100  *      Input:  nax (<optional> numa of abscissa values; can be NULL)
02101  *              nay (signal)
02102  *              estthresh (estimated pixel threshold for crossing: e.g., for
02103  *                         images, white <--> black; typ. ~120)
02104  *              &bestthresh (<return> robust estimate of threshold to use)
02105  *      Return: 0 if OK, 1 on error
02106  *
02107  *  Note:
02108  *     (1) When a valid threshold is used, the number of crossings is
02109  *         a maximum, because none are missed.  If no threshold intersects
02110  *         all the crossings, the crossings must be determined with
02111  *         numaCrossingsByPeaks().
02112  *     (2) @estthresh is an input estimate of the threshold that should
02113  *         be used.  We compute the crossings with 41 thresholds
02114  *         (20 below and 20 above).  There is a range in which the
02115  *         number of crossings is a maximum.  Return a threshold
02116  *         in the center of this stable plateau of crossings.
02117  *         This can then be used with numaCrossingsByThreshold()
02118  *         to get a good estimate of crossing locations.
02119  */
02120 l_int32
02121 numaSelectCrossingThreshold(NUMA       *nax,
02122                             NUMA       *nay,
02123                             l_float32   estthresh,
02124                             l_float32  *pbestthresh)
02125 {
02126 l_int32    i, inrun, istart, iend, maxstart, maxend, runlen, maxrunlen;
02127 l_int32    val, maxval, nmax, count;
02128 l_float32  thresh, fmaxval, fmodeval;
02129 NUMA      *nat, *nac;
02130
02131     PROCNAME("numaSelectCrossingThreshold");
02132
02133     if (!nay)
02134         return ERROR_INT("nay not defined", procName, 1);
02135
02136         /* Compute the number of crossings for different thresholds */
02137     nat = numaCreate(41);
02138     for (i = 0; i < 41; i++) {
02139         thresh = estthresh - 80.0 + 4.0 * i;
02140         nac = numaCrossingsByThreshold(nax, nay, thresh);
02141         numaAddNumber(nat, numaGetCount(nac));
02142         numaDestroy(&nac);
02143     }
02144
02145         /* Find the center of the plateau of max crossings, which
02146          * extends from thresh[istart] to thresh[iend]. */
02147     numaGetMax(nat, &fmaxval, NULL);
02148     maxval = (l_int32)fmaxval;
02149     nmax = 0;
02150     for (i = 0; i < 41; i++) {
02151         numaGetIValue(nat, i, &val);
02152         if (val == maxval)
02153             nmax++;
02154     }
02155     if (nmax < 3) {  /* likely accidental max; try the mode */
02156         numaGetMode(nat, &fmodeval, &count);
02157         if (count > nmax && fmodeval > 0.5 * fmaxval)
02158             maxval = (l_int32)fmodeval;  /* use the mode */
02159     }
02160
02161     inrun = FALSE;
02162     iend = 40;
02163     maxrunlen = 0;
02164     for (i = 0; i < 41; i++) {
02165         numaGetIValue(nat, i, &val);
02166         if (val == maxval) {
02167             if (!inrun) {
02168                 istart = i;
02169                 inrun = TRUE;
02170             }
02171             continue;
02172         }
02173         if (inrun && (val != maxval)) {
02174             iend = i - 1;
02175             runlen = iend - istart + 1;
02176             inrun = FALSE;
02177             if (runlen > maxrunlen) {
02178                 maxstart = istart;
02179                 maxend = iend;
02180                 maxrunlen = runlen;
02181             }
02182         }
02183     }
02184     if (inrun) {
02185         runlen = i - istart;
02186         if (runlen > maxrunlen) {
02187             maxstart = istart;
02188             maxend = i - 1;
02189             maxrunlen = runlen;
02190         }
02191     }
02192
02193 #if 0
02194     foundfirst = FALSE;
02195     iend = 40;
02196     for (i = 0; i < 41; i++) {
02197         numaGetIValue(nat, i, &val);
02198         if (val == maxval) {
02199             if (!foundfirst) {
02200                 istart = i;
02201                 foundfirst = TRUE;
02202             }
02203         }
02204         if ((val != maxval) && foundfirst) {
02205             iend = i - 1;
02206             break;
02207         }
02208     }
02209     nmax = iend - istart + 1;
02210 #endif
02211
02212     *pbestthresh = estthresh - 80.0 + 2.0 * (l_float32)(maxstart + maxend);
02213
02214 #if  DEBUG_CROSSINGS
02215     fprintf(stderr, "\nCrossings attain a maximum at %d thresholds, between:\n"
02216                     "  thresh[%d] = %5.1f and thresh[%d] = %5.1f\n",
02217                     nmax, maxstart, estthresh - 80.0 + 4.0 * maxstart,
02218                     maxend, estthresh - 80.0 + 4.0 * maxend);
02219     fprintf(stderr, "The best choice: %5.1f\n", *pbestthresh);
02220     fprintf(stderr, "Number of crossings at the 41 thresholds:");
02221     numaWriteStream(stderr, nat);
02222 #endif  /* DEBUG_CROSSINGS */
02223
02224     numaDestroy(&nat);
02225     return 0;
02226 }
02227
02228
02229 /*!
02230  *  numaCrossingsByThreshold()
02231  *
02232  *      Input:  nax (<optional> numa of abscissa values; can be NULL)
02233  *              nay (numa of ordinate values, corresponding to nax)
02234  *              thresh (threshold value for nay)
02235  *      Return: nad (abscissa pts at threshold), or null on error
02236  *
02237  *  Notes:
02238  *      (1) If nax == NULL, we use startx and delx from nay to compute
02239  *          the crossing values in nad.
02240  */
02241 NUMA *
02242 numaCrossingsByThreshold(NUMA      *nax,
02243                          NUMA      *nay,
02244                          l_float32  thresh)
02245 {
02246 l_int32    i, n;
02247 l_float32  startx, delx;
02248 l_float32  xval1, xval2, yval1, yval2, delta1, delta2, crossval, fract;
02249 NUMA      *nad;
02250
02251     PROCNAME("numaCrossingsByThreshold");
02252
02253     if (!nay)
02254         return (NUMA *)ERROR_PTR("nay not defined", procName, NULL);
02255     n = numaGetCount(nay);
02256
02257     if (nax && (numaGetCount(nax) != n))
02258         return (NUMA *)ERROR_PTR("nax and nay sizes differ", procName, NULL);
02259
02260     nad = numaCreate(0);
02261     numaGetFValue(nay, 0, &yval1);
02262     numaGetXParameters(nay, &startx, &delx);
02263     if (nax)
02264         numaGetFValue(nax, 0, &xval1);
02265     else
02266         xval1 = startx;
02267     for (i = 1; i < n; i++) {
02268         numaGetFValue(nay, i, &yval2);
02269         if (nax)
02270             numaGetFValue(nax, i, &xval2);
02271         else
02272             xval2 = startx + i * delx;
02273         delta1 = yval1 - thresh;
02274         delta2 = yval2 - thresh;
02275         if (delta1 == 0.0)
02276             numaAddNumber(nad, xval1);
02277         else if (delta2 == 0.0)
02278             numaAddNumber(nad, xval2);
02279         else if (delta1 * delta2 < 0.0) {  /* crossing */
02280             fract = L_ABS(delta1) / L_ABS(yval1 - yval2);
02281             crossval = xval1 + fract * (xval2 - xval1);
02282             numaAddNumber(nad, crossval);
02283         }
02284         xval1 = xval2;
02285         yval1 = yval2;
02286     }
02287
02288     return nad;
02289 }
02290
02291
02292 /*!
02293  *  numaCrossingsByPeaks()
02294  *
02295  *      Input:  nax (<optional> numa of abscissa values)
02296  *              nay (numa of ordinate values, corresponding to nax)
02297  *              delta (parameter used to identify when a new peak can be found)
02298  *      Return: nad (abscissa pts at threshold), or null on error
02299  *
02300  *  Notes:
02301  *      (1) If nax == NULL, we use startx and delx from nay to compute
02302  *          the crossing values in nad.
02303  */
02304 NUMA *
02305 numaCrossingsByPeaks(NUMA      *nax,
02306                      NUMA      *nay,
02307                      l_float32  delta)
02308 {
02309 l_int32    i, j, n, np, previndex, curindex;
02310 l_float32  startx, delx;
02311 l_float32  xval1, xval2, yval1, yval2, delta1, delta2;
02312 l_float32  prevval, curval, thresh, crossval, fract;
02313 NUMA      *nap, *nad;
02314
02315     PROCNAME("numaCrossingsByPeaks");
02316
02317     if (!nax)
02318         return (NUMA *)ERROR_PTR("nax not defined", procName, NULL);
02319     if (!nay)
02320         return (NUMA *)ERROR_PTR("nay not defined", procName, NULL);
02321
02322     n = numaGetCount(nax);
02323     if (numaGetCount(nay) != n)
02324         return (NUMA *)ERROR_PTR("nax and nay sizes differ", procName, NULL);
02325
02326         /* Find the extrema.  Also add last point in nay to get
02327          * the last transition (from the last peak to the end).
02328          * The number of crossings is 1 more than the number of extrema. */
02329     nap = numaFindExtrema(nay, delta);
02330     numaAddNumber(nap, n - 1);
02331     np = numaGetCount(nap);
02332     L_INFO_INT("Number of crossings: %d", procName, np);
02333
02334         /* Do all computation in index units of nax */
02335     nad = numaCreate(np);  /* output crossings, in nax units */
02336     previndex = 0;  /* prime the search with 1st point */
02337     numaGetFValue(nay, 0, &prevval);  /* prime the search with 1st point */
02338     numaGetXParameters(nay, &startx, &delx);
02339     for (i = 0; i < np; i++) {
02340         numaGetIValue(nap, i, &curindex);
02341         numaGetFValue(nay, curindex, &curval);
02342         thresh = (prevval + curval) / 2.0;
02343 /*        fprintf(stderr, "thresh[%d] = %7.3f\n", i, thresh); */
02344         if (nax)
02345             numaGetFValue(nax, previndex, &xval1);
02346         else
02347             xval1 = startx + previndex * delx;
02348         numaGetFValue(nay, previndex, &yval1);
02349         for (j = previndex + 1; j <= curindex; j++) {
02350             if (nax)
02351                 numaGetFValue(nax, j, &xval2);
02352             else
02353                 xval2 = startx + j * delx;
02354             numaGetFValue(nay, j, &yval2);
02355             delta1 = yval1 - thresh;
02356             delta2 = yval2 - thresh;
02357             if (delta1 == 0.0) {
02358                 numaAddNumber(nad, xval1);
02359                 break;
02360             }
02361             else if (delta2 == 0.0) {
02362                 numaAddNumber(nad, xval2);
02363                 break;
02364             }
02365             else if (delta1 * delta2 < 0.0) {  /* crossing */
02366                 fract = L_ABS(delta1) / L_ABS(yval1 - yval2);
02367                 crossval = xval1 + fract * (xval2 - xval1);
02368                 numaAddNumber(nad, crossval);
02369                 break;
02370             }
02371             xval1 = xval2;
02372             yval1 = yval2;
02373         }
02374         previndex = curindex;
02375         prevval = curval;
02376     }
02377
02378     numaDestroy(&nap);
02379     return nad;
02380 }
02381
02382
02383 /*!
02384  *  numaEvalBestHaarParameters()
02385  *
02386  *      Input:  nas (numa of non-negative signal values)
02387  *              relweight (relative weight of (-1 comb) / (+1 comb)
02388  *                         contributions to the 'convolution'.  In effect,
02389  *                         the convolution kernel is a comb consisting of
02390  *                         alternating +1 and -weight.)
02391  *              nwidth (number of widths to consider)
02392  *              nshift (number of shifts to consider for each width)
02393  *              minwidth (smallest width to consider)
02394  *              maxwidth (largest width to consider)
02395  *              &bestwidth (<return> width giving largest score)
02396  *              &bestshift (<return> shift giving largest score)
02397  *              &bestscore (<optional return> convolution with
02398  *                          "Haar"-like comb)
02399  *      Return: 0 if OK, 1 on error
02400  *
02401  *  Notes:
02402  *      (1) This does a linear sweep of widths, evaluating at @nshift
02403  *          shifts for each width, computing the score from a convolution
02404  *          with a long comb, and finding the (width, shift) pair that
02405  *          gives the maximum score.  The best width is the "half-wavelength"
02406  *          of the signal.
02407  *      (2) The convolving function is a comb of alternating values
02408  *          +1 and -1 * relweight, separated by the width and phased by
02409  *          the shift.  This is similar to a Haar transform, except
02410  *          there the convolution is performed with a square wave.
02411  *      (3) The function is useful for finding the line spacing
02412  *          and strength of line signal from pixel sum projections.
02413  *      (4) The score is normalized to the size of nas divided by
02414  *          the number of half-widths.  For image applications, the input is
02415  *          typically an array of pixel projections, so one should
02416  *          normalize by dividing the score by the image width in the
02417  *          pixel projection direction.
02418  */
02419 l_int32
02420 numaEvalBestHaarParameters(NUMA       *nas,
02421                            l_float32   relweight,
02422                            l_int32     nwidth,
02423                            l_int32     nshift,
02424                            l_float32   minwidth,
02425                            l_float32   maxwidth,
02426                            l_float32  *pbestwidth,
02427                            l_float32  *pbestshift,
02428                            l_float32  *pbestscore)
02429 {
02430 l_int32    i, j;
02431 l_float32  delwidth, delshift, width, shift, score;
02432 l_float32  bestwidth, bestshift, bestscore;
02433
02434     PROCNAME("numaEvalBestHaarParameters");
02435
02436     if (!nas)
02437         return ERROR_INT("nas not defined", procName, 1);
02438     if (!pbestwidth || !pbestshift)
02439         return ERROR_INT("&bestwidth and &bestshift not defined", procName, 1);
02440
02441     bestscore = 0.0;
02442     delwidth = (maxwidth - minwidth) / (nwidth - 1.0);
02443     for (i = 0; i < nwidth; i++) {
02444         width = minwidth + delwidth * i;
02445         delshift = width / (l_float32)(nshift);
02446         for (j = 0; j < nshift; j++) {
02447             shift = j * delshift;
02448             numaEvalHaarSum(nas, width, shift, relweight, &score);
02449             if (score > bestscore) {
02450                 bestscore = score;
02451                 bestwidth = width;
02452                 bestshift = shift;
02453 #if  DEBUG_FREQUENCY
02454                 fprintf(stderr, "width = %7.3f, shift = %7.3f, score = %7.3f\n",
02455                         width, shift, score);
02456 #endif  /* DEBUG_FREQUENCY */
02457             }
02458         }
02459     }
02460
02461     *pbestwidth = bestwidth;
02462     *pbestshift = bestshift;
02463     if (pbestscore)
02464         *pbestscore = bestscore;
02465     return 0;
02466 }
02467
02468
02469 /*!
02470  *  numaEvalHaarSum()
02471  *
02472  *      Input:  nas (numa of non-negative signal values)
02473  *              width (distance between +1 and -1 in convolution comb)
02474  *              shift (phase of the comb: location of first +1)
02475  *              relweight (relative weight of (-1 comb) / (+1 comb)
02476  *                         contributions to the 'convolution'.  In effect,
02477  *                         the convolution kernel is a comb consisting of
02478  *                         alternating +1 and -weight.)
02479  *              &score (<return> convolution with "Haar"-like comb)
02480  *      Return: 0 if OK, 1 on error
02481  *
02482  *  Notes:
02483  *      (1) This does a convolution with a comb of alternating values
02484  *          +1 and -relweight, separated by the width and phased by the shift.
02485  *          This is similar to a Haar transform, except that for Haar,
02486  *            (1) the convolution kernel is symmetric about 0, so the
02487  *                relweight is 1.0, and
02488  *            (2) the convolution is performed with a square wave.
02489  *      (2) The score is normalized to the size of nas divided by
02490  *          twice the "width".  For image applications, the input is
02491  *          typically an array of pixel projections, so one should
02492  *          normalize by dividing the score by the image width in the
02493  *          pixel projection direction.
02494  *      (3) To get a Haar-like result, use relweight = 1.0.  For detecting
02495  *          signals where you expect every other sample to be close to
02496  *          zero, as with barcodes or filtered text lines, you can
02497  *          use relweight > 1.0.
02498  */
02499 l_int32
02500 numaEvalHaarSum(NUMA       *nas,
02501                 l_float32   width,
02502                 l_float32   shift,
02503                 l_float32   relweight,
02504                 l_float32  *pscore)
02505 {
02506 l_int32    i, n, nsamp, index;
02507 l_float32  score, weight, val;
02508
02509     PROCNAME("numaEvalHaarSum");
02510
02511     if (!pscore)
02512         return ERROR_INT("&score not defined", procName, 1);
02513     *pscore = 0.0;
02514     if (!nas)
02515         return ERROR_INT("nas not defined", procName, 1);
02516     if ((n = numaGetCount(nas)) < 2 * width)
02517         return ERROR_INT("nas size too small", procName, 1);
02518
02519     score = 0.0;
02520     nsamp = (l_int32)((n - shift) / width);
02521     for (i = 0; i < nsamp; i++) {
02522         index = (l_int32)(shift + i * width);
02523         weight = (i % 2) ? 1.0 : -1.0 * relweight;
02524         numaGetFValue(nas, index, &val);
02525         score += weight * val;
02526     }
02527
02528     *pscore = 2.0 * width * score / (l_float32)n;
02529     return 0;
02530 }
02531