Leptonica/scalelow_8c_source.html

00001 /*====================================================================*
00002  -  Copyright (C) 2001 Leptonica.  All rights reserved.
00003  -  This software is distributed in the hope that it will be
00004  -  useful, but with NO WARRANTY OF ANY KIND.
00005  -  No author or distributor accepts responsibility to anyone for the
00006  -  consequences of using this software, or for whether it serves any
00007  -  particular purpose or works at all, unless he or she says so in
00008  -  writing.  Everyone is granted permission to copy, modify and
00009  -  redistribute this source code, for commercial or non-commercial
00010  -  purposes, with the following restrictions: (1) the origin of this
00011  -  source code must not be misrepresented; (2) modified versions must
00012  -  be plainly marked as such; and (3) this notice may not be removed
00013  -  or altered from any source or modified source distribution.
00014  *====================================================================*/
00015
00016
00017 /*
00018  *  scalelow.c
00019  *
00020  *         Color (interpolated) scaling: general case
00021  *                  void       scaleColorLILow()
00022  *
00023  *         Grayscale (interpolated) scaling: general case
00024  *                  void       scaleGrayLILow()
00025  *
00026  *         Color (interpolated) scaling: 2x upscaling
00027  *                  void       scaleColor2xLILow()
00028  *                  void       scaleColor2xLILineLow()
00029  *
00030  *         Grayscale (interpolated) scaling: 2x upscaling
00031  *                  void       scaleGray2xLILow()
00032  *                  void       scaleGray2xLILineLow()
00033  *
00034  *         Grayscale (interpolated) scaling: 4x upscaling
00035  *                  void       scaleGray4xLILow()
00036  *                  void       scaleGray4xLILineLow()
00037  *
00038  *         Grayscale and color scaling by closest pixel sampling
00039  *                  l_int32    scaleBySamplingLow()
00040  *
00041  *         Color and grayscale downsampling with (antialias) lowpass filter
00042  *                  l_int32    scaleSmoothLow()
00043  *                  void       scaleRGBToGray2Low()
00044  *
00045  *         Color and grayscale downsampling with (antialias) area mapping
00046  *                  l_int32    scaleColorAreaMapLow()
00047  *                  l_int32    scaleGrayAreaMapLow()
00048  *                  l_int32    scaleAreaMapLow2()
00049  *
00050  *         Binary scaling by closest pixel sampling
00051  *                  l_int32    scaleBinaryLow()
00052  *
00053  *         Scale-to-gray 2x
00054  *                  void       scaleToGray2Low()
00055  *                  l_uint32  *makeSumTabSG2()
00056  *                  l_uint8   *makeValTabSG2()
00057  *
00058  *         Scale-to-gray 3x
00059  *                  void       scaleToGray3Low()
00060  *                  l_uint32  *makeSumTabSG3()
00061  *                  l_uint8   *makeValTabSG3()
00062  *
00063  *         Scale-to-gray 4x
00064  *                  void       scaleToGray4Low()
00065  *                  l_uint32  *makeSumTabSG4()
00066  *                  l_uint8   *makeValTabSG4()
00067  *
00068  *         Scale-to-gray 6x
00069  *                  void       scaleToGray6Low()
00070  *                  l_uint8   *makeValTabSG6()
00071  *
00072  *         Scale-to-gray 8x
00073  *                  void       scaleToGray8Low()
00074  *                  l_uint8   *makeValTabSG8()
00075  *
00076  *         Scale-to-gray 16x
00077  *                  void       scaleToGray16Low()
00078  *
00079  *         Grayscale mipmap
00080  *                  l_int32    scaleMipmapLow()
00081  *
00082  */
00083
00084 #include <stdio.h>
00085 #include <stdlib.h>
00086 #include <string.h>
00087 #include "allheaders.h"
00088
00089 #ifndef  NO_CONSOLE_IO
00090 #define  DEBUG_OVERFLOW   0
00091 #define  DEBUG_UNROLLING  0
00092 #endif  /* ~NO_CONSOLE_IO */
00093
00094
00095 /*------------------------------------------------------------------*
00096  *            General linear interpolated color scaling             *
00097  *------------------------------------------------------------------*/
00098 /*!
00099  *  scaleColorLILow()
00100  *
00101  *  We choose to divide each pixel into 16 x 16 sub-pixels.
00102  *  Linear interpolation is equivalent to finding the
00103  *  fractional area (i.e., number of sub-pixels divided
00104  *  by 256) associated with each of the four nearest src pixels,
00105  *  and weighting each pixel value by this fractional area.
00106  *
00107  *  P3 speed is about 7 x 10^6 dst pixels/sec/GHz
00108  */
00109 void
00110 scaleColorLILow(l_uint32  *datad,
00111                l_int32    wd,
00112                l_int32    hd,
00113                l_int32    wpld,
00114                l_uint32  *datas,
00115                l_int32    ws,
00116                l_int32    hs,
00117                l_int32    wpls)
00118 {
00119 l_int32    i, j, wm2, hm2;
00120 l_int32    xpm, ypm;  /* location in src image, to 1/16 of a pixel */
00121 l_int32    xp, yp, xf, yf;  /* src pixel and pixel fraction coordinates */
00122 l_int32    v00r, v01r, v10r, v11r, v00g, v01g, v10g, v11g;
00123 l_int32    v00b, v01b, v10b, v11b, area00, area01, area10, area11;
00124 l_uint32   pixels1, pixels2, pixels3, pixels4, pixel;
00125 l_uint32  *lines, *lined;
00126 l_float32  scx, scy;
00127
00128         /* (scx, scy) are scaling factors that are applied to the
00129          * dest coords to get the corresponding src coords.
00130          * We need them because we iterate over dest pixels
00131          * and must find the corresponding set of src pixels. */
00132     scx = 16. * (l_float32)ws / (l_float32)wd;
00133     scy = 16. * (l_float32)hs / (l_float32)hd;
00134
00135     wm2 = ws - 2;
00136     hm2 = hs - 2;
00137
00138         /* Iterate over the destination pixels */
00139     for (i = 0; i < hd; i++) {
00140         ypm = (l_int32)(scy * (l_float32)i);
00141         yp = ypm >> 4;
00142         yf = ypm & 0x0f;
00143         lined = datad + i * wpld;
00144         lines = datas + yp * wpls;
00145         for (j = 0; j < wd; j++) {
00146             xpm = (l_int32)(scx * (l_float32)j);
00147             xp = xpm >> 4;
00148             xf = xpm & 0x0f;
00149
00150                 /* Do bilinear interpolation.  This is a simple
00151                  * generalization of the calculation in scaleGrayLILow().
00152                  * Without this, we could simply subsample:
00153                  *     *(lined + j) = *(lines + xp);
00154                  * which is faster but gives lousy results!  */
00155             pixels1 = *(lines + xp);
00156
00157             if (xp > wm2 || yp > hm2) {
00158                 if (yp > hm2 && xp <= wm2) {  /* pixels near bottom */
00159                     pixels2 = *(lines + xp + 1);
00160                     pixels3 = pixels1;
00161                     pixels4 = pixels2;
00162                 }
00163                 else if (xp > wm2 && yp <= hm2) {  /* pixels near right side */
00164                     pixels2 = pixels1;
00165                     pixels3 = *(lines + wpls + xp);
00166                     pixels4 = pixels3;
00167                 }
00168                 else {  /* pixels at LR corner */
00169                     pixels4 = pixels3 = pixels2 = pixels1;
00170                 }
00171             }
00172             else {
00173                 pixels2 = *(lines + xp + 1);
00174                 pixels3 = *(lines + wpls + xp);
00175                 pixels4 = *(lines + wpls + xp + 1);
00176             }
00177
00178             area00 = (16 - xf) * (16 - yf);
00179             area10 = xf * (16 - yf);
00180             area01 = (16 - xf) * yf;
00181             area11 = xf * yf;
00182             v00r = area00 * ((pixels1 >> L_RED_SHIFT) & 0xff);
00183             v00g = area00 * ((pixels1 >> L_GREEN_SHIFT) & 0xff);
00184             v00b = area00 * ((pixels1 >> L_BLUE_SHIFT) & 0xff);
00185             v10r = area10 * ((pixels2 >> L_RED_SHIFT) & 0xff);
00186             v10g = area10 * ((pixels2 >> L_GREEN_SHIFT) & 0xff);
00187             v10b = area10 * ((pixels2 >> L_BLUE_SHIFT) & 0xff);
00188             v01r = area01 * ((pixels3 >> L_RED_SHIFT) & 0xff);
00189             v01g = area01 * ((pixels3 >> L_GREEN_SHIFT) & 0xff);
00190             v01b = area01 * ((pixels3 >> L_BLUE_SHIFT) & 0xff);
00191             v11r = area11 * ((pixels4 >> L_RED_SHIFT) & 0xff);
00192             v11g = area11 * ((pixels4 >> L_GREEN_SHIFT) & 0xff);
00193             v11b = area11 * ((pixels4 >> L_BLUE_SHIFT) & 0xff);
00194             pixel = (((v00r + v10r + v01r + v11r + 128) << 16) & 0xff000000) |
00195                     (((v00g + v10g + v01g + v11g + 128) << 8) & 0x00ff0000) |
00196                     ((v00b + v10b + v01b + v11b + 128) & 0x0000ff00);
00197             *(lined + j) = pixel;
00198         }
00199     }
00200
00201     return;
00202 }
00203
00204
00205 /*------------------------------------------------------------------*
00206  *            General linear interpolated gray scaling              *
00207  *------------------------------------------------------------------*/
00208 /*!
00209  *  scaleGrayLILow()
00210  *
00211  *  We choose to divide each pixel into 16 x 16 sub-pixels.
00212  *  Linear interpolation is equivalent to finding the
00213  *  fractional area (i.e., number of sub-pixels divided
00214  *  by 256) associated with each of the four nearest src pixels,
00215  *  and weighting each pixel value by this fractional area.
00216  */
00217 void
00218 scaleGrayLILow(l_uint32  *datad,
00219                l_int32    wd,
00220                l_int32    hd,
00221                l_int32    wpld,
00222                l_uint32  *datas,
00223                l_int32    ws,
00224                l_int32    hs,
00225                l_int32    wpls)
00226 {
00227 l_int32    i, j, wm2, hm2;
00228 l_int32    xpm, ypm;  /* location in src image, to 1/16 of a pixel */
00229 l_int32    xp, yp, xf, yf;  /* src pixel and pixel fraction coordinates */
00230 l_int32    v00, v01, v10, v11, v00_val, v01_val, v10_val, v11_val;
00231 l_uint8    val;
00232 l_uint32  *lines, *lined;
00233 l_float32  scx, scy;
00234
00235         /* (scx, scy) are scaling factors that are applied to the
00236          * dest coords to get the corresponding src coords.
00237          * We need them because we iterate over dest pixels
00238          * and must find the corresponding set of src pixels. */
00239     scx = 16. * (l_float32)ws / (l_float32)wd;
00240     scy = 16. * (l_float32)hs / (l_float32)hd;
00241
00242     wm2 = ws - 2;
00243     hm2 = hs - 2;
00244
00245         /* Iterate over the destination pixels */
00246     for (i = 0; i < hd; i++) {
00247         ypm = (l_int32)(scy * (l_float32)i);
00248         yp = ypm >> 4;
00249         yf = ypm & 0x0f;
00250         lined = datad + i * wpld;
00251         lines = datas + yp * wpls;
00252         for (j = 0; j < wd; j++) {
00253             xpm = (l_int32)(scx * (l_float32)j);
00254             xp = xpm >> 4;
00255             xf = xpm & 0x0f;
00256
00257                 /* Do bilinear interpolation.  Without this, we could
00258                  * simply subsample:
00259                  *   SET_DATA_BYTE(lined, j, GET_DATA_BYTE(lines, xp));
00260                  * which is faster but gives lousy results!  */
00261             v00_val = GET_DATA_BYTE(lines, xp);
00262             if (xp > wm2 || yp > hm2) {
00263                 if (yp > hm2 && xp <= wm2) {  /* pixels near bottom */
00264                     v01_val = v00_val;
00265                     v10_val = GET_DATA_BYTE(lines, xp + 1);
00266                     v11_val = v10_val;
00267                 }
00268                 else if (xp > wm2 && yp <= hm2) {  /* pixels near right side */
00269                     v01_val = GET_DATA_BYTE(lines + wpls, xp);
00270                     v10_val = v00_val;
00271                     v11_val = v01_val;
00272                 }
00273                 else {  /* pixels at LR corner */
00274                     v10_val = v01_val = v11_val = v00_val;
00275                 }
00276             }
00277             else {
00278                 v10_val = GET_DATA_BYTE(lines, xp + 1);
00279                 v01_val = GET_DATA_BYTE(lines + wpls, xp);
00280                 v11_val = GET_DATA_BYTE(lines + wpls, xp + 1);
00281             }
00282
00283             v00 = (16 - xf) * (16 - yf) * v00_val;
00284             v10 = xf * (16 - yf) * v10_val;
00285             v01 = (16 - xf) * yf * v01_val;
00286             v11 = xf * yf * v11_val;
00287
00288             val = (l_uint8)((v00 + v01 + v10 + v11 + 128) / 256);
00289             SET_DATA_BYTE(lined, j, val);
00290         }
00291     }
00292
00293     return;
00294 }
00295
00296
00297 /*------------------------------------------------------------------*
00298  *                2x linear interpolated color scaling              *
00299  *------------------------------------------------------------------*/
00300 /*!
00301  *  scaleColor2xLILow()
00302  *
00303  *  This is a special case of 2x expansion by linear
00304  *  interpolation.  Each src pixel contains 4 dest pixels.
00305  *  The 4 dest pixels in src pixel 1 are numbered at
00306  *  their UL corners.  The 4 dest pixels in src pixel 1
00307  *  are related to that src pixel and its 3 neighboring
00308  *  src pixels as follows:
00309  *
00310  *             1-----2-----|-----|-----|
00311  *             |     |     |     |     |
00312  *             |     |     |     |     |
00313  *  src 1 -->  3-----4-----|     |     |  <-- src 2
00314  *             |     |     |     |     |
00315  *             |     |     |     |     |
00316  *             |-----|-----|-----|-----|
00317  *             |     |     |     |     |
00318  *             |     |     |     |     |
00319  *  src 3 -->  |     |     |     |     |  <-- src 4
00320  *             |     |     |     |     |
00321  *             |     |     |     |     |
00322  *             |-----|-----|-----|-----|
00323  *
00324  *           dest      src
00325  *           ----      ---
00326  *           dp1    =  sp1
00327  *           dp2    =  (sp1 + sp2) / 2
00328  *           dp3    =  (sp1 + sp3) / 2
00329  *           dp4    =  (sp1 + sp2 + sp3 + sp4) / 4
00330  *
00331  *  We iterate over the src pixels, and unroll the calculation
00332  *  for each set of 4 dest pixels corresponding to that src
00333  *  pixel, caching pixels for the next src pixel whenever possible.
00334  *  The method is exactly analogous to the one we use for
00335  *  scaleGray2xLILow() and its line version.
00336  *
00337  *  P3 speed is about 5 x 10^7 dst pixels/sec/GHz
00338  */
00339 void
00340 scaleColor2xLILow(l_uint32  *datad,
00341                   l_int32    wpld,
00342                   l_uint32  *datas,
00343                   l_int32    ws,
00344                   l_int32    hs,
00345                   l_int32    wpls)
00346 {
00347 l_int32    i, hsm;
00348 l_uint32  *lines, *lined;
00349
00350     hsm = hs - 1;
00351
00352         /* We're taking 2 src and 2 dest lines at a time,
00353          * and for each src line, we're computing 2 dest lines.
00354          * Call these 2 dest lines:  destline1 and destline2.
00355          * The first src line is used for destline 1.
00356          * On all but the last src line, both src lines are
00357          * used in the linear interpolation for destline2.
00358          * On the last src line, both destline1 and destline2
00359          * are computed using only that src line (because there
00360          * isn't a lower src line). */
00361
00362         /* iterate over all but the last src line */
00363     for (i = 0; i < hsm; i++) {
00364         lines = datas + i * wpls;
00365         lined = datad + 2 * i * wpld;
00366         scaleColor2xLILineLow(lined, wpld, lines, ws, wpls, 0);
00367     }
00368
00369         /* last src line */
00370     lines = datas + hsm * wpls;
00371     lined = datad + 2 * hsm * wpld;
00372     scaleColor2xLILineLow(lined, wpld, lines, ws, wpls, 1);
00373
00374     return;
00375 }
00376
00377
00378 /*!
00379  *  scaleColor2xLILineLow()
00380  *
00381  *      Input:  lined   (ptr to top destline, to be made from current src line)
00382  *              wpld
00383  *              lines   (ptr to current src line)
00384  *              ws
00385  *              wpls
00386  *              lastlineflag  (1 if last src line; 0 otherwise)
00387  *      Return: void
00388  *
00389  *  *** Warning: implicit assumption about RGB component ordering ***
00390  */
00391 void
00392 scaleColor2xLILineLow(l_uint32  *lined,
00393                       l_int32    wpld,
00394                       l_uint32  *lines,
00395                       l_int32    ws,
00396                       l_int32    wpls,
00397                       l_int32    lastlineflag)
00398 {
00399 l_int32    j, jd, wsm;
00400 l_int32    rval1, rval2, rval3, rval4, gval1, gval2, gval3, gval4;
00401 l_int32    bval1, bval2, bval3, bval4;
00402 l_uint32   pixels1, pixels2, pixels3, pixels4, pixel;
00403 l_uint32  *linesp, *linedp;
00404
00405     wsm = ws - 1;
00406
00407     if (lastlineflag == 0) {
00408         linesp = lines + wpls;
00409         linedp = lined + wpld;
00410         pixels1 = *lines;
00411         pixels3 = *linesp;
00412
00413             /* initialize with v(2) and v(4) */
00414         rval2 = pixels1 >> 24;
00415         gval2 = (pixels1 >> 16) & 0xff;
00416         bval2 = (pixels1 >> 8) & 0xff;
00417         rval4 = pixels3 >> 24;
00418         gval4 = (pixels3 >> 16) & 0xff;
00419         bval4 = (pixels3 >> 8) & 0xff;
00420
00421         for (j = 0, jd = 0; j < wsm; j++, jd += 2) {
00422                 /* shift in previous src values */
00423             rval1 = rval2;
00424             gval1 = gval2;
00425             bval1 = bval2;
00426             rval3 = rval4;
00427             gval3 = gval4;
00428             bval3 = bval4;
00429                 /* get new src values */
00430             pixels2 = *(lines + j + 1);
00431             pixels4 = *(linesp + j + 1);
00432             rval2 = pixels2 >> 24;
00433             gval2 = (pixels2 >> 16) & 0xff;
00434             bval2 = (pixels2 >> 8) & 0xff;
00435             rval4 = pixels4 >> 24;
00436             gval4 = (pixels4 >> 16) & 0xff;
00437             bval4 = (pixels4 >> 8) & 0xff;
00438                 /* save dest values */
00439             pixel = (rval1 << 24 | gval1 << 16 | bval1 << 8);
00440             *(lined + jd) = pixel;                               /* pix 1 */
00441             pixel = ((((rval1 + rval2) << 23) & 0xff000000) |
00442                      (((gval1 + gval2) << 15) & 0x00ff0000) |
00443                      (((bval1 + bval2) << 7) & 0x0000ff00));
00444             *(lined + jd + 1) = pixel;                           /* pix 2 */
00445             pixel = ((((rval1 + rval3) << 23) & 0xff000000) |
00446                      (((gval1 + gval3) << 15) & 0x00ff0000) |
00447                      (((bval1 + bval3) << 7) & 0x0000ff00));
00448             *(linedp + jd) = pixel;                              /* pix 3 */
00449             pixel = ((((rval1 + rval2 + rval3 + rval4) << 22) & 0xff000000) |
00450                      (((gval1 + gval2 + gval3 + gval4) << 14) & 0x00ff0000) |
00451                      (((bval1 + bval2 + bval3 + bval4) << 6) & 0x0000ff00));
00452             *(linedp + jd + 1) = pixel;                          /* pix 4 */
00453         }
00454             /* last src pixel on line */
00455         rval1 = rval2;
00456         gval1 = gval2;
00457         bval1 = bval2;
00458         rval3 = rval4;
00459         gval3 = gval4;
00460         bval3 = bval4;
00461         pixel = (rval1 << 24 | gval1 << 16 | bval1 << 8);
00462         *(lined + 2 * wsm) = pixel;                        /* pix 1 */
00463         *(lined + 2 * wsm + 1) = pixel;                    /* pix 2 */
00464         pixel = ((((rval1 + rval3) << 23) & 0xff000000) |
00465                  (((gval1 + gval3) << 15) & 0x00ff0000) |
00466                  (((bval1 + bval3) << 7) & 0x0000ff00));
00467         *(linedp + 2 * wsm) = pixel;                       /* pix 3 */
00468         *(linedp + 2 * wsm + 1) = pixel;                   /* pix 4 */
00469     }
00470     else {   /* last row of src pixels: lastlineflag == 1 */
00471         linedp = lined + wpld;
00472         pixels2 = *lines;
00473         rval2 = pixels2 >> 24;
00474         gval2 = (pixels2 >> 16) & 0xff;
00475         bval2 = (pixels2 >> 8) & 0xff;
00476         for (j = 0, jd = 0; j < wsm; j++, jd += 2) {
00477             rval1 = rval2;
00478             gval1 = gval2;
00479             bval1 = bval2;
00480             pixels2 = *(lines + j + 1);
00481             rval2 = pixels2 >> 24;
00482             gval2 = (pixels2 >> 16) & 0xff;
00483             bval2 = (pixels2 >> 8) & 0xff;
00484             pixel = (rval1 << 24 | gval1 << 16 | bval1 << 8);
00485             *(lined + jd) = pixel;                            /* pix 1 */
00486             *(linedp + jd) = pixel;                           /* pix 2 */
00487             pixel = ((((rval1 + rval2) << 23) & 0xff000000) |
00488                      (((gval1 + gval2) << 15) & 0x00ff0000) |
00489                      (((bval1 + bval2) << 7) & 0x0000ff00));
00490             *(lined + jd + 1) = pixel;                        /* pix 3 */
00491             *(linedp + jd + 1) = pixel;                       /* pix 4 */
00492         }
00493         rval1 = rval2;
00494         gval1 = gval2;
00495         bval1 = bval2;
00496         pixel = (rval1 << 24 | gval1 << 16 | bval1 << 8);
00497         *(lined + 2 * wsm) = pixel;                           /* pix 1 */
00498         *(lined + 2 * wsm + 1) = pixel;                       /* pix 2 */
00499         *(linedp + 2 * wsm) = pixel;                          /* pix 3 */
00500         *(linedp + 2 * wsm + 1) = pixel;                      /* pix 4 */
00501     }
00502
00503     return;
00504 }
00505
00506
00507 /*------------------------------------------------------------------*
00508  *                2x linear interpolated gray scaling               *
00509  *------------------------------------------------------------------*/
00510 /*!
00511  *  scaleGray2xLILow()
00512  *
00513  *  This is a special case of 2x expansion by linear
00514  *  interpolation.  Each src pixel contains 4 dest pixels.
00515  *  The 4 dest pixels in src pixel 1 are numbered at
00516  *  their UL corners.  The 4 dest pixels in src pixel 1
00517  *  are related to that src pixel and its 3 neighboring
00518  *  src pixels as follows:
00519  *
00520  *             1-----2-----|-----|-----|
00521  *             |     |     |     |     |
00522  *             |     |     |     |     |
00523  *  src 1 -->  3-----4-----|     |     |  <-- src 2
00524  *             |     |     |     |     |
00525  *             |     |     |     |     |
00526  *             |-----|-----|-----|-----|
00527  *             |     |     |     |     |
00528  *             |     |     |     |     |
00529  *  src 3 -->  |     |     |     |     |  <-- src 4
00530  *             |     |     |     |     |
00531  *             |     |     |     |     |
00532  *             |-----|-----|-----|-----|
00533  *
00534  *           dest      src
00535  *           ----      ---
00536  *           dp1    =  sp1
00537  *           dp2    =  (sp1 + sp2) / 2
00538  *           dp3    =  (sp1 + sp3) / 2
00539  *           dp4    =  (sp1 + sp2 + sp3 + sp4) / 4
00540  *
00541  *  We iterate over the src pixels, and unroll the calculation
00542  *  for each set of 4 dest pixels corresponding to that src
00543  *  pixel, caching pixels for the next src pixel whenever possible.
00544  */
00545 void
00546 scaleGray2xLILow(l_uint32  *datad,
00547                  l_int32    wpld,
00548                  l_uint32  *datas,
00549                  l_int32    ws,
00550                  l_int32    hs,
00551                  l_int32    wpls)
00552 {
00553 l_int32    i, hsm;
00554 l_uint32  *lines, *lined;
00555
00556     hsm = hs - 1;
00557
00558         /* We're taking 2 src and 2 dest lines at a time,
00559          * and for each src line, we're computing 2 dest lines.
00560          * Call these 2 dest lines:  destline1 and destline2.
00561          * The first src line is used for destline 1.
00562          * On all but the last src line, both src lines are
00563          * used in the linear interpolation for destline2.
00564          * On the last src line, both destline1 and destline2
00565          * are computed using only that src line (because there
00566          * isn't a lower src line). */
00567
00568         /* iterate over all but the last src line */
00569     for (i = 0; i < hsm; i++) {
00570         lines = datas + i * wpls;
00571         lined = datad + 2 * i * wpld;
00572         scaleGray2xLILineLow(lined, wpld, lines, ws, wpls, 0);
00573     }
00574
00575         /* last src line */
00576     lines = datas + hsm * wpls;
00577     lined = datad + 2 * hsm * wpld;
00578     scaleGray2xLILineLow(lined, wpld, lines, ws, wpls, 1);
00579
00580     return;
00581 }
00582
00583
00584 /*!
00585  *  scaleGray2xLILineLow()
00586  *
00587  *      Input:  lined   (ptr to top destline, to be made from current src line)
00588  *              wpld
00589  *              lines   (ptr to current src line)
00590  *              ws
00591  *              wpls
00592  *              lastlineflag  (1 if last src line; 0 otherwise)
00593  *      Return: void
00594  */
00595 void
00596 scaleGray2xLILineLow(l_uint32  *lined,
00597                      l_int32    wpld,
00598                      l_uint32  *lines,
00599                      l_int32    ws,
00600                      l_int32    wpls,
00601                      l_int32    lastlineflag)
00602 {
00603 l_int32    j, jd, wsm, w;
00604 l_int32    sval1, sval2, sval3, sval4;
00605 l_uint32  *linesp, *linedp;
00606 l_uint32   words, wordsp, wordd, worddp;
00607
00608     wsm = ws - 1;
00609
00610     if (lastlineflag == 0) {
00611         linesp = lines + wpls;
00612         linedp = lined + wpld;
00613
00614             /* Unroll the loop 4x and work on full words */
00615         words = lines[0];
00616         wordsp = linesp[0];
00617         sval2 = (words >> 24) & 0xff;
00618         sval4 = (wordsp >> 24) & 0xff;
00619         for (j = 0, jd = 0, w = 0; j + 3 < wsm; j += 4, jd += 8, w++) {
00620                 /* At the top of the loop,
00621                  * words == lines[w], wordsp == linesp[w]
00622                  * and the top bytes of those have been loaded into
00623                  * sval2 and sval4. */
00624             sval1 = sval2;
00625             sval2 = (words >> 16) & 0xff;
00626             sval3 = sval4;
00627             sval4 = (wordsp >> 16) & 0xff;
00628             wordd = (sval1 << 24) | (((sval1 + sval2) >> 1) << 16);
00629             worddp = (((sval1 + sval3) >> 1) << 24) |
00630                 (((sval1 + sval2 + sval3 + sval4) >> 2) << 16);
00631
00632             sval1 = sval2;
00633             sval2 = (words >> 8) & 0xff;
00634             sval3 = sval4;
00635             sval4 = (wordsp >> 8) & 0xff;
00636             wordd |= (sval1 << 8) | ((sval1 + sval2) >> 1);
00637             worddp |= (((sval1 + sval3) >> 1) << 8) |
00638                 ((sval1 + sval2 + sval3 + sval4) >> 2);
00639             lined[w * 2] = wordd;
00640             linedp[w * 2] = worddp;
00641
00642             sval1 = sval2;
00643             sval2 = words & 0xff;
00644             sval3 = sval4;
00645             sval4 = wordsp & 0xff;
00646             wordd = (sval1 << 24) |                              /* pix 1 */
00647                 (((sval1 + sval2) >> 1) << 16);                  /* pix 2 */
00648             worddp = (((sval1 + sval3) >> 1) << 24) |            /* pix 3 */
00649                 (((sval1 + sval2 + sval3 + sval4) >> 2) << 16);  /* pix 4 */
00650
00651                 /* Load the next word as we need its first byte */
00652             words = lines[w + 1];
00653             wordsp = linesp[w + 1];
00654             sval1 = sval2;
00655             sval2 = (words >> 24) & 0xff;
00656             sval3 = sval4;
00657             sval4 = (wordsp >> 24) & 0xff;
00658             wordd |= (sval1 << 8) |                              /* pix 1 */
00659                 ((sval1 + sval2) >> 1);                          /* pix 2 */
00660             worddp |= (((sval1 + sval3) >> 1) << 8) |            /* pix 3 */
00661                 ((sval1 + sval2 + sval3 + sval4) >> 2);          /* pix 4 */
00662             lined[w * 2 + 1] = wordd;
00663             linedp[w * 2 + 1] = worddp;
00664         }
00665
00666             /* Finish up the last word */
00667         for (; j < wsm; j++, jd += 2) {
00668             sval1 = sval2;
00669             sval3 = sval4;
00670             sval2 = GET_DATA_BYTE(lines, j + 1);
00671             sval4 = GET_DATA_BYTE(linesp, j + 1);
00672             SET_DATA_BYTE(lined, jd, sval1);                     /* pix 1 */
00673             SET_DATA_BYTE(lined, jd + 1, (sval1 + sval2) / 2);   /* pix 2 */
00674             SET_DATA_BYTE(linedp, jd, (sval1 + sval3) / 2);      /* pix 3 */
00675             SET_DATA_BYTE(linedp, jd + 1,
00676                           (sval1 + sval2 + sval3 + sval4) / 4);  /* pix 4 */
00677         }
00678         sval1 = sval2;
00679         sval3 = sval4;
00680         SET_DATA_BYTE(lined, 2 * wsm, sval1);                     /* pix 1 */
00681         SET_DATA_BYTE(lined, 2 * wsm + 1, sval1);                 /* pix 2 */
00682         SET_DATA_BYTE(linedp, 2 * wsm, (sval1 + sval3) / 2);      /* pix 3 */
00683         SET_DATA_BYTE(linedp, 2 * wsm + 1, (sval1 + sval3) / 2);  /* pix 4 */
00684
00685 #if DEBUG_UNROLLING
00686 #define CHECK_BYTE(a, b, c) if (GET_DATA_BYTE(a, b) != c) {\
00687      fprintf(stderr, "Error: mismatch at %d, %d vs %d\n", \
00688              j, GET_DATA_BYTE(a, b), c); }
00689
00690         sval2 = GET_DATA_BYTE(lines, 0);
00691         sval4 = GET_DATA_BYTE(linesp, 0);
00692         for (j = 0, jd = 0; j < wsm; j++, jd += 2) {
00693             sval1 = sval2;
00694             sval3 = sval4;
00695             sval2 = GET_DATA_BYTE(lines, j + 1);
00696             sval4 = GET_DATA_BYTE(linesp, j + 1);
00697             CHECK_BYTE(lined, jd, sval1);                     /* pix 1 */
00698             CHECK_BYTE(lined, jd + 1, (sval1 + sval2) / 2);   /* pix 2 */
00699             CHECK_BYTE(linedp, jd, (sval1 + sval3) / 2);      /* pix 3 */
00700             CHECK_BYTE(linedp, jd + 1,
00701                           (sval1 + sval2 + sval3 + sval4) / 4);  /* pix 4 */
00702         }
00703         sval1 = sval2;
00704         sval3 = sval4;
00705         CHECK_BYTE(lined, 2 * wsm, sval1);                     /* pix 1 */
00706         CHECK_BYTE(lined, 2 * wsm + 1, sval1);                 /* pix 2 */
00707         CHECK_BYTE(linedp, 2 * wsm, (sval1 + sval3) / 2);      /* pix 3 */
00708         CHECK_BYTE(linedp, 2 * wsm + 1, (sval1 + sval3) / 2);  /* pix 4 */
00709 #undef CHECK_BYTE
00710 #endif
00711     }
00712     else {   /* last row of src pixels: lastlineflag == 1 */
00713         linedp = lined + wpld;
00714         sval2 = GET_DATA_BYTE(lines, 0);
00715         for (j = 0, jd = 0; j < wsm; j++, jd += 2) {
00716             sval1 = sval2;
00717             sval2 = GET_DATA_BYTE(lines, j + 1);
00718             SET_DATA_BYTE(lined, jd, sval1);                       /* pix 1 */
00719             SET_DATA_BYTE(linedp, jd, sval1);                      /* pix 3 */
00720             SET_DATA_BYTE(lined, jd + 1, (sval1 + sval2) / 2);     /* pix 2 */
00721             SET_DATA_BYTE(linedp, jd + 1, (sval1 + sval2) / 2);    /* pix 4 */
00722         }
00723         sval1 = sval2;
00724         SET_DATA_BYTE(lined, 2 * wsm, sval1);                     /* pix 1 */
00725         SET_DATA_BYTE(lined, 2 * wsm + 1, sval1);                 /* pix 2 */
00726         SET_DATA_BYTE(linedp, 2 * wsm, sval1);                    /* pix 3 */
00727         SET_DATA_BYTE(linedp, 2 * wsm + 1, sval1);                /* pix 4 */
00728     }
00729
00730     return;
00731 }
00732
00733
00734 /*------------------------------------------------------------------*
00735  *               4x linear interpolated gray scaling                *
00736  *------------------------------------------------------------------*/
00737 /*!
00738  *  scaleGray4xLILow()
00739  *
00740  *  This is a special case of 4x expansion by linear
00741  *  interpolation.  Each src pixel contains 16 dest pixels.
00742  *  The 16 dest pixels in src pixel 1 are numbered at
00743  *  their UL corners.  The 16 dest pixels in src pixel 1
00744  *  are related to that src pixel and its 3 neighboring
00745  *  src pixels as follows:
00746  *
00747  *             1---2---3---4---|---|---|---|---|
00748  *             |   |   |   |   |   |   |   |   |
00749  *             5---6---7---8---|---|---|---|---|
00750  *             |   |   |   |   |   |   |   |   |
00751  *  src 1 -->  9---a---b---c---|---|---|---|---|  <-- src 2
00752  *             |   |   |   |   |   |   |   |   |
00753  *             d---e---f---g---|---|---|---|---|
00754  *             |   |   |   |   |   |   |   |   |
00755  *             |===|===|===|===|===|===|===|===|
00756  *             |   |   |   |   |   |   |   |   |
00757  *             |---|---|---|---|---|---|---|---|
00758  *             |   |   |   |   |   |   |   |   |
00759  *  src 3 -->  |---|---|---|---|---|---|---|---|  <-- src 4
00760  *             |   |   |   |   |   |   |   |   |
00761  *             |---|---|---|---|---|---|---|---|
00762  *             |   |   |   |   |   |   |   |   |
00763  *             |---|---|---|---|---|---|---|---|
00764  *
00765  *           dest      src
00766  *           ----      ---
00767  *           dp1    =  sp1
00768  *           dp2    =  (3 * sp1 + sp2) / 4
00769  *           dp3    =  (sp1 + sp2) / 2
00770  *           dp4    =  (sp1 + 3 * sp2) / 4
00771  *           dp5    =  (3 * sp1 + sp3) / 4
00772  *           dp6    =  (9 * sp1 + 3 * sp2 + 3 * sp3 + sp4) / 16
00773  *           dp7    =  (3 * sp1 + 3 * sp2 + sp3 + sp4) / 8
00774  *           dp8    =  (3 * sp1 + 9 * sp2 + 1 * sp3 + 3 * sp4) / 16
00775  *           dp9    =  (sp1 + sp3) / 2
00776  *           dp10   =  (3 * sp1 + sp2 + 3 * sp3 + sp4) / 8
00777  *           dp11   =  (sp1 + sp2 + sp3 + sp4) / 4
00778  *           dp12   =  (sp1 + 3 * sp2 + sp3 + 3 * sp4) / 8
00779  *           dp13   =  (sp1 + 3 * sp3) / 4
00780  *           dp14   =  (3 * sp1 + sp2 + 9 * sp3 + 3 * sp4) / 16
00781  *           dp15   =  (sp1 + sp2 + 3 * sp3 + 3 * sp4) / 8
00782  *           dp16   =  (sp1 + 3 * sp2 + 3 * sp3 + 9 * sp4) / 16
00783  *
00784  *  We iterate over the src pixels, and unroll the calculation
00785  *  for each set of 16 dest pixels corresponding to that src
00786  *  pixel, caching pixels for the next src pixel whenever possible.
00787  */
00788 void
00789 scaleGray4xLILow(l_uint32  *datad,
00790                  l_int32    wpld,
00791                  l_uint32  *datas,
00792                  l_int32    ws,
00793                  l_int32    hs,
00794                  l_int32    wpls)
00795 {
00796 l_int32    i, hsm;
00797 l_uint32  *lines, *lined;
00798
00799     hsm = hs - 1;
00800
00801         /* We're taking 2 src and 4 dest lines at a time,
00802          * and for each src line, we're computing 4 dest lines.
00803          * Call these 4 dest lines:  destline1 - destline4.
00804          * The first src line is used for destline 1.
00805          * Two src lines are used for all other dest lines,
00806          * except for the last 4 dest lines, which are computed
00807          * using only the last src line. */
00808
00809         /* iterate over all but the last src line */
00810     for (i = 0; i < hsm; i++) {
00811         lines = datas + i * wpls;
00812         lined = datad + 4 * i * wpld;
00813         scaleGray4xLILineLow(lined, wpld, lines, ws, wpls, 0);
00814     }
00815
00816         /* last src line */
00817     lines = datas + hsm * wpls;
00818     lined = datad + 4 * hsm * wpld;
00819     scaleGray4xLILineLow(lined, wpld, lines, ws, wpls, 1);
00820
00821     return;
00822 }
00823
00824
00825 /*!
00826  *  scaleGray4xLILineLow()
00827  *
00828  *      Input:  lined   (ptr to top destline, to be made from current src line)
00829  *              wpld
00830  *              lines   (ptr to current src line)
00831  *              ws
00832  *              wpls
00833  *              lastlineflag  (1 if last src line; 0 otherwise)
00834  *      Return: void
00835  */
00836 void
00837 scaleGray4xLILineLow(l_uint32  *lined,
00838                      l_int32    wpld,
00839                      l_uint32  *lines,
00840                      l_int32    ws,
00841                      l_int32    wpls,
00842                      l_int32    lastlineflag)
00843 {
00844 l_int32    j, jd, wsm, wsm4;
00845 l_int32    s1, s2, s3, s4, s1t, s2t, s3t, s4t;
00846 l_uint32  *linesp, *linedp1, *linedp2, *linedp3;
00847
00848     wsm = ws - 1;
00849     wsm4 = 4 * wsm;
00850
00851     if (lastlineflag == 0) {
00852         linesp = lines + wpls;
00853         linedp1 = lined + wpld;
00854         linedp2 = lined + 2 * wpld;
00855         linedp3 = lined + 3 * wpld;
00856         s2 = GET_DATA_BYTE(lines, 0);
00857         s4 = GET_DATA_BYTE(linesp, 0);
00858         for (j = 0, jd = 0; j < wsm; j++, jd += 4) {
00859             s1 = s2;
00860             s3 = s4;
00861             s2 = GET_DATA_BYTE(lines, j + 1);
00862             s4 = GET_DATA_BYTE(linesp, j + 1);
00863             s1t = 3 * s1;
00864             s2t = 3 * s2;
00865             s3t = 3 * s3;
00866             s4t = 3 * s4;
00867             SET_DATA_BYTE(lined, jd, s1);                             /* d1 */
00868             SET_DATA_BYTE(lined, jd + 1, (s1t + s2) / 4);             /* d2 */
00869             SET_DATA_BYTE(lined, jd + 2, (s1 + s2) / 2);              /* d3 */
00870             SET_DATA_BYTE(lined, jd + 3, (s1 + s2t) / 4);             /* d4 */
00871             SET_DATA_BYTE(linedp1, jd, (s1t + s3) / 4);                /* d5 */
00872             SET_DATA_BYTE(linedp1, jd + 1, (9*s1 + s2t + s3t + s4) / 16); /*d6*/
00873             SET_DATA_BYTE(linedp1, jd + 2, (s1t + s2t + s3 + s4) / 8); /* d7 */
00874             SET_DATA_BYTE(linedp1, jd + 3, (s1t + 9*s2 + s3 + s4t) / 16);/*d8*/
00875             SET_DATA_BYTE(linedp2, jd, (s1 + s3) / 2);                /* d9 */
00876             SET_DATA_BYTE(linedp2, jd + 1, (s1t + s2 + s3t + s4) / 8);/* d10 */
00877             SET_DATA_BYTE(linedp2, jd + 2, (s1 + s2 + s3 + s4) / 4);  /* d11 */
00878             SET_DATA_BYTE(linedp2, jd + 3, (s1 + s2t + s3 + s4t) / 8);/* d12 */
00879             SET_DATA_BYTE(linedp3, jd, (s1 + s3t) / 4);               /* d13 */
00880             SET_DATA_BYTE(linedp3, jd + 1, (s1t + s2 + 9*s3 + s4t) / 16);/*d14*/
00881             SET_DATA_BYTE(linedp3, jd + 2, (s1 + s2 + s3t + s4t) / 8); /* d15 */
00882             SET_DATA_BYTE(linedp3, jd + 3, (s1 + s2t + s3t + 9*s4) / 16);/*d16*/
00883         }
00884         s1 = s2;
00885         s3 = s4;
00886         s1t = 3 * s1;
00887         s3t = 3 * s3;
00888         SET_DATA_BYTE(lined, wsm4, s1);                               /* d1 */
00889         SET_DATA_BYTE(lined, wsm4 + 1, s1);                           /* d2 */
00890         SET_DATA_BYTE(lined, wsm4 + 2, s1);                           /* d3 */
00891         SET_DATA_BYTE(lined, wsm4 + 3, s1);                           /* d4 */
00892         SET_DATA_BYTE(linedp1, wsm4, (s1t + s3) / 4);                 /* d5 */
00893         SET_DATA_BYTE(linedp1, wsm4 + 1, (s1t + s3) / 4);             /* d6 */
00894         SET_DATA_BYTE(linedp1, wsm4 + 2, (s1t + s3) / 4);             /* d7 */
00895         SET_DATA_BYTE(linedp1, wsm4 + 3, (s1t + s3) / 4);             /* d8 */
00896         SET_DATA_BYTE(linedp2, wsm4, (s1 + s3) / 2);                  /* d9 */
00897         SET_DATA_BYTE(linedp2, wsm4 + 1, (s1 + s3) / 2);              /* d10 */
00898         SET_DATA_BYTE(linedp2, wsm4 + 2, (s1 + s3) / 2);              /* d11 */
00899         SET_DATA_BYTE(linedp2, wsm4 + 3, (s1 + s3) / 2);              /* d12 */
00900         SET_DATA_BYTE(linedp3, wsm4, (s1 + s3t) / 4);                 /* d13 */
00901         SET_DATA_BYTE(linedp3, wsm4 + 1, (s1 + s3t) / 4);             /* d14 */
00902         SET_DATA_BYTE(linedp3, wsm4 + 2, (s1 + s3t) / 4);             /* d15 */
00903         SET_DATA_BYTE(linedp3, wsm4 + 3, (s1 + s3t) / 4);             /* d16 */
00904     }
00905     else {   /* last row of src pixels: lastlineflag == 1 */
00906         linedp1 = lined + wpld;
00907         linedp2 = lined + 2 * wpld;
00908         linedp3 = lined + 3 * wpld;
00909         s2 = GET_DATA_BYTE(lines, 0);
00910         for (j = 0, jd = 0; j < wsm; j++, jd += 4) {
00911             s1 = s2;
00912             s2 = GET_DATA_BYTE(lines, j + 1);
00913             s1t = 3 * s1;
00914             s2t = 3 * s2;
00915             SET_DATA_BYTE(lined, jd, s1);                            /* d1 */
00916             SET_DATA_BYTE(lined, jd + 1, (s1t + s2) / 4 );           /* d2 */
00917             SET_DATA_BYTE(lined, jd + 2, (s1 + s2) / 2 );            /* d3 */
00918             SET_DATA_BYTE(lined, jd + 3, (s1 + s2t) / 4 );           /* d4 */
00919             SET_DATA_BYTE(linedp1, jd, s1);                          /* d5 */
00920             SET_DATA_BYTE(linedp1, jd + 1, (s1t + s2) / 4 );         /* d6 */
00921             SET_DATA_BYTE(linedp1, jd + 2, (s1 + s2) / 2 );          /* d7 */
00922             SET_DATA_BYTE(linedp1, jd + 3, (s1 + s2t) / 4 );         /* d8 */
00923             SET_DATA_BYTE(linedp2, jd, s1);                          /* d9 */
00924             SET_DATA_BYTE(linedp2, jd + 1, (s1t + s2) / 4 );         /* d10 */
00925             SET_DATA_BYTE(linedp2, jd + 2, (s1 + s2) / 2 );          /* d11 */
00926             SET_DATA_BYTE(linedp2, jd + 3, (s1 + s2t) / 4 );         /* d12 */
00927             SET_DATA_BYTE(linedp3, jd, s1);                          /* d13 */
00928             SET_DATA_BYTE(linedp3, jd + 1, (s1t + s2) / 4 );         /* d14 */
00929             SET_DATA_BYTE(linedp3, jd + 2, (s1 + s2) / 2 );          /* d15 */
00930             SET_DATA_BYTE(linedp3, jd + 3, (s1 + s2t) / 4 );         /* d16 */
00931         }
00932         s1 = s2;
00933         SET_DATA_BYTE(lined, wsm4, s1);                              /* d1 */
00934         SET_DATA_BYTE(lined, wsm4 + 1, s1);                          /* d2 */
00935         SET_DATA_BYTE(lined, wsm4 + 2, s1);                          /* d3 */
00936         SET_DATA_BYTE(lined, wsm4 + 3, s1);                          /* d4 */
00937         SET_DATA_BYTE(linedp1, wsm4, s1);                            /* d5 */
00938         SET_DATA_BYTE(linedp1, wsm4 + 1, s1);                        /* d6 */
00939         SET_DATA_BYTE(linedp1, wsm4 + 2, s1);                        /* d7 */
00940         SET_DATA_BYTE(linedp1, wsm4 + 3, s1);                        /* d8 */
00941         SET_DATA_BYTE(linedp2, wsm4, s1);                            /* d9 */
00942         SET_DATA_BYTE(linedp2, wsm4 + 1, s1);                        /* d10 */
00943         SET_DATA_BYTE(linedp2, wsm4 + 2, s1);                        /* d11 */
00944         SET_DATA_BYTE(linedp2, wsm4 + 3, s1);                        /* d12 */
00945         SET_DATA_BYTE(linedp3, wsm4, s1);                            /* d13 */
00946         SET_DATA_BYTE(linedp3, wsm4 + 1, s1);                        /* d14 */
00947         SET_DATA_BYTE(linedp3, wsm4 + 2, s1);                        /* d15 */
00948         SET_DATA_BYTE(linedp3, wsm4 + 3, s1);                        /* d16 */
00949     }
00950
00951     return;
00952 }
00953
00954
00955 /*------------------------------------------------------------------*
00956  *       Grayscale and color scaling by closest pixel sampling      *
00957  *------------------------------------------------------------------*/
00958 /*!
00959  *  scaleBySamplingLow()
00960  *
00961  *  Notes:
00962  *      (1) The dest must be cleared prior to this operation,
00963  *          and we clear it here in the low-level code.
00964  *      (2) We reuse dest pixels and dest pixel rows whenever
00965  *          possible.  This speeds the upscaling; downscaling
00966  *          is done by strict subsampling and is unaffected.
00967  *      (3) Because we are sampling and not interpolating, this
00968  *          routine works directly, without conversion to full
00969  *          RGB color, for 2, 4 or 8 bpp palette color images.
00970  */
00971 l_int32
00972 scaleBySamplingLow(l_uint32  *datad,
00973                    l_int32    wd,
00974                    l_int32    hd,
00975                    l_int32    wpld,
00976                    l_uint32  *datas,
00977                    l_int32    ws,
00978                    l_int32    hs,
00979                    l_int32    d,
00980                    l_int32    wpls)
00981 {
00982 l_int32    i, j, bpld;
00983 l_int32    xs, prevxs, sval;
00984 l_int32   *srow, *scol;
00985 l_uint32   csval;
00986 l_uint32  *lines, *prevlines, *lined, *prevlined;
00987 l_float32  wratio, hratio;
00988
00989     PROCNAME("scaleBySamplingLow");
00990
00991         /* clear dest */
00992     bpld = 4 * wpld;
00993     memset((char *)datad, 0, hd * bpld);
00994
00995         /* the source row corresponding to dest row i ==> srow[i]
00996          * the source col corresponding to dest col j ==> scol[j]  */
00997     if ((srow = (l_int32 *)CALLOC(hd, sizeof(l_int32))) == NULL)
00998         return ERROR_INT("srow not made", procName, 1);
00999     if ((scol = (l_int32 *)CALLOC(wd, sizeof(l_int32))) == NULL)
01000         return ERROR_INT("scol not made", procName, 1);
01001
01002     wratio = (l_float32)ws / (l_float32)wd;
01003     hratio = (l_float32)hs / (l_float32)hd;
01004     for (i = 0; i < hd; i++)
01005         srow[i] = L_MIN((l_int32)(hratio * i + 0.5), hs - 1);
01006     for (j = 0; j < wd; j++)
01007         scol[j] = L_MIN((l_int32)(wratio * j + 0.5), ws - 1);
01008
01009     prevlines = NULL;
01010     for (i = 0; i < hd; i++) {
01011         lines = datas + srow[i] * wpls;
01012         lined = datad + i * wpld;
01013         if (lines != prevlines) {  /* make dest from new source row */
01014             prevxs = -1;
01015             sval = 0;
01016             csval = 0;
01017             switch (d)
01018             {
01019             case 2:
01020                 for (j = 0; j < wd; j++) {
01021                     xs = scol[j];
01022                     if (xs != prevxs) {  /* get dest pix from source col */
01023                         sval = GET_DATA_DIBIT(lines, xs);
01024                         SET_DATA_DIBIT(lined, j, sval);
01025                         prevxs = xs;
01026                     }
01027                     else   /* copy prev dest pix */
01028                         SET_DATA_DIBIT(lined, j, sval);
01029                 }
01030                 break;
01031             case 4:
01032                 for (j = 0; j < wd; j++) {
01033                     xs = scol[j];
01034                     if (xs != prevxs) {  /* get dest pix from source col */
01035                         sval = GET_DATA_QBIT(lines, xs);
01036                         SET_DATA_QBIT(lined, j, sval);
01037                         prevxs = xs;
01038                     }
01039                     else   /* copy prev dest pix */
01040                         SET_DATA_QBIT(lined, j, sval);
01041                 }
01042                 break;
01043             case 8:
01044                 for (j = 0; j < wd; j++) {
01045                     xs = scol[j];
01046                     if (xs != prevxs) {  /* get dest pix from source col */
01047                         sval = GET_DATA_BYTE(lines, xs);
01048                         SET_DATA_BYTE(lined, j, sval);
01049                         prevxs = xs;
01050                     }
01051                     else   /* copy prev dest pix */
01052                         SET_DATA_BYTE(lined, j, sval);
01053                 }
01054                 break;
01055             case 16:
01056                 for (j = 0; j < wd; j++) {
01057                     xs = scol[j];
01058                     if (xs != prevxs) {  /* get dest pix from source col */
01059                         sval = GET_DATA_TWO_BYTES(lines, xs);
01060                         SET_DATA_TWO_BYTES(lined, j, sval);
01061                         prevxs = xs;
01062                     }
01063                     else   /* copy prev dest pix */
01064                         SET_DATA_TWO_BYTES(lined, j, sval);
01065                 }
01066                 break;
01067             case 32:
01068                 for (j = 0; j < wd; j++) {
01069                     xs = scol[j];
01070                     if (xs != prevxs) {  /* get dest pix from source col */
01071                         csval = lines[xs];
01072                         lined[j] = csval;
01073                         prevxs = xs;
01074                     }
01075                     else   /* copy prev dest pix */
01076                         lined[j] = csval;
01077                 }
01078                 break;
01079             default:
01080                 return ERROR_INT("pixel depth not supported", procName, 1);
01081                 break;
01082             }
01083         }
01084         else {  /* lines == prevlines; copy prev dest row */
01085             prevlined = lined - wpld;
01086             memcpy((char *)lined, (char *)prevlined, bpld);
01087         }
01088         prevlines = lines;
01089     }
01090
01091     FREE(srow);
01092     FREE(scol);
01093
01094     return 0;
01095 }
01096
01097
01098 /*------------------------------------------------------------------*
01099  *    Color and grayscale downsampling with (antialias) smoothing   *
01100  *------------------------------------------------------------------*/
01101 /*!
01102  *  scaleSmoothLow()
01103  *
01104  *  Notes:
01105  *      (1) This function is called on 8 or 32 bpp src and dest images.
01106  *      (2) size is the full width of the lowpass smoothing filter.
01107  *          It is correlated with the reduction ratio, being the
01108  *          nearest integer such that size is approximately equal to hs / hd.
01109  */
01110 l_int32
01111 scaleSmoothLow(l_uint32  *datad,
01112                l_int32    wd,
01113                l_int32    hd,
01114                l_int32    wpld,
01115                l_uint32  *datas,
01116                l_int32    ws,
01117                l_int32    hs,
01118                l_int32    d,
01119                l_int32    wpls,
01120                l_int32    size)
01121 {
01122 l_int32    i, j, m, n, xstart;
01123 l_int32    val, rval, gval, bval;
01124 l_int32   *srow, *scol;
01125 l_uint32  *lines, *lined, *line, *ppixel;
01126 l_uint32   pixel;
01127 l_float32  wratio, hratio, norm;
01128
01129     PROCNAME("scaleSmoothLow");
01130
01131         /* Clear dest */
01132     memset((char *)datad, 0, 4 * wpld * hd);
01133
01134         /* Each dest pixel at (j,i) is computed as the average
01135            of size^2 corresponding src pixels.
01136            We store the UL corner location of the square of
01137            src pixels that correspond to dest pixel (j,i).
01138            The are labelled by the arrays srow[i] and scol[j]. */
01139     if ((srow = (l_int32 *)CALLOC(hd, sizeof(l_int32))) == NULL)
01140         return ERROR_INT("srow not made", procName, 1);
01141     if ((scol = (l_int32 *)CALLOC(wd, sizeof(l_int32))) == NULL)
01142         return ERROR_INT("scol not made", procName, 1);
01143
01144     norm = 1. / (l_float32)(size * size);
01145     wratio = (l_float32)ws / (l_float32)wd;
01146     hratio = (l_float32)hs / (l_float32)hd;
01147     for (i = 0; i < hd; i++)
01148         srow[i] = L_MIN((l_int32)(hratio * i), hs - size);
01149     for (j = 0; j < wd; j++)
01150         scol[j] = L_MIN((l_int32)(wratio * j), ws - size);
01151
01152         /* For each dest pixel, compute average */
01153     if (d == 8) {
01154         for (i = 0; i < hd; i++) {
01155             lines = datas + srow[i] * wpls;
01156             lined = datad + i * wpld;
01157             for (j = 0; j < wd; j++) {
01158                 xstart = scol[j];
01159                 val = 0;
01160                 for (m = 0; m < size; m++) {
01161                     line = lines + m * wpls;
01162                     for (n = 0; n < size; n++) {
01163                         val += GET_DATA_BYTE(line, xstart + n);
01164                     }
01165                 }
01166                 val = (l_int32)((l_float32)val * norm);
01167                 SET_DATA_BYTE(lined, j, val);
01168             }
01169         }
01170     }
01171     else {  /* d == 32 */
01172         for (i = 0; i < hd; i++) {
01173             lines = datas + srow[i] * wpls;
01174             lined = datad + i * wpld;
01175             for (j = 0; j < wd; j++) {
01176                 xstart = scol[j];
01177                 rval = gval = bval = 0;
01178                 for (m = 0; m < size; m++) {
01179                     ppixel = lines + m * wpls + xstart;
01180                     for (n = 0; n < size; n++) {
01181                         pixel = *(ppixel + n);
01182                         rval += (pixel >> L_RED_SHIFT) & 0xff;
01183                         gval += (pixel >> L_GREEN_SHIFT) & 0xff;
01184                         bval += (pixel >> L_BLUE_SHIFT) & 0xff;
01185                     }
01186                 }
01187                 rval = (l_int32)((l_float32)rval * norm);
01188                 gval = (l_int32)((l_float32)gval * norm);
01189                 bval = (l_int32)((l_float32)bval * norm);
01190                 *(lined + j) = rval << L_RED_SHIFT |
01191                                gval << L_GREEN_SHIFT |
01192                                bval << L_BLUE_SHIFT;
01193             }
01194         }
01195     }
01196
01197     FREE(srow);
01198     FREE(scol);
01199     return 0;
01200 }
01201
01202
01203 /*!
01204  *  scaleRGBToGray2Low()
01205  *
01206  *  Note: This function is called with 32 bpp RGB src and 8 bpp,
01207  *        half-resolution dest.  The weights should add to 1.0.
01208  */
01209 void
01210 scaleRGBToGray2Low(l_uint32  *datad,
01211                    l_int32    wd,
01212                    l_int32    hd,
01213                    l_int32    wpld,
01214                    l_uint32  *datas,
01215                    l_int32    wpls,
01216                    l_float32  rwt,
01217                    l_float32  gwt,
01218                    l_float32  bwt)
01219 {
01220 l_int32    i, j, val, rval, gval, bval;
01221 l_uint32  *lines, *lined;
01222 l_uint32   pixel;
01223
01224     rwt *= 0.25;
01225     gwt *= 0.25;
01226     bwt *= 0.25;
01227     for (i = 0; i < hd; i++) {
01228         lines = datas + 2 * i * wpls;
01229         lined = datad + i * wpld;
01230         for (j = 0; j < wd; j++) {
01231                 /* Sum each of the color components from 4 src pixels */
01232             pixel = *(lines + 2 * j);
01233             rval = (pixel >> L_RED_SHIFT) & 0xff;
01234             gval = (pixel >> L_GREEN_SHIFT) & 0xff;
01235             bval = (pixel >> L_BLUE_SHIFT) & 0xff;
01236             pixel = *(lines + 2 * j + 1);
01237             rval += (pixel >> L_RED_SHIFT) & 0xff;
01238             gval += (pixel >> L_GREEN_SHIFT) & 0xff;
01239             bval += (pixel >> L_BLUE_SHIFT) & 0xff;
01240             pixel = *(lines + wpls + 2 * j);
01241             rval += (pixel >> L_RED_SHIFT) & 0xff;
01242             gval += (pixel >> L_GREEN_SHIFT) & 0xff;
01243             bval += (pixel >> L_BLUE_SHIFT) & 0xff;
01244             pixel = *(lines + wpls + 2 * j + 1);
01245             rval += (pixel >> L_RED_SHIFT) & 0xff;
01246             gval += (pixel >> L_GREEN_SHIFT) & 0xff;
01247             bval += (pixel >> L_BLUE_SHIFT) & 0xff;
01248                 /* Generate the dest byte as a weighted sum of the averages */
01249             val = (l_int32)(rwt * rval + gwt * gval + bwt * bval);
01250             SET_DATA_BYTE(lined, j, val);
01251         }
01252     }
01253 }
01254
01255
01256 /*------------------------------------------------------------------*
01257  *                  General area mapped gray scaling                *
01258  *------------------------------------------------------------------*/
01259 /*!
01260  *  scaleColorAreaMapLow()
01261  *
01262  *  This should only be used for downscaling.
01263  *  We choose to divide each pixel into 16 x 16 sub-pixels.
01264  *  This is much slower than scaleSmoothLow(), but it gives a
01265  *  better representation, esp. for downscaling factors between
01266  *  1.5 and 5.  All src pixels are subdivided into 256 sub-pixels,
01267  *  and are weighted by the number of sub-pixels covered by
01268  *  the dest pixel.  This is about 2x slower than scaleSmoothLow(),
01269  *  but the results are significantly better on small text.
01270  */
01271 void
01272 scaleColorAreaMapLow(l_uint32  *datad,
01273                     l_int32    wd,
01274                     l_int32    hd,
01275                     l_int32    wpld,
01276                     l_uint32  *datas,
01277                     l_int32    ws,
01278                     l_int32    hs,
01279                     l_int32    wpls)
01280 {
01281 l_int32    i, j, k, m, wm2, hm2;
01282 l_int32    area00, area10, area01, area11, areal, arear, areat, areab;
01283 l_int32    xu, yu;  /* UL corner in src image, to 1/16 of a pixel */
01284 l_int32    xl, yl;  /* LR corner in src image, to 1/16 of a pixel */
01285 l_int32    xup, yup, xuf, yuf;  /* UL src pixel: integer and fraction */
01286 l_int32    xlp, ylp, xlf, ylf;  /* LR src pixel: integer and fraction */
01287 l_int32    delx, dely, area;
01288 l_int32    v00r, v00g, v00b;  /* contrib. from UL src pixel */
01289 l_int32    v01r, v01g, v01b;  /* contrib. from LL src pixel */
01290 l_int32    v10r, v10g, v10b;  /* contrib from UR src pixel */
01291 l_int32    v11r, v11g, v11b;  /* contrib from LR src pixel */
01292 l_int32    vinr, ving, vinb;  /* contrib from all full interior src pixels */
01293 l_int32    vmidr, vmidg, vmidb;  /* contrib from side parts */
01294 l_int32    rval, gval, bval;
01295 l_uint32   pixel00, pixel10, pixel01, pixel11, pixel;
01296 l_uint32  *lines, *lined;
01297 l_float32  scx, scy;
01298
01299         /* (scx, scy) are scaling factors that are applied to the
01300          * dest coords to get the corresponding src coords.
01301          * We need them because we iterate over dest pixels
01302          * and must find the corresponding set of src pixels. */
01303     scx = 16. * (l_float32)ws / (l_float32)wd;
01304     scy = 16. * (l_float32)hs / (l_float32)hd;
01305
01306     wm2 = ws - 2;
01307     hm2 = hs - 2;
01308
01309         /* Iterate over the destination pixels */
01310     for (i = 0; i < hd; i++) {
01311         yu = (l_int32)(scy * i);
01312         yl = (l_int32)(scy * (i + 1.0));
01313         yup = yu >> 4;
01314         yuf = yu & 0x0f;
01315         ylp = yl >> 4;
01316         ylf = yl & 0x0f;
01317         dely = ylp - yup;
01318         lined = datad + i * wpld;
01319         lines = datas + yup * wpls;
01320         for (j = 0; j < wd; j++) {
01321             xu = (l_int32)(scx * j);
01322             xl = (l_int32)(scx * (j + 1.0));
01323             xup = xu >> 4;
01324             xuf = xu & 0x0f;
01325             xlp = xl >> 4;
01326             xlf = xl & 0x0f;
01327             delx = xlp - xup;
01328
01329                 /* If near the edge, just use a src pixel value */
01330             if (xlp > wm2 || ylp > hm2) {
01331                 *(lined + j) = *(lines + xup);
01332                 continue;
01333             }
01334
01335                 /* Area summed over, in subpixels.  This varies
01336                  * due to the quantization, so we can't simply take
01337                  * the area to be a constant: area = scx * scy. */
01338             area = ((16 - xuf) + 16 * (delx - 1) + xlf) *
01339                    ((16 - yuf) + 16 * (dely - 1) + ylf);
01340
01341                 /* Do area map summation */
01342             pixel00 = *(lines + xup);
01343             pixel10 = *(lines + xlp);
01344             pixel01 = *(lines + dely * wpls +  xup);
01345             pixel11 = *(lines + dely * wpls +  xlp);
01346             area00 = (16 - xuf) * (16 - yuf);
01347             area10 = xlf * (16 - yuf);
01348             area01 = (16 - xuf) * ylf;
01349             area11 = xlf * ylf;
01350             v00r = area00 * ((pixel00 >> L_RED_SHIFT) & 0xff);
01351             v00g = area00 * ((pixel00 >> L_GREEN_SHIFT) & 0xff);
01352             v00b = area00 * ((pixel00 >> L_BLUE_SHIFT) & 0xff);
01353             v10r = area10 * ((pixel10 >> L_RED_SHIFT) & 0xff);
01354             v10g = area10 * ((pixel10 >> L_GREEN_SHIFT) & 0xff);
01355             v10b = area10 * ((pixel10 >> L_BLUE_SHIFT) & 0xff);
01356             v01r = area01 * ((pixel01 >> L_RED_SHIFT) & 0xff);
01357             v01g = area01 * ((pixel01 >> L_GREEN_SHIFT) & 0xff);
01358             v01b = area01 * ((pixel01 >> L_BLUE_SHIFT) & 0xff);
01359             v11r = area11 * ((pixel11 >> L_RED_SHIFT) & 0xff);
01360             v11g = area11 * ((pixel11 >> L_GREEN_SHIFT) & 0xff);
01361             v11b = area11 * ((pixel11 >> L_BLUE_SHIFT) & 0xff);
01362             vinr = ving = vinb = 0;
01363             for (k = 1; k < dely; k++) {  /* for full src pixels */
01364                 for (m = 1; m < delx; m++) {
01365                     pixel = *(lines + k * wpls + xup + m);
01366                     vinr += 256 * ((pixel >> L_RED_SHIFT) & 0xff);
01367                     ving += 256 * ((pixel >> L_GREEN_SHIFT) & 0xff);
01368                     vinb += 256 * ((pixel >> L_BLUE_SHIFT) & 0xff);
01369                 }
01370             }
01371             vmidr = vmidg = vmidb = 0;
01372             areal = (16 - xuf) * 16;
01373             arear = xlf * 16;
01374             areat = 16 * (16 - yuf);
01375             areab = 16 * ylf;
01376             for (k = 1; k < dely; k++) {  /* for left side */
01377                 pixel = *(lines + k * wpls + xup);
01378                 vmidr += areal * ((pixel >> L_RED_SHIFT) & 0xff);
01379                 vmidg += areal * ((pixel >> L_GREEN_SHIFT) & 0xff);
01380                 vmidb += areal * ((pixel >> L_BLUE_SHIFT) & 0xff);
01381             }
01382             for (k = 1; k < dely; k++) {  /* for right side */
01383                 pixel = *(lines + k * wpls + xlp);
01384                 vmidr += arear * ((pixel >> L_RED_SHIFT) & 0xff);
01385                 vmidg += arear * ((pixel >> L_GREEN_SHIFT) & 0xff);
01386                 vmidb += arear * ((pixel >> L_BLUE_SHIFT) & 0xff);
01387             }
01388             for (m = 1; m < delx; m++) {  /* for top side */
01389                 pixel = *(lines + xup + m);
01390                 vmidr += areat * ((pixel >> L_RED_SHIFT) & 0xff);
01391                 vmidg += areat * ((pixel >> L_GREEN_SHIFT) & 0xff);
01392                 vmidb += areat * ((pixel >> L_BLUE_SHIFT) & 0xff);
01393             }
01394             for (m = 1; m < delx; m++) {  /* for bottom side */
01395                 pixel = *(lines + dely * wpls + xup + m);
01396                 vmidr += areab * ((pixel >> L_RED_SHIFT) & 0xff);
01397                 vmidg += areab * ((pixel >> L_GREEN_SHIFT) & 0xff);
01398                 vmidb += areab * ((pixel >> L_BLUE_SHIFT) & 0xff);
01399             }
01400
01401                 /* Sum all the contributions */
01402             rval = (v00r + v01r + v10r + v11r + vinr + vmidr + 128) / area;
01403             gval = (v00g + v01g + v10g + v11g + ving + vmidg + 128) / area;
01404             bval = (v00b + v01b + v10b + v11b + vinb + vmidb + 128) / area;
01405 #if  DEBUG_OVERFLOW
01406             if (rval > 255) fprintf(stderr, "rval ovfl: %d\n", rval);
01407             if (rval > 255) fprintf(stderr, "gval ovfl: %d\n", gval);
01408             if (rval > 255) fprintf(stderr, "bval ovfl: %d\n", bval);
01409 #endif  /* DEBUG_OVERFLOW */
01410             composeRGBPixel(rval, gval, bval, lined + j);
01411         }
01412     }
01413
01414     return;
01415 }
01416
01417
01418 /*!
01419  *  scaleGrayAreaMapLow()
01420  *
01421  *  This should only be used for downscaling.
01422  *  We choose to divide each pixel into 16 x 16 sub-pixels.
01423  *  This is about 2x slower than scaleSmoothLow(), but the results
01424  *  are significantly better on small text, esp. for downscaling
01425  *  factors between 1.5 and 5.  All src pixels are subdivided
01426  *  into 256 sub-pixels, and are weighted by the number of
01427  *  sub-pixels covered by the dest pixel.
01428  */
01429 void
01430 scaleGrayAreaMapLow(l_uint32  *datad,
01431                     l_int32    wd,
01432                     l_int32    hd,
01433                     l_int32    wpld,
01434                     l_uint32  *datas,
01435                     l_int32    ws,
01436                     l_int32    hs,
01437                     l_int32    wpls)
01438 {
01439 l_int32    i, j, k, m, wm2, hm2;
01440 l_int32    xu, yu;  /* UL corner in src image, to 1/16 of a pixel */
01441 l_int32    xl, yl;  /* LR corner in src image, to 1/16 of a pixel */
01442 l_int32    xup, yup, xuf, yuf;  /* UL src pixel: integer and fraction */
01443 l_int32    xlp, ylp, xlf, ylf;  /* LR src pixel: integer and fraction */
01444 l_int32    delx, dely, area;
01445 l_int32    v00;  /* contrib. from UL src pixel */
01446 l_int32    v01;  /* contrib. from LL src pixel */
01447 l_int32    v10;  /* contrib from UR src pixel */
01448 l_int32    v11;  /* contrib from LR src pixel */
01449 l_int32    vin;  /* contrib from all full interior src pixels */
01450 l_int32    vmid;  /* contrib from side parts that are full in 1 direction */
01451 l_int32    val;
01452 l_uint32  *lines, *lined;
01453 l_float32  scx, scy;
01454
01455         /* (scx, scy) are scaling factors that are applied to the
01456          * dest coords to get the corresponding src coords.
01457          * We need them because we iterate over dest pixels
01458          * and must find the corresponding set of src pixels. */
01459     scx = 16. * (l_float32)ws / (l_float32)wd;
01460     scy = 16. * (l_float32)hs / (l_float32)hd;
01461
01462     wm2 = ws - 2;
01463     hm2 = hs - 2;
01464
01465         /* Iterate over the destination pixels */
01466     for (i = 0; i < hd; i++) {
01467         yu = (l_int32)(scy * i);
01468         yl = (l_int32)(scy * (i + 1.0));
01469         yup = yu >> 4;
01470         yuf = yu & 0x0f;
01471         ylp = yl >> 4;
01472         ylf = yl & 0x0f;
01473         dely = ylp - yup;
01474         lined = datad + i * wpld;
01475         lines = datas + yup * wpls;
01476         for (j = 0; j < wd; j++) {
01477             xu = (l_int32)(scx * j);
01478             xl = (l_int32)(scx * (j + 1.0));
01479             xup = xu >> 4;
01480             xuf = xu & 0x0f;
01481             xlp = xl >> 4;
01482             xlf = xl & 0x0f;
01483             delx = xlp - xup;
01484
01485                 /* If near the edge, just use a src pixel value */
01486             if (xlp > wm2 || ylp > hm2) {
01487                 SET_DATA_BYTE(lined, j, GET_DATA_BYTE(lines, xup));
01488                 continue;
01489             }
01490
01491                 /* Area summed over, in subpixels.  This varies
01492                  * due to the quantization, so we can't simply take
01493                  * the area to be a constant: area = scx * scy. */
01494             area = ((16 - xuf) + 16 * (delx - 1) + xlf) *
01495                    ((16 - yuf) + 16 * (dely - 1) + ylf);
01496
01497                 /* Do area map summation */
01498             v00 = (16 - xuf) * (16 - yuf) * GET_DATA_BYTE(lines, xup);
01499             v10 = xlf * (16 - yuf) * GET_DATA_BYTE(lines, xlp);
01500             v01 = (16 - xuf) * ylf * GET_DATA_BYTE(lines + dely * wpls, xup);
01501             v11 = xlf * ylf * GET_DATA_BYTE(lines + dely * wpls, xlp);
01502             for (vin = 0, k = 1; k < dely; k++) {  /* for full src pixels */
01503                  for (m = 1; m < delx; m++) {
01504                      vin += 256 * GET_DATA_BYTE(lines + k * wpls, xup + m);
01505                  }
01506             }
01507             for (vmid = 0, k = 1; k < dely; k++)  /* for left side */
01508                 vmid += (16 - xuf) * 16 * GET_DATA_BYTE(lines + k * wpls, xup);
01509             for (k = 1; k < dely; k++)  /* for right side */
01510                 vmid += xlf * 16 * GET_DATA_BYTE(lines + k * wpls, xlp);
01511             for (m = 1; m < delx; m++)  /* for top side */
01512                 vmid += 16 * (16 - yuf) * GET_DATA_BYTE(lines, xup + m);
01513             for (m = 1; m < delx; m++)  /* for bottom side */
01514                 vmid += 16 * ylf * GET_DATA_BYTE(lines + dely * wpls, xup + m);
01515             val = (v00 + v01 + v10 + v11 + vin + vmid + 128) / area;
01516 #if  DEBUG_OVERFLOW
01517             if (val > 255) fprintf(stderr, "val overflow: %d\n", val);
01518 #endif  /* DEBUG_OVERFLOW */
01519             SET_DATA_BYTE(lined, j, val);
01520         }
01521     }
01522
01523     return;
01524 }
01525
01526
01527 /*------------------------------------------------------------------*
01528  *                     2x area mapped downscaling                   *
01529  *------------------------------------------------------------------*/
01530 /*!
01531  *  scaleAreaMapLow2()
01532  *
01533  *  Note: This function is called with either 8 bpp gray or 32 bpp RGB.
01534  *        The result is a 2x reduced dest.
01535  */
01536 void
01537 scaleAreaMapLow2(l_uint32  *datad,
01538                  l_int32    wd,
01539                  l_int32    hd,
01540                  l_int32    wpld,
01541                  l_uint32  *datas,
01542                  l_int32    d,
01543                  l_int32    wpls)
01544 {
01545 l_int32    i, j, val, rval, gval, bval;
01546 l_uint32  *lines, *lined;
01547 l_uint32   pixel;
01548
01549     if (d == 8) {
01550         for (i = 0; i < hd; i++) {
01551             lines = datas + 2 * i * wpls;
01552             lined = datad + i * wpld;
01553             for (j = 0; j < wd; j++) {
01554                     /* Average each dest pixel using 4 src pixels */
01555                 val = GET_DATA_BYTE(lines, 2 * j);
01556                 val += GET_DATA_BYTE(lines, 2 * j + 1);
01557                 val += GET_DATA_BYTE(lines + wpls, 2 * j);
01558                 val += GET_DATA_BYTE(lines + wpls, 2 * j + 1);
01559                 val >>= 2;
01560                 SET_DATA_BYTE(lined, j, val);
01561             }
01562         }
01563     }
01564     else {  /* d == 32 */
01565         for (i = 0; i < hd; i++) {
01566             lines = datas + 2 * i * wpls;
01567             lined = datad + i * wpld;
01568             for (j = 0; j < wd; j++) {
01569                     /* Average each of the color components from 4 src pixels */
01570                 pixel = *(lines + 2 * j);
01571                 rval = (pixel >> L_RED_SHIFT) & 0xff;
01572                 gval = (pixel >> L_GREEN_SHIFT) & 0xff;
01573                 bval = (pixel >> L_BLUE_SHIFT) & 0xff;
01574                 pixel = *(lines + 2 * j + 1);
01575                 rval += (pixel >> L_RED_SHIFT) & 0xff;
01576                 gval += (pixel >> L_GREEN_SHIFT) & 0xff;
01577                 bval += (pixel >> L_BLUE_SHIFT) & 0xff;
01578                 pixel = *(lines + wpls + 2 * j);
01579                 rval += (pixel >> L_RED_SHIFT) & 0xff;
01580                 gval += (pixel >> L_GREEN_SHIFT) & 0xff;
01581                 bval += (pixel >> L_BLUE_SHIFT) & 0xff;
01582                 pixel = *(lines + wpls + 2 * j + 1);
01583                 rval += (pixel >> L_RED_SHIFT) & 0xff;
01584                 gval += (pixel >> L_GREEN_SHIFT) & 0xff;
01585                 bval += (pixel >> L_BLUE_SHIFT) & 0xff;
01586                 composeRGBPixel(rval >> 2, gval >> 2, bval >> 2, &pixel);
01587                 *(lined + j) = pixel;
01588             }
01589         }
01590     }
01591     return;
01592 }
01593
01594
01595 /*------------------------------------------------------------------*
01596  *              Binary scaling by closest pixel sampling            *
01597  *------------------------------------------------------------------*/
01598 /*
01599  *  scaleBinaryLow()
01600  *
01601  *  Notes:
01602  *      (1) The dest must be cleared prior to this operation,
01603  *          and we clear it here in the low-level code.
01604  *      (2) We reuse dest pixels and dest pixel rows whenever
01605  *          possible for upscaling; downscaling is done by
01606  *          strict subsampling.
01607  */
01608 l_int32
01609 scaleBinaryLow(l_uint32  *datad,
01610                l_int32    wd,
01611                l_int32    hd,
01612                l_int32    wpld,
01613                l_uint32  *datas,
01614                l_int32    ws,
01615                l_int32    hs,
01616                l_int32    wpls)
01617 {
01618 l_int32    i, j, bpld;
01619 l_int32    xs, prevxs, sval;
01620 l_int32   *srow, *scol;
01621 l_uint32  *lines, *prevlines, *lined, *prevlined;
01622 l_float32  wratio, hratio;
01623
01624     PROCNAME("scaleBinaryLow");
01625
01626         /* clear dest */
01627     bpld = 4 * wpld;
01628     memset((char *)datad, 0, hd * bpld);
01629
01630         /* The source row corresponding to dest row i ==> srow[i]
01631          * The source col corresponding to dest col j ==> scol[j]  */
01632     if ((srow = (l_int32 *)CALLOC(hd, sizeof(l_int32))) == NULL)
01633         return ERROR_INT("srow not made", procName, 1);
01634     if ((scol = (l_int32 *)CALLOC(wd, sizeof(l_int32))) == NULL)
01635         return ERROR_INT("scol not made", procName, 1);
01636
01637     wratio = (l_float32)ws / (l_float32)wd;
01638     hratio = (l_float32)hs / (l_float32)hd;
01639     for (i = 0; i < hd; i++)
01640         srow[i] = L_MIN((l_int32)(hratio * i + 0.5), hs - 1);
01641     for (j = 0; j < wd; j++)
01642         scol[j] = L_MIN((l_int32)(wratio * j + 0.5), ws - 1);
01643
01644     prevlines = NULL;
01645     prevxs = -1;
01646     sval = 0;
01647     for (i = 0; i < hd; i++) {
01648         lines = datas + srow[i] * wpls;
01649         lined = datad + i * wpld;
01650         if (lines != prevlines) {  /* make dest from new source row */
01651             for (j = 0; j < wd; j++) {
01652                 xs = scol[j];
01653                 if (xs != prevxs) {  /* get dest pix from source col */
01654                     if ((sval = GET_DATA_BIT(lines, xs)))
01655                         SET_DATA_BIT(lined, j);
01656                     prevxs = xs;
01657                 }
01658                 else {  /* copy prev dest pix, if set */
01659                     if (sval)
01660                         SET_DATA_BIT(lined, j);
01661                 }
01662             }
01663         }
01664         else {  /* lines == prevlines; copy prev dest row */
01665             prevlined = lined - wpld;
01666             memcpy((char *)lined, (char *)prevlined, bpld);
01667         }
01668         prevlines = lines;
01669     }
01670
01671     FREE(srow);
01672     FREE(scol);
01673
01674     return 0;
01675 }
01676
01677
01678 /*------------------------------------------------------------------*
01679  *                         Scale-to-gray 2x                         *
01680  *------------------------------------------------------------------*/
01681 /*!
01682  *  scaleToGray2Low()
01683  *
01684  *      Input:  usual image variables
01685  *              sumtab  (made from makeSumTabSG2())
01686  *              valtab  (made from makeValTabSG2())
01687  *      Return: 0 if OK; 1 on error.
01688  *
01689  *  The output is processed in sets of 4 output bytes on a row,
01690  *  corresponding to 4 2x2 bit-blocks in the input image.
01691  *  Two lookup tables are used.  The first, sumtab, gets the
01692  *  sum of ON pixels in 4 sets of two adjacent bits,
01693  *  storing the result in 4 adjacent bytes.  After sums from
01694  *  two rows have been added, the second table, valtab,
01695  *  converts from the sum of ON pixels in the 2x2 block to
01696  *  an 8 bpp grayscale value between 0 (for 4 bits ON)
01697  *  and 255 (for 0 bits ON).
01698  */
01699 void
01700 scaleToGray2Low(l_uint32  *datad,
01701                 l_int32    wd,
01702                 l_int32    hd,
01703                 l_int32    wpld,
01704                 l_uint32  *datas,
01705                 l_int32    wpls,
01706                 l_uint32  *sumtab,
01707                 l_uint8   *valtab)
01708 {
01709 l_int32    i, j, l, k, m, wd4, extra;
01710 l_uint32   sbyte1, sbyte2, sum;
01711 l_uint32  *lines, *lined;
01712
01713         /* i indexes the dest lines
01714          * l indexes the source lines
01715          * j indexes the dest bytes
01716          * k indexes the source bytes
01717          * We take two bytes from the source (in 2 lines of 8 pixels
01718          * each) and convert them into four 8 bpp bytes of the dest. */
01719     wd4 = wd & 0xfffffffc;
01720     extra = wd - wd4;
01721     for (i = 0, l = 0; i < hd; i++, l += 2) {
01722         lines = datas + l * wpls;
01723         lined = datad + i * wpld;
01724         for (j = 0, k = 0; j < wd4; j += 4, k++) {
01725             sbyte1 = GET_DATA_BYTE(lines, k);
01726             sbyte2 = GET_DATA_BYTE(lines + wpls, k);
01727             sum = sumtab[sbyte1] + sumtab[sbyte2];
01728             SET_DATA_BYTE(lined, j, valtab[sum >> 24]);
01729             SET_DATA_BYTE(lined, j + 1, valtab[(sum >> 16) & 0xff]);
01730             SET_DATA_BYTE(lined, j + 2, valtab[(sum >> 8) & 0xff]);
01731             SET_DATA_BYTE(lined, j + 3, valtab[sum & 0xff]);
01732         }
01733         if (extra > 0) {
01734             sbyte1 = GET_DATA_BYTE(lines, k);
01735             sbyte2 = GET_DATA_BYTE(lines + wpls, k);
01736             sum = sumtab[sbyte1] + sumtab[sbyte2];
01737             for (m = 0; m < extra; m++) {
01738                 SET_DATA_BYTE(lined, j + m,
01739                               valtab[((sum >> (24 - 8 * m)) & 0xff)]);
01740             }
01741         }
01742
01743     }
01744
01745     return;
01746 }
01747
01748
01749 /*!
01750  *  makeSumTabSG2()
01751  *
01752  *  Returns a table of 256 l_uint32s, giving the four output
01753  *  8-bit grayscale sums corresponding to 8 input bits of a binary
01754  *  image, for a 2x scale-to-gray op.  The sums from two
01755  *  adjacent scanlines are then added and transformed to
01756  *  output four 8 bpp pixel values, using makeValTabSG2().
01757  */
01758 l_uint32  *
01759 makeSumTabSG2(void)
01760 {
01761 l_int32    i;
01762 l_int32    sum[] = {0, 1, 1, 2};
01763 l_uint32  *tab;
01764
01765     PROCNAME("makeSumTabSG2");
01766
01767     if ((tab = (l_uint32 *)CALLOC(256, sizeof(l_uint32))) == NULL)
01768         return (l_uint32 *)ERROR_PTR("calloc fail for tab", procName, NULL);
01769
01770         /* Pack the four sums separately in four bytes */
01771     for (i = 0; i < 256; i++) {
01772         tab[i] = (sum[i & 0x3] | sum[(i >> 2) & 0x3] << 8 |
01773                   sum[(i >> 4) & 0x3] << 16 | sum[(i >> 6) & 0x3] << 24);
01774     }
01775
01776     return tab;
01777 }
01778
01779
01780 /*!
01781  *  makeValTabSG2()
01782  *
01783  *  Returns an 8 bit value for the sum of ON pixels
01784  *  in a 2x2 square, according to
01785  *
01786  *         val = 255 - (255 * sum)/4
01787  *
01788  *  where sum is in set {0,1,2,3,4}
01789  */
01790 l_uint8 *
01791 makeValTabSG2(void)
01792 {
01793 l_int32   i;
01794 l_uint8  *tab;
01795
01796     PROCNAME("makeValTabSG2");
01797
01798     if ((tab = (l_uint8 *)CALLOC(5, sizeof(l_uint8))) == NULL)
01799         return (l_uint8 *)ERROR_PTR("calloc fail for tab", procName, NULL);
01800
01801     for (i = 0; i < 5; i++)
01802         tab[i] = 255 - (i * 255) / 4;
01803
01804     return tab;
01805 }
01806
01807
01808 /*------------------------------------------------------------------*
01809  *                         Scale-to-gray 3x                         *
01810  *------------------------------------------------------------------*/
01811 /*!
01812  *  scaleToGray3Low()
01813  *
01814  *      Input:  usual image variables
01815  *              sumtab  (made from makeSumTabSG3())
01816  *              valtab  (made from makeValTabSG3())
01817  *      Return: 0 if OK; 1 on error
01818  *
01819  *  Each set of 8 3x3 bit-blocks in the source image, which
01820  *  consist of 72 pixels arranged 24 pixels wide by 3 scanlines,
01821  *  is converted to a row of 8 8-bit pixels in the dest image.
01822  *  These 72 pixels of the input image are runs of 24 pixels
01823  *  in three adjacent scanlines.  Each run of 24 pixels is
01824  *  stored in the 24 LSbits of a 32-bit word.  We use 2 LUTs.
01825  *  The first, sumtab, takes 6 of these bits and stores
01826  *  sum, taken 3 bits at a time, in two bytes.  (See
01827  *  makeSumTabSG3).  This is done for each of the 3 scanlines,
01828  *  and the results are added.  We now have the sum of ON pixels
01829  *  in the first two 3x3 blocks in two bytes.  The valtab LUT
01830  *  then converts these values (which go from 0 to 9) to
01831  *  grayscale values between between 255 and 0.  (See makeValTabSG3).
01832  *  This process is repeated for each of the other 3 sets of
01833  *  6x3 input pixels, giving 8 output pixels in total.
01834  *
01835  *  Note: because the input image is processed in groups of
01836  *        24 x 3 pixels, the process clips the input height to
01837  *        (h - h % 3) and the input width to (w - w % 24).
01838  */
01839 void
01840 scaleToGray3Low(l_uint32  *datad,
01841                 l_int32    wd,
01842                 l_int32    hd,
01843                 l_int32    wpld,
01844                 l_uint32  *datas,
01845                 l_int32    wpls,
01846                 l_uint32  *sumtab,
01847                 l_uint8   *valtab)
01848 {
01849 l_int32    i, j, l, k;
01850 l_uint32   threebytes1, threebytes2, threebytes3, sum;
01851 l_uint32  *lines, *lined;
01852
01853         /* i indexes the dest lines
01854          * l indexes the source lines
01855          * j indexes the dest bytes
01856          * k indexes the source bytes
01857          * We take 9 bytes from the source (72 binary pixels
01858          * in three lines of 24 pixels each) and convert it
01859          * into 8 bytes of the dest (8 8bpp pixels in one line)   */
01860     for (i = 0, l = 0; i < hd; i++, l += 3) {
01861         lines = datas + l * wpls;
01862         lined = datad + i * wpld;
01863         for (j = 0, k = 0; j < wd; j += 8, k += 3) {
01864             threebytes1 = (GET_DATA_BYTE(lines, k) << 16) |
01865                           (GET_DATA_BYTE(lines, k + 1) << 8) |
01866                           GET_DATA_BYTE(lines, k + 2);
01867             threebytes2 = (GET_DATA_BYTE(lines + wpls, k) << 16) |
01868                           (GET_DATA_BYTE(lines + wpls, k + 1) << 8) |
01869                           GET_DATA_BYTE(lines + wpls, k + 2);
01870             threebytes3 = (GET_DATA_BYTE(lines + 2 * wpls, k) << 16) |
01871                           (GET_DATA_BYTE(lines + 2 * wpls, k + 1) << 8) |
01872                           GET_DATA_BYTE(lines + 2 * wpls, k + 2);
01873
01874             sum = sumtab[(threebytes1 >> 18)] +
01875                   sumtab[(threebytes2 >> 18)] +
01876                   sumtab[(threebytes3 >> 18)];
01877             SET_DATA_BYTE(lined, j, valtab[GET_DATA_BYTE(&sum, 2)]);
01878             SET_DATA_BYTE(lined, j + 1, valtab[GET_DATA_BYTE(&sum, 3)]);
01879
01880             sum = sumtab[((threebytes1 >> 12) & 0x3f)] +
01881                   sumtab[((threebytes2 >> 12) & 0x3f)] +
01882                   sumtab[((threebytes3 >> 12) & 0x3f)];
01883             SET_DATA_BYTE(lined, j + 2, valtab[GET_DATA_BYTE(&sum, 2)]);
01884             SET_DATA_BYTE(lined, j + 3, valtab[GET_DATA_BYTE(&sum, 3)]);
01885
01886             sum = sumtab[((threebytes1 >> 6) & 0x3f)] +
01887                   sumtab[((threebytes2 >> 6) & 0x3f)] +
01888                   sumtab[((threebytes3 >> 6) & 0x3f)];
01889             SET_DATA_BYTE(lined, j + 4, valtab[GET_DATA_BYTE(&sum, 2)]);
01890             SET_DATA_BYTE(lined, j + 5, valtab[GET_DATA_BYTE(&sum, 3)]);
01891
01892             sum = sumtab[(threebytes1 & 0x3f)] +
01893                   sumtab[(threebytes2 & 0x3f)] +
01894                   sumtab[(threebytes3 & 0x3f)];
01895             SET_DATA_BYTE(lined, j + 6, valtab[GET_DATA_BYTE(&sum, 2)]);
01896             SET_DATA_BYTE(lined, j + 7, valtab[GET_DATA_BYTE(&sum, 3)]);
01897         }
01898     }
01899
01900     return;
01901 }
01902
01903
01904
01905 /*!
01906  *  makeSumTabSG3()
01907  *
01908  *  Returns a table of 64 l_uint32s, giving the two output
01909  *  8-bit grayscale sums corresponding to 6 input bits of a binary
01910  *  image, for a 3x scale-to-gray op.  In practice, this would
01911  *  be used three times (on adjacent scanlines), and the sums would
01912  *  be added and then transformed to output 8 bpp pixel values,
01913  *  using makeValTabSG3().
01914  */
01915 l_uint32  *
01916 makeSumTabSG3(void)
01917 {
01918 l_int32    i;
01919 l_int32    sum[] = {0, 1, 1, 2, 1, 2, 2, 3};
01920 l_uint32  *tab;
01921
01922     PROCNAME("makeSumTabSG3");
01923
01924     if ((tab = (l_uint32 *)CALLOC(64, sizeof(l_uint32))) == NULL)
01925         return (l_uint32 *)ERROR_PTR("calloc fail for tab", procName, NULL);
01926
01927         /* Pack the two sums separately in two bytes */
01928     for (i = 0; i < 64; i++) {
01929         tab[i] = (sum[i & 0x07]) | (sum[(i >> 3) & 0x07] << 8);
01930     }
01931
01932     return tab;
01933 }
01934
01935
01936 /*!
01937  *  makeValTabSG3()
01938  *
01939  *  Returns an 8 bit value for the sum of ON pixels
01940  *  in a 3x3 square, according to
01941  *      val = 255 - (255 * sum)/9
01942  *  where sum is in set {0, ... ,9}
01943  */
01944 l_uint8 *
01945 makeValTabSG3(void)
01946 {
01947 l_int32   i;
01948 l_uint8  *tab;
01949
01950     PROCNAME("makeValTabSG3");
01951
01952     if ((tab = (l_uint8 *)CALLOC(10, sizeof(l_uint8))) == NULL)
01953         return (l_uint8 *)ERROR_PTR("calloc fail for tab", procName, NULL);
01954
01955     for (i = 0; i < 10; i++)
01956         tab[i] = 0xff - (i * 255) / 9;
01957
01958     return tab;
01959 }
01960
01961
01962 /*------------------------------------------------------------------*
01963  *                         Scale-to-gray 4x                         *
01964  *------------------------------------------------------------------*/
01965 /*!
01966  *  scaleToGray4Low()
01967  *
01968  *      Input:  usual image variables
01969  *              sumtab  (made from makeSumTabSG4())
01970  *              valtab  (made from makeValTabSG4())
01971  *      Return: 0 if OK; 1 on error.
01972  *
01973  *  The output is processed in sets of 2 output bytes on a row,
01974  *  corresponding to 2 4x4 bit-blocks in the input image.
01975  *  Two lookup tables are used.  The first, sumtab, gets the
01976  *  sum of ON pixels in two sets of four adjacent bits,
01977  *  storing the result in 2 adjacent bytes.  After sums from
01978  *  four rows have been added, the second table, valtab,
01979  *  converts from the sum of ON pixels in the 4x4 block to
01980  *  an 8 bpp grayscale value between 0 (for 16 bits ON)
01981  *  and 255 (for 0 bits ON).
01982  */
01983 void
01984 scaleToGray4Low(l_uint32  *datad,
01985                 l_int32    wd,
01986                 l_int32    hd,
01987                 l_int32    wpld,
01988                 l_uint32  *datas,
01989                 l_int32    wpls,
01990                 l_uint32  *sumtab,
01991                 l_uint8   *valtab)
01992 {
01993 l_int32    i, j, l, k;
01994 l_uint32   sbyte1, sbyte2, sbyte3, sbyte4, sum;
01995 l_uint32  *lines, *lined;
01996
01997         /* i indexes the dest lines
01998          * l indexes the source lines
01999          * j indexes the dest bytes
02000          * k indexes the source bytes
02001          * We take four bytes from the source (in 4 lines of 8 pixels
02002          * each) and convert it into two 8 bpp bytes of the dest. */
02003     for (i = 0, l = 0; i < hd; i++, l += 4) {
02004         lines = datas + l * wpls;
02005         lined = datad + i * wpld;
02006         for (j = 0, k = 0; j < wd; j += 2, k++) {
02007             sbyte1 = GET_DATA_BYTE(lines, k);
02008             sbyte2 = GET_DATA_BYTE(lines + wpls, k);
02009             sbyte3 = GET_DATA_BYTE(lines + 2 * wpls, k);
02010             sbyte4 = GET_DATA_BYTE(lines + 3 * wpls, k);
02011             sum = sumtab[sbyte1] + sumtab[sbyte2] +
02012                   sumtab[sbyte3] + sumtab[sbyte4];
02013             SET_DATA_BYTE(lined, j, valtab[GET_DATA_BYTE(&sum, 2)]);
02014             SET_DATA_BYTE(lined, j + 1, valtab[GET_DATA_BYTE(&sum, 3)]);
02015         }
02016     }
02017
02018     return;
02019 }
02020
02021
02022 /*!
02023  *  makeSumTabSG4()
02024  *
02025  *  Returns a table of 256 l_uint32s, giving the two output
02026  *  8-bit grayscale sums corresponding to 8 input bits of a binary
02027  *  image, for a 4x scale-to-gray op.  The sums from four
02028  *  adjacent scanlines are then added and transformed to
02029  *  output 8 bpp pixel values, using makeValTabSG4().
02030  */
02031 l_uint32  *
02032 makeSumTabSG4(void)
02033 {
02034 l_int32    i;
02035 l_int32    sum[] = {0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4};
02036 l_uint32  *tab;
02037
02038     PROCNAME("makeSumTabSG4");
02039
02040     if ((tab = (l_uint32 *)CALLOC(256, sizeof(l_uint32))) == NULL)
02041         return (l_uint32 *)ERROR_PTR("calloc fail for tab", procName, NULL);
02042
02043         /* Pack the two sums separately in two bytes */
02044     for (i = 0; i < 256; i++) {
02045         tab[i] = (sum[i & 0xf]) | (sum[(i >> 4) & 0xf] << 8);
02046     }
02047
02048     return tab;
02049 }
02050
02051
02052 /*!
02053  *  makeValTabSG4()
02054  *
02055  *  Returns an 8 bit value for the sum of ON pixels
02056  *  in a 4x4 square, according to
02057  *
02058  *         val = 255 - (255 * sum)/16
02059  *
02060  *  where sum is in set {0, ... ,16}
02061  */
02062 l_uint8 *
02063 makeValTabSG4(void)
02064 {
02065 l_int32   i;
02066 l_uint8  *tab;
02067
02068     PROCNAME("makeValTabSG4");
02069
02070     if ((tab = (l_uint8 *)CALLOC(17, sizeof(l_uint8))) == NULL)
02071         return (l_uint8 *)ERROR_PTR("calloc fail for tab", procName, NULL);
02072
02073     for (i = 0; i < 17; i++)
02074         tab[i] = 0xff - (i * 255) / 16;
02075
02076     return tab;
02077 }
02078
02079
02080 /*------------------------------------------------------------------*
02081  *                         Scale-to-gray 6x                         *
02082  *------------------------------------------------------------------*/
02083 /*!
02084  *  scaleToGray6Low()
02085  *
02086  *      Input:  usual image variables
02087  *              tab8  (made from makePixelSumTab8())
02088  *              valtab  (made from makeValTabSG6())
02089  *      Return: 0 if OK; 1 on error
02090  *
02091  *  Each set of 4 6x6 bit-blocks in the source image, which
02092  *  consist of 144 pixels arranged 24 pixels wide by 6 scanlines,
02093  *  is converted to a row of 4 8-bit pixels in the dest image.
02094  *  These 144 pixels of the input image are runs of 24 pixels
02095  *  in six adjacent scanlines.  Each run of 24 pixels is
02096  *  stored in the 24 LSbits of a 32-bit word.  We use 2 LUTs.
02097  *  The first, tab8, takes 6 of these bits and stores
02098  *  sum in one byte.  This is done for each of the 6 scanlines,
02099  *  and the results are added.
02100  *  We now have the sum of ON pixels in the first 6x6 block.  The
02101  *  valtab LUT then converts these values (which go from 0 to 36) to
02102  *  grayscale values between between 255 and 0.  (See makeValTabSG6).
02103  *  This process is repeated for each of the other 3 sets of
02104  *  6x6 input pixels, giving 4 output pixels in total.
02105  *
02106  *  Note: because the input image is processed in groups of
02107  *        24 x 6 pixels, the process clips the input height to
02108  *        (h - h % 6) and the input width to (w - w % 24).
02109  *
02110  */
02111 void
02112 scaleToGray6Low(l_uint32  *datad,
02113                 l_int32    wd,
02114                 l_int32    hd,
02115                 l_int32    wpld,
02116                 l_uint32  *datas,
02117                 l_int32    wpls,
02118                 l_int32   *tab8,
02119                 l_uint8   *valtab)
02120 {
02121 l_int32    i, j, l, k;
02122 l_uint32   threebytes1, threebytes2, threebytes3;
02123 l_uint32   threebytes4, threebytes5, threebytes6, sum;
02124 l_uint32  *lines, *lined;
02125
02126         /* i indexes the dest lines
02127          * l indexes the source lines
02128          * j indexes the dest bytes
02129          * k indexes the source bytes
02130          * We take 18 bytes from the source (144 binary pixels
02131          * in six lines of 24 pixels each) and convert it
02132          * into 4 bytes of the dest (four 8 bpp pixels in one line)   */
02133     for (i = 0, l = 0; i < hd; i++, l += 6) {
02134         lines = datas + l * wpls;
02135         lined = datad + i * wpld;
02136         for (j = 0, k = 0; j < wd; j += 4, k += 3) {
02137                 /* First grab the 18 bytes, 3 at a time, and put each set
02138                  * of 3 bytes into the LS bytes of a 32-bit word. */
02139             threebytes1 = (GET_DATA_BYTE(lines, k) << 16) |
02140                           (GET_DATA_BYTE(lines, k + 1) << 8) |
02141                           GET_DATA_BYTE(lines, k + 2);
02142             threebytes2 = (GET_DATA_BYTE(lines + wpls, k) << 16) |
02143                           (GET_DATA_BYTE(lines + wpls, k + 1) << 8) |
02144                           GET_DATA_BYTE(lines + wpls, k + 2);
02145             threebytes3 = (GET_DATA_BYTE(lines + 2 * wpls, k) << 16) |
02146                           (GET_DATA_BYTE(lines + 2 * wpls, k + 1) << 8) |
02147                           GET_DATA_BYTE(lines + 2 * wpls, k + 2);
02148             threebytes4 = (GET_DATA_BYTE(lines + 3 * wpls, k) << 16) |
02149                           (GET_DATA_BYTE(lines + 3 * wpls, k + 1) << 8) |
02150                           GET_DATA_BYTE(lines + 3 * wpls, k + 2);
02151             threebytes5 = (GET_DATA_BYTE(lines + 4 * wpls, k) << 16) |
02152                           (GET_DATA_BYTE(lines + 4 * wpls, k + 1) << 8) |
02153                           GET_DATA_BYTE(lines + 4 * wpls, k + 2);
02154             threebytes6 = (GET_DATA_BYTE(lines + 5 * wpls, k) << 16) |
02155                           (GET_DATA_BYTE(lines + 5 * wpls, k + 1) << 8) |
02156                           GET_DATA_BYTE(lines + 5 * wpls, k + 2);
02157
02158                 /* Sum first set of 36 bits and convert to 0-255 */
02159             sum = tab8[(threebytes1 >> 18)] +
02160                   tab8[(threebytes2 >> 18)] +
02161                   tab8[(threebytes3 >> 18)] +
02162                   tab8[(threebytes4 >> 18)] +
02163                   tab8[(threebytes5 >> 18)] +
02164                    tab8[(threebytes6 >> 18)];
02165             SET_DATA_BYTE(lined, j, valtab[GET_DATA_BYTE(&sum, 3)]);
02166
02167                 /* Ditto for second set */
02168             sum = tab8[((threebytes1 >> 12) & 0x3f)] +
02169                   tab8[((threebytes2 >> 12) & 0x3f)] +
02170                   tab8[((threebytes3 >> 12) & 0x3f)] +
02171                   tab8[((threebytes4 >> 12) & 0x3f)] +
02172                   tab8[((threebytes5 >> 12) & 0x3f)] +
02173                   tab8[((threebytes6 >> 12) & 0x3f)];
02174             SET_DATA_BYTE(lined, j + 1, valtab[GET_DATA_BYTE(&sum, 3)]);
02175
02176             sum = tab8[((threebytes1 >> 6) & 0x3f)] +
02177                   tab8[((threebytes2 >> 6) & 0x3f)] +
02178                   tab8[((threebytes3 >> 6) & 0x3f)] +
02179                   tab8[((threebytes4 >> 6) & 0x3f)] +
02180                   tab8[((threebytes5 >> 6) & 0x3f)] +
02181                   tab8[((threebytes6 >> 6) & 0x3f)];
02182             SET_DATA_BYTE(lined, j + 2, valtab[GET_DATA_BYTE(&sum, 3)]);
02183
02184             sum = tab8[(threebytes1 & 0x3f)] +
02185                   tab8[(threebytes2 & 0x3f)] +
02186                   tab8[(threebytes3 & 0x3f)] +
02187                   tab8[(threebytes4 & 0x3f)] +
02188                   tab8[(threebytes5 & 0x3f)] +
02189                   tab8[(threebytes6 & 0x3f)];
02190             SET_DATA_BYTE(lined, j + 3, valtab[GET_DATA_BYTE(&sum, 3)]);
02191         }
02192     }
02193
02194     return;
02195 }
02196
02197
02198 /*!
02199  *  makeValTabSG6()
02200  *
02201  *  Returns an 8 bit value for the sum of ON pixels
02202  *  in a 6x6 square, according to
02203  *      val = 255 - (255 * sum)/36
02204  *  where sum is in set {0, ... ,36}
02205  */
02206 l_uint8 *
02207 makeValTabSG6(void)
02208 {
02209 l_int32   i;
02210 l_uint8  *tab;
02211
02212     PROCNAME("makeValTabSG6");
02213
02214     if ((tab = (l_uint8 *)CALLOC(37, sizeof(l_uint8))) == NULL)
02215         return (l_uint8 *)ERROR_PTR("calloc fail for tab", procName, NULL);
02216
02217     for (i = 0; i < 37; i++)
02218         tab[i] = 0xff - (i * 255) / 36;
02219
02220     return tab;
02221 }
02222
02223
02224 /*------------------------------------------------------------------*
02225  *                         Scale-to-gray 8x                         *
02226  *------------------------------------------------------------------*/
02227 /*!
02228  *  scaleToGray8Low()
02229  *
02230  *      Input:  usual image variables
02231  *              tab8  (made from makePixelSumTab8())
02232  *              valtab  (made from makeValTabSG8())
02233  *      Return: 0 if OK; 1 on error.
02234  *
02235  *  The output is processed one dest byte at a time,
02236  *  corresponding to 8 rows of src bytes in the input image.
02237  *  Two lookup tables are used.  The first, tab8, gets the
02238  *  sum of ON pixels in a byte.  After sums from 8 rows have
02239  *  been added, the second table, valtab, converts from this
02240  *  value (which is between 0 and 64) to an 8 bpp grayscale
02241  *  value between 0 (for all 64 bits ON) and 255 (for 0 bits ON).
02242  */
02243 void
02244 scaleToGray8Low(l_uint32  *datad,
02245                 l_int32    wd,
02246                 l_int32    hd,
02247                 l_int32    wpld,
02248                 l_uint32  *datas,
02249                 l_int32    wpls,
02250                 l_int32   *tab8,
02251                 l_uint8   *valtab)
02252 {
02253 l_int32    i, j, k;
02254 l_int32    sbyte0, sbyte1, sbyte2, sbyte3, sbyte4, sbyte5, sbyte6, sbyte7, sum;
02255 l_uint32  *lines, *lined;
02256
02257         /* i indexes the dest lines
02258          * k indexes the source lines
02259          * j indexes the src and dest bytes
02260          * We take 8 bytes from the source (in 8 lines of 8 pixels
02261          * each) and convert it into one 8 bpp byte of the dest. */
02262     for (i = 0, k = 0; i < hd; i++, k += 8) {
02263         lines = datas + k * wpls;
02264         lined = datad + i * wpld;
02265         for (j = 0; j < wd; j++) {
02266             sbyte0 = GET_DATA_BYTE(lines, j);
02267             sbyte1 = GET_DATA_BYTE(lines + wpls, j);
02268             sbyte2 = GET_DATA_BYTE(lines + 2 * wpls, j);
02269             sbyte3 = GET_DATA_BYTE(lines + 3 * wpls, j);
02270             sbyte4 = GET_DATA_BYTE(lines + 4 * wpls, j);
02271             sbyte5 = GET_DATA_BYTE(lines + 5 * wpls, j);
02272             sbyte6 = GET_DATA_BYTE(lines + 6 * wpls, j);
02273             sbyte7 = GET_DATA_BYTE(lines + 7 * wpls, j);
02274             sum = tab8[sbyte0] + tab8[sbyte1] +
02275                   tab8[sbyte2] + tab8[sbyte3] +
02276                   tab8[sbyte4] + tab8[sbyte5] +
02277                   tab8[sbyte6] + tab8[sbyte7];
02278             SET_DATA_BYTE(lined, j, valtab[sum]);
02279         }
02280     }
02281
02282     return;
02283 }
02284
02285
02286 /*!
02287  *  makeValTabSG8()
02288  *
02289  *  Returns an 8 bit value for the sum of ON pixels
02290  *  in an 8x8 square, according to
02291  *      val = 255 - (255 * sum)/64
02292  *  where sum is in set {0, ... ,64}
02293  */
02294 l_uint8 *
02295 makeValTabSG8(void)
02296 {
02297 l_int32   i;
02298 l_uint8  *tab;
02299
02300     PROCNAME("makeValTabSG8");
02301
02302     if ((tab = (l_uint8 *)CALLOC(65, sizeof(l_uint8))) == NULL)
02303         return (l_uint8 *)ERROR_PTR("calloc fail for tab", procName, NULL);
02304
02305     for (i = 0; i < 65; i++)
02306         tab[i] = 0xff - (i * 255) / 64;
02307
02308     return tab;
02309 }
02310
02311
02312 /*------------------------------------------------------------------*
02313  *                         Scale-to-gray 16x                        *
02314  *------------------------------------------------------------------*/
02315 /*!
02316  *  scaleToGray16Low()
02317  *
02318  *      Input:  usual image variables
02319  *              tab8  (made from makePixelSumTab8())
02320  *      Return: 0 if OK; 1 on error.
02321  *
02322  *  The output is processed one dest byte at a time, corresponding
02323  *  to 16 rows consisting each of 2 src bytes in the input image.
02324  *  This uses one lookup table, tab8, which gives the sum of
02325  *  ON pixels in a byte.  After summing for all ON pixels in the
02326  *  32 src bytes, which is between 0 and 256, this is converted
02327  *  to an 8 bpp grayscale value between 0 (for 255 or 256 bits ON)
02328  *  and 255 (for 0 bits ON).
02329  */
02330 void
02331 scaleToGray16Low(l_uint32  *datad,
02332                   l_int32    wd,
02333                  l_int32    hd,
02334                  l_int32    wpld,
02335                  l_uint32  *datas,
02336                  l_int32    wpls,
02337                  l_int32   *tab8)
02338 {
02339 l_int32    i, j, k, m;
02340 l_int32    sum;
02341 l_uint32  *lines, *lined;
02342
02343         /* i indexes the dest lines
02344          * k indexes the source lines
02345          * j indexes the dest bytes
02346          * m indexes the src bytes
02347          * We take 32 bytes from the source (in 16 lines of 16 pixels
02348          * each) and convert it into one 8 bpp byte of the dest. */
02349     for (i = 0, k = 0; i < hd; i++, k += 16) {
02350         lines = datas + k * wpls;
02351         lined = datad + i * wpld;
02352         for (j = 0; j < wd; j++) {
02353             m = 2 * j;
02354             sum = tab8[GET_DATA_BYTE(lines, m)];
02355             sum += tab8[GET_DATA_BYTE(lines, m + 1)];
02356             sum += tab8[GET_DATA_BYTE(lines + wpls, m)];
02357             sum += tab8[GET_DATA_BYTE(lines + wpls, m + 1)];
02358             sum += tab8[GET_DATA_BYTE(lines + 2 * wpls, m)];
02359             sum += tab8[GET_DATA_BYTE(lines + 2 * wpls, m + 1)];
02360             sum += tab8[GET_DATA_BYTE(lines + 3 * wpls, m)];
02361             sum += tab8[GET_DATA_BYTE(lines + 3 * wpls, m + 1)];
02362             sum += tab8[GET_DATA_BYTE(lines + 4 * wpls, m)];
02363             sum += tab8[GET_DATA_BYTE(lines + 4 * wpls, m + 1)];
02364             sum += tab8[GET_DATA_BYTE(lines + 5 * wpls, m)];
02365             sum += tab8[GET_DATA_BYTE(lines + 5 * wpls, m + 1)];
02366             sum += tab8[GET_DATA_BYTE(lines + 6 * wpls, m)];
02367             sum += tab8[GET_DATA_BYTE(lines + 6 * wpls, m + 1)];
02368             sum += tab8[GET_DATA_BYTE(lines + 7 * wpls, m)];
02369             sum += tab8[GET_DATA_BYTE(lines + 7 * wpls, m + 1)];
02370             sum += tab8[GET_DATA_BYTE(lines + 8 * wpls, m)];
02371             sum += tab8[GET_DATA_BYTE(lines + 8 * wpls, m + 1)];
02372             sum += tab8[GET_DATA_BYTE(lines + 9 * wpls, m)];
02373             sum += tab8[GET_DATA_BYTE(lines + 9 * wpls, m + 1)];
02374             sum += tab8[GET_DATA_BYTE(lines + 10 * wpls, m)];
02375             sum += tab8[GET_DATA_BYTE(lines + 10 * wpls, m + 1)];
02376             sum += tab8[GET_DATA_BYTE(lines + 11 * wpls, m)];
02377             sum += tab8[GET_DATA_BYTE(lines + 11 * wpls, m + 1)];
02378             sum += tab8[GET_DATA_BYTE(lines + 12 * wpls, m)];
02379             sum += tab8[GET_DATA_BYTE(lines + 12 * wpls, m + 1)];
02380             sum += tab8[GET_DATA_BYTE(lines + 13 * wpls, m)];
02381             sum += tab8[GET_DATA_BYTE(lines + 13 * wpls, m + 1)];
02382             sum += tab8[GET_DATA_BYTE(lines + 14 * wpls, m)];
02383             sum += tab8[GET_DATA_BYTE(lines + 14 * wpls, m + 1)];
02384             sum += tab8[GET_DATA_BYTE(lines + 15 * wpls, m)];
02385             sum += tab8[GET_DATA_BYTE(lines + 15 * wpls, m + 1)];
02386             sum = L_MIN(sum, 255);
02387             SET_DATA_BYTE(lined, j, 255 - sum);
02388         }
02389     }
02390
02391     return;
02392 }
02393
02394
02395
02396 /*------------------------------------------------------------------*
02397  *                         Grayscale mipmap                         *
02398  *------------------------------------------------------------------*/
02399 /*!
02400  *  scaleMipmapLow()
02401  *
02402  *  See notes in scale.c for pixScaleToGrayMipmap().  This function
02403  *  is here for pedagogical reasons.  It gives poor results on document
02404  *  images because of aliasing.
02405  */
02406 l_int32
02407 scaleMipmapLow(l_uint32  *datad,
02408                l_int32    wd,
02409                l_int32    hd,
02410                l_int32    wpld,
02411                l_uint32  *datas1,
02412                l_int32    wpls1,
02413                l_uint32  *datas2,
02414                l_int32    wpls2,
02415                l_float32  red)
02416 {
02417 l_int32    i, j, val1, val2, val, row2, col2;
02418 l_int32   *srow, *scol;
02419 l_uint32  *lines1, *lines2, *lined;
02420 l_float32  ratio, w1, w2;
02421
02422     PROCNAME("scaleMipmapLow");
02423
02424         /* Clear dest */
02425     memset((char *)datad, 0, 4 * wpld * hd);
02426
02427         /* Each dest pixel at (j,i) is computed by interpolating
02428            between the two src images at the corresponding location.
02429            We store the UL corner locations of the square of
02430            src pixels in thelower-resolution image that correspond
02431            to dest pixel (j,i).  The are labelled by the arrays
02432            srow[i], scol[j].  The UL corner locations of the higher
02433            resolution src pixels are obtained from these arrays
02434            by multiplying by 2. */
02435     if ((srow = (l_int32 *)CALLOC(hd, sizeof(l_int32))) == NULL)
02436         return ERROR_INT("srow not made", procName, 1);
02437     if ((scol = (l_int32 *)CALLOC(wd, sizeof(l_int32))) == NULL)
02438         return ERROR_INT("scol not made", procName, 1);
02439     ratio = 1. / (2. * red);  /* 0.5 for red = 1, 1 for red = 0.5 */
02440     for (i = 0; i < hd; i++)
02441         srow[i] = (l_int32)(ratio * i);
02442     for (j = 0; j < wd; j++)
02443         scol[j] = (l_int32)(ratio * j);
02444
02445         /* Get weights for linear interpolation: these are the
02446          * 'distances' of the dest image plane from the two
02447          * src image planes. */
02448     w1 = 2. * red - 1.;   /* w1 --> 1 as red --> 1 */
02449     w2 = 1. - w1;
02450
02451         /* For each dest pixel, compute linear interpolation */
02452     for (i = 0; i < hd; i++) {
02453         row2 = srow[i];
02454         lines1 = datas1 + 2 * row2 * wpls1;
02455         lines2 = datas2 + row2 * wpls2;
02456         lined = datad + i * wpld;
02457         for (j = 0; j < wd; j++) {
02458             col2 = scol[j];
02459             val1 = GET_DATA_BYTE(lines1, 2 * col2);
02460             val2 = GET_DATA_BYTE(lines2, col2);
02461             val = (l_int32)(w1 * val1 + w2 * val2);
02462             SET_DATA_BYTE(lined, j, val);
02463         }
02464     }
02465
02466     FREE(srow);
02467     FREE(scol);
02468     return 0;
02469 }