Leptonica 1.68
C Image Processing Library

textops.c

Go to the documentation of this file.
00001 /*====================================================================*
00002  -  Copyright (C) 2001 Leptonica.  All rights reserved.
00003  -  This software is distributed in the hope that it will be
00004  -  useful, but with NO WARRANTY OF ANY KIND.
00005  -  No author or distributor accepts responsibility to anyone for the
00006  -  consequences of using this software, or for whether it serves any
00007  -  particular purpose or works at all, unless he or she says so in
00008  -  writing.  Everyone is granted permission to copy, modify and
00009  -  redistribute this source code, for commercial or non-commercial
00010  -  purposes, with the following restrictions: (1) the origin of this
00011  -  source code must not be misrepresented; (2) modified versions must
00012  -  be plainly marked as such; and (3) this notice may not be removed
00013  -  or altered from any source or modified source distribution.
00014  *====================================================================*/
00015 
00016 
00017 /*
00018  *  textops.c
00019  *
00020  *    Font layout
00021  *       PIX             *pixAddSingleTextblock()
00022  *       l_int32          pixSetTextblock()
00023  *       l_int32          pixSetTextline()
00024  *
00025  *    Text size estimation and partitioning
00026  *       SARRAY          *bmfGetLineStrings()
00027  *       NUMA            *bmfGetWordWidths()
00028  *       l_int32          bmfGetStringWidth()
00029  *
00030  *    Text splitting
00031  *       SARRAY          *splitStringToParagraphs()
00032  *       static l_int32   stringAllWhitespace() 
00033  *       static l_int32   stringLeadingWhitespace() 
00034  *
00035  *    This is a simple utility to put text on images.  One font and style
00036  *    is provided, with a variety of pt sizes.  For example, to put a
00037  *    line of green 10 pt text on an image, with the beginning baseline
00038  *    at (50, 50):
00039  *        L_Bmf  *bmf = bmfCreate("./fonts", 10);
00040  *        const char *textstr = "This is a funny cat";
00041  *        pixSetTextline(pixs, bmf, textstr, 0x00ff0000, 50, 50, NULL, NULL);
00042  *
00043  *    A simpler interface for adding text to an image is pixAddTextblock().
00044  *    See prog/writetext_reg.c for examples of its use.
00045  */
00046 
00047 #include <string.h>
00048 #include "allheaders.h"
00049 
00050 static l_int32 stringAllWhitespace(char *textstr, l_int32 *pval);
00051 static l_int32 stringLeadingWhitespace(char *textstr, l_int32 *pval);
00052 
00053 
00054 /*---------------------------------------------------------------------*
00055  *                                 Font layout                         *
00056  *---------------------------------------------------------------------*/
00057 /*!
00058  *  pixAddSingleTextblock()
00059  *
00060  *      Input:  pixs (input pix; colormap ok)
00061  *              bmf (bitmap font data)
00062  *              textstr (<optional> text string to be added)
00063  *              val (color to set the text)
00064  *              location (L_ADD_ABOVE, L_ADD_AT_TOP, L_ADD_AT_BOTTOM,
00065  *                        L_ADD_BELOW)
00066  *              &overflow (<optional return> 1 if text overflows
00067  *                         allocated region and is clipped; 0 otherwise)
00068  *      Return: pixd (new pix with rendered text), or null on error
00069  *
00070  *  Notes:
00071  *      (1) This function paints a set of lines of text over an image.
00072  *          If @location is L_ADD_ABOVE or L_ADD_BELOW, the pix size
00073  *          is expanded with a border and rendered over the border.
00074  *      (2) @val is the pixel value to be painted through the font mask.
00075  *          It should be chosen to agree with the depth of pixs.
00076  *          If it is out of bounds, an intermediate value is chosen.
00077  *          For RGB, use hex notation: 0xRRGGBB00, where RR is the
00078  *          hex representation of the red intensity, etc.
00079  *      (3) If textstr == NULL, use the text field in the pix.
00080  *      (4) If there is a colormap, this does the best it can to use
00081  *          the requested color, or something similar to it.
00082  *      (5) Typical usage is for labelling a pix with some text data.
00083  */
00084 PIX *
00085 pixAddSingleTextblock(PIX         *pixs,
00086                       L_BMF       *bmf,
00087                       const char  *textstr,
00088                       l_uint32     val,
00089                       l_int32      location,
00090                       l_int32     *poverflow)
00091 {
00092 char     *linestr;
00093 l_int32   w, h, d, i, y, xstart, ystart, extra, spacer, rval, gval, bval;
00094 l_int32   nlines, htext, ovf, overflow, offset, index;
00095 l_uint32  textcolor;
00096 PIX      *pixd;
00097 PIXCMAP  *cmap, *cmapd;
00098 SARRAY   *salines;
00099 
00100     PROCNAME("pixAddSingleTextblock");
00101 
00102     if (!pixs)
00103         return (PIX *)ERROR_PTR("pixs not defined", procName, NULL);
00104     if (!bmf)
00105         return (PIX *)ERROR_PTR("bmf not defined", procName, NULL);
00106     if (val < 0)
00107         return (PIX *)ERROR_PTR("val must be >= 0", procName, NULL);
00108     if (location != L_ADD_ABOVE && location != L_ADD_AT_TOP &&
00109         location != L_ADD_AT_BOTTOM && location != L_ADD_BELOW)
00110         return (PIX *)ERROR_PTR("invalid location", procName, NULL);
00111 
00112     if (!textstr)
00113         textstr = pixGetText(pixs);
00114     if (!textstr) {
00115         L_ERROR("no textstring defined", procName);
00116         return pixCopy(NULL, pixs);
00117     }
00118 
00119         /* Make sure the "color" value for the text will work
00120          * for the pix.  If the pix is not colormapped and the
00121          * value is out of range, set it to mid-range. */
00122     pixGetDimensions(pixs, &w, &h, &d);
00123     cmap = pixGetColormap(pixs);
00124     if (d == 1 && val > 1)
00125         val = 1;
00126     else if (d == 2 && val > 3 && !cmap)
00127         val = 2;
00128     else if (d == 4 && val > 15 && !cmap)
00129         val = 8;
00130     else if (d == 8 && val > 0xff && !cmap)
00131         val = 128;
00132     else if (d == 16 && val > 0xffff)
00133         val = 0x8000;
00134     else if (d == 32 && val < 256)
00135         val = 0x80808000;
00136 
00137     xstart = (l_int32)(0.1 * w);
00138     salines = bmfGetLineStrings(bmf, textstr, w - 2 * xstart, 0, &htext);
00139     if (!salines)
00140         return (PIX *)ERROR_PTR("line string sa not made", procName, NULL);
00141     nlines = sarrayGetCount(salines);
00142 
00143         /* Add white border if required */
00144     spacer = 10;  /* pixels away from image boundary or added border */
00145     if (location == L_ADD_ABOVE || location == L_ADD_BELOW) {
00146         extra = htext + 2 * spacer;
00147         pixd = pixCreate(w, h + extra, d);
00148         if (cmap) {
00149             cmapd = pixcmapCopy(cmap);
00150             pixSetColormap(pixd, cmapd);
00151         }
00152         pixSetBlackOrWhite(pixd, L_BRING_IN_WHITE);
00153         if (location == L_ADD_ABOVE)
00154             pixRasterop(pixd, 0, extra, w, h, PIX_SRC, pixs, 0, 0);
00155         else  /* add below */
00156             pixRasterop(pixd, 0, 0, w, h, PIX_SRC, pixs, 0, 0);
00157     }
00158     else
00159         pixd = pixCopy(NULL, pixs);
00160     cmapd = pixGetColormap(pixd);
00161 
00162         /* bmf->baselinetab[93] is the approximate distance from
00163          * the top of the tallest character to the baseline.  93 was chosen
00164          * at random, as all the baselines are essentially equal for
00165          * each character in a font. */
00166     offset = bmf->baselinetab[93];
00167     if (location == L_ADD_ABOVE || location == L_ADD_AT_TOP)
00168         ystart = offset + spacer;
00169     else if (location == L_ADD_AT_BOTTOM)
00170         ystart = h - htext - spacer + offset;
00171     else   /* add below */
00172         ystart = h + offset + spacer;
00173 
00174         /* If cmapped, add the color if necessary to the cmap.  If the
00175          * cmap is full, use the nearest color to the requested color. */
00176     if (cmapd) {
00177         extractRGBValues(val, &rval, &gval, &bval);
00178         pixcmapAddNearestColor(cmapd, rval, gval, bval, &index);
00179         pixcmapGetColor(cmapd, index, &rval, &gval, &bval);
00180         composeRGBPixel(rval, gval, bval, &textcolor);
00181     } else
00182         textcolor = val;
00183 
00184         /* Keep track of overflow condition on line width */
00185     overflow = 0;
00186     for (i = 0, y = ystart; i < nlines; i++) {
00187         linestr = sarrayGetString(salines, i, 0);
00188         pixSetTextline(pixd, bmf, linestr, textcolor,
00189                        xstart, y, NULL, &ovf);
00190         y += bmf->lineheight + bmf->vertlinesep;
00191         if (ovf)
00192             overflow = 1;
00193     }
00194 
00195        /* Also consider vertical overflow where there is too much text to
00196         * fit inside the image: the cases L_ADD_AT_TOP and L_ADD_AT_BOTTOM.
00197         *  The text requires a total of htext + 2 * spacer vertical pixels. */
00198     if (location == L_ADD_AT_TOP || location == L_ADD_AT_BOTTOM) {
00199         if (h < htext + 2 * spacer)
00200             overflow = 1;
00201     }
00202     if (poverflow)
00203         *poverflow = overflow;
00204 
00205     sarrayDestroy(&salines);
00206     return pixd;
00207 }
00208 
00209 
00210 /*!
00211  *  pixSetTextblock()
00212  *
00213  *      Input:  pixs (input image)
00214  *              bmf (bitmap font data)
00215  *              textstr (block text string to be set)
00216  *              val (color to set the text)
00217  *              x0 (left edge for each line of text)
00218  *              y0 (baseline location for the first text line)
00219  *              wtext (max width of each line of generated text)
00220  *              firstindent (indentation of first line, in x-widths)
00221  *              &overflow (<optional return> 0 if text is contained in
00222  *                         input pix; 1 if it is clipped)
00223  *      Return: 0 if OK, 1 on error
00224  *
00225  *  Notes:
00226  *      (1) This function paints a set of lines of text over an image.
00227  *      (2) @val is the pixel value to be painted through the font mask.
00228  *          It should be chosen to agree with the depth of pixs.
00229  *          If it is out of bounds, an intermediate value is chosen.
00230  *          For RGB, use hex notation: 0xRRGGBB00, where RR is the
00231  *          hex representation of the red intensity, etc.
00232  *          The last two hex digits are 00 (byte value 0), assigned to
00233  *          the A component.  Note that, as usual, RGBA proceeds from 
00234  *          left to right in the order from MSB to LSB (see pix.h
00235  *          for details).
00236  *      (3) If there is a colormap, this does the best it can to use
00237  *          the requested color, or something similar to it.
00238  */
00239 l_int32
00240 pixSetTextblock(PIX         *pixs,
00241                 L_BMF       *bmf,
00242                 const char  *textstr,
00243                 l_uint32     val,
00244                 l_int32      x0,
00245                 l_int32      y0,
00246                 l_int32      wtext,
00247                 l_int32      firstindent,
00248                 l_int32     *poverflow)
00249 {
00250 char     *linestr;
00251 l_int32   d, h, i, w, x, y, nlines, htext, xwidth, wline, ovf, overflow;
00252 SARRAY   *salines;
00253 PIXCMAP  *cmap;
00254 
00255     PROCNAME("pixSetTextblock");
00256 
00257     if (!pixs)
00258         return ERROR_INT("pixs not defined", procName, 1);
00259     if (!bmf)
00260         return ERROR_INT("bmf not defined", procName, 1);
00261     if (!textstr)
00262         return ERROR_INT("textstr not defined", procName, 1);
00263     if (val < 0)
00264         return ERROR_INT("val must be >= 0", procName, 1);
00265 
00266         /* Make sure the "color" value for the text will work
00267          * for the pix.  If the pix is not colormapped and the
00268          * value is out of range, set it to mid-range. */
00269     pixGetDimensions(pixs, &w, &h, &d);
00270     cmap = pixGetColormap(pixs);
00271     if (d == 1 && val > 1)
00272         val = 1;
00273     else if (d == 2 && val > 3 && !cmap)
00274         val = 2;
00275     else if (d == 4 && val > 15 && !cmap)
00276         val = 8;
00277     else if (d == 8 && val > 0xff && !cmap)
00278         val = 128;
00279     else if (d == 16 && val > 0xffff)
00280         val = 0x8000;
00281     else if (d == 32 && val < 256)
00282         val = 0x80808000;
00283 
00284     if (w < x0 + wtext) {
00285         L_WARNING("reducing width of textblock", procName);
00286         wtext = w - x0 - w / 10;
00287         if (wtext <= 0)
00288             return ERROR_INT("wtext too small; no room for text", procName, 1);
00289     }
00290 
00291     salines = bmfGetLineStrings(bmf, textstr, wtext, firstindent, &htext);
00292     if (!salines)
00293         return ERROR_INT("line string sa not made", procName, 1);
00294     nlines = sarrayGetCount(salines);
00295     bmfGetWidth(bmf, 'x', &xwidth);
00296 
00297     y = y0;
00298     overflow = 0;
00299     for (i = 0; i < nlines; i++) {
00300         if (i == 0)
00301             x = x0 + firstindent * xwidth;
00302         else
00303             x = x0;
00304         linestr = sarrayGetString(salines, i, 0);
00305         pixSetTextline(pixs, bmf, linestr, val, x, y, &wline, &ovf);
00306         y += bmf->lineheight + bmf->vertlinesep;
00307         if (ovf)
00308             overflow = 1;
00309     }
00310 
00311        /* (y0 - baseline) is the top of the printed text.  Character
00312         * 93 was chosen at random, as all the baselines are essentially
00313         * equal for each character in a font. */
00314     if (h < y0 - bmf->baselinetab[93] + htext)
00315         overflow = 1;
00316     if (poverflow)
00317         *poverflow = overflow;
00318 
00319     sarrayDestroy(&salines);
00320     return 0;
00321 }
00322 
00323 
00324 /*!
00325  *  pixSetTextline()
00326  *
00327  *      Input:  pixs (input image)
00328  *              bmf (bitmap font data)
00329  *              textstr (text string to be set on the line)
00330  *              val (color to set the text)
00331  *              x0 (left edge for first char)
00332  *              y0 (baseline location for all text on line)
00333  *              &width (<optional return> width of generated text)
00334  *              &overflow (<optional return> 0 if text is contained in
00335  *                         input pix; 1 if it is clipped)
00336  *      Return: 0 if OK, 1 on error
00337  *
00338  *  Notes:
00339  *      (1) This function paints a line of text over an image.
00340  *      (2) @val is the pixel value to be painted through the font mask.
00341  *          It should be chosen to agree with the depth of pixs.
00342  *          If it is out of bounds, an intermediate value is chosen.
00343  *          For RGB, use hex notation: 0xRRGGBB00, where RR is the
00344  *          hex representation of the red intensity, etc.
00345  *          The last two hex digits are 00 (byte value 0), assigned to
00346  *          the A component.  Note that, as usual, RGBA proceeds from 
00347  *          left to right in the order from MSB to LSB (see pix.h
00348  *          for details).
00349  *      (3) If there is a colormap, this does the best it can to use
00350  *          the requested color, or something similar to it.
00351  */
00352 l_int32
00353 pixSetTextline(PIX         *pixs,
00354                L_BMF       *bmf,
00355                const char  *textstr,
00356                l_uint32     val,
00357                l_int32      x0,
00358                l_int32      y0,
00359                l_int32     *pwidth,
00360                l_int32     *poverflow)
00361 {
00362 char      chr; 
00363 l_int32   d, i, x, w, nchar, baseline, index, rval, gval, bval;
00364 l_uint32  textcolor;
00365 PIX      *pix;
00366 PIXCMAP  *cmap;
00367 
00368     PROCNAME("pixSetTextline");
00369 
00370     if (!pixs)
00371         return ERROR_INT("pixs not defined", procName, 1);
00372     if (!bmf)
00373         return ERROR_INT("bmf not defined", procName, 1);
00374     if (!textstr)
00375         return ERROR_INT("teststr not defined", procName, 1);
00376     if (val < 0) {
00377         L_WARNING("val must be non-negative; setting to 0", procName);
00378         val = 0;
00379     }
00380 
00381     d = pixGetDepth(pixs);
00382     cmap = pixGetColormap(pixs);
00383     if (d == 1 && val > 1)
00384         val = 1;
00385     else if (d == 2 && val > 3 && !cmap)
00386         val = 2;
00387     else if (d == 4 && val > 15 && !cmap)
00388         val = 8;
00389     else if (d == 8 && val > 0xff && !cmap)
00390         val = 128;
00391     else if (d == 16 && val > 0xffff)
00392         val = 0x8000;
00393     else if (d == 32 && val < 256)
00394         val = 0x80808000;
00395 
00396         /* If cmapped, add the color if necessary to the cmap.  If the
00397          * cmap is full, use the nearest color to the requested color. */
00398     if (cmap) {
00399         extractRGBValues(val, &rval, &gval, &bval);
00400         pixcmapAddNearestColor(cmap, rval, gval, bval, &index);
00401         pixcmapGetColor(cmap, index, &rval, &gval, &bval);
00402         composeRGBPixel(rval, gval, bval, &textcolor);
00403     } else
00404         textcolor = val;
00405 
00406     nchar = strlen(textstr);
00407     x = x0;
00408     for (i = 0; i < nchar; i++) {
00409         chr = textstr[i];
00410         if ((l_int32)chr == 10) continue;  /* NL */
00411         pix = bmfGetPix(bmf, chr);
00412         bmfGetBaseline(bmf, chr, &baseline);
00413         pixPaintThroughMask(pixs, pix, x, y0 - baseline, textcolor);
00414         w = pixGetWidth(pix);
00415         x += w + bmf->kernwidth;
00416         pixDestroy(&pix);
00417     }
00418 
00419     if (pwidth)
00420         *pwidth = x - bmf->kernwidth - x0;
00421     if (poverflow)
00422         *poverflow = (x > pixGetWidth(pixs) - 1) ? 1 : 0;
00423     return 0;
00424 }
00425 
00426 
00427 /*---------------------------------------------------------------------*
00428  *                   Text size estimation and partitioning             *
00429  *---------------------------------------------------------------------*/
00430 /*!
00431  *  bmfGetLineStrings()
00432  *
00433  *      Input:  bmf
00434  *              textstr
00435  *              maxw (max width of a text line in pixels)
00436  *              firstindent (indentation of first line, in x-widths)
00437  *              &h (<return> height required to hold text bitmap)
00438  *      Return: sarray of text strings for each line, or null on error
00439  *
00440  *  Notes:
00441  *      (1) Divides the input text string into an array of text strings,
00442  *          each of which will fit within maxw bits of width.
00443  */
00444 SARRAY *
00445 bmfGetLineStrings(L_BMF       *bmf,
00446                   const char  *textstr,
00447                   l_int32      maxw,
00448                   l_int32      firstindent,
00449                   l_int32     *ph)
00450 {
00451 char    *linestr;
00452 l_int32  i, ifirst, sumw, newsum, w, nwords, nlines, len, xwidth;
00453 NUMA    *na;
00454 SARRAY  *sa, *sawords;
00455 
00456     PROCNAME("bmfGetLineStrings");
00457 
00458     if (!bmf)
00459         return (SARRAY *)ERROR_PTR("bmf not defined", procName, NULL);
00460     if (!textstr)
00461         return (SARRAY *)ERROR_PTR("teststr not defined", procName, NULL);
00462 
00463     if ((sawords = sarrayCreateWordsFromString(textstr)) == NULL)
00464         return (SARRAY *)ERROR_PTR("sawords not made", procName, NULL);
00465 
00466     if ((na = bmfGetWordWidths(bmf, textstr, sawords)) == NULL)
00467         return (SARRAY *)ERROR_PTR("na not made", procName, NULL);
00468     nwords = numaGetCount(na);
00469     if (nwords == 0)
00470         return (SARRAY *)ERROR_PTR("no words in textstr", procName, NULL);
00471     bmfGetWidth(bmf, 'x', &xwidth);
00472 
00473     if ((sa = sarrayCreate(0)) == NULL)
00474         return (SARRAY *)ERROR_PTR("sa not made", procName, NULL);
00475 
00476     ifirst = 0;
00477     numaGetIValue(na, 0, &w);
00478     sumw = firstindent * xwidth + w;
00479     for (i = 1; i < nwords; i++) {
00480         numaGetIValue(na, i, &w);
00481         newsum = sumw + bmf->spacewidth + w;
00482         if (newsum > maxw) {
00483             linestr = sarrayToStringRange(sawords, ifirst, i - ifirst, 2);
00484             if (!linestr)
00485                 continue;
00486             len = strlen(linestr);
00487             if (len > 0)  /* it should always be */
00488                 linestr[len - 1] = '\0';  /* remove the last space */
00489             sarrayAddString(sa, linestr, 0);
00490             ifirst = i;
00491             sumw = w;
00492         }
00493         else
00494             sumw += bmf->spacewidth + w;
00495     }
00496     linestr = sarrayToStringRange(sawords, ifirst, nwords - ifirst, 2);
00497     if (linestr)
00498         sarrayAddString(sa, linestr, 0);
00499     nlines = sarrayGetCount(sa);
00500     *ph = nlines * bmf->lineheight + (nlines - 1) * bmf->vertlinesep;
00501 
00502     sarrayDestroy(&sawords);
00503     numaDestroy(&na);
00504     return sa;
00505 }
00506 
00507 
00508 /*!
00509  *  bmfGetWordWidths()
00510  *
00511  *      Input:  bmf
00512  *              textstr
00513  *              sa (of individual words)
00514  *      Return: numa (of word lengths in pixels for the font represented
00515  *                    by the bmf), or null on error
00516  */
00517 NUMA *
00518 bmfGetWordWidths(L_BMF       *bmf,
00519                  const char  *textstr,
00520                  SARRAY      *sa)
00521 {
00522 char    *wordstr;
00523 l_int32  i, nwords, width;
00524 NUMA    *na;
00525 
00526     PROCNAME("bmfGetWordWidths");
00527 
00528     if (!bmf)
00529         return (NUMA *)ERROR_PTR("bmf not defined", procName, NULL);
00530     if (!textstr)
00531         return (NUMA *)ERROR_PTR("teststr not defined", procName, NULL);
00532     if (!sa)
00533         return (NUMA *)ERROR_PTR("sa not defined", procName, NULL);
00534 
00535     nwords = sarrayGetCount(sa);
00536     if ((na = numaCreate(nwords)) == NULL)
00537         return (NUMA *)ERROR_PTR("na not made", procName, NULL);
00538 
00539     for (i = 0; i < nwords; i++) {
00540         wordstr = sarrayGetString(sa, i, 0);  /* not a copy */
00541         bmfGetStringWidth(bmf, wordstr, &width);
00542         numaAddNumber(na, width);
00543     }
00544 
00545     return na;
00546 }
00547 
00548 
00549 /*!
00550  *  bmfGetStringWidth()
00551  *
00552  *      Input:  bmf
00553  *              textstr
00554  *              &w (<return> width of text string, in pixels for the
00555  *                 font represented by the bmf)
00556  *      Return: 0 if OK, 1 on error
00557  */
00558 l_int32
00559 bmfGetStringWidth(L_BMF       *bmf,
00560                   const char  *textstr,
00561                   l_int32     *pw)
00562 {
00563 char     chr; 
00564 l_int32  i, w, width, nchar;
00565 
00566     PROCNAME("bmfGetStringWidth");
00567 
00568     if (!bmf)
00569         return ERROR_INT("bmf not defined", procName, 1);
00570     if (!textstr)
00571         return ERROR_INT("teststr not defined", procName, 1);
00572     if (!pw)
00573         return ERROR_INT("&w not defined", procName, 1);
00574 
00575     nchar = strlen(textstr);
00576     w = 0;
00577     for (i = 0; i < nchar; i++) {
00578         chr = textstr[i];
00579         bmfGetWidth(bmf, chr, &width);
00580         if (width != UNDEF)
00581             w += width + bmf->kernwidth;
00582     }
00583     w -= bmf->kernwidth;  /* remove last one */
00584 
00585     *pw = w;
00586     return 0;
00587 }
00588 
00589 
00590 
00591 /*---------------------------------------------------------------------*
00592  *                             Text splitting                          *
00593  *---------------------------------------------------------------------*/
00594 /*!
00595  *  splitStringToParagraphs()
00596  *
00597  *      Input:  textstring
00598  *              splitting flag (see enum in bmf.h; valid values in {1,2,3})
00599  *      Return: sarray (where each string is a paragraph of the input),
00600  *                      or null on error.
00601  */
00602 SARRAY *
00603 splitStringToParagraphs(char    *textstr,
00604                         l_int32  splitflag)
00605 {
00606 char    *linestr, *parastring;
00607 l_int32  nlines, i, allwhite, leadwhite;
00608 SARRAY  *salines, *satemp, *saout;
00609 
00610     PROCNAME("splitStringToParagraphs");
00611 
00612     if (!textstr)
00613         return (SARRAY *)ERROR_PTR("textstr not defined", procName, NULL);
00614 
00615     if ((salines = sarrayCreateLinesFromString(textstr, 1)) == NULL)
00616         return (SARRAY *)ERROR_PTR("salines not made", procName, NULL);
00617     nlines = sarrayGetCount(salines);
00618     saout = sarrayCreate(0);
00619     satemp = sarrayCreate(0);
00620 
00621     linestr = sarrayGetString(salines, 0, 0);
00622     sarrayAddString(satemp, linestr, 1);
00623     for (i = 1; i < nlines; i++) {
00624         linestr = sarrayGetString(salines, i, 0);
00625         stringAllWhitespace(linestr, &allwhite);
00626         stringLeadingWhitespace(linestr, &leadwhite);
00627         if ((splitflag == SPLIT_ON_LEADING_WHITE && leadwhite) ||
00628             (splitflag == SPLIT_ON_BLANK_LINE && allwhite) ||
00629             (splitflag == SPLIT_ON_BOTH && (allwhite || leadwhite))) {
00630             parastring = sarrayToString(satemp, 1);  /* add nl to each line */
00631             sarrayAddString(saout, parastring, 0);  /* insert */
00632             sarrayDestroy(&satemp);
00633             satemp = sarrayCreate(0);
00634         }
00635         sarrayAddString(satemp, linestr, 1);
00636     }
00637     parastring = sarrayToString(satemp, 1);  /* add nl to each line */
00638     sarrayAddString(saout, parastring, 0);  /* insert */
00639     sarrayDestroy(&satemp);
00640 
00641     return saout;
00642 }
00643 
00644 
00645 /*!
00646  *  stringAllWhitespace()
00647  *
00648  *      Input:  textstring
00649  *              &val (<return> 1 if all whitespace; 0 otherwise)
00650  *      Return: 0 if OK, 1 on error
00651  */
00652 static l_int32
00653 stringAllWhitespace(char     *textstr,
00654                     l_int32  *pval)
00655 {
00656 l_int32  len, i;
00657 
00658     PROCNAME("stringAllWhitespace");
00659 
00660     if (!textstr)
00661         return ERROR_INT("textstr not defined", procName, 1);
00662     if (!pval)
00663         return ERROR_INT("&va not defined", procName, 1);
00664 
00665     len = strlen(textstr);
00666     *pval = 1;
00667     for (i = 0; i < len; i++) {
00668         if (textstr[i] != ' ' && textstr[i] != '\t' && textstr[i] != '\n') {
00669             *pval = 0;
00670             return 0;
00671         }
00672     }
00673     return 0;
00674 }
00675 
00676 
00677 /*!
00678  *  stringLeadingWhitespace()
00679  *
00680  *      Input:  textstring
00681  *              &val (<return> 1 if leading char is ' ' or '\t'; 0 otherwise)
00682  *      Return: 0 if OK, 1 on error
00683  */
00684 static l_int32
00685 stringLeadingWhitespace(char     *textstr,
00686                         l_int32  *pval)
00687 {
00688     PROCNAME("stringLeadingWhitespace");
00689 
00690     if (!textstr)
00691         return ERROR_INT("textstr not defined", procName, 1);
00692     if (!pval)
00693         return ERROR_INT("&va not defined", procName, 1);
00694 
00695     *pval = 0;
00696     if (textstr[0] == ' ' || textstr[0] == '\t')
00697         *pval = 1;
00698 
00699     return 0;
00700 }
00701 
00702 
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Defines