Leptonica/utils_8c_source.html

00001 /*====================================================================*
00002  -  Copyright (C) 2001 Leptonica.  All rights reserved.
00003  -  This software is distributed in the hope that it will be
00004  -  useful, but with NO WARRANTY OF ANY KIND.
00005  -  No author or distributor accepts responsibility to anyone for the
00006  -  consequences of using this software, or for whether it serves any
00007  -  particular purpose or works at all, unless he or she says so in
00008  -  writing.  Everyone is granted permission to copy, modify and
00009  -  redistribute this source code, for commercial or non-commercial
00010  -  purposes, with the following restrictions: (1) the origin of this
00011  -  source code must not be misrepresented; (2) modified versions must
00012  -  be plainly marked as such; and (3) this notice may not be removed
00013  -  or altered from any source or modified source distribution.
00014  *====================================================================*/
00015
00016
00017 /*
00018  *  utils.c
00019  *
00020  *       Error, warning and info procs; all invoked by macros
00021  *           l_int32    returnErrorInt()
00022  *           l_float32  returnErrorFloat()
00023  *           void      *returnErrorPtr()
00024  *           void       l_error()
00025  *           void       l_errorString()
00026  *           void       l_errorInt()
00027  *           void       l_errorFloat()
00028  *           void       l_warning()
00029  *           void       l_warningString()
00030  *           void       l_warningInt()
00031  *           void       l_warningInt2()
00032  *           void       l_warningFloat()
00033  *           void       l_warningFloat2()
00034  *           void       l_info()
00035  *           void       l_infoString()
00036  *           void       l_infoInt()
00037  *           void       l_infoInt2()
00038  *           void       l_infoFloat()
00039  *           void       l_infoFloat2()
00040  *
00041  *       Safe string procs
00042  *           char      *stringNew()
00043  *           l_int32    stringCopy()
00044  *           l_int32    stringReplace()
00045  *           l_int32    stringLength()
00046  *           l_int32    stringCat()
00047  *           char      *stringJoin()
00048  *           char      *stringReverse()
00049  *           char      *strtokSafe()
00050  *           l_int32    stringSplitOnToken()
00051  *
00052  *       Find and replace string and array procs
00053  *           char      *stringRemoveChars()
00054  *           l_int32    stringFindSubstr()
00055  *           char      *stringReplaceSubstr()
00056  *           char      *stringReplaceEachSubstr()
00057  *           NUMA      *arrayFindEachSequence()
00058  *           l_int32    arrayFindSequence()
00059  *
00060  *       Safe realloc
00061  *           void      *reallocNew()
00062  *
00063  *       Read and write between file and memory
00064  *           l_uint8   *l_binaryRead()
00065  *           l_uint8   *l_binaryReadStream()
00066  *           l_int32    l_binaryWrite()
00067  *           l_int32    nbytesInFile()
00068  *           l_int32    fnbytesInFile()
00069  *
00070  *       Copy in memory
00071  *           l_uint8   *l_binaryCopy()
00072  *
00073  *       File copy operations
00074  *           l_int32    fileCopy()
00075  *           l_int32    fileConcatenate()
00076  *           l_int32    fileAppendString()
00077  *
00078  *       Test files for equivalence
00079  *           l_int32    filesAreIdentical()
00080  *
00081  *       Byte-swapping data conversion
00082  *           l_uint16   convertOnBigEnd16()
00083  *           l_uint32   convertOnBigEnd32()
00084  *           l_uint16   convertOnLittleEnd16()
00085  *           l_uint32   convertOnLittleEnd32()
00086  *
00087  *       Opening file streams
00088  *           FILE      *fopenReadStream()
00089  *           FILE      *fopenWriteStream()
00090  *
00091  *       Functions to avoid C-runtime boundary crossing with Windows DLLs
00092  *           FILE      *lept_fopen()
00093  *           l_int32    lept_fclose()
00094  *           void       lept_calloc()
00095  *           void       lept_free()
00096  *
00097  *       Cross-platform file system operations
00098  *           l_int32    lept_mkdir()
00099  *           l_int32    lept_rmdir()
00100  *           l_int32    lept_mv()
00101  *           l_int32    lept_rm()
00102  *           l_int32    lept_cp()
00103  *
00104  *       File name operations
00105  *           l_int32    splitPathAtDirectory()
00106  *           l_int32    splitPathAtExtension()
00107  *           char      *pathJoin()
00108  *           char      *genPathname()
00109  *           char      *genTempFilename()
00110  *           l_int32    extractNumberFromFilename()
00111  *
00112  *       Generate random integer in given range
00113  *           l_int32    genRandomIntegerInRange()
00114  *
00115  *       Leptonica version number
00116  *           char      *getLeptonicaVersion()
00117  *
00118  *       Timing
00119  *           void       startTimer()
00120  *           l_float32  stopTimer()
00121  *           L_TIMER    startTimerNested()
00122  *           l_float32  stopTimerNested()
00123  *           void       l_getCurrentTime()
00124  *           void       l_getFormattedDate()
00125  *
00126  *       Deprecated binary read functions  (don't use these!)
00127  *           l_uint8   *arrayRead()
00128  *           l_uint8   *arrayReadStream()
00129  *
00130  *
00131  *  Notes on cross-platform development
00132  *  -----------------------------------
00133  *  (1) With the exception of splitPathAtDirectory() and
00134  *      splitPathAtExtension(), all input pathnames must have unix separators.
00135  *  (2) The conversion from unix to windows pathnames happens in genPathname().
00136  *  (3) Use fopenReadStream() and fopenWriteStream() to open files,
00137  *      because these use genPathname() to find the platform-dependent
00138  *      filenames.  Likewise for l_binaryRead() and l_binaryWrite().
00139  *  (4) For moving, copying and removing files and directories,
00140  *      use the lept_*() file system shell wrappers:
00141  *         lept_mkdir(), lept_rmdir(), lept_mv(), lept_rm() and lept_cp().
00142  *  (5) Use the lept_*() C library wrappers:
00143  *         lept_fopen(), lept_fclose(), lept_calloc() and lept_free().
00144  */
00145
00146 #include <string.h>
00147 #include <time.h>
00148 #ifdef _MSC_VER
00149 #include <process.h>
00150 #else
00151 #include <unistd.h>
00152 #endif   /* _MSC_VER */
00153 #include "allheaders.h"
00154
00155 #ifdef _WIN32
00156 #include <windows.h>
00157 static const char sepchar = '\\';
00158 #else
00159 #include <sys/stat.h>  /* for mkdir(2) */
00160 #include <sys/types.h>
00161 static const char sepchar = '/';
00162 #endif
00163
00164
00165 /*----------------------------------------------------------------------*
00166  *                 Error, warning and info message procs                *
00167  *                                                                      *
00168  *            ---------------------  N.B. ---------------------         *
00169  *                                                                      *
00170  *    (1) These functions all print messages to stderr.                 *
00171  *                                                                      *
00172  *    (2) They must be invoked only by macros, which are in             *
00173  *        environ.h, so that the print output can be disabled           *
00174  *        at compile time, using -DNO_CONSOLE_IO.                       *
00175  *                                                                      *
00176  *----------------------------------------------------------------------*/
00177 /*!
00178  *  returnErrorInt()
00179  *
00180  *      Input:  msg (error message)
00181  *              procname
00182  *              ival (return val)
00183  *      Return: ival (typically 1)
00184  */
00185 l_int32
00186 returnErrorInt(const char  *msg,
00187                const char  *procname,
00188                l_int32      ival)
00189 {
00190     fprintf(stderr, "Error in %s: %s\n", procname, msg);
00191     return ival;
00192 }
00193
00194
00195 /*!
00196  *  returnErrorFloat()
00197  *
00198  *      Input:  msg (error message)
00199  *              procname
00200  *              fval (return val)
00201  *      Return: fval
00202  */
00203 l_float32
00204 returnErrorFloat(const char  *msg,
00205                  const char  *procname,
00206                  l_float32    fval)
00207 {
00208     fprintf(stderr, "Error in %s: %s\n", procname, msg);
00209     return fval;
00210 }
00211
00212
00213 /*!
00214  *  returnErrorPtr()
00215  *
00216  *      Input:  msg (error message)
00217  *              procname
00218  *              pval  (return val)
00219  *      Return: pval (typically null)
00220  */
00221 void *
00222 returnErrorPtr(const char  *msg,
00223                const char  *procname,
00224                void        *pval)
00225 {
00226     fprintf(stderr, "Error in %s: %s\n", procname, msg);
00227     return pval;
00228 }
00229
00230
00231 /*!
00232  *  l_error()
00233  *
00234  *      Input: msg (error message)
00235  *             procname
00236  */
00237 void
00238 l_error(const char  *msg,
00239         const char  *procname)
00240 {
00241     fprintf(stderr, "Error in %s: %s\n", procname, msg);
00242     return;
00243 }
00244
00245
00246 /*!
00247  *  l_errorString()
00248  *
00249  *      Input: msg (error message; must include '%s')
00250  *             procname
00251  *             str (embedded in error message via %s)
00252  */
00253 void
00254 l_errorString(const char  *msg,
00255               const char  *procname,
00256               const char  *str)
00257 {
00258 l_int32  bufsize;
00259 char    *charbuf;
00260
00261     if (!msg || !procname || !str) {
00262         L_ERROR("msg, procname or str not defined in l_errorString()",
00263                 procname);
00264         return;
00265     }
00266
00267     bufsize = strlen(msg) + strlen(procname) + 128;
00268     if ((charbuf = (char *)CALLOC(bufsize, sizeof(char))) == NULL) {
00269         L_ERROR("charbuf not made in l_errorString()", procname);
00270         return;
00271     }
00272
00273     sprintf(charbuf, "Error in %s: %s\n", procname, msg);
00274     fprintf(stderr, charbuf, str);
00275
00276     FREE(charbuf);
00277     return;
00278 }
00279
00280
00281 /*!
00282  *  l_errorInt()
00283  *
00284  *      Input: msg (error message; must include '%d')
00285  *             procname
00286  *             ival (embedded in error message via %d)
00287  */
00288 void
00289 l_errorInt(const char  *msg,
00290            const char  *procname,
00291            l_int32      ival)
00292 {
00293 l_int32  bufsize;
00294 char    *charbuf;
00295
00296     if (!msg || !procname) {
00297         L_ERROR("msg or procname not defined in l_errorInt()", procname);
00298         return;
00299     }
00300
00301     bufsize = strlen(msg) + strlen(procname) + 128;
00302     if ((charbuf = (char *)CALLOC(bufsize, sizeof(char))) == NULL) {
00303         L_ERROR("charbuf not made in l_errorInt()", procname);
00304         return;
00305     }
00306
00307     sprintf(charbuf, "Error in %s: %s\n", procname, msg);
00308     fprintf(stderr, charbuf, ival);
00309
00310     FREE(charbuf);
00311     return;
00312 }
00313
00314
00315 /*!
00316  *  l_errorFloat()
00317  *
00318  *      Input: msg (error message; must include '%f')
00319  *             procname
00320  *             fval (embedded in error message via %f)
00321  */
00322 void
00323 l_errorFloat(const char  *msg,
00324              const char  *procname,
00325              l_float32    fval)
00326 {
00327 l_int32  bufsize;
00328 char    *charbuf;
00329
00330     if (!msg || !procname) {
00331         L_ERROR("msg or procname not defined in l_errorFloat()", procname);
00332         return;
00333     }
00334
00335     bufsize = strlen(msg) + strlen(procname) + 128;
00336     if ((charbuf = (char *)CALLOC(bufsize, sizeof(char))) == NULL) {
00337         L_ERROR("charbuf not made in l_errorFloat()", procname);
00338         return;
00339     }
00340
00341     sprintf(charbuf, "Error in %s: %s\n", procname, msg);
00342     fprintf(stderr, charbuf, fval);
00343
00344     FREE(charbuf);
00345     return;
00346 }
00347
00348
00349 /*!
00350  *  l_warning()
00351  *
00352  *      Input: msg (warning message)
00353  *             procname
00354  */
00355 void
00356 l_warning(const char  *msg,
00357           const char  *procname)
00358 {
00359     fprintf(stderr, "Warning in %s: %s\n", procname, msg);
00360     return;
00361 }
00362
00363
00364 /*!
00365  *  l_warningString()
00366  *
00367  *      Input: msg (warning message; must include '%s')
00368  *             procname
00369  *             str (embedded in warning message via %s)
00370  */
00371 void
00372 l_warningString(const char  *msg,
00373                 const char  *procname,
00374                 const char  *str)
00375 {
00376 l_int32  bufsize;
00377 char    *charbuf;
00378
00379     if (!msg || !procname || !str) {
00380         L_ERROR("msg, procname or str not defined in l_warningString()",
00381                 procname);
00382         return;
00383     }
00384
00385     bufsize = strlen(msg) + strlen(procname) + 128;
00386     if ((charbuf = (char *)CALLOC(bufsize, sizeof(char))) == NULL) {
00387         L_ERROR("charbuf not made in l_warningString()", procname);
00388         return;
00389     }
00390
00391     sprintf(charbuf, "Warning in %s: %s\n", procname, msg);
00392     fprintf(stderr, charbuf, str);
00393
00394     FREE(charbuf);
00395     return;
00396 }
00397
00398
00399 /*!
00400  *  l_warningInt()
00401  *
00402  *      Input: msg (warning message; must include '%d')
00403  *             procname
00404  *             ival (embedded in warning message via %d)
00405  */
00406 void
00407 l_warningInt(const char  *msg,
00408              const char  *procname,
00409              l_int32      ival)
00410 {
00411 l_int32  bufsize;
00412 char    *charbuf;
00413
00414     if (!msg || !procname) {
00415         L_ERROR("msg or procname not defined in l_warningInt()", procname);
00416         return;
00417     }
00418
00419     bufsize = strlen(msg) + strlen(procname) + 128;
00420     if ((charbuf = (char *)CALLOC(bufsize, sizeof(char))) == NULL) {
00421         L_ERROR("charbuf not made in l_warningInt()", procname);
00422         return;
00423     }
00424
00425     sprintf(charbuf, "Warning in %s: %s\n", procname, msg);
00426     fprintf(stderr, charbuf, ival);
00427
00428     FREE(charbuf);
00429     return;
00430 }
00431
00432
00433 /*!
00434  *  l_warningInt2()
00435  *
00436  *      Input: msg (warning message; must include '%d')
00437  *             procname
00438  *             ival1, ival2 (two args, embedded in message via %d)
00439  */
00440 void
00441 l_warningInt2(const char  *msg,
00442               const char  *procname,
00443               l_int32      ival1,
00444               l_int32      ival2)
00445 {
00446 l_int32  bufsize;
00447 char    *charbuf;
00448
00449     if (!msg || !procname) {
00450         L_ERROR("msg or procname not defined in l_warningInt2()", procname);
00451         return;
00452     }
00453
00454     bufsize = strlen(msg) + strlen(procname) + 128;
00455     if ((charbuf = (char *)CALLOC(bufsize, sizeof(char))) == NULL) {
00456         L_ERROR("charbuf not made in l_warningInt()", procname);
00457         return;
00458     }
00459
00460     sprintf(charbuf, "Warning in %s: %s\n", procname, msg);
00461     fprintf(stderr, charbuf, ival1, ival2);
00462
00463     FREE(charbuf);
00464     return;
00465 }
00466
00467
00468 /*!
00469  *  l_warningFloat()
00470  *
00471  *      Input: msg (warning message; must include '%f')
00472  *             procname
00473  *             fval (embedded in warning message via %f)
00474  */
00475 void
00476 l_warningFloat(const char  *msg,
00477                const char  *procname,
00478                l_float32    fval)
00479 {
00480 l_int32  bufsize;
00481 char    *charbuf;
00482
00483     if (!msg || !procname) {
00484         L_ERROR("msg or procname not defined in l_warningFloat()", procname);
00485         return;
00486     }
00487
00488     bufsize = strlen(msg) + strlen(procname) + 128;
00489     if ((charbuf = (char *)CALLOC(bufsize, sizeof(char))) == NULL) {
00490         L_ERROR("charbuf not made in l_warningFloat()", procname);
00491         return;
00492     }
00493
00494     sprintf(charbuf, "Warning in %s: %s\n", procname, msg);
00495     fprintf(stderr, charbuf, fval);
00496
00497     FREE(charbuf);
00498     return;
00499 }
00500
00501
00502 /*!
00503  *  l_warningFloat2()
00504  *
00505  *      Input: msg (warning message; must include '%f')
00506  *             procname
00507  *             fval1, fval2 (two args, embedded in message via %f)
00508  */
00509 void
00510 l_warningFloat2(const char  *msg,
00511                 const char  *procname,
00512                 l_float32    fval1,
00513                 l_float32    fval2)
00514 {
00515 l_int32  bufsize;
00516 char    *charbuf;
00517
00518     if (!msg || !procname) {
00519         L_ERROR("msg or procname not defined in l_warningFloat2()", procname);
00520         return;
00521     }
00522
00523     bufsize = strlen(msg) + strlen(procname) + 128;
00524     if ((charbuf = (char *)CALLOC(bufsize, sizeof(char))) == NULL) {
00525         L_ERROR("charbuf not made in l_warningFloat()", procname);
00526         return;
00527     }
00528
00529     sprintf(charbuf, "Warning in %s: %s\n", procname, msg);
00530     fprintf(stderr, charbuf, fval1, fval2);
00531
00532     FREE(charbuf);
00533     return;
00534 }
00535
00536
00537 /*!
00538  *  l_info()
00539  *
00540  *      Input: msg (info message)
00541  *             procname
00542  */
00543 void
00544 l_info(const char  *msg,
00545        const char  *procname)
00546 {
00547     fprintf(stderr, "Info in %s: %s\n", procname, msg);
00548     return;
00549 }
00550
00551
00552 /*!
00553  *  l_infoString()
00554  *
00555  *      Input: msg (info message; must include '%s')
00556  *             procname
00557  *             str (embedded in warning message via %s)
00558  */
00559 void
00560 l_infoString(const char  *msg,
00561              const char  *procname,
00562              const char  *str)
00563 {
00564 l_int32  bufsize;
00565 char    *charbuf;
00566
00567     if (!msg || !procname || !str) {
00568         L_ERROR("msg, procname or str not defined in l_infoString()", procname);
00569         return;
00570     }
00571
00572     bufsize = strlen(msg) + strlen(procname) + 128;
00573     if ((charbuf = (char *)CALLOC(bufsize, sizeof(char))) == NULL) {
00574         L_ERROR("charbuf not made in l_infoString()", procname);
00575         return;
00576     }
00577
00578     sprintf(charbuf, "Info in %s: %s\n", procname, msg);
00579     fprintf(stderr, charbuf, str);
00580
00581     FREE(charbuf);
00582     return;
00583 }
00584
00585
00586 /*!
00587  *  l_infoInt()
00588  *
00589  *      Input: msg (info message; must include '%d')
00590  *             procname
00591  *             ival (embedded in info message via %d)
00592  */
00593 void
00594 l_infoInt(const char  *msg,
00595           const char  *procname,
00596           l_int32      ival)
00597 {
00598 l_int32  bufsize;
00599 char    *charbuf;
00600
00601     if (!msg || !procname) {
00602         L_ERROR("msg or procname not defined in l_infoInt()", procname);
00603         return;
00604     }
00605
00606     bufsize = strlen(msg) + strlen(procname) + 128;
00607     if ((charbuf = (char *)CALLOC(bufsize, sizeof(char))) == NULL) {
00608         L_ERROR("charbuf not made in l_infoInt()", procname);
00609         return;
00610     }
00611
00612     sprintf(charbuf, "Info in %s: %s\n", procname, msg);
00613     fprintf(stderr, charbuf, ival);
00614
00615     FREE(charbuf);
00616     return;
00617 }
00618
00619
00620 /*!
00621  *  l_infoInt2()
00622  *
00623  *      Input: msg (info message; must include two '%d')
00624  *             procname
00625  *             ival1, ival2 (two args, embedded in info message via %d)
00626  */
00627 void
00628 l_infoInt2(const char  *msg,
00629            const char  *procname,
00630            l_int32      ival1,
00631            l_int32      ival2)
00632 {
00633 l_int32  bufsize;
00634 char    *charbuf;
00635
00636     if (!msg || !procname) {
00637         L_ERROR("msg or procname not defined in l_infoInt2()", procname);
00638         return;
00639     }
00640
00641     bufsize = strlen(msg) + strlen(procname) + 128;
00642     if ((charbuf = (char *)CALLOC(bufsize, sizeof(char))) == NULL) {
00643         L_ERROR("charbuf not made in l_infoInt2()", procname);
00644         return;
00645     }
00646
00647     sprintf(charbuf, "Info in %s: %s\n", procname, msg);
00648     fprintf(stderr, charbuf, ival1, ival2);
00649
00650     FREE(charbuf);
00651     return;
00652 }
00653
00654
00655 /*!
00656  *  l_infoFloat()
00657  *
00658  *      Input: msg (info message; must include '%f')
00659  *             procname
00660  *             fval (embedded in info message via %f)
00661  */
00662 void
00663 l_infoFloat(const char  *msg,
00664             const char  *procname,
00665             l_float32    fval)
00666 {
00667 l_int32  bufsize;
00668 char    *charbuf;
00669
00670     if (!msg || !procname) {
00671         L_ERROR("msg or procname not defined in l_infoFloat()", procname);
00672         return;
00673     }
00674
00675     bufsize = strlen(msg) + strlen(procname) + 128;
00676     if ((charbuf = (char *)CALLOC(bufsize, sizeof(char))) == NULL) {
00677         L_ERROR("charbuf not made in l_infoFloat()", procname);
00678         return;
00679     }
00680
00681     sprintf(charbuf, "Info in %s: %s\n", procname, msg);
00682     fprintf(stderr, charbuf, fval);
00683
00684     FREE(charbuf);
00685     return;
00686 }
00687
00688
00689 /*!
00690  *  l_infoFloat2()
00691  *
00692  *      Input: msg (info message; must include two '%f')
00693  *             procname
00694  *             fval1, fval2 (two args, embedded in info message via %f)
00695  */
00696 void
00697 l_infoFloat2(const char  *msg,
00698              const char  *procname,
00699              l_float32    fval1,
00700              l_float32    fval2)
00701 {
00702 l_int32  bufsize;
00703 char    *charbuf;
00704
00705     if (!msg || !procname) {
00706         L_ERROR("msg or procname not defined in l_infoFloat2()", procname);
00707         return;
00708     }
00709
00710     bufsize = strlen(msg) + strlen(procname) + 128;
00711     if ((charbuf = (char *)CALLOC(bufsize, sizeof(char))) == NULL) {
00712         L_ERROR("charbuf not made in l_infoFloat()", procname);
00713         return;
00714     }
00715
00716     sprintf(charbuf, "Info in %s: %s\n", procname, msg);
00717     fprintf(stderr, charbuf, fval1, fval2);
00718
00719     FREE(charbuf);
00720     return;
00721 }
00722
00723
00724
00725 /*--------------------------------------------------------------------*
00726  *                       Safe string operations                       *
00727  *--------------------------------------------------------------------*/
00728 /*!
00729  *  stringNew()
00730  *
00731  *      Input:  src string
00732  *      Return: dest copy of src string, or null on error
00733  */
00734 char *
00735 stringNew(const char  *src)
00736 {
00737 l_int32  len;
00738 char    *dest;
00739
00740     PROCNAME("stringNew");
00741
00742     if (!src)
00743         return (char *)ERROR_PTR("src not defined", procName, NULL);
00744
00745     len = strlen(src);
00746     if ((dest = (char *)CALLOC(len + 1, sizeof(char))) == NULL)
00747         return (char *)ERROR_PTR("dest not made", procName, NULL);
00748
00749     stringCopy(dest, src, len);
00750     return dest;
00751 }
00752
00753
00754 /*!
00755  *  stringCopy()
00756  *
00757  *      Input:  dest (existing byte buffer)
00758  *              src string (can be null)
00759  *              n (max number of characters to copy)
00760  *      Return: 0 if OK, 1 on error
00761  *
00762  *  Notes:
00763  *      (1) Relatively safe wrapper for strncpy, that checks the input,
00764  *          and does not complain if @src is null or @n < 1.
00765  *          If @n < 1, this is a no-op.
00766  *      (2) @dest needs to be at least @n bytes in size.
00767  *      (3) We don't call strncpy() because valgrind complains about
00768  *          use of uninitialized values.
00769  */
00770 l_int32
00771 stringCopy(char        *dest,
00772            const char  *src,
00773            l_int32      n)
00774 {
00775 l_int32  i;
00776
00777     PROCNAME("stringCopy");
00778
00779     if (!dest)
00780         return ERROR_INT("dest not defined", procName, 1);
00781     if (!src || n < 1)
00782         return 0;
00783
00784         /* Implementation of strncpy that valgrind doesn't complain about */
00785     for (i = 0; i < n && src[i] != '\0'; i++)
00786         dest[i] = src[i];
00787     for (; i < n; i++)
00788         dest[i] = '\0';
00789     return 0;
00790 }
00791
00792
00793 /*!
00794  *  stringReplace()
00795  *
00796  *      Input:  &dest string (<return> copy)
00797  *              src string
00798  *      Return: 0 if OK; 1 on error
00799  *
00800  *  Notes:
00801  *      (1) Frees any existing dest string
00802  *      (2) Puts a copy of src string in the dest
00803  *      (3) If either or both strings are null, does something reasonable.
00804  */
00805 l_int32
00806 stringReplace(char       **pdest,
00807               const char  *src)
00808 {
00809 char    *scopy;
00810 l_int32  len;
00811
00812     PROCNAME("stringReplace");
00813
00814     if (!pdest)
00815         return ERROR_INT("pdest not defined", procName, 1);
00816
00817     if (*pdest)
00818         FREE(*pdest);
00819
00820     if (src) {
00821         len = strlen(src);
00822         if ((scopy = (char *)CALLOC(len + 1, sizeof(char))) == NULL)
00823             return ERROR_INT("scopy not made", procName, 1);
00824         stringCopy(scopy, src, len);
00825         *pdest = scopy;
00826     }
00827     else
00828         *pdest = NULL;
00829
00830     return 0;
00831 }
00832
00833
00834 /*!
00835  *  stringLength()
00836  *
00837  *      Input:  src string (can be null or null-terminated string)
00838  *              size (size of src buffer)
00839  *      Return: length of src in bytes.
00840  *
00841  *  Notes:
00842  *      (1) Safe implementation of strlen that only checks size bytes
00843  *          for trailing NUL.
00844  *      (2) Valid returned string lengths are between 0 and size - 1.
00845  *          If size bytes are checked without finding a NUL byte, then
00846  *          an error is indicated by returning size.
00847  */
00848 l_int32
00849 stringLength(const char  *src,
00850              size_t       size)
00851 {
00852 l_int32  i;
00853
00854     PROCNAME("stringLength");
00855
00856     if (!src)
00857         return ERROR_INT("src not defined", procName, 0);
00858     if (size < 1)
00859         return 0;
00860
00861     for (i = 0; i < size; i++) {
00862         if (src[i] == '\0')
00863             return i;
00864     }
00865     return size;
00866 }
00867
00868
00869 /*!
00870  *  stringCat()
00871  *
00872  *      Input:  dest (null-terminated byte buffer)
00873  *              size (size of dest)
00874  *              src string (can be null or null-terminated string)
00875  *      Return: number of bytes added to dest; -1 on error
00876  *
00877  *  Notes:
00878  *      (1) Alternative implementation of strncat, that checks the input,
00879  *          is easier to use (since the size of the dest buffer is specified
00880  *          rather than the number of bytes to copy), and does not complain
00881  *          if @src is null.
00882  *      (2) Never writes past end of dest.
00883  *      (3) If it can't append src (an error), it does nothing.
00884  *      (4) N.B. The order of 2nd and 3rd args is reversed from that in
00885  *          strncat, as in the Windows function strcat_s().
00886  */
00887 l_int32
00888 stringCat(char        *dest,
00889           size_t       size,
00890           const char  *src)
00891 {
00892 l_int32  i, n;
00893 l_int32  lendest, lensrc;
00894
00895     PROCNAME("stringCat");
00896
00897     if (!dest)
00898         return ERROR_INT("dest not defined", procName, -1);
00899     if (size < 1)
00900         return ERROR_INT("size < 1; too small", procName, -1);
00901     if (!src)
00902         return 0;
00903
00904     lendest = stringLength(dest, size);
00905     if (lendest == size)
00906         return ERROR_INT("no terminating nul byte", procName, -1);
00907     lensrc = stringLength(src, size);
00908     if (lensrc == 0)
00909         return 0;
00910     n = (lendest + lensrc > size - 1 ? size - lendest - 1 : lensrc);
00911     if (n < 1)
00912         return ERROR_INT("dest too small for append", procName, -1);
00913
00914     for (i = 0; i < n; i++)
00915         dest[lendest + i] = src[i];
00916     dest[lendest + n] = '\0';
00917     return n;
00918 }
00919
00920
00921 /*!
00922  *  stringJoin()
00923  *
00924  *      Input:  src1 string (<optional> can be null)
00925  *              src2 string (<optional> can be null)
00926  *      Return: concatenated string, or null on error
00927  *
00928  *  Notes:
00929  *      (1) This is a safe version of strcat; it makes a new string.
00930  *      (2) It is not an error if either or both of the strings
00931  *          are empty, or if either or both of the pointers are null.
00932  */
00933 char *
00934 stringJoin(const char  *src1,
00935            const char  *src2)
00936 {
00937 char    *dest;
00938 l_int32  srclen1, srclen2, destlen;
00939
00940     PROCNAME("stringJoin");
00941
00942     srclen1 = (src1) ? strlen(src1) : 0;
00943     srclen2 = (src2) ? strlen(src2) : 0;
00944     destlen = srclen1 + srclen2 + 3;
00945
00946     if ((dest = (char *)CALLOC(destlen, sizeof(char))) == NULL)
00947         return (char *)ERROR_PTR("calloc fail for dest", procName, NULL);
00948
00949     if (src1)
00950         stringCopy(dest, src1, srclen1);
00951     if (src2)
00952         strncat(dest, src2, srclen2);
00953     return dest;
00954 }
00955
00956
00957 /*!
00958  *  stringReverse()
00959  *
00960  *      Input:  src (string)
00961  *      Return: dest (newly-allocated reversed string)
00962  */
00963 char *
00964 stringReverse(const char  *src)
00965 {
00966 char    *dest;
00967 l_int32  i, len;
00968
00969     PROCNAME("stringReverse");
00970
00971     if (!src)
00972         return (char *)ERROR_PTR("src not defined", procName, NULL);
00973     len = strlen(src);
00974     if ((dest = (char *)CALLOC(len + 1, sizeof(char))) == NULL)
00975         return (char *)ERROR_PTR("calloc fail for dest", procName, NULL);
00976     for (i = 0; i < len; i++)
00977         dest[i] = src[len - 1 - i];
00978
00979     return dest;
00980 }
00981
00982
00983 /*!
00984  *  strtokSafe()
00985  *
00986  *      Input:  cstr (input string to be sequentially parsed;
00987  *                    use NULL after the first call)
00988  *              seps (a string of character separators)
00989  *              &saveptr (<return> ptr to the next char after
00990  *                        the last encountered separator)
00991  *      Return: substr (a new string that is copied from the previous
00992  *                      saveptr up to but not including the next
00993  *                      separator character), or NULL if end of cstr.
00994  *
00995  *  Notes:
00996  *      (1) This is a thread-safe implementation of strtok.
00997  *      (2) It has the same interface as strtok_r.
00998  *      (3) It differs from strtok_r in usage in two respects:
00999  *          (a) the input string is not altered
01000  *          (b) each returned substring is newly allocated and must
01001  *              be freed after use.
01002  *      (4) Let me repeat that.  This is "safe" because the input
01003  *          string is not altered and because each returned string
01004  *          is newly allocated on the heap.
01005  *      (5) It is here because, surprisingly, some C libraries don't
01006  *          include strtok_r.
01007  *      (6) Important usage points:
01008  *          - Input the string to be parsed on the first invocation.
01009  *          - Then input NULL after that; the value returned in saveptr
01010  *            is used in all subsequent calls.
01011  *      (7) This is only slightly slower than strtok_k.
01012  */
01013 char *
01014 strtokSafe(char        *cstr,
01015            const char  *seps,
01016            char       **psaveptr)
01017 {
01018 char     nextc;
01019 char    *start, *substr;
01020 l_int32  istart, i, j, nchars;
01021
01022     PROCNAME("strtokSafe");
01023
01024     if (!seps)
01025         return (char *)ERROR_PTR("seps not defined", procName, NULL);
01026     if (!psaveptr)
01027         return (char *)ERROR_PTR("&saveptr not defined", procName, NULL);
01028
01029     if (!cstr)
01030         start = *psaveptr;
01031     else
01032         start = cstr;
01033     if (!start)  /* nothing to do */
01034         return NULL;
01035
01036         /* First time, scan for the first non-sep character */
01037     istart = 0;
01038     if (cstr) {
01039         for (istart = 0;; istart++) {
01040             if ((nextc = start[istart]) == '\0') {
01041                 *psaveptr = NULL;  /* in case caller doesn't check ret value */
01042                 return NULL;
01043             }
01044             if (!strchr(seps, nextc))
01045                 break;
01046         }
01047     }
01048
01049         /* Scan through, looking for a sep character; if none is
01050          * found, 'i' will be at the end of the string. */
01051     for (i = istart;; i++) {
01052         if ((nextc = start[i]) == '\0')
01053             break;
01054         if (strchr(seps, nextc))
01055             break;
01056     }
01057
01058         /* Save the substring */
01059     nchars = i - istart;
01060     substr = (char *)CALLOC(nchars + 1, sizeof(char));
01061     stringCopy(substr, start + istart, nchars);
01062
01063         /* Look for the next non-sep character.
01064          * If this is the last substring, return a null saveptr. */
01065     for (j = i;; j++) {
01066         if ((nextc = start[j]) == '\0') {
01067             *psaveptr = NULL;  /* no more non-sep characters */
01068             break;
01069         }
01070         if (!strchr(seps, nextc)) {
01071             *psaveptr = start + j;  /* start here on next call */
01072                 break;
01073         }
01074     }
01075
01076     return substr;
01077 }
01078
01079
01080 /*!
01081  *  stringSplitOnToken()
01082  *
01083  *      Input:  cstr (input string to be split; not altered)
01084  *              seps (a string of character separators)
01085  *              &head (<return> ptr to copy of the input string, up to
01086  *                     the first separator token encountered)
01087  *              &tail (<return> ptr to copy of the part of the input string
01088  *                     starting with the first non-separator character
01089  *                     that occurs after the first separator is found)
01090  *      Return: 0 if OK, 1 on error
01091  *
01092  *  Notes:
01093  *      (1) The input string is not altered; all split parts are new strings.
01094  *      (2) The split occurs around the first consecutive sequence of
01095  *          tokens encountered.
01096  *      (3) The head goes from the beginning of the string up to
01097  *          but not including the first token found.
01098  *      (4) The tail contains the second part of the string, starting
01099  *          with the first char in that part that is NOT a token.
01100  *      (5) If no separator token is found, 'head' contains a copy
01101  *          of the input string and 'tail' is null.
01102  */
01103 l_int32
01104 stringSplitOnToken(char        *cstr,
01105                    const char  *seps,
01106                    char       **phead,
01107                    char       **ptail)
01108 {
01109 char  *saveptr;
01110
01111     PROCNAME("stringSplitOnToken");
01112
01113     if (!phead)
01114         return ERROR_INT("&head not defined", procName, 1);
01115     if (!ptail)
01116         return ERROR_INT("&tail not defined", procName, 1);
01117     *phead = *ptail = NULL;
01118     if (!cstr)
01119         return ERROR_INT("cstr not defined", procName, 1);
01120     if (!seps)
01121         return ERROR_INT("seps not defined", procName, 1);
01122
01123     *phead = strtokSafe(cstr, seps, &saveptr);
01124     if (saveptr)
01125         *ptail = stringNew(saveptr);
01126     return 0;
01127 }
01128
01129
01130 /*--------------------------------------------------------------------*
01131  *                       Find and replace procs                       *
01132  *--------------------------------------------------------------------*/
01133 /*!
01134  *  stringRemoveChars()
01135  *
01136  *      Input:  src (input string; can be of zero length)
01137  *              remchars  (string of chars to be removed from src)
01138  *      Return: dest (string with specified chars removed), or null on error
01139  */
01140 char *
01141 stringRemoveChars(const char  *src,
01142                   const char  *remchars)
01143 {
01144 char     ch;
01145 char    *dest;
01146 l_int32  nsrc, i, k;
01147
01148     PROCNAME("stringRemoveChars");
01149
01150     if (!src)
01151         return (char *)ERROR_PTR("src not defined", procName, NULL);
01152     if (!remchars)
01153         return stringNew(src);
01154
01155     if ((dest = (char *)CALLOC(strlen(src) + 1, sizeof(char))) == NULL)
01156         return (char *)ERROR_PTR("dest not made", procName, NULL);
01157     nsrc = strlen(src);
01158     for (i = 0, k = 0; i < nsrc; i++) {
01159         ch = src[i];
01160         if (!strchr(remchars, ch))
01161             dest[k++] = ch;
01162     }
01163
01164     return dest;
01165 }
01166
01167
01168 /*!
01169  *  stringFindSubstr()
01170  *
01171  *      Input:  src (input string; can be of zero length)
01172  *              sub (substring to be searched for)
01173  *              &loc (<return optional> location of substring in src)
01174  *      Return: 1 if found; 0 if not found or on error
01175  *
01176  *  Notes:
01177  *      (1) This is a wrapper around strstr().
01178  *      (2) Both @src and @sub must be defined, and @sub must have
01179  *          length of at least 1.
01180  *      (3) If the substring is not found and loc is returned, it has
01181  *          the value -1.
01182  */
01183 l_int32
01184 stringFindSubstr(const char  *src,
01185                  const char  *sub,
01186                  l_int32     *ploc)
01187 {
01188 char  *ptr;
01189
01190     PROCNAME("stringFindSubstr");
01191
01192     if (!src)
01193         return ERROR_INT("src not defined", procName, 0);
01194     if (!sub)
01195         return ERROR_INT("sub not defined", procName, 0);
01196     if (ploc) *ploc = -1;
01197     if (strlen(sub) == 0)
01198         return ERROR_INT("substring length 0", procName, 0);
01199     if (strlen(src) == 0)
01200         return 0;
01201
01202     if ((ptr = (char *)strstr(src, sub)) == NULL)  /* not found */
01203         return 0;
01204
01205     if (ploc)
01206         *ploc = ptr - src;
01207     return 1;
01208 }
01209
01210
01211 /*!
01212  *  stringReplaceSubstr()
01213  *
01214  *      Input:  src (input string; can be of zero length)
01215  *              sub1 (substring to be replaced)
01216  *              sub2 (substring to put in; can be "")
01217  *              &found (<return optional> 1 if sub1 is found; 0 otherwise)
01218  *              &loc (<return optional> location of ptr after replacement)
01219  *      Return: dest (string with substring replaced), or null if the
01220  *              substring not found or on error.
01221  *
01222  *  Notes:
01223  *      (1) Replaces the first instance.
01224  *      (2) To only remove sub1, use "" for sub2
01225  *      (3) Returns a new string if sub1 and sub2 are the same.
01226  *      (4) The optional loc is input as the byte offset within the src
01227  *          from which the search starts, and after the search it is the
01228  *          char position in the string of the next character after
01229  *          the substituted string.
01230  *      (5) N.B. If ploc is not null, loc must always be initialized.
01231  *          To search the string from the beginning, set loc = 0.
01232  */
01233 char *
01234 stringReplaceSubstr(const char  *src,
01235                     const char  *sub1,
01236                     const char  *sub2,
01237                     l_int32     *pfound,
01238                     l_int32     *ploc)
01239 {
01240 char    *ptr, *dest;
01241 l_int32  nsrc, nsub1, nsub2, len, npre, loc;
01242
01243     PROCNAME("stringReplaceSubstr");
01244
01245     if (!src)
01246         return (char *)ERROR_PTR("src not defined", procName, NULL);
01247     if (!sub1)
01248         return (char *)ERROR_PTR("sub1 not defined", procName, NULL);
01249     if (!sub2)
01250         return (char *)ERROR_PTR("sub2 not defined", procName, NULL);
01251
01252     if (pfound)
01253         *pfound = 0;
01254     if (ploc)
01255         loc = *ploc;
01256     else
01257         loc = 0;
01258     if ((ptr = (char *)strstr(src + loc, sub1)) == NULL) {
01259         return NULL;
01260     }
01261
01262     if (pfound)
01263         *pfound = 1;
01264     nsrc = strlen(src);
01265     nsub1 = strlen(sub1);
01266     nsub2 = strlen(sub2);
01267     len = nsrc + nsub2 - nsub1;
01268     if ((dest = (char *)CALLOC(len + 1, sizeof(char))) == NULL)
01269         return (char *)ERROR_PTR("dest not made", procName, NULL);
01270     npre = ptr - src;
01271     memcpy(dest, src, npre);
01272     strcpy(dest + npre, sub2);
01273     strcpy(dest + npre + nsub2, ptr + nsub1);
01274     if (ploc)
01275         *ploc = npre + nsub2;
01276
01277     return dest;
01278 }
01279
01280
01281 /*!
01282  *  stringReplaceEachSubstr()
01283  *
01284  *      Input:  src (input string; can be of zero length)
01285  *              sub1 (substring to be replaced)
01286  *              sub2 (substring to put in; can be "")
01287  *              &count (<optional return > the number of times that sub1
01288  *                      is found in src; 0 if not found)
01289  *      Return: dest (string with substring replaced), or null if the
01290  *              substring not found or on error.
01291  *
01292  *  Notes:
01293  *      (1) Replaces every instance.
01294  *      (2) To only remove each instance of sub1, use "" for sub2
01295  *      (3) Returns NULL if sub1 and sub2 are the same.
01296  */
01297 char *
01298 stringReplaceEachSubstr(const char  *src,
01299                         const char  *sub1,
01300                         const char  *sub2,
01301                         l_int32     *pcount)
01302 {
01303 char    *currstr, *newstr;
01304 l_int32  loc;
01305
01306     PROCNAME("stringReplaceEachSubstr");
01307
01308     if (!src)
01309         return (char *)ERROR_PTR("src not defined", procName, NULL);
01310     if (!sub1)
01311         return (char *)ERROR_PTR("sub1 not defined", procName, NULL);
01312     if (!sub2)
01313         return (char *)ERROR_PTR("sub2 not defined", procName, NULL);
01314
01315     if (pcount)
01316         *pcount = 0;
01317     loc = 0;
01318     if ((newstr = stringReplaceSubstr(src, sub1, sub2, NULL, &loc)) == NULL)
01319         return NULL;
01320
01321     if (pcount)
01322         (*pcount)++;
01323     while (1) {
01324         currstr = newstr;
01325         newstr = stringReplaceSubstr(currstr, sub1, sub2, NULL, &loc);
01326         if (!newstr)
01327             return currstr;
01328         FREE(currstr);
01329         if (pcount)
01330             (*pcount)++;
01331     }
01332 }
01333
01334
01335 /*!
01336  *  arrayFindEachSequence()
01337  *
01338  *      Input:  data (byte array)
01339  *              datalen (length of data, in bytes)
01340  *              sequence (subarray of bytes to find in data)
01341  *              seqlen (length of sequence, in bytes)
01342  *      Return: numa of offsets where the sequence is found, or null if
01343  *              none are found or on error
01344  *
01345  *  Notes:
01346  *      (1) The byte arrays @data and @sequence are not C strings,
01347  *          as they can contain null bytes.  Therefore, for each
01348  *          we must give the length of the array.
01349  *      (2) This finds every occurrence in @data of @sequence.
01350  */
01351 NUMA *
01352 arrayFindEachSequence(const l_uint8  *data,
01353                       l_int32         datalen,
01354                       const l_uint8  *sequence,
01355                       l_int32         seqlen)
01356 {
01357 l_int32  start, offset, realoffset, found;
01358 NUMA    *na;
01359
01360     PROCNAME("arrayFindEachSequence");
01361
01362     if (!data || !sequence)
01363         return (NUMA *)ERROR_PTR("data & sequence not both defined",
01364                                  procName, NULL);
01365
01366     na = numaCreate(0);
01367     start = 0;
01368     while (1) {
01369         arrayFindSequence(data + start, datalen - start, sequence, seqlen,
01370                           &offset, &found);
01371         if (found == TRUE) {
01372             realoffset = start + offset;
01373             numaAddNumber(na, realoffset);
01374             start = realoffset + seqlen;
01375             if (start >= datalen) break;
01376         }
01377         else  /* no more */
01378             break;
01379     }
01380
01381     if (numaGetCount(na) == 0)
01382         numaDestroy(&na);
01383     return na;
01384 }
01385
01386
01387 /*!
01388  *  arrayFindSequence()
01389  *
01390  *      Input:  data (byte array)
01391  *              datalen (length of data, in bytes)
01392  *              sequence (subarray of bytes to find in data)
01393  *              seqlen (length of sequence, in bytes)
01394  *              &offset (return> offset from beginning of
01395  *                       data where the sequence begins)
01396  *              &found (<optional return> 1 if sequence is found; 0 otherwise)
01397  *      Return: 0 if OK, 1 on error
01398  *
01399  *  Notes:
01400  *      (1) The byte arrays 'data' and 'sequence' are not C strings,
01401  *          as they can contain null bytes.  Therefore, for each
01402  *          we must give the length of the array.
01403  *      (2) This searches for the first occurrence in @data of @sequence,
01404  *          which consists of @seqlen bytes.  The parameter @seqlen
01405  *          must not exceed the actual length of the @sequence byte array.
01406  *      (3) If the sequence is not found, the offset will be set to -1.
01407  */
01408 l_int32
01409 arrayFindSequence(const l_uint8  *data,
01410                   l_int32         datalen,
01411                   const l_uint8  *sequence,
01412                   l_int32         seqlen,
01413                   l_int32        *poffset,
01414                   l_int32        *pfound)
01415 {
01416 l_int32  i, j, found, lastpos;
01417
01418     PROCNAME("arrayFindSequence");
01419
01420     if (!data || !sequence)
01421         return ERROR_INT("data & sequence not both defined", procName, 1);
01422     if (!poffset)
01423         return ERROR_INT("&offset not defined", procName, 1);
01424
01425     *poffset = -1;
01426     if (pfound) *pfound = 0;
01427     lastpos = datalen - seqlen + 1;
01428     found = 0;
01429     for (i = 0; i < lastpos; i++) {
01430         for (j = 0; j < seqlen; j++) {
01431             if (data[i + j] != sequence[j])
01432                  break;
01433             if (j == seqlen - 1)
01434                  found = 1;
01435         }
01436         if (found)
01437             break;
01438     }
01439
01440     if (found) {
01441         *poffset = i;
01442         if (pfound) *pfound = 1;
01443     }
01444
01445     return 0;
01446 }
01447
01448
01449 /*--------------------------------------------------------------------*
01450  *                             Safe realloc                           *
01451  *--------------------------------------------------------------------*/
01452 /*!
01453  *  reallocNew()
01454  *
01455  *      Input:  &indata (<optional>; nulls indata)
01456  *              size of input data to be copied (bytes)
01457  *              size of data to be reallocated (bytes)
01458  *      Return: ptr to new data, or null on error
01459  *
01460  *  Action: !N.B. (3) and (4)!
01461  *      (1) Allocates memory, initialized to 0
01462  *      (2) Copies as much of the input data as possible
01463  *          to the new block, truncating the copy if necessary
01464  *      (3) Frees the input data
01465  *      (4) Zeroes the input data ptr
01466  *
01467  *  Notes:
01468  *      (1) If newsize <=0, just frees input data and nulls ptr
01469  *      (2) If input ptr is null, just callocs new memory
01470  *      (3) This differs from realloc in that it always allocates
01471  *          new memory (if newsize > 0) and initializes it to 0,
01472  *          it requires the amount of old data to be copied,
01473  *          and it takes the address of the input ptr and
01474  *          nulls the handle.
01475  */
01476 void *
01477 reallocNew(void   **pindata,
01478            l_int32  oldsize,
01479            l_int32  newsize)
01480 {
01481 l_int32  minsize;
01482 void    *indata;
01483 void    *newdata;
01484
01485     PROCNAME("reallocNew");
01486
01487     if (!pindata)
01488         return ERROR_PTR("input data not defined", procName, NULL);
01489     indata = *pindata;
01490
01491     if (newsize <= 0) {   /* nonstandard usage */
01492         if (indata) {
01493             FREE(indata);
01494             *pindata = NULL;
01495         }
01496         return NULL;
01497     }
01498
01499     if (!indata)   /* nonstandard usage */
01500     {
01501         if ((newdata = (void *)CALLOC(1, newsize)) == NULL)
01502             return ERROR_PTR("newdata not made", procName, NULL);
01503         return newdata;
01504     }
01505
01506         /* Standard usage */
01507     if ((newdata = (void *)CALLOC(1, newsize)) == NULL)
01508         return ERROR_PTR("newdata not made", procName, NULL);
01509     minsize = L_MIN(oldsize, newsize);
01510     memcpy((char *)newdata, (char *)indata, minsize);
01511
01512     FREE(indata);
01513     *pindata = NULL;
01514
01515     return newdata;
01516 }
01517
01518
01519
01520 /*--------------------------------------------------------------------*
01521  *                 Read and write between file and memory             *
01522  *--------------------------------------------------------------------*/
01523 /*!
01524  *  l_binaryRead()
01525  *
01526  *      Input:  filename
01527  *              &nbytes (<return> number of bytes read)
01528  *      Return: data, or null on error
01529  */
01530 l_uint8 *
01531 l_binaryRead(const char  *filename,
01532              size_t      *pnbytes)
01533 {
01534 l_uint8  *data;
01535 FILE     *fp;
01536
01537     PROCNAME("l_binaryRead");
01538
01539     if (!filename)
01540         return (l_uint8 *)ERROR_PTR("filename not defined", procName, NULL);
01541     if (!pnbytes)
01542         return (l_uint8 *)ERROR_PTR("pnbytes not defined", procName, NULL);
01543     *pnbytes = 0;
01544
01545     if ((fp = fopenReadStream(filename)) == NULL)
01546         return (l_uint8 *)ERROR_PTR("file stream not opened", procName, NULL);
01547
01548     data = l_binaryReadStream(fp, pnbytes);
01549     fclose(fp);
01550     return data;
01551 }
01552
01553
01554 /*!
01555  *  l_binaryReadStream()
01556  *
01557  *      Input:  stream
01558  *              &nbytes (<return> number of bytes read)
01559  *      Return: null-terminated array, or null on error
01560  *              (reading 0 bytes is not an error)
01561  *
01562  *  Notes:
01563  *      (1) The returned array is terminated with a null byte so that
01564  *          it can be used to read ascii data into a proper C string.
01565  *      (2) Side effect: this re-positions the stream ptr to the
01566  *          beginning of the file.
01567  */
01568 l_uint8 *
01569 l_binaryReadStream(FILE    *fp,
01570                    size_t  *pnbytes)
01571 {
01572 l_int32   ignore;
01573 l_uint8  *data;
01574
01575     PROCNAME("l_binaryReadStream");
01576
01577     if (!pnbytes)
01578         return (l_uint8 *)ERROR_PTR("&nbytes not defined", procName, NULL);
01579     *pnbytes = 0;
01580     if (!fp)
01581         return (l_uint8 *)ERROR_PTR("stream not defined", procName, NULL);
01582
01583     *pnbytes = fnbytesInFile(fp);
01584     if ((data = (l_uint8 *)CALLOC(1, *pnbytes + 1)) == NULL)
01585         return (l_uint8 *)ERROR_PTR("calloc fail for data", procName, NULL);
01586     ignore = fread(data, 1, *pnbytes, fp);
01587     return data;
01588 }
01589
01590
01591 /*!
01592  *  l_binaryWrite()
01593  *
01594  *      Input:  filename (output)
01595  *              operation  ("w" for write; "a" for append)
01596  *              data  (binary data to be written)
01597  *              nbytes  (size of data array)
01598  *      Return: 0 if OK; 1 on error
01599  */
01600 l_int32
01601 l_binaryWrite(const char  *filename,
01602               const char  *operation,
01603               void        *data,
01604               size_t       nbytes)
01605 {
01606 char   actualOperation[20];
01607 FILE  *fp;
01608
01609     PROCNAME("l_binaryWrite");
01610
01611     if (!filename)
01612         return ERROR_INT("filename not defined", procName, 1);
01613     if (!operation)
01614         return ERROR_INT("operation not defined", procName, 1);
01615     if (!data)
01616         return ERROR_INT("data not defined", procName, 1);
01617     if (nbytes <= 0)
01618         return ERROR_INT("nbytes must be > 0", procName, 1);
01619
01620     if (!strcmp(operation, "w") && !strcmp(operation, "a"))
01621         return ERROR_INT("operation not one of {'w','a'}", procName, 1);
01622
01623         /* The 'b' flag to fopen() is ignored for all POSIX
01624          * conforming systems.  However, Windows needs the 'b' flag. */
01625     stringCopy(actualOperation, operation, 2);
01626     strncat(actualOperation, "b", 2);
01627
01628     if ((fp = fopenWriteStream(filename, actualOperation)) == NULL)
01629         return ERROR_INT("stream not opened", procName, 1);
01630     fwrite(data, 1, nbytes, fp);
01631     fclose(fp);
01632     return 0;
01633 }
01634
01635
01636 /*!
01637  *  nbytesInFile()
01638  *
01639  *      Input:  filename
01640  *      Return: nbytes in file; 0 on error
01641  */
01642 size_t
01643 nbytesInFile(const char  *filename)
01644 {
01645 size_t  nbytes;
01646 FILE   *fp;
01647
01648     PROCNAME("nbytesInFile");
01649
01650     if (!filename)
01651         return ERROR_INT("filename not defined", procName, 0);
01652     if ((fp = fopenReadStream(filename)) == NULL)
01653         return ERROR_INT("stream not opened", procName, 0);
01654     nbytes = fnbytesInFile(fp);
01655     fclose(fp);
01656     return nbytes;
01657 }
01658
01659
01660 /*!
01661  *  fnbytesInFile()
01662  *
01663  *      Input:  file stream
01664  *      Return: nbytes in file; 0 on error
01665  */
01666 size_t
01667 fnbytesInFile(FILE  *fp)
01668 {
01669 size_t  nbytes, pos;
01670
01671     PROCNAME("fnbytesInFile");
01672
01673     if (!fp)
01674         return ERROR_INT("stream not open", procName, 0);
01675
01676     pos = ftell(fp);          /* initial position */
01677     fseek(fp, 0, SEEK_END);   /* EOF */
01678     nbytes = ftell(fp);
01679     fseek(fp, pos, SEEK_SET);        /* back to initial position */
01680     return nbytes;
01681 }
01682
01683
01684 /*--------------------------------------------------------------------*
01685  *                            Copy in memory                          *
01686  *--------------------------------------------------------------------*/
01687 /*!
01688  *  l_binaryCopy()
01689  *
01690  *      Input:  datas
01691  *              size (of data array)
01692  *      Return: datad (on heap), or null on error
01693  *
01694  *  Notes:
01695  *      (1) We add 4 bytes to the zeroed output because in some cases
01696  *          (e.g., string handling) it is important to have the data
01697  *          be null terminated.  This guarantees that after the memcpy,
01698  *          the result is automatically null terminated.
01699  */
01700 l_uint8 *
01701 l_binaryCopy(l_uint8  *datas,
01702              size_t    size)
01703 {
01704 l_uint8  *datad;
01705
01706     PROCNAME("l_binaryCopy");
01707
01708     if (!datas)
01709         return (l_uint8 *)ERROR_PTR("datas not defined", procName, NULL);
01710
01711     if ((datad = (l_uint8 *)CALLOC(size + 4, sizeof(l_uint8))) == NULL)
01712         return (l_uint8 *)ERROR_PTR("datad not made", procName, NULL);
01713     memcpy(datad, datas, size);
01714     return datad;
01715 }
01716
01717
01718 /*--------------------------------------------------------------------*
01719  *                         File copy operations                       *
01720  *--------------------------------------------------------------------*/
01721 /*!
01722  *  fileCopy()
01723  *
01724  *      Input:  srcfile (copy this file)
01725  *              newfile (to this file)
01726  *      Return: 0 if OK, 1 on error
01727  */
01728 l_int32
01729 fileCopy(const char  *srcfile,
01730          const char  *newfile)
01731 {
01732 l_int32   ret;
01733 size_t    nbytes;
01734 l_uint8  *data;
01735
01736     PROCNAME("fileCopy");
01737
01738     if (!srcfile)
01739         return ERROR_INT("srcfile not defined", procName, 1);
01740     if (!newfile)
01741         return ERROR_INT("newfile not defined", procName, 1);
01742
01743     if ((data = l_binaryRead(srcfile, &nbytes)) == NULL)
01744         return ERROR_INT("data not returned", procName, 1);
01745     ret = l_binaryWrite(newfile, "w", data, nbytes);
01746     FREE(data);
01747     return ret;
01748 }
01749
01750
01751 /*!
01752  *  fileConcatenate()
01753  *
01754  *      Input:  srcfile (file to append)
01755  *              destfile (file to add to)
01756  *      Return: 0 if OK, 1 on error
01757  */
01758 l_int32
01759 fileConcatenate(const char  *srcfile,
01760                 const char  *destfile)
01761 {
01762 size_t    nbytes;
01763 l_uint8  *data;
01764
01765     PROCNAME("fileConcatenate");
01766
01767     if (!srcfile)
01768         return ERROR_INT("srcfile not defined", procName, 1);
01769     if (!destfile)
01770         return ERROR_INT("destfile not defined", procName, 1);
01771
01772     data = l_binaryRead(srcfile, &nbytes);
01773     l_binaryWrite(destfile, "a", data, nbytes);
01774     FREE(data);
01775     return 0;
01776 }
01777
01778
01779 /*!
01780  *  fileAppendString()
01781  *
01782  *      Input:  filename
01783  *              str (string to append to file)
01784  *      Return: 0 if OK, 1 on error
01785  */
01786 l_int32
01787 fileAppendString(const char  *filename,
01788                  const char  *str)
01789 {
01790 FILE  *fp;
01791
01792     PROCNAME("fileAppendString");
01793
01794     if (!filename)
01795         return ERROR_INT("filename not defined", procName, 1);
01796     if (!str)
01797         return ERROR_INT("str not defined", procName, 1);
01798
01799     if ((fp = fopenWriteStream(filename, "a")) == NULL)
01800         return ERROR_INT("stream not opened", procName, 1);
01801     fprintf(fp, "%s", str);
01802     fclose(fp);
01803     return 0;
01804 }
01805
01806
01807 /*--------------------------------------------------------------------*
01808  *                      Test files for equivalence                    *
01809  *--------------------------------------------------------------------*/
01810 /*!
01811  *  filesAreIdentical()
01812  *
01813  *      Input:  fname1
01814  *              fname2
01815  *              &same (<return> 1 if identical; 0 if different)
01816  *      Return: 0 if OK, 1 on error
01817  */
01818 l_int32
01819 filesAreIdentical(const char  *fname1,
01820                   const char  *fname2,
01821                   l_int32     *psame)
01822 {
01823 l_int32   i, same;
01824 size_t    nbytes1, nbytes2;
01825 l_uint8  *array1, *array2;
01826
01827     PROCNAME("filesAreIdentical");
01828
01829     if (!psame)
01830         return ERROR_INT("&same not defined", procName, 1);
01831     *psame = 0;
01832     if (!fname1 || !fname2)
01833         return ERROR_INT("both names not defined", procName, 1);
01834
01835     nbytes1 = nbytesInFile(fname1);
01836     nbytes2 = nbytesInFile(fname2);
01837     if (nbytes1 != nbytes2)
01838         return 0;
01839
01840     if ((array1 = l_binaryRead(fname1, &nbytes1)) == NULL)
01841         return ERROR_INT("array1 not read", procName, 1);
01842     if ((array2 = l_binaryRead(fname2, &nbytes2)) == NULL)
01843         return ERROR_INT("array2 not read", procName, 1);
01844     same = 1;
01845     for (i = 0; i < nbytes1; i++) {
01846         if (array1[i] != array2[i]) {
01847             same = 0;
01848             break;
01849         }
01850     }
01851     FREE(array1);
01852     FREE(array2);
01853     *psame = same;
01854
01855     return 0;
01856 }
01857
01858
01859 /*--------------------------------------------------------------------------*
01860  *   16 and 32 bit byte-swapping on big endian and little  endian machines  *
01861  *                                                                          *
01862  *   These are typically used for I/O conversions:                          *
01863  *      (1) endian conversion for data that was read from a file            *
01864  *      (2) endian conversion on data before it is written to a file        *
01865  *--------------------------------------------------------------------------*/
01866
01867 /*--------------------------------------------------------------------*
01868  *                        16-bit byte swapping                        *
01869  *--------------------------------------------------------------------*/
01870 #ifdef L_BIG_ENDIAN
01871
01872 l_uint16
01873 convertOnBigEnd16(l_uint16  shortin)
01874 {
01875     return ((shortin << 8) | (shortin >> 8));
01876 }
01877
01878 l_uint16
01879 convertOnLittleEnd16(l_uint16  shortin)
01880 {
01881     return  shortin;
01882 }
01883
01884 #else     /* L_LITTLE_ENDIAN */
01885
01886 l_uint16
01887 convertOnLittleEnd16(l_uint16  shortin)
01888 {
01889     return ((shortin << 8) | (shortin >> 8));
01890 }
01891
01892 l_uint16
01893 convertOnBigEnd16(l_uint16  shortin)
01894 {
01895     return  shortin;
01896 }
01897
01898 #endif  /* L_BIG_ENDIAN */
01899
01900
01901 /*--------------------------------------------------------------------*
01902  *                        32-bit byte swapping                        *
01903  *--------------------------------------------------------------------*/
01904 #ifdef L_BIG_ENDIAN
01905
01906 l_uint32
01907 convertOnBigEnd32(l_uint32  wordin)
01908 {
01909     return ((wordin << 24) | ((wordin << 8) & 0x00ff0000) |
01910             ((wordin >> 8) & 0x0000ff00) | (wordin >> 24));
01911 }
01912
01913 l_uint32
01914 convertOnLittleEnd32(l_uint32  wordin)
01915 {
01916     return wordin;
01917 }
01918
01919 #else  /*  L_LITTLE_ENDIAN */
01920
01921 l_uint32
01922 convertOnLittleEnd32(l_uint32  wordin)
01923 {
01924     return ((wordin << 24) | ((wordin << 8) & 0x00ff0000) |
01925             ((wordin >> 8) & 0x0000ff00) | (wordin >> 24));
01926 }
01927
01928 l_uint32
01929 convertOnBigEnd32(l_uint32  wordin)
01930 {
01931     return wordin;
01932 }
01933
01934 #endif  /* L_BIG_ENDIAN */
01935
01936
01937
01938 /*--------------------------------------------------------------------*
01939  *                        Opening file streams                        *
01940  *--------------------------------------------------------------------*/
01941 /*!
01942  *  fopenReadStream()
01943  *
01944  *      Input:  filename
01945  *      Return: stream, or null on error
01946  *
01947  *  Notes:
01948  *      (1) This wrapper also handles pathname conversions for Windows.
01949  *          It should be used whenever you want to run fopen() to
01950  *          read from a stream.
01951  */
01952 FILE *
01953 fopenReadStream(const char  *filename)
01954 {
01955 char  *fname, *tail;
01956 FILE  *fp;
01957
01958     PROCNAME("fopenReadStream");
01959
01960     if (!filename)
01961         return (FILE *)ERROR_PTR("filename not defined", procName, NULL);
01962
01963         /* Try input filename */
01964     fname = genPathname(filename, NULL);
01965     fp = fopen(fname, "rb");
01966     FREE(fname);
01967     if (fp) return fp;
01968
01969         /* Else, strip directory and try locally */
01970     splitPathAtDirectory(filename, NULL, &tail);
01971     fp = fopen(tail, "rb");
01972     FREE(tail);
01973
01974     if (!fp)
01975         return (FILE *)ERROR_PTR("file not found", procName, NULL);
01976     return fp;
01977 }
01978
01979
01980 /*!
01981  *  fopenWriteStream()
01982  *
01983  *      Input:  filename
01984  *              modestring
01985  *      Return: stream, or null on error
01986  *
01987  *  Notes:
01988  *      (1) This wrapper also handles pathname conversions for Windows.
01989  *          It should be used whenever you want to run fopen() to
01990  *          write or append to a stream.
01991  */
01992 FILE *
01993 fopenWriteStream(const char  *filename,
01994                  const char  *modestring)
01995 {
01996 FILE  *fp;
01997
01998     PROCNAME("fopenWriteStream");
01999
02000     if (!filename)
02001         return (FILE *)ERROR_PTR("filename not defined", procName, NULL);
02002
02003 #ifdef _WIN32
02004     {
02005     char  *fname;
02006         fname = genPathname(filename, NULL);
02007         fp = fopen(fname, modestring);
02008         FREE(fname);
02009     }
02010 #else
02011     fp = fopen(filename, modestring);
02012 #endif  /* _WIN32 */
02013
02014     if (!fp)
02015         return (FILE *)ERROR_PTR("stream not opened", procName, NULL);
02016     return fp;
02017 }
02018
02019
02020 /*--------------------------------------------------------------------*
02021  *      Functions to avoid C-runtime boundary crossing with dlls      *
02022  *--------------------------------------------------------------------*/
02023 /*
02024  *  Problems arise when pointers to streams and data are passed
02025  *  between two Windows DLLs that have been generated with different
02026  *  C runtimes.  To avoid this, leptonica provides wrappers for
02027  *  several C library calls.
02028  */
02029 /*!
02030  *  lept_fopen()
02031  *
02032  *      Input:  filename
02033  *              mode (same as for fopen(); e.g., "rb")
02034  *      Return: stream or null on error
02035  *
02036  *  Notes:
02037  *      (1) This must be used by any application that passes
02038  *          a file handle to a leptonica Windows DLL.
02039  */
02040 FILE *
02041 lept_fopen(const char  *filename,
02042            const char  *mode)
02043 {
02044     PROCNAME("lept_fopen");
02045
02046     if (!filename)
02047         return (FILE *)ERROR_PTR("filename not defined", procName, NULL);
02048     if (!mode)
02049         return (FILE *)ERROR_PTR("mode not defined", procName, NULL);
02050
02051     if (stringFindSubstr(mode, "r", NULL))
02052         return fopenReadStream(filename);
02053     else
02054         return fopenWriteStream(filename, mode);
02055 }
02056
02057
02058 /*!
02059  *  lept_fclose()
02060  *
02061  *      Input:  fp (stream handle)
02062  *      Return: 0 if OK, 1 on error
02063  *
02064  *  Notes:
02065  *      (1) This should be used by any application that accepts
02066  *          a file handle generated by a leptonica Windows DLL.
02067  */
02068 l_int32
02069 lept_fclose(FILE *fp)
02070 {
02071     PROCNAME("lept_fclose");
02072
02073     if (!fp)
02074         return ERROR_INT("stream not defined", procName, 1);
02075
02076     return fclose(fp);
02077 }
02078
02079
02080 /*!
02081  *  lept_calloc()
02082  *
02083  *      Input:  nmemb (number of members)
02084  *              size (of each member)
02085  *      Return: void ptr, or null on error
02086  *
02087  *  Notes:
02088  *      (1) For safety with windows DLLs, this can be used in conjunction
02089  *          with lept_free() to avoid C-runtime boundary problems.
02090  *          Just use these two functions throughout your application.
02091  */
02092 void *
02093 lept_calloc(size_t  nmemb,
02094             size_t  size)
02095 {
02096     if (nmemb <= 0 || size <= 0)
02097         return NULL;
02098     return CALLOC(nmemb, size);
02099 }
02100
02101
02102 /*!
02103  *  lept_free()
02104  *
02105  *      Input:  void ptr
02106  *      Return: 0 if OK, 1 on error
02107  *
02108  *  Notes:
02109  *      (1) This should be used by any application that accepts
02110  *          heap data allocated by a leptonica Windows DLL.
02111  */
02112 void
02113 lept_free(void *ptr)
02114 {
02115     if (!ptr) return;
02116     FREE(ptr);
02117     return;
02118 }
02119
02120
02121 /*--------------------------------------------------------------------*
02122  *                Cross-platform file system operations               *
02123  *         [ These only write to /tmp or its subdirectories ]         *
02124  *--------------------------------------------------------------------*/
02125 /*!
02126  *  lept_mkdir()
02127  *
02128  *      Input:  subdir
02129  *      Return: 0 on success, non-zero on failure
02130  *
02131  *  Notes:
02132  *      (1) This makes a subdirectory of /tmp/.
02133  *      (2) Use unix pathname separators.
02134  *      (3) On Windows, it makes a subdirectory of <Temp>/leptonica,
02135  *          where <Temp> is the Windows temp dir.  The name translation is:
02136  *                 /tmp  -->   <Temp>/leptonica
02137  */
02138 l_int32
02139 lept_mkdir(const char  *subdir)
02140 {
02141 char     *dir;
02142 l_int32   ret;
02143 #ifdef  _WIN32
02144 char     *newpath;
02145 l_uint32  attributes;
02146 #endif  /* !_WIN32 */
02147
02148     PROCNAME("lept_mkdir");
02149
02150     if (!subdir)
02151         return ERROR_INT("subdir not defined", procName, 1);
02152     if ((strlen(subdir) == 0) || (subdir[0] == '.') || (subdir[0] == '/'))
02153         return ERROR_INT("subdir not an actual subdirectory", procName, 1);
02154
02155     dir = pathJoin("/tmp", subdir);
02156
02157 #ifndef _WIN32
02158     ret = mkdir(dir, 0777);
02159 #else
02160         /* Make sure the leptonica subdir exists in tmp dir */
02161     newpath = genPathname("/tmp", NULL);
02162     attributes = GetFileAttributes(newpath);
02163     if (attributes == INVALID_FILE_ATTRIBUTES) {
02164         ret = (CreateDirectory(newpath, NULL) ? 0 : 1);
02165     }
02166     FREE(newpath);
02167
02168     newpath = genPathname(dir, NULL);
02169     ret = (CreateDirectory(newpath, NULL) ? 0 : 1);
02170     FREE(newpath);
02171 #endif  /* !_WIN32 */
02172
02173     FREE(dir);
02174     return ret;
02175 }
02176
02177
02178 /*!
02179  *  lept_rmdir()
02180  *
02181  *      Input:  subdir (of /tmp or its equivalent on Windows)
02182  *      Return: 0 on success, non-zero on failure
02183  *
02184  *  Notes:
02185  *      (1) On unix, this removes all the files in the named
02186  *          subdirectory of /tmp.  It then removes the subdirectory.
02187  *      (2) Use unix pathname separators.
02188  *      (3) On Windows, the affected directory is a subdirectory
02189  *          of <Temp>/leptonica, where <Temp> is the Windows temp dir.
02190  *      (4) TODO: Use a new function lept_dirExists(path) to test
02191  *          if the directory exists, and if not, fail silently.
02192  */
02193 l_int32
02194 lept_rmdir(const char  *subdir)
02195 {
02196 char    *dir, *fname, *fullname;
02197 l_int32  ret, i, nfiles;
02198 SARRAY  *sa;
02199 #ifdef _WIN32
02200 char    *newpath;
02201 #endif  /* _WIN32 */
02202
02203     PROCNAME("lept_rmdir");
02204
02205     if (!subdir)
02206         return ERROR_INT("subdir not defined", procName, 1);
02207     if ((strlen(subdir) == 0) || (subdir[0] == '.') || (subdir[0] == '/'))
02208         return ERROR_INT("subdir not an actual subdirectory", procName, 1);
02209
02210     if ((dir = pathJoin("/tmp", subdir)) == NULL)
02211         return ERROR_INT("dir not made", procName, 1);
02212
02213         /* List all the files in temp subdir */
02214     if ((sa = getFilenamesInDirectory(dir)) == NULL) {
02215         L_WARNING_STRING("directory %s does not exist", procName, dir);
02216         FREE(dir);
02217         return 1;
02218     }
02219     nfiles = sarrayGetCount(sa);
02220
02221 #ifndef _WIN32
02222     for (i = 0; i < nfiles; i++) {
02223         fname = sarrayGetString(sa, i, L_NOCOPY);
02224         fullname = genPathname(dir, fname);
02225         remove(fullname);
02226         FREE(fullname);
02227     }
02228     ret = remove(dir);
02229 #else
02230     for (i = 0; i < nfiles; i++) {
02231         fname = sarrayGetString(sa, i, L_NOCOPY);
02232         fullname = genPathname(dir, fname);
02233         ret = DeleteFile(fullname);
02234         FREE(fullname);
02235     }
02236     newpath = genPathname(dir, NULL);
02237     ret = (RemoveDirectory(newpath) ? 0 : 1);
02238     FREE(newpath);
02239 #endif  /* !_WIN32 */
02240
02241     sarrayDestroy(&sa);
02242     FREE(dir);
02243     return ret;
02244 }
02245
02246
02247 /*!
02248  *  lept_rm()
02249  *
02250  *      Input:  subdir (can be NULL, in which case the removed file is
02251  *                      in /tmp)
02252  *              filename (without the directory)
02253  *      Return: 0 on success, non-zero on failure
02254  *
02255  *  Notes:
02256  *      (1) This removes the named file in /tmp or a subdirectory of /tmp.
02257  *          If the file is in /tmp, use NULL for the subdir.
02258  *      (2) Use unix pathname separators.
02259  *      (3) On Windows, the file is in either <Temp>/leptonica, or
02260  *          a subdirectory of this, where <Temp> is the Windows temp dir.
02261  *          The name translation is: /tmp  -->   <Temp>/leptonica
02262  */
02263 l_int32
02264 lept_rm(const char  *subdir,
02265         const char  *filename)
02266 {
02267 char    *dir, *pathname;
02268 l_int32  ret;
02269 #ifdef _WIN32
02270 char    *newpath;
02271 #endif  /* _WIN32 */
02272
02273     PROCNAME("lept_rm");
02274
02275     if (!filename)
02276         return ERROR_INT("filename not defined", procName, 1);
02277     if ((strlen(filename) == 0) || (filename[0] == '.') || (filename[0] == '/'))
02278         return ERROR_INT("filename cannot contain a path", procName, 1);
02279
02280     if (subdir) {
02281         dir = pathJoin("/tmp", subdir);
02282         pathname = pathJoin(dir, filename);
02283         FREE(dir);
02284     }
02285     else
02286         pathname = pathJoin("/tmp", filename);
02287     if (!pathname)
02288         return ERROR_INT("pathname not made", procName, 1);
02289
02290 #ifndef _WIN32
02291     ret = remove(pathname);
02292 #else
02293     newpath = genPathname(pathname, NULL);
02294     if (!newpath) {
02295         FREE(pathname);
02296         return ERROR_INT("newpath not made", procName, 1);
02297     }
02298     ret = (DeleteFile(newpath) ? 0 : 1);
02299     FREE(newpath);
02300 #endif  /* !_WIN32 */
02301
02302     FREE(pathname);
02303     return ret;
02304 }
02305
02306
02307 /*!
02308  *  lept_mv()
02309  *
02310  *      Input:  srcfile, newfile
02311  *      Return: 0 on success, non-zero on failure
02312  *
02313  *  Notes:
02314  *      (1) This moves a srcfile to /tmp or to a subdirectory of /tmp.
02315  *      (2) The input srcfile name is the complete pathname.
02316  *          The input newfile is either in /tmp or a subdirectory
02317  *          of /tmp, and newfile can be specified either as the
02318  *          full path or without the leading '/tmp'.
02319  *      (3) Use unix pathname separators.
02320  *      (4) On Windows, the source and target filename are altered
02321  *          internally if necessary to conform to the Windows temp file.
02322  *          The name translation is: /tmp  -->   <Temp>/leptonica
02323  */
02324 l_int32
02325 lept_mv(const char  *srcfile,
02326         const char  *newfile)
02327 {
02328 char     *newfileplus;
02329 l_int32   ret;
02330 #ifndef _WIN32
02331 char     *command;
02332 l_int32   nbytes;
02333 #else
02334 char     *srcpath, *newpath, *tail;
02335 l_uint32  attributes;
02336 #endif  /* !_WIN32 */
02337
02338     PROCNAME("lept_mv");
02339
02340     if (!srcfile || !newfile)
02341         return ERROR_INT("srcfile and newfile not both defined", procName, 1);
02342     if (strncmp(newfile, "/tmp/", 5))
02343         newfileplus = pathJoin("/tmp", newfile);
02344     else
02345         newfileplus = stringNew(newfile);
02346
02347 #ifndef _WIN32
02348     nbytes = strlen(srcfile) + strlen(newfileplus) + 10;
02349     command = (char *)CALLOC(nbytes, sizeof(char));
02350     snprintf(command, nbytes, "mv %s %s", srcfile, newfileplus);
02351     ret = system(command);
02352     FREE(command);
02353 #else
02354     srcpath = genPathname(srcfile, NULL);
02355     newpath = genPathname(newfileplus, NULL);
02356     attributes = GetFileAttributes(newpath);
02357     if (attributes != INVALID_FILE_ATTRIBUTES &&
02358         (attributes & FILE_ATTRIBUTE_DIRECTORY)) {
02359         if (splitPathAtDirectory(srcpath, NULL, &tail)) {
02360             FREE(srcpath);
02361             FREE(newpath);
02362             return ERROR_INT("Unable to split source filename into root & tail",
02363                              procName, 1);
02364         }
02365         FREE(newpath);
02366         newpath = genPathname(newfileplus, tail);
02367         FREE(tail);
02368     }
02369
02370         /* New file overwritten if it already exists */
02371     ret = (MoveFileEx(srcpath, newpath,
02372                       MOVEFILE_COPY_ALLOWED |
02373                       MOVEFILE_REPLACE_EXISTING) ? 0 : 1);
02374     FREE(srcpath);
02375     FREE(newpath);
02376 #endif  /* !_WIN32 */
02377
02378     FREE(newfileplus);
02379     return ret;
02380 }
02381
02382
02383 /*!
02384  *  lept_cp()
02385  *
02386  *      Input:  srcfile
02387  *              newfile
02388  *      Return: 0 on success, non-zero on failure
02389  *
02390  *  Notes:
02391  *      (1) This copies a file to /tmp or a subdirectory of /tmp.
02392  *      (2) The input srcfile name is the complete pathname.
02393  *          The input newfile is either in /tmp or a subdirectory
02394  *          of /tmp, and newfile can be specified either as the
02395  *          full path or without the leading '/tmp'.
02396  *      (3) Use unix pathname separators.
02397  *      (4) On Windows, the source and target filename are altered
02398  *          internally if necessary to conform to the Windows temp file.
02399  *      (5) Alternatively, you can use fileCopy().  This avoids
02400  *          forking a new process and has no restrictions on the
02401  *          destination directory.
02402  */
02403 l_int32
02404 lept_cp(const char  *srcfile,
02405         const char  *newfile)
02406 {
02407 char     *newfileplus;
02408 l_int32   ret;
02409 #ifndef _WIN32
02410 char     *command;
02411 l_int32   nbytes;
02412 #else
02413 char     *srcpath, *newpath, *tail;
02414 l_uint32  attributes;
02415 #endif  /* !_WIN32 */
02416
02417     PROCNAME("lept_cp");
02418
02419     if (!srcfile || !newfile)
02420         return ERROR_INT("srcfile and newfile not both defined", procName, 1);
02421     if (strncmp(newfile, "/tmp/", 5))
02422         newfileplus = pathJoin("/tmp", newfile);
02423     else
02424         newfileplus = stringNew(newfile);
02425
02426 #ifndef _WIN32
02427     nbytes = strlen(srcfile) + strlen(newfileplus) + 10;
02428     command = (char *)CALLOC(nbytes, sizeof(char));
02429     snprintf(command, nbytes, "cp %s %s", srcfile, newfile);
02430     ret = system(command);
02431     FREE(command);
02432 #else
02433     srcpath = genPathname(srcfile, NULL);
02434     newpath = genPathname(newfileplus, NULL);
02435     attributes = GetFileAttributes(newpath);
02436     if (attributes != INVALID_FILE_ATTRIBUTES &&
02437         (attributes & FILE_ATTRIBUTE_DIRECTORY)) {
02438         if (splitPathAtDirectory(srcpath, NULL, &tail)) {
02439             FREE(srcpath);
02440             FREE(newpath);
02441             return ERROR_INT("Unable to split source filename into root & tail",
02442                              procName, 1);
02443         }
02444         FREE(newpath);
02445         newpath = genPathname(newfileplus, tail);
02446         FREE(tail);
02447     }
02448
02449         /* New file overwritten if it already exists */
02450     ret = (CopyFile(srcpath, newpath, FALSE) ? 0 : 1);
02451     FREE(srcpath);
02452     FREE(newpath);
02453 #endif  /* !_WIN32 */
02454
02455     FREE(newfileplus);
02456     return ret;
02457 }
02458
02459
02460 /*--------------------------------------------------------------------*
02461  *                         File name operations                       *
02462  *--------------------------------------------------------------------*/
02463 /*!
02464  *  splitPathAtDirectory()
02465  *
02466  *      Input:  pathname  (full path; can be a directory)
02467  *              &dir  (<optional return> root directory name of
02468  *                     input path, including trailing '/')
02469  *              &tail (<optional return> path tail, which is either
02470  *                     the file name within the root directory or
02471  *                     the last sub-directory in the path)
02472  *      Return: 0 if OK, 1 on error
02473  *
02474  *  Notes:
02475  *      (1) If you only want the tail, input null for the root directory ptr.
02476  *      (2) If you only want the root directory name, input null for the
02477  *          tail ptr.
02478  *      (3) This function makes decisions based only on the lexical
02479  *          structure of the input.  Examples:
02480  *            /usr/tmp/abc  -->  dir: /usr/tmp/   tail: abc
02481  *            /usr/tmp/  -->  dir: /usr/tmp/   tail: [empty string]
02482  *            /usr/tmp  -->  dir: /usr/   tail: tmp
02483  *      (4) N.B. The input pathname must have unix directory separators
02484  *          for unix and windows directory separators for windows.
02485  */
02486 l_int32
02487 splitPathAtDirectory(const char  *pathname,
02488                      char       **pdir,
02489                      char       **ptail)
02490 {
02491 char  *cpathname, *lastslash;
02492
02493     PROCNAME("splitPathAtDirectory");
02494
02495     if (!pdir && !ptail)
02496         return ERROR_INT("null input for both strings", procName, 1);
02497     if (pdir) *pdir = NULL;
02498     if (ptail) *ptail = NULL;
02499     if (!pathname)
02500         return ERROR_INT("pathname not defined", procName, 1);
02501
02502     cpathname = stringNew(pathname);
02503     if ((lastslash = strrchr(cpathname, sepchar))) {
02504         if (ptail)
02505             *ptail = stringNew(lastslash + 1);
02506         if (pdir) {
02507             *(lastslash + 1) = '\0';
02508             *pdir = cpathname;
02509         }
02510         else
02511             FREE(cpathname);
02512     }
02513     else {  /* no directory */
02514         if (pdir)
02515             *pdir = stringNew("");
02516         if (ptail)
02517             *ptail = cpathname;
02518         else
02519             FREE(cpathname);
02520     }
02521
02522     return 0;
02523 }
02524
02525
02526 /*!
02527  *  splitPathAtExtension()
02528  *
02529  *      Input:  pathname (full path; can be a directory)
02530  *              &basename (<optional return> pathname not including the
02531  *                        last dot and characters after that)
02532  *              &extension (<optional return> path extension, which is
02533  *                        the last dot and the characters after it.  If
02534  *                        there is no extension, it returns the empty string)
02535  *      Return: 0 if OK, 1 on error
02536  *
02537  *  Notes:
02538  *      (1) If you only want the extension, input null for the basename ptr.
02539  *      (2) If you only want the basename without extension, input null
02540  *          for the extension ptr.
02541  *      (3) This function makes decisions based only on the lexical
02542  *          structure of the input.  Examples:
02543  *            /usr/tmp/abc.jpg  -->  basename: /usr/tmp/abc   ext: .jpg
02544  *            /usr/tmp/.jpg  -->  basename: /usr/tmp/   tail: .jpg
02545  *            /usr/tmp.jpg/  -->  basename: /usr/tmp.jpg/   tail: [empty str]
02546  *            ./.jpg  -->  basename: ./   tail: .jpg
02547  *      (4) N.B. The input pathname must have unix directory separators
02548  *          for unix and windows directory separators for windows.
02549  */
02550 l_int32
02551 splitPathAtExtension(const char  *pathname,
02552                      char       **pbasename,
02553                      char       **pextension)
02554 {
02555 char  *tail, *dir, *lastdot;
02556 char   empty[4] = "";
02557
02558     PROCNAME("splitPathExtension");
02559
02560     if (!pbasename && !pextension)
02561         return ERROR_INT("null input for both strings", procName, 1);
02562     if (pbasename) *pbasename = NULL;
02563     if (pextension) *pextension = NULL;
02564     if (!pathname)
02565         return ERROR_INT("pathname not defined", procName, 1);
02566
02567         /* Split out the directory first */
02568     splitPathAtDirectory(pathname, &dir, &tail);
02569
02570         /* Then look for a "." in the tail part.
02571          * This way we ignore all "." in the directory. */
02572     if ((lastdot = strrchr(tail, '.'))) {
02573         if (pextension)
02574             *pextension = stringNew(lastdot);
02575         if (pbasename) {
02576             *lastdot = '\0';
02577             *pbasename = stringJoin(dir, tail);
02578         }
02579     }
02580     else {
02581         if (pextension)
02582             *pextension = stringNew(empty);
02583         if (pbasename)
02584             *pbasename = stringNew(pathname);
02585     }
02586     FREE(dir);
02587     FREE(tail);
02588     return 0;
02589 }
02590
02591
02592 /*!
02593  *  pathJoin()
02594  *
02595  *      Input:  dir (<optional> can be null)
02596  *              fname (<optional> can be null)
02597  *      Return: specially concatenated path, or null on error
02598  *
02599  *  Notes:
02600  *      (1) Use unix-style pathname separators ('/').
02601  *      (2) @fname can be the entire path, or part of the path containing
02602  *          at least one directory, or a tail without a directory, or NULL.
02603  *      (3) It produces a path that strips multiple slashes to a single
02604  *          slash, joins @dir and @fname by a slash, and has no trailing
02605  *          slashes (except in the cases where @dir == "/" and
02606  *          @fname == NULL, or v.v.).
02607  *      (4) If both @dir and @fname are null, produces an empty string.
02608  *      (5) The result is not canonicalized or tested  for correctness:
02609  *          garbage in (e.g., ...), garbage out.
02610  *      (6) Examples:
02611  *             //tmp// + //abc/  -->  /tmp/abc
02612  *             tmp/ + /abc/      -->  tmp/abc
02613  *             tmp/ + abc/       -->  tmp/abc
02614  *             /tmp/ + ///       -->  /tmp
02615  *             /tmp/ + NULL      -->  /tmp
02616  *             // + /abc//       -->  /abc
02617  *             // + NULL         -->  /
02618  *             NULL + /abc/def/  -->  /abc/def
02619  *             NULL + abc//      -->  abc
02620  *             NULL + //         -->  /
02621  *             NULL + NULL       -->  (empty string)
02622  *             "" + ""           -->  (empty string)
02623  *             "" + /            -->  /
02624  */
02625 char *
02626 pathJoin(const char  *dir,
02627          const char  *fname)
02628 {
02629 char     *slash = (char *)"/";
02630 char     *str, *dest;
02631 l_int32   i, n1, n2, emptydir;
02632 size_t    size;
02633 SARRAY   *sa1, *sa2;
02634 L_BYTEA  *ba;
02635
02636     if (!dir && !fname)
02637         return stringNew("");
02638
02639     sa1 = sarrayCreate(0);
02640     sa2 = sarrayCreate(0);
02641     ba = l_byteaCreate(4);
02642
02643         /* Process @dir */
02644     if (dir && strlen(dir) > 0) {
02645         if (dir[0] == '/')
02646             l_byteaAppendString(ba, slash);
02647         sarraySplitString(sa1, dir, "/");  /* removes all slashes */
02648         n1 = sarrayGetCount(sa1);
02649         for (i = 0; i < n1; i++) {
02650             str = sarrayGetString(sa1, i, L_NOCOPY);
02651             l_byteaAppendString(ba, str);
02652             l_byteaAppendString(ba, slash);
02653         }
02654     }
02655
02656         /* Special case to add leading slash: dir NULL or empty string  */
02657     emptydir = dir && strlen(dir) == 0;
02658     if ((!dir || emptydir) && fname && strlen(fname) > 0 && fname[0] == '/')
02659         l_byteaAppendString(ba, slash);
02660
02661         /* Process @fname */
02662     if (fname && strlen(fname) > 0) {
02663         sarraySplitString(sa2, fname, "/");
02664         n2 = sarrayGetCount(sa2);
02665         for (i = 0; i < n2; i++) {
02666             str = sarrayGetString(sa2, i, L_NOCOPY);
02667             l_byteaAppendString(ba, str);
02668             l_byteaAppendString(ba, slash);
02669         }
02670     }
02671
02672         /* Remove trailing slash */
02673     dest = (char *)l_byteaCopyData(ba, &size);
02674     if (size > 1 && dest[size - 1] == '/')
02675         dest[size - 1] = '\0';
02676
02677     sarrayDestroy(&sa1);
02678     sarrayDestroy(&sa2);
02679     l_byteaDestroy(&ba);
02680     return dest;
02681 }
02682
02683
02684 /*!
02685  *  genPathname()
02686  *
02687  *      Input:  dir (directory name, with or without trailing '/')
02688  *              fname (<optional> file name within the directory)
02689  *      Return: pathname (either a directory or full path), or null on error
02690  *
02691  *  Notes:
02692  *      (1) Use unix-style pathname separators ('/').
02693  *      (2) This function can be used in several ways:
02694  *            * to generate a full path from a directory and a file name
02695  *            * to convert a unix pathname to a windows pathname
02696  *            * to convert from the unix '/tmp' directory to the
02697  *              equivalent windows temp directory.
02698  *          The windows name translation is:
02699  *                   /tmp  -->   <Temp>/leptonica
02700  *      (3) There are three cases for the input:
02701  *          (a) @dir is a directory and @fname is null: result is a directory
02702  *          (b) @dir is a full path and @fname is null: result is a full path
02703  *          (c) @dir is a directory and @fname is defined: result is a full path
02704  *      (4) In all cases, the resulting pathname is not terminated with a slash
02705  *      (5) The caller is responsible for freeing the pathname.
02706  */
02707 char *
02708 genPathname(const char  *dir,
02709             const char  *fname)
02710 {
02711 char    *cdir, *pathout;
02712 l_int32  dirlen, namelen, size;
02713
02714     PROCNAME("genPathname");
02715
02716     if (!dir)
02717         return (char *)ERROR_PTR("dir not defined", procName, NULL);
02718
02719         /* Remove trailing slash in dir, except when dir == "/"  */
02720     cdir = stringNew(dir);
02721     dirlen = strlen(cdir);
02722     if (cdir[dirlen - 1] == '/' && dirlen != 1) {
02723         cdir[dirlen - 1] = '\0';
02724         dirlen--;
02725     }
02726
02727     namelen = (fname) ? strlen(fname) : 0;
02728     size = dirlen + namelen + 256;
02729     if ((pathout = (char *)CALLOC(size, sizeof(char))) == NULL)
02730         return (char *)ERROR_PTR("pathout not made", procName, NULL);
02731
02732 #ifdef _WIN32
02733     {
02734         char  dirt[MAX_PATH];
02735         if (stringFindSubstr(cdir, "/", NULL) > 0) {
02736             char    *tempdir;
02737             l_int32  tdirlen;
02738             tempdir = stringReplaceEachSubstr(cdir, "/", "\\", NULL);
02739             tdirlen = strlen(tempdir);
02740             if (strncmp(dir, "/tmp", 4) == 0) {  /* get temp directory */
02741                 GetTempPath(sizeof(dirt), dirt);
02742                 stringCopy(pathout, dirt, strlen(dirt) - 1);
02743                 stringCat(pathout, size, "\\leptonica");
02744                 if (tdirlen > 4)
02745                     stringCat(pathout, size, tempdir + 4);
02746
02747                     /* Set an extra null byte.  Otherwise, when setting
02748                        sepchar later, no trailing null byte remains. */
02749                 pathout[strlen(pathout) + 1] = '\0';
02750             }
02751             else {
02752                 stringCopy(pathout, tempdir, tdirlen);
02753             }
02754             FREE(tempdir);
02755         }
02756         else {  /* no '/' characters; OK as is */
02757             stringCopy(pathout, cdir, dirlen);
02758         }
02759     }
02760 #else
02761     stringCopy(pathout, cdir, dirlen);
02762 #endif  /* _WIN32 */
02763
02764     if (fname && strlen(fname) > 0) {
02765         dirlen = strlen(pathout);
02766         pathout[dirlen] = sepchar;  /* append sepchar */
02767         strncat(pathout, fname, namelen);
02768     }
02769     FREE(cdir);
02770     return pathout;
02771 }
02772
02773
02774 /*!
02775  *  genTempFilename()
02776  *
02777  *      Input:  dir (directory name; use '.' for local dir;
02778  *                   no trailing '/' and @dir == "/" is invalid)
02779  *              tail (<optional>  tailname, including extension if any;
02780  *                    can be null or empty but can't contain '/')
02781  *              usetime (1 to include current time in microseconds in
02782  *                       the filename; 0 to omit.
02783  *              usepid (1 to include pid in filename; 0 to omit.
02784  *      Return: temp filename, or null on error
02785  *
02786  *  Notes:
02787  *      (1) Use unix-style pathname separators ('/').
02788  *      (2) Specifying the root directory (@dir == "/") is invalid.
02789  *      (3) Specifying a @tail containing '/' is invalid.
02790  *      (4) The most general form (@usetime = @usepid = 1) is:
02791  *              <dir>/<usec>_<pid>_<tail>
02792  *          When @usetime = 1, @usepid = 0, the output filename is:
02793  *              <dir>/<usec>_<tail>
02794  *          When @usepid = 0, @usepid = 1, the output filename is:
02795  *              <dir>/<pid>_<tail>
02796  *          When @usetime = @usepid = 0, the output filename is:
02797  *              <dir>/<tail>
02798  *          Note: It is not valid to have @tail = null or empty and have
02799  *          both @usetime = @usepid = 0.  That is, there must be
02800  *          some non-empty tail name.
02801  *      (5) N.B. The caller is responsible for freeing the returned filename.
02802  *          For windows, to avoid C-runtime boundary crossing problems
02803  *          when using DLLs, you must use lept_free() to free the name.
02804  *      (6) For windows, if the caller requests the directory '/tmp',
02805  *          this uses GetTempPath() to select the actual directory,
02806  *          avoiding platform-conditional code in use.  The directory
02807  *          selected is <Temp>/leptonica, where <Temp> is the Windows
02808  *          temp directory.
02809  *      (7) Set @usetime = @usepid = 1 when
02810  *          (a) more than one process is writing and reading temp files, or
02811  *          (b) multiple threads from a single process call this function, or
02812  *          (c) there is the possiblity of an attack where the intruder
02813  *              is logged onto the server and might try to guess filenames.
02814  */
02815 char *
02816 genTempFilename(const char  *dir,
02817                 const char  *tail,
02818                 l_int32      usetime,
02819                 l_int32      usepid)
02820 {
02821 char     buf[256];
02822 l_int32  i, buflen, usec, pid, emptytail;
02823 #ifdef _WIN32
02824 char    *newpath;
02825 l_uint32 attributes;
02826 l_int32  ret;
02827 #endif  /* !_WIN32 */
02828
02829     PROCNAME("genTempFilename");
02830
02831     if (!dir)
02832         return (char *)ERROR_PTR("dir not defined", procName, NULL);
02833     if (dir && strlen(dir) == 1 && dir[0] == '/')
02834         return (char *)ERROR_PTR("dir == '/' not permitted", procName, NULL);
02835     if (tail && strlen(tail) > 0 && stringFindSubstr(tail, "/", NULL))
02836         return (char *)ERROR_PTR("tail can't contain '/'", procName, NULL);
02837     emptytail = tail && (strlen(tail) == 0);
02838     if (!usetime && !usepid && (!tail || emptytail))
02839         return (char *)ERROR_PTR("name can't be a directory", procName, NULL);
02840
02841     if (usepid) pid = getpid();
02842     buflen = sizeof(buf);
02843     for (i = 0; i < buflen; i++)
02844         buf[i] = 0;
02845     l_getCurrentTime(NULL, &usec);
02846
02847 #ifdef _WIN32
02848     {  /* do not assume /tmp exists */
02849     char  dirt[MAX_PATH];
02850     if (!strcmp(dir, "/tmp")) {
02851         GetTempPath(sizeof(dirt), dirt);
02852         stringCat(dirt, sizeof(dirt), "leptonica\\");
02853
02854             /* Make sure the leptonica subdir exists in tmp dir */
02855         newpath = genPathname("/tmp", NULL);
02856         attributes = GetFileAttributes(newpath);
02857         if (attributes == INVALID_FILE_ATTRIBUTES) {
02858             ret = (CreateDirectory(newpath, NULL) ? 0 : 1);
02859         }
02860         FREE(newpath);
02861     }
02862     else
02863         snprintf(dirt, sizeof(dirt), "%s\\", dir);  /* add trailing '\' */
02864
02865     if (usetime && usepid)
02866         snprintf(buf, buflen, "%s%d_%d_", dirt, usec, pid);
02867     else if (usetime)
02868         snprintf(buf, buflen, "%s%d_", dirt, usec);
02869     else if (usepid)
02870         snprintf(buf, buflen, "%s%d_", dirt, pid);
02871     else
02872         snprintf(buf, buflen, "%s", dirt);
02873     }
02874 #else
02875     if (usetime && usepid)
02876         snprintf(buf, buflen, "%s/%d_%d_", dir, usec, pid);
02877     else if (usetime)
02878         snprintf(buf, buflen, "%s/%d_", dir, usec);
02879     else if (usepid)
02880         snprintf(buf, buflen, "%s/%d_", dir, pid);
02881     else
02882         snprintf(buf, buflen, "%s/", dir);
02883 #endif
02884
02885     return stringJoin(buf, tail);
02886 }
02887
02888
02889 /*!
02890  *  extractNumberFromFilename()
02891  *
02892  *      Input:  fname
02893  *              numpre (number of characters before the digits to be found)
02894  *              numpost (number of characters after the digits to be found)
02895  *      Return: num (number embedded in the filename); -1 on error or if
02896  *                   not found
02897  *
02898  *  Notes:
02899  *      (1) Use unix-style pathname separators ('/').
02900  *      (2) The number is to be found in the basename, which is the
02901  *          filename without either the directory or the last extension.
02902  *      (3) When a number is found, it is non-negative.  If no number
02903  *          is found, this returns -1, without an error message.  The
02904  *          caller needs to check.
02905  */
02906 l_int32
02907 extractNumberFromFilename(const char  *fname,
02908                           l_int32      numpre,
02909                           l_int32      numpost)
02910 {
02911 char    *tail, *basename;
02912 l_int32  len, nret, num;
02913
02914     PROCNAME("extractNumberFromFilename");
02915
02916     if (!fname)
02917         return ERROR_INT("fname not defined", procName, -1);
02918
02919     splitPathAtDirectory(fname, NULL, &tail);
02920     splitPathAtExtension(tail, &basename, NULL);
02921     FREE(tail);
02922
02923     len = strlen(basename);
02924     if (numpre + numpost > len - 1) {
02925         FREE(basename);
02926         return ERROR_INT("numpre + numpost too big", procName, -1);
02927     }
02928
02929     basename[len - numpost] = '\0';
02930     nret = sscanf(basename + numpre, "%d", &num);
02931     FREE(basename);
02932
02933     if (nret == 1)
02934         return num;
02935     else
02936         return -1;  /* not found */
02937 }
02938
02939
02940 /*---------------------------------------------------------------------*
02941  *                Generate random integer in given range               *
02942  *---------------------------------------------------------------------*/
02943 /*!
02944  *  genRandomIntegerInRange()
02945  *
02946  *      Input:  range (size of range; must be >= 2)
02947  *              seed (use 0 to skip; otherwise call srand)
02948  *              val (<return> random integer in range {0 ... range-1}
02949  *      Return: 0 if OK, 1 on error
02950  *
02951  *  Notes:
02952  *      (1) For example, to choose a rand integer between 0 and 99,
02953  *          use @range = 100.
02954  */
02955 l_int32
02956 genRandomIntegerInRange(l_int32   range,
02957                         l_int32   seed,
02958                         l_int32  *pval)
02959 {
02960     PROCNAME("genRandomIntegerInRange");
02961
02962     if (!pval)
02963         return ERROR_INT("&val not defined", procName, 1);
02964     *pval = 0;
02965     if (range < 2)
02966         return ERROR_INT("range must be >= 2", procName, 1);
02967
02968     if (seed > 0) srand(seed);
02969     *pval = (l_int32)((l_float64)range *
02970                        ((l_float64)rand() / (l_float64)RAND_MAX));
02971     return 0;
02972 }
02973
02974
02975 /*---------------------------------------------------------------------*
02976  *                       Leptonica version number                      *
02977  *---------------------------------------------------------------------*/
02978 /*!
02979  *  getLeptonicaVersion()
02980  *
02981  *      Return: string of version number (e.g., 'leptonica-1.68')
02982  *
02983  *  Notes:
02984  *      (1) The caller has responsibility to free the memory.
02985  */
02986 char *
02987 getLeptonicaVersion()
02988 {
02989     char *version = (char *)CALLOC(100, sizeof(char));
02990
02991 #ifdef _MSC_VER
02992   #ifdef _USRDLL
02993     char dllStr[] = "DLL";
02994   #else
02995     char dllStr[] = "LIB";
02996   #endif
02997   #ifdef _DEBUG
02998     char debugStr[] = "Debug";
02999   #else
03000     char debugStr[] = "Release";
03001   #endif
03002   #ifdef _M_IX86
03003     char bitStr[] = " 32 bit";
03004   #elif _M_X64
03005     char bitStr[] = " 64 bit";
03006   #else
03007     char bitStr[] = ""
03008   #endif
03009     snprintf(version, 100, "leptonica-%d.%d (%s, %s) [MSC v.%d %s %s%s]",
03010              LIBLEPT_MAJOR_VERSION, LIBLEPT_MINOR_VERSION,
03011              __DATE__, __TIME__, _MSC_VER, dllStr, debugStr, bitStr);
03012
03013 #else
03014
03015     snprintf(version, 100, "leptonica-%d.%d", LIBLEPT_MAJOR_VERSION,
03016              LIBLEPT_MINOR_VERSION);
03017
03018 #endif   /* _MSC_VER */
03019     return version;
03020 }
03021
03022
03023 /*---------------------------------------------------------------------*
03024  *                           Timing procs                              *
03025  *---------------------------------------------------------------------*/
03026 #ifndef _WIN32
03027
03028 #include <sys/time.h>
03029 #include <sys/resource.h>
03030
03031 static struct rusage rusage_before;
03032 static struct rusage rusage_after;
03033
03034 /*!
03035  *  startTimer(), stopTimer()
03036  *
03037  *  Example of usage:
03038  *
03039  *      startTimer();
03040  *      ....
03041  *      fprintf(stderr, "Elapsed time = %7.3f sec\n", stopTimer());
03042  */
03043 void
03044 startTimer(void)
03045 {
03046     getrusage(RUSAGE_SELF, &rusage_before);
03047 }
03048
03049 l_float32
03050 stopTimer(void)
03051 {
03052 l_int32  tsec, tusec;
03053
03054     getrusage(RUSAGE_SELF, &rusage_after);
03055
03056     tsec = rusage_after.ru_utime.tv_sec - rusage_before.ru_utime.tv_sec;
03057     tusec = rusage_after.ru_utime.tv_usec - rusage_before.ru_utime.tv_usec;
03058     return (tsec + ((l_float32)tusec) / 1000000.0);
03059 }
03060
03061
03062 /*!
03063  *  startTimerNested(), stopTimerNested()
03064  *
03065  *  Example of usage:
03066  *
03067  *      L_TIMER  t1 = startTimerNested();
03068  *      ....
03069  *      L_TIMER  t2 = startTimerNested();
03070  *      ....
03071  *      fprintf(stderr, "Elapsed time 2 = %7.3f sec\n", stopTimerNested(t2));
03072  *      ....
03073  *      fprintf(stderr, "Elapsed time 1 = %7.3f sec\n", stopTimerNested(t1));
03074  */
03075 L_TIMER
03076 startTimerNested(void)
03077 {
03078 struct rusage  *rusage_start;
03079
03080     rusage_start = (struct rusage *)CALLOC(1, sizeof(struct rusage));
03081     getrusage(RUSAGE_SELF, rusage_start);
03082     return rusage_start;
03083 }
03084
03085 l_float32
03086 stopTimerNested(L_TIMER  rusage_start)
03087 {
03088 l_int32        tsec, tusec;
03089 struct rusage  rusage_stop;
03090
03091     getrusage(RUSAGE_SELF, &rusage_stop);
03092
03093     tsec = rusage_stop.ru_utime.tv_sec -
03094            ((struct rusage *)rusage_start)->ru_utime.tv_sec;
03095     tusec = rusage_stop.ru_utime.tv_usec -
03096            ((struct rusage *)rusage_start)->ru_utime.tv_usec;
03097     FREE(rusage_start);
03098     return (tsec + ((l_float32)tusec) / 1000000.0);
03099 }
03100
03101
03102 /*!
03103  *  l_getCurrentTime()
03104  *
03105  *      Input:  &sec (<optional return> in seconds since birth of Unix)
03106  *              &usec (<optional return> in microseconds since birth of Unix)
03107  *      Return: void
03108  */
03109 void
03110 l_getCurrentTime(l_int32  *sec,
03111                  l_int32  *usec)
03112 {
03113 struct timeval tv;
03114
03115     gettimeofday(&tv, NULL);
03116     if (sec) *sec = (l_int32)tv.tv_sec;
03117     if (usec) *usec = (l_int32)tv.tv_usec;
03118     return;
03119 }
03120
03121
03122 #else   /* _WIN32 : resource.h not implemented under Windows */
03123
03124     /* Note: if division by 10^7 seems strange, the time is expressed
03125      * as the number of 100-nanosecond intervals that have elapsed
03126      * since 12:00 A.M. January 1, 1601.  */
03127
03128 static ULARGE_INTEGER utime_before;
03129 static ULARGE_INTEGER utime_after;
03130
03131 void
03132 startTimer(void)
03133 {
03134 HANDLE    this_process;
03135 FILETIME  start, stop, kernel, user;
03136
03137     this_process = GetCurrentProcess();
03138
03139     GetProcessTimes(this_process, &start, &stop, &kernel, &user);
03140
03141     utime_before.LowPart  = user.dwLowDateTime;
03142     utime_before.HighPart = user.dwHighDateTime;
03143 }
03144
03145 l_float32
03146 stopTimer(void)
03147 {
03148 HANDLE     this_process;
03149 FILETIME   start, stop, kernel, user;
03150 ULONGLONG  hnsec;  /* in units of hecto-nanosecond (100 ns) intervals */
03151
03152     this_process = GetCurrentProcess();
03153
03154     GetProcessTimes(this_process, &start, &stop, &kernel, &user);
03155
03156     utime_after.LowPart  = user.dwLowDateTime;
03157     utime_after.HighPart = user.dwHighDateTime;
03158     hnsec = utime_after.QuadPart - utime_before.QuadPart;
03159     return (l_float32)(signed)hnsec / 10000000.0;
03160 }
03161
03162 L_TIMER
03163 startTimerNested(void)
03164 {
03165 HANDLE           this_process;
03166 FILETIME         start, stop, kernel, user;
03167 ULARGE_INTEGER  *utime_start;
03168
03169     this_process = GetCurrentProcess();
03170
03171     GetProcessTimes (this_process, &start, &stop, &kernel, &user);
03172
03173     utime_start = (ULARGE_INTEGER *)CALLOC(1, sizeof(ULARGE_INTEGER));
03174     utime_start->LowPart  = user.dwLowDateTime;
03175     utime_start->HighPart = user.dwHighDateTime;
03176     return utime_start;
03177 }
03178
03179 l_float32
03180 stopTimerNested(L_TIMER  utime_start)
03181 {
03182 HANDLE          this_process;
03183 FILETIME        start, stop, kernel, user;
03184 ULARGE_INTEGER  utime_stop;
03185 ULONGLONG       hnsec;  /* in units of 100 ns intervals */
03186
03187     this_process = GetCurrentProcess ();
03188
03189     GetProcessTimes (this_process, &start, &stop, &kernel, &user);
03190
03191     utime_stop.LowPart  = user.dwLowDateTime;
03192     utime_stop.HighPart = user.dwHighDateTime;
03193     hnsec = utime_stop.QuadPart - ((ULARGE_INTEGER *)utime_start)->QuadPart;
03194     FREE(utime_start);
03195     return (l_float32)(signed)hnsec / 10000000.0;
03196 }
03197
03198 void
03199 l_getCurrentTime(l_int32  *sec,
03200                  l_int32  *usec)
03201 {
03202 ULARGE_INTEGER  utime, birthunix;
03203 FILETIME        systemtime;
03204 LONGLONG        birthunixhnsec = 116444736000000000;  /*in units of 100 ns */
03205 LONGLONG        usecs;
03206
03207     GetSystemTimeAsFileTime(&systemtime);
03208     utime.LowPart  = systemtime.dwLowDateTime;
03209     utime.HighPart = systemtime.dwHighDateTime;
03210
03211     birthunix.LowPart = (DWORD) birthunixhnsec;
03212     birthunix.HighPart = birthunixhnsec >> 32;
03213
03214     usecs = (LONGLONG) ((utime.QuadPart - birthunix.QuadPart) / 10);
03215
03216     if (sec) *sec = (l_int32) (usecs / 1000000);
03217     if (usec) *usec = (l_int32) (usecs % 1000000);
03218     return;
03219 }
03220
03221 #endif
03222
03223
03224 /*!
03225  *  l_getFormattedDate()
03226  *
03227  *      Input:  (none)
03228  *      Return: formatted date string, or null on error
03229  */
03230 char *
03231 l_getFormattedDate()
03232 {
03233 char        buf[64];
03234 time_t      tmp1;
03235 struct tm  *tmp2;
03236
03237     tmp1 = time(NULL);
03238     tmp2 = localtime(&tmp1);
03239     strftime(buf, sizeof(buf), "%y%m%d%H%M%S", tmp2);
03240     return stringNew(buf);
03241 }
03242
03243
03244 /*--------------------------------------------------------------------*
03245  *                  Deprecated binary read functions                  *
03246  *--------------------------------------------------------------------*/
03247 /*   Don't use these: they use l_int32 instead of size_t              */
03248 /*!
03249  *  arrayRead()
03250  *
03251  *      Input:  filename
03252  *              &nbytes (<return> number of bytes read)
03253  *      Return: array, or null on error
03254  */
03255 l_uint8 *
03256 arrayRead(const char  *fname,
03257           l_int32     *pnbytes)
03258 {
03259 l_uint8  *data;
03260 FILE     *fp;
03261
03262     PROCNAME("arrayRead");
03263
03264     if (!fname)
03265         return (l_uint8 *)ERROR_PTR("fname not defined", procName, NULL);
03266     if (!pnbytes)
03267         return (l_uint8 *)ERROR_PTR("pnbytes not defined", procName, NULL);
03268     *pnbytes = 0;
03269
03270     if ((fp = fopenReadStream(fname)) == NULL)
03271         return (l_uint8 *)ERROR_PTR("file stream not opened", procName, NULL);
03272
03273     data = arrayReadStream(fp, pnbytes);
03274     fclose(fp);
03275
03276     return data;
03277 }
03278
03279
03280 /*!
03281  *  arrayReadStream()
03282  *
03283  *      Input:  stream
03284  *              &nbytes (<return> number of bytes read)
03285  *      Return: null-terminated array, or null on error
03286  *              (reading 0 bytes is not an error)
03287  *
03288  *  Notes:
03289  *      (1) N.B.: as a side effect, this always re-positions the
03290  *          stream ptr to the beginning of the file.
03291  */
03292 l_uint8 *
03293 arrayReadStream(FILE     *fp,
03294                 l_int32  *pnbytes)
03295 {
03296 l_int32   ignore;
03297 l_uint8  *data;
03298
03299     PROCNAME("arrayReadStream");
03300
03301     if (!fp)
03302         return (l_uint8 *)ERROR_PTR("stream not defined", procName, NULL);
03303     if (!pnbytes)
03304         return (l_uint8 *)ERROR_PTR("ptr to nbytes not defined",
03305                                     procName, NULL);
03306
03307     *pnbytes = fnbytesInFile(fp);
03308     if ((data = (l_uint8 *)CALLOC(1, *pnbytes + 1)) == NULL)
03309         return (l_uint8 *)ERROR_PTR("CALLOC fail for data", procName, NULL);
03310     ignore = fread(data, 1, *pnbytes, fp);
03311     return data;
03312 }
03313