Leptonica 1.68
C Image Processing Library
|
00001 /*====================================================================* 00002 - Copyright (C) 2001 Leptonica. All rights reserved. 00003 - This software is distributed in the hope that it will be 00004 - useful, but with NO WARRANTY OF ANY KIND. 00005 - No author or distributor accepts responsibility to anyone for the 00006 - consequences of using this software, or for whether it serves any 00007 - particular purpose or works at all, unless he or she says so in 00008 - writing. Everyone is granted permission to copy, modify and 00009 - redistribute this source code, for commercial or non-commercial 00010 - purposes, with the following restrictions: (1) the origin of this 00011 - source code must not be misrepresented; (2) modified versions must 00012 - be plainly marked as such; and (3) this notice may not be removed 00013 - or altered from any source or modified source distribution. 00014 *====================================================================*/ 00015 00016 00017 /* 00018 * utils.c 00019 * 00020 * Error, warning and info procs; all invoked by macros 00021 * l_int32 returnErrorInt() 00022 * l_float32 returnErrorFloat() 00023 * void *returnErrorPtr() 00024 * void l_error() 00025 * void l_errorString() 00026 * void l_errorInt() 00027 * void l_errorFloat() 00028 * void l_warning() 00029 * void l_warningString() 00030 * void l_warningInt() 00031 * void l_warningInt2() 00032 * void l_warningFloat() 00033 * void l_warningFloat2() 00034 * void l_info() 00035 * void l_infoString() 00036 * void l_infoInt() 00037 * void l_infoInt2() 00038 * void l_infoFloat() 00039 * void l_infoFloat2() 00040 * 00041 * Safe string procs 00042 * char *stringNew() 00043 * l_int32 stringCopy() 00044 * l_int32 stringReplace() 00045 * l_int32 stringLength() 00046 * l_int32 stringCat() 00047 * char *stringJoin() 00048 * char *stringReverse() 00049 * char *strtokSafe() 00050 * l_int32 stringSplitOnToken() 00051 * 00052 * Find and replace string and array procs 00053 * char *stringRemoveChars() 00054 * l_int32 stringFindSubstr() 00055 * char *stringReplaceSubstr() 00056 * char *stringReplaceEachSubstr() 00057 * NUMA *arrayFindEachSequence() 00058 * l_int32 arrayFindSequence() 00059 * 00060 * Safe realloc 00061 * void *reallocNew() 00062 * 00063 * Read and write between file and memory 00064 * l_uint8 *l_binaryRead() 00065 * l_uint8 *l_binaryReadStream() 00066 * l_int32 l_binaryWrite() 00067 * l_int32 nbytesInFile() 00068 * l_int32 fnbytesInFile() 00069 * 00070 * Copy in memory 00071 * l_uint8 *l_binaryCopy() 00072 * 00073 * File copy operations 00074 * l_int32 fileCopy() 00075 * l_int32 fileConcatenate() 00076 * l_int32 fileAppendString() 00077 * 00078 * Test files for equivalence 00079 * l_int32 filesAreIdentical() 00080 * 00081 * Byte-swapping data conversion 00082 * l_uint16 convertOnBigEnd16() 00083 * l_uint32 convertOnBigEnd32() 00084 * l_uint16 convertOnLittleEnd16() 00085 * l_uint32 convertOnLittleEnd32() 00086 * 00087 * Opening file streams 00088 * FILE *fopenReadStream() 00089 * FILE *fopenWriteStream() 00090 * 00091 * Functions to avoid C-runtime boundary crossing with Windows DLLs 00092 * FILE *lept_fopen() 00093 * l_int32 lept_fclose() 00094 * void lept_calloc() 00095 * void lept_free() 00096 * 00097 * Cross-platform file system operations 00098 * l_int32 lept_mkdir() 00099 * l_int32 lept_rmdir() 00100 * l_int32 lept_mv() 00101 * l_int32 lept_rm() 00102 * l_int32 lept_cp() 00103 * 00104 * File name operations 00105 * l_int32 splitPathAtDirectory() 00106 * l_int32 splitPathAtExtension() 00107 * char *pathJoin() 00108 * char *genPathname() 00109 * char *genTempFilename() 00110 * l_int32 extractNumberFromFilename() 00111 * 00112 * Generate random integer in given range 00113 * l_int32 genRandomIntegerInRange() 00114 * 00115 * Leptonica version number 00116 * char *getLeptonicaVersion() 00117 * 00118 * Timing 00119 * void startTimer() 00120 * l_float32 stopTimer() 00121 * L_TIMER startTimerNested() 00122 * l_float32 stopTimerNested() 00123 * void l_getCurrentTime() 00124 * void l_getFormattedDate() 00125 * 00126 * Deprecated binary read functions (don't use these!) 00127 * l_uint8 *arrayRead() 00128 * l_uint8 *arrayReadStream() 00129 * 00130 * 00131 * Notes on cross-platform development 00132 * ----------------------------------- 00133 * (1) With the exception of splitPathAtDirectory() and 00134 * splitPathAtExtension(), all input pathnames must have unix separators. 00135 * (2) The conversion from unix to windows pathnames happens in genPathname(). 00136 * (3) Use fopenReadStream() and fopenWriteStream() to open files, 00137 * because these use genPathname() to find the platform-dependent 00138 * filenames. Likewise for l_binaryRead() and l_binaryWrite(). 00139 * (4) For moving, copying and removing files and directories, 00140 * use the lept_*() file system shell wrappers: 00141 * lept_mkdir(), lept_rmdir(), lept_mv(), lept_rm() and lept_cp(). 00142 * (5) Use the lept_*() C library wrappers: 00143 * lept_fopen(), lept_fclose(), lept_calloc() and lept_free(). 00144 */ 00145 00146 #include <string.h> 00147 #include <time.h> 00148 #ifdef _MSC_VER 00149 #include <process.h> 00150 #else 00151 #include <unistd.h> 00152 #endif /* _MSC_VER */ 00153 #include "allheaders.h" 00154 00155 #ifdef _WIN32 00156 #include <windows.h> 00157 static const char sepchar = '\\'; 00158 #else 00159 #include <sys/stat.h> /* for mkdir(2) */ 00160 #include <sys/types.h> 00161 static const char sepchar = '/'; 00162 #endif 00163 00164 00165 /*----------------------------------------------------------------------* 00166 * Error, warning and info message procs * 00167 * * 00168 * --------------------- N.B. --------------------- * 00169 * * 00170 * (1) These functions all print messages to stderr. * 00171 * * 00172 * (2) They must be invoked only by macros, which are in * 00173 * environ.h, so that the print output can be disabled * 00174 * at compile time, using -DNO_CONSOLE_IO. * 00175 * * 00176 *----------------------------------------------------------------------*/ 00177 /*! 00178 * returnErrorInt() 00179 * 00180 * Input: msg (error message) 00181 * procname 00182 * ival (return val) 00183 * Return: ival (typically 1) 00184 */ 00185 l_int32 00186 returnErrorInt(const char *msg, 00187 const char *procname, 00188 l_int32 ival) 00189 { 00190 fprintf(stderr, "Error in %s: %s\n", procname, msg); 00191 return ival; 00192 } 00193 00194 00195 /*! 00196 * returnErrorFloat() 00197 * 00198 * Input: msg (error message) 00199 * procname 00200 * fval (return val) 00201 * Return: fval 00202 */ 00203 l_float32 00204 returnErrorFloat(const char *msg, 00205 const char *procname, 00206 l_float32 fval) 00207 { 00208 fprintf(stderr, "Error in %s: %s\n", procname, msg); 00209 return fval; 00210 } 00211 00212 00213 /*! 00214 * returnErrorPtr() 00215 * 00216 * Input: msg (error message) 00217 * procname 00218 * pval (return val) 00219 * Return: pval (typically null) 00220 */ 00221 void * 00222 returnErrorPtr(const char *msg, 00223 const char *procname, 00224 void *pval) 00225 { 00226 fprintf(stderr, "Error in %s: %s\n", procname, msg); 00227 return pval; 00228 } 00229 00230 00231 /*! 00232 * l_error() 00233 * 00234 * Input: msg (error message) 00235 * procname 00236 */ 00237 void 00238 l_error(const char *msg, 00239 const char *procname) 00240 { 00241 fprintf(stderr, "Error in %s: %s\n", procname, msg); 00242 return; 00243 } 00244 00245 00246 /*! 00247 * l_errorString() 00248 * 00249 * Input: msg (error message; must include '%s') 00250 * procname 00251 * str (embedded in error message via %s) 00252 */ 00253 void 00254 l_errorString(const char *msg, 00255 const char *procname, 00256 const char *str) 00257 { 00258 l_int32 bufsize; 00259 char *charbuf; 00260 00261 if (!msg || !procname || !str) { 00262 L_ERROR("msg, procname or str not defined in l_errorString()", 00263 procname); 00264 return; 00265 } 00266 00267 bufsize = strlen(msg) + strlen(procname) + 128; 00268 if ((charbuf = (char *)CALLOC(bufsize, sizeof(char))) == NULL) { 00269 L_ERROR("charbuf not made in l_errorString()", procname); 00270 return; 00271 } 00272 00273 sprintf(charbuf, "Error in %s: %s\n", procname, msg); 00274 fprintf(stderr, charbuf, str); 00275 00276 FREE(charbuf); 00277 return; 00278 } 00279 00280 00281 /*! 00282 * l_errorInt() 00283 * 00284 * Input: msg (error message; must include '%d') 00285 * procname 00286 * ival (embedded in error message via %d) 00287 */ 00288 void 00289 l_errorInt(const char *msg, 00290 const char *procname, 00291 l_int32 ival) 00292 { 00293 l_int32 bufsize; 00294 char *charbuf; 00295 00296 if (!msg || !procname) { 00297 L_ERROR("msg or procname not defined in l_errorInt()", procname); 00298 return; 00299 } 00300 00301 bufsize = strlen(msg) + strlen(procname) + 128; 00302 if ((charbuf = (char *)CALLOC(bufsize, sizeof(char))) == NULL) { 00303 L_ERROR("charbuf not made in l_errorInt()", procname); 00304 return; 00305 } 00306 00307 sprintf(charbuf, "Error in %s: %s\n", procname, msg); 00308 fprintf(stderr, charbuf, ival); 00309 00310 FREE(charbuf); 00311 return; 00312 } 00313 00314 00315 /*! 00316 * l_errorFloat() 00317 * 00318 * Input: msg (error message; must include '%f') 00319 * procname 00320 * fval (embedded in error message via %f) 00321 */ 00322 void 00323 l_errorFloat(const char *msg, 00324 const char *procname, 00325 l_float32 fval) 00326 { 00327 l_int32 bufsize; 00328 char *charbuf; 00329 00330 if (!msg || !procname) { 00331 L_ERROR("msg or procname not defined in l_errorFloat()", procname); 00332 return; 00333 } 00334 00335 bufsize = strlen(msg) + strlen(procname) + 128; 00336 if ((charbuf = (char *)CALLOC(bufsize, sizeof(char))) == NULL) { 00337 L_ERROR("charbuf not made in l_errorFloat()", procname); 00338 return; 00339 } 00340 00341 sprintf(charbuf, "Error in %s: %s\n", procname, msg); 00342 fprintf(stderr, charbuf, fval); 00343 00344 FREE(charbuf); 00345 return; 00346 } 00347 00348 00349 /*! 00350 * l_warning() 00351 * 00352 * Input: msg (warning message) 00353 * procname 00354 */ 00355 void 00356 l_warning(const char *msg, 00357 const char *procname) 00358 { 00359 fprintf(stderr, "Warning in %s: %s\n", procname, msg); 00360 return; 00361 } 00362 00363 00364 /*! 00365 * l_warningString() 00366 * 00367 * Input: msg (warning message; must include '%s') 00368 * procname 00369 * str (embedded in warning message via %s) 00370 */ 00371 void 00372 l_warningString(const char *msg, 00373 const char *procname, 00374 const char *str) 00375 { 00376 l_int32 bufsize; 00377 char *charbuf; 00378 00379 if (!msg || !procname || !str) { 00380 L_ERROR("msg, procname or str not defined in l_warningString()", 00381 procname); 00382 return; 00383 } 00384 00385 bufsize = strlen(msg) + strlen(procname) + 128; 00386 if ((charbuf = (char *)CALLOC(bufsize, sizeof(char))) == NULL) { 00387 L_ERROR("charbuf not made in l_warningString()", procname); 00388 return; 00389 } 00390 00391 sprintf(charbuf, "Warning in %s: %s\n", procname, msg); 00392 fprintf(stderr, charbuf, str); 00393 00394 FREE(charbuf); 00395 return; 00396 } 00397 00398 00399 /*! 00400 * l_warningInt() 00401 * 00402 * Input: msg (warning message; must include '%d') 00403 * procname 00404 * ival (embedded in warning message via %d) 00405 */ 00406 void 00407 l_warningInt(const char *msg, 00408 const char *procname, 00409 l_int32 ival) 00410 { 00411 l_int32 bufsize; 00412 char *charbuf; 00413 00414 if (!msg || !procname) { 00415 L_ERROR("msg or procname not defined in l_warningInt()", procname); 00416 return; 00417 } 00418 00419 bufsize = strlen(msg) + strlen(procname) + 128; 00420 if ((charbuf = (char *)CALLOC(bufsize, sizeof(char))) == NULL) { 00421 L_ERROR("charbuf not made in l_warningInt()", procname); 00422 return; 00423 } 00424 00425 sprintf(charbuf, "Warning in %s: %s\n", procname, msg); 00426 fprintf(stderr, charbuf, ival); 00427 00428 FREE(charbuf); 00429 return; 00430 } 00431 00432 00433 /*! 00434 * l_warningInt2() 00435 * 00436 * Input: msg (warning message; must include '%d') 00437 * procname 00438 * ival1, ival2 (two args, embedded in message via %d) 00439 */ 00440 void 00441 l_warningInt2(const char *msg, 00442 const char *procname, 00443 l_int32 ival1, 00444 l_int32 ival2) 00445 { 00446 l_int32 bufsize; 00447 char *charbuf; 00448 00449 if (!msg || !procname) { 00450 L_ERROR("msg or procname not defined in l_warningInt2()", procname); 00451 return; 00452 } 00453 00454 bufsize = strlen(msg) + strlen(procname) + 128; 00455 if ((charbuf = (char *)CALLOC(bufsize, sizeof(char))) == NULL) { 00456 L_ERROR("charbuf not made in l_warningInt()", procname); 00457 return; 00458 } 00459 00460 sprintf(charbuf, "Warning in %s: %s\n", procname, msg); 00461 fprintf(stderr, charbuf, ival1, ival2); 00462 00463 FREE(charbuf); 00464 return; 00465 } 00466 00467 00468 /*! 00469 * l_warningFloat() 00470 * 00471 * Input: msg (warning message; must include '%f') 00472 * procname 00473 * fval (embedded in warning message via %f) 00474 */ 00475 void 00476 l_warningFloat(const char *msg, 00477 const char *procname, 00478 l_float32 fval) 00479 { 00480 l_int32 bufsize; 00481 char *charbuf; 00482 00483 if (!msg || !procname) { 00484 L_ERROR("msg or procname not defined in l_warningFloat()", procname); 00485 return; 00486 } 00487 00488 bufsize = strlen(msg) + strlen(procname) + 128; 00489 if ((charbuf = (char *)CALLOC(bufsize, sizeof(char))) == NULL) { 00490 L_ERROR("charbuf not made in l_warningFloat()", procname); 00491 return; 00492 } 00493 00494 sprintf(charbuf, "Warning in %s: %s\n", procname, msg); 00495 fprintf(stderr, charbuf, fval); 00496 00497 FREE(charbuf); 00498 return; 00499 } 00500 00501 00502 /*! 00503 * l_warningFloat2() 00504 * 00505 * Input: msg (warning message; must include '%f') 00506 * procname 00507 * fval1, fval2 (two args, embedded in message via %f) 00508 */ 00509 void 00510 l_warningFloat2(const char *msg, 00511 const char *procname, 00512 l_float32 fval1, 00513 l_float32 fval2) 00514 { 00515 l_int32 bufsize; 00516 char *charbuf; 00517 00518 if (!msg || !procname) { 00519 L_ERROR("msg or procname not defined in l_warningFloat2()", procname); 00520 return; 00521 } 00522 00523 bufsize = strlen(msg) + strlen(procname) + 128; 00524 if ((charbuf = (char *)CALLOC(bufsize, sizeof(char))) == NULL) { 00525 L_ERROR("charbuf not made in l_warningFloat()", procname); 00526 return; 00527 } 00528 00529 sprintf(charbuf, "Warning in %s: %s\n", procname, msg); 00530 fprintf(stderr, charbuf, fval1, fval2); 00531 00532 FREE(charbuf); 00533 return; 00534 } 00535 00536 00537 /*! 00538 * l_info() 00539 * 00540 * Input: msg (info message) 00541 * procname 00542 */ 00543 void 00544 l_info(const char *msg, 00545 const char *procname) 00546 { 00547 fprintf(stderr, "Info in %s: %s\n", procname, msg); 00548 return; 00549 } 00550 00551 00552 /*! 00553 * l_infoString() 00554 * 00555 * Input: msg (info message; must include '%s') 00556 * procname 00557 * str (embedded in warning message via %s) 00558 */ 00559 void 00560 l_infoString(const char *msg, 00561 const char *procname, 00562 const char *str) 00563 { 00564 l_int32 bufsize; 00565 char *charbuf; 00566 00567 if (!msg || !procname || !str) { 00568 L_ERROR("msg, procname or str not defined in l_infoString()", procname); 00569 return; 00570 } 00571 00572 bufsize = strlen(msg) + strlen(procname) + 128; 00573 if ((charbuf = (char *)CALLOC(bufsize, sizeof(char))) == NULL) { 00574 L_ERROR("charbuf not made in l_infoString()", procname); 00575 return; 00576 } 00577 00578 sprintf(charbuf, "Info in %s: %s\n", procname, msg); 00579 fprintf(stderr, charbuf, str); 00580 00581 FREE(charbuf); 00582 return; 00583 } 00584 00585 00586 /*! 00587 * l_infoInt() 00588 * 00589 * Input: msg (info message; must include '%d') 00590 * procname 00591 * ival (embedded in info message via %d) 00592 */ 00593 void 00594 l_infoInt(const char *msg, 00595 const char *procname, 00596 l_int32 ival) 00597 { 00598 l_int32 bufsize; 00599 char *charbuf; 00600 00601 if (!msg || !procname) { 00602 L_ERROR("msg or procname not defined in l_infoInt()", procname); 00603 return; 00604 } 00605 00606 bufsize = strlen(msg) + strlen(procname) + 128; 00607 if ((charbuf = (char *)CALLOC(bufsize, sizeof(char))) == NULL) { 00608 L_ERROR("charbuf not made in l_infoInt()", procname); 00609 return; 00610 } 00611 00612 sprintf(charbuf, "Info in %s: %s\n", procname, msg); 00613 fprintf(stderr, charbuf, ival); 00614 00615 FREE(charbuf); 00616 return; 00617 } 00618 00619 00620 /*! 00621 * l_infoInt2() 00622 * 00623 * Input: msg (info message; must include two '%d') 00624 * procname 00625 * ival1, ival2 (two args, embedded in info message via %d) 00626 */ 00627 void 00628 l_infoInt2(const char *msg, 00629 const char *procname, 00630 l_int32 ival1, 00631 l_int32 ival2) 00632 { 00633 l_int32 bufsize; 00634 char *charbuf; 00635 00636 if (!msg || !procname) { 00637 L_ERROR("msg or procname not defined in l_infoInt2()", procname); 00638 return; 00639 } 00640 00641 bufsize = strlen(msg) + strlen(procname) + 128; 00642 if ((charbuf = (char *)CALLOC(bufsize, sizeof(char))) == NULL) { 00643 L_ERROR("charbuf not made in l_infoInt2()", procname); 00644 return; 00645 } 00646 00647 sprintf(charbuf, "Info in %s: %s\n", procname, msg); 00648 fprintf(stderr, charbuf, ival1, ival2); 00649 00650 FREE(charbuf); 00651 return; 00652 } 00653 00654 00655 /*! 00656 * l_infoFloat() 00657 * 00658 * Input: msg (info message; must include '%f') 00659 * procname 00660 * fval (embedded in info message via %f) 00661 */ 00662 void 00663 l_infoFloat(const char *msg, 00664 const char *procname, 00665 l_float32 fval) 00666 { 00667 l_int32 bufsize; 00668 char *charbuf; 00669 00670 if (!msg || !procname) { 00671 L_ERROR("msg or procname not defined in l_infoFloat()", procname); 00672 return; 00673 } 00674 00675 bufsize = strlen(msg) + strlen(procname) + 128; 00676 if ((charbuf = (char *)CALLOC(bufsize, sizeof(char))) == NULL) { 00677 L_ERROR("charbuf not made in l_infoFloat()", procname); 00678 return; 00679 } 00680 00681 sprintf(charbuf, "Info in %s: %s\n", procname, msg); 00682 fprintf(stderr, charbuf, fval); 00683 00684 FREE(charbuf); 00685 return; 00686 } 00687 00688 00689 /*! 00690 * l_infoFloat2() 00691 * 00692 * Input: msg (info message; must include two '%f') 00693 * procname 00694 * fval1, fval2 (two args, embedded in info message via %f) 00695 */ 00696 void 00697 l_infoFloat2(const char *msg, 00698 const char *procname, 00699 l_float32 fval1, 00700 l_float32 fval2) 00701 { 00702 l_int32 bufsize; 00703 char *charbuf; 00704 00705 if (!msg || !procname) { 00706 L_ERROR("msg or procname not defined in l_infoFloat2()", procname); 00707 return; 00708 } 00709 00710 bufsize = strlen(msg) + strlen(procname) + 128; 00711 if ((charbuf = (char *)CALLOC(bufsize, sizeof(char))) == NULL) { 00712 L_ERROR("charbuf not made in l_infoFloat()", procname); 00713 return; 00714 } 00715 00716 sprintf(charbuf, "Info in %s: %s\n", procname, msg); 00717 fprintf(stderr, charbuf, fval1, fval2); 00718 00719 FREE(charbuf); 00720 return; 00721 } 00722 00723 00724 00725 /*--------------------------------------------------------------------* 00726 * Safe string operations * 00727 *--------------------------------------------------------------------*/ 00728 /*! 00729 * stringNew() 00730 * 00731 * Input: src string 00732 * Return: dest copy of src string, or null on error 00733 */ 00734 char * 00735 stringNew(const char *src) 00736 { 00737 l_int32 len; 00738 char *dest; 00739 00740 PROCNAME("stringNew"); 00741 00742 if (!src) 00743 return (char *)ERROR_PTR("src not defined", procName, NULL); 00744 00745 len = strlen(src); 00746 if ((dest = (char *)CALLOC(len + 1, sizeof(char))) == NULL) 00747 return (char *)ERROR_PTR("dest not made", procName, NULL); 00748 00749 stringCopy(dest, src, len); 00750 return dest; 00751 } 00752 00753 00754 /*! 00755 * stringCopy() 00756 * 00757 * Input: dest (existing byte buffer) 00758 * src string (can be null) 00759 * n (max number of characters to copy) 00760 * Return: 0 if OK, 1 on error 00761 * 00762 * Notes: 00763 * (1) Relatively safe wrapper for strncpy, that checks the input, 00764 * and does not complain if @src is null or @n < 1. 00765 * If @n < 1, this is a no-op. 00766 * (2) @dest needs to be at least @n bytes in size. 00767 * (3) We don't call strncpy() because valgrind complains about 00768 * use of uninitialized values. 00769 */ 00770 l_int32 00771 stringCopy(char *dest, 00772 const char *src, 00773 l_int32 n) 00774 { 00775 l_int32 i; 00776 00777 PROCNAME("stringCopy"); 00778 00779 if (!dest) 00780 return ERROR_INT("dest not defined", procName, 1); 00781 if (!src || n < 1) 00782 return 0; 00783 00784 /* Implementation of strncpy that valgrind doesn't complain about */ 00785 for (i = 0; i < n && src[i] != '\0'; i++) 00786 dest[i] = src[i]; 00787 for (; i < n; i++) 00788 dest[i] = '\0'; 00789 return 0; 00790 } 00791 00792 00793 /*! 00794 * stringReplace() 00795 * 00796 * Input: &dest string (<return> copy) 00797 * src string 00798 * Return: 0 if OK; 1 on error 00799 * 00800 * Notes: 00801 * (1) Frees any existing dest string 00802 * (2) Puts a copy of src string in the dest 00803 * (3) If either or both strings are null, does something reasonable. 00804 */ 00805 l_int32 00806 stringReplace(char **pdest, 00807 const char *src) 00808 { 00809 char *scopy; 00810 l_int32 len; 00811 00812 PROCNAME("stringReplace"); 00813 00814 if (!pdest) 00815 return ERROR_INT("pdest not defined", procName, 1); 00816 00817 if (*pdest) 00818 FREE(*pdest); 00819 00820 if (src) { 00821 len = strlen(src); 00822 if ((scopy = (char *)CALLOC(len + 1, sizeof(char))) == NULL) 00823 return ERROR_INT("scopy not made", procName, 1); 00824 stringCopy(scopy, src, len); 00825 *pdest = scopy; 00826 } 00827 else 00828 *pdest = NULL; 00829 00830 return 0; 00831 } 00832 00833 00834 /*! 00835 * stringLength() 00836 * 00837 * Input: src string (can be null or null-terminated string) 00838 * size (size of src buffer) 00839 * Return: length of src in bytes. 00840 * 00841 * Notes: 00842 * (1) Safe implementation of strlen that only checks size bytes 00843 * for trailing NUL. 00844 * (2) Valid returned string lengths are between 0 and size - 1. 00845 * If size bytes are checked without finding a NUL byte, then 00846 * an error is indicated by returning size. 00847 */ 00848 l_int32 00849 stringLength(const char *src, 00850 size_t size) 00851 { 00852 l_int32 i; 00853 00854 PROCNAME("stringLength"); 00855 00856 if (!src) 00857 return ERROR_INT("src not defined", procName, 0); 00858 if (size < 1) 00859 return 0; 00860 00861 for (i = 0; i < size; i++) { 00862 if (src[i] == '\0') 00863 return i; 00864 } 00865 return size; 00866 } 00867 00868 00869 /*! 00870 * stringCat() 00871 * 00872 * Input: dest (null-terminated byte buffer) 00873 * size (size of dest) 00874 * src string (can be null or null-terminated string) 00875 * Return: number of bytes added to dest; -1 on error 00876 * 00877 * Notes: 00878 * (1) Alternative implementation of strncat, that checks the input, 00879 * is easier to use (since the size of the dest buffer is specified 00880 * rather than the number of bytes to copy), and does not complain 00881 * if @src is null. 00882 * (2) Never writes past end of dest. 00883 * (3) If it can't append src (an error), it does nothing. 00884 * (4) N.B. The order of 2nd and 3rd args is reversed from that in 00885 * strncat, as in the Windows function strcat_s(). 00886 */ 00887 l_int32 00888 stringCat(char *dest, 00889 size_t size, 00890 const char *src) 00891 { 00892 l_int32 i, n; 00893 l_int32 lendest, lensrc; 00894 00895 PROCNAME("stringCat"); 00896 00897 if (!dest) 00898 return ERROR_INT("dest not defined", procName, -1); 00899 if (size < 1) 00900 return ERROR_INT("size < 1; too small", procName, -1); 00901 if (!src) 00902 return 0; 00903 00904 lendest = stringLength(dest, size); 00905 if (lendest == size) 00906 return ERROR_INT("no terminating nul byte", procName, -1); 00907 lensrc = stringLength(src, size); 00908 if (lensrc == 0) 00909 return 0; 00910 n = (lendest + lensrc > size - 1 ? size - lendest - 1 : lensrc); 00911 if (n < 1) 00912 return ERROR_INT("dest too small for append", procName, -1); 00913 00914 for (i = 0; i < n; i++) 00915 dest[lendest + i] = src[i]; 00916 dest[lendest + n] = '\0'; 00917 return n; 00918 } 00919 00920 00921 /*! 00922 * stringJoin() 00923 * 00924 * Input: src1 string (<optional> can be null) 00925 * src2 string (<optional> can be null) 00926 * Return: concatenated string, or null on error 00927 * 00928 * Notes: 00929 * (1) This is a safe version of strcat; it makes a new string. 00930 * (2) It is not an error if either or both of the strings 00931 * are empty, or if either or both of the pointers are null. 00932 */ 00933 char * 00934 stringJoin(const char *src1, 00935 const char *src2) 00936 { 00937 char *dest; 00938 l_int32 srclen1, srclen2, destlen; 00939 00940 PROCNAME("stringJoin"); 00941 00942 srclen1 = (src1) ? strlen(src1) : 0; 00943 srclen2 = (src2) ? strlen(src2) : 0; 00944 destlen = srclen1 + srclen2 + 3; 00945 00946 if ((dest = (char *)CALLOC(destlen, sizeof(char))) == NULL) 00947 return (char *)ERROR_PTR("calloc fail for dest", procName, NULL); 00948 00949 if (src1) 00950 stringCopy(dest, src1, srclen1); 00951 if (src2) 00952 strncat(dest, src2, srclen2); 00953 return dest; 00954 } 00955 00956 00957 /*! 00958 * stringReverse() 00959 * 00960 * Input: src (string) 00961 * Return: dest (newly-allocated reversed string) 00962 */ 00963 char * 00964 stringReverse(const char *src) 00965 { 00966 char *dest; 00967 l_int32 i, len; 00968 00969 PROCNAME("stringReverse"); 00970 00971 if (!src) 00972 return (char *)ERROR_PTR("src not defined", procName, NULL); 00973 len = strlen(src); 00974 if ((dest = (char *)CALLOC(len + 1, sizeof(char))) == NULL) 00975 return (char *)ERROR_PTR("calloc fail for dest", procName, NULL); 00976 for (i = 0; i < len; i++) 00977 dest[i] = src[len - 1 - i]; 00978 00979 return dest; 00980 } 00981 00982 00983 /*! 00984 * strtokSafe() 00985 * 00986 * Input: cstr (input string to be sequentially parsed; 00987 * use NULL after the first call) 00988 * seps (a string of character separators) 00989 * &saveptr (<return> ptr to the next char after 00990 * the last encountered separator) 00991 * Return: substr (a new string that is copied from the previous 00992 * saveptr up to but not including the next 00993 * separator character), or NULL if end of cstr. 00994 * 00995 * Notes: 00996 * (1) This is a thread-safe implementation of strtok. 00997 * (2) It has the same interface as strtok_r. 00998 * (3) It differs from strtok_r in usage in two respects: 00999 * (a) the input string is not altered 01000 * (b) each returned substring is newly allocated and must 01001 * be freed after use. 01002 * (4) Let me repeat that. This is "safe" because the input 01003 * string is not altered and because each returned string 01004 * is newly allocated on the heap. 01005 * (5) It is here because, surprisingly, some C libraries don't 01006 * include strtok_r. 01007 * (6) Important usage points: 01008 * - Input the string to be parsed on the first invocation. 01009 * - Then input NULL after that; the value returned in saveptr 01010 * is used in all subsequent calls. 01011 * (7) This is only slightly slower than strtok_k. 01012 */ 01013 char * 01014 strtokSafe(char *cstr, 01015 const char *seps, 01016 char **psaveptr) 01017 { 01018 char nextc; 01019 char *start, *substr; 01020 l_int32 istart, i, j, nchars; 01021 01022 PROCNAME("strtokSafe"); 01023 01024 if (!seps) 01025 return (char *)ERROR_PTR("seps not defined", procName, NULL); 01026 if (!psaveptr) 01027 return (char *)ERROR_PTR("&saveptr not defined", procName, NULL); 01028 01029 if (!cstr) 01030 start = *psaveptr; 01031 else 01032 start = cstr; 01033 if (!start) /* nothing to do */ 01034 return NULL; 01035 01036 /* First time, scan for the first non-sep character */ 01037 istart = 0; 01038 if (cstr) { 01039 for (istart = 0;; istart++) { 01040 if ((nextc = start[istart]) == '\0') { 01041 *psaveptr = NULL; /* in case caller doesn't check ret value */ 01042 return NULL; 01043 } 01044 if (!strchr(seps, nextc)) 01045 break; 01046 } 01047 } 01048 01049 /* Scan through, looking for a sep character; if none is 01050 * found, 'i' will be at the end of the string. */ 01051 for (i = istart;; i++) { 01052 if ((nextc = start[i]) == '\0') 01053 break; 01054 if (strchr(seps, nextc)) 01055 break; 01056 } 01057 01058 /* Save the substring */ 01059 nchars = i - istart; 01060 substr = (char *)CALLOC(nchars + 1, sizeof(char)); 01061 stringCopy(substr, start + istart, nchars); 01062 01063 /* Look for the next non-sep character. 01064 * If this is the last substring, return a null saveptr. */ 01065 for (j = i;; j++) { 01066 if ((nextc = start[j]) == '\0') { 01067 *psaveptr = NULL; /* no more non-sep characters */ 01068 break; 01069 } 01070 if (!strchr(seps, nextc)) { 01071 *psaveptr = start + j; /* start here on next call */ 01072 break; 01073 } 01074 } 01075 01076 return substr; 01077 } 01078 01079 01080 /*! 01081 * stringSplitOnToken() 01082 * 01083 * Input: cstr (input string to be split; not altered) 01084 * seps (a string of character separators) 01085 * &head (<return> ptr to copy of the input string, up to 01086 * the first separator token encountered) 01087 * &tail (<return> ptr to copy of the part of the input string 01088 * starting with the first non-separator character 01089 * that occurs after the first separator is found) 01090 * Return: 0 if OK, 1 on error 01091 * 01092 * Notes: 01093 * (1) The input string is not altered; all split parts are new strings. 01094 * (2) The split occurs around the first consecutive sequence of 01095 * tokens encountered. 01096 * (3) The head goes from the beginning of the string up to 01097 * but not including the first token found. 01098 * (4) The tail contains the second part of the string, starting 01099 * with the first char in that part that is NOT a token. 01100 * (5) If no separator token is found, 'head' contains a copy 01101 * of the input string and 'tail' is null. 01102 */ 01103 l_int32 01104 stringSplitOnToken(char *cstr, 01105 const char *seps, 01106 char **phead, 01107 char **ptail) 01108 { 01109 char *saveptr; 01110 01111 PROCNAME("stringSplitOnToken"); 01112 01113 if (!phead) 01114 return ERROR_INT("&head not defined", procName, 1); 01115 if (!ptail) 01116 return ERROR_INT("&tail not defined", procName, 1); 01117 *phead = *ptail = NULL; 01118 if (!cstr) 01119 return ERROR_INT("cstr not defined", procName, 1); 01120 if (!seps) 01121 return ERROR_INT("seps not defined", procName, 1); 01122 01123 *phead = strtokSafe(cstr, seps, &saveptr); 01124 if (saveptr) 01125 *ptail = stringNew(saveptr); 01126 return 0; 01127 } 01128 01129 01130 /*--------------------------------------------------------------------* 01131 * Find and replace procs * 01132 *--------------------------------------------------------------------*/ 01133 /*! 01134 * stringRemoveChars() 01135 * 01136 * Input: src (input string; can be of zero length) 01137 * remchars (string of chars to be removed from src) 01138 * Return: dest (string with specified chars removed), or null on error 01139 */ 01140 char * 01141 stringRemoveChars(const char *src, 01142 const char *remchars) 01143 { 01144 char ch; 01145 char *dest; 01146 l_int32 nsrc, i, k; 01147 01148 PROCNAME("stringRemoveChars"); 01149 01150 if (!src) 01151 return (char *)ERROR_PTR("src not defined", procName, NULL); 01152 if (!remchars) 01153 return stringNew(src); 01154 01155 if ((dest = (char *)CALLOC(strlen(src) + 1, sizeof(char))) == NULL) 01156 return (char *)ERROR_PTR("dest not made", procName, NULL); 01157 nsrc = strlen(src); 01158 for (i = 0, k = 0; i < nsrc; i++) { 01159 ch = src[i]; 01160 if (!strchr(remchars, ch)) 01161 dest[k++] = ch; 01162 } 01163 01164 return dest; 01165 } 01166 01167 01168 /*! 01169 * stringFindSubstr() 01170 * 01171 * Input: src (input string; can be of zero length) 01172 * sub (substring to be searched for) 01173 * &loc (<return optional> location of substring in src) 01174 * Return: 1 if found; 0 if not found or on error 01175 * 01176 * Notes: 01177 * (1) This is a wrapper around strstr(). 01178 * (2) Both @src and @sub must be defined, and @sub must have 01179 * length of at least 1. 01180 * (3) If the substring is not found and loc is returned, it has 01181 * the value -1. 01182 */ 01183 l_int32 01184 stringFindSubstr(const char *src, 01185 const char *sub, 01186 l_int32 *ploc) 01187 { 01188 char *ptr; 01189 01190 PROCNAME("stringFindSubstr"); 01191 01192 if (!src) 01193 return ERROR_INT("src not defined", procName, 0); 01194 if (!sub) 01195 return ERROR_INT("sub not defined", procName, 0); 01196 if (ploc) *ploc = -1; 01197 if (strlen(sub) == 0) 01198 return ERROR_INT("substring length 0", procName, 0); 01199 if (strlen(src) == 0) 01200 return 0; 01201 01202 if ((ptr = (char *)strstr(src, sub)) == NULL) /* not found */ 01203 return 0; 01204 01205 if (ploc) 01206 *ploc = ptr - src; 01207 return 1; 01208 } 01209 01210 01211 /*! 01212 * stringReplaceSubstr() 01213 * 01214 * Input: src (input string; can be of zero length) 01215 * sub1 (substring to be replaced) 01216 * sub2 (substring to put in; can be "") 01217 * &found (<return optional> 1 if sub1 is found; 0 otherwise) 01218 * &loc (<return optional> location of ptr after replacement) 01219 * Return: dest (string with substring replaced), or null if the 01220 * substring not found or on error. 01221 * 01222 * Notes: 01223 * (1) Replaces the first instance. 01224 * (2) To only remove sub1, use "" for sub2 01225 * (3) Returns a new string if sub1 and sub2 are the same. 01226 * (4) The optional loc is input as the byte offset within the src 01227 * from which the search starts, and after the search it is the 01228 * char position in the string of the next character after 01229 * the substituted string. 01230 * (5) N.B. If ploc is not null, loc must always be initialized. 01231 * To search the string from the beginning, set loc = 0. 01232 */ 01233 char * 01234 stringReplaceSubstr(const char *src, 01235 const char *sub1, 01236 const char *sub2, 01237 l_int32 *pfound, 01238 l_int32 *ploc) 01239 { 01240 char *ptr, *dest; 01241 l_int32 nsrc, nsub1, nsub2, len, npre, loc; 01242 01243 PROCNAME("stringReplaceSubstr"); 01244 01245 if (!src) 01246 return (char *)ERROR_PTR("src not defined", procName, NULL); 01247 if (!sub1) 01248 return (char *)ERROR_PTR("sub1 not defined", procName, NULL); 01249 if (!sub2) 01250 return (char *)ERROR_PTR("sub2 not defined", procName, NULL); 01251 01252 if (pfound) 01253 *pfound = 0; 01254 if (ploc) 01255 loc = *ploc; 01256 else 01257 loc = 0; 01258 if ((ptr = (char *)strstr(src + loc, sub1)) == NULL) { 01259 return NULL; 01260 } 01261 01262 if (pfound) 01263 *pfound = 1; 01264 nsrc = strlen(src); 01265 nsub1 = strlen(sub1); 01266 nsub2 = strlen(sub2); 01267 len = nsrc + nsub2 - nsub1; 01268 if ((dest = (char *)CALLOC(len + 1, sizeof(char))) == NULL) 01269 return (char *)ERROR_PTR("dest not made", procName, NULL); 01270 npre = ptr - src; 01271 memcpy(dest, src, npre); 01272 strcpy(dest + npre, sub2); 01273 strcpy(dest + npre + nsub2, ptr + nsub1); 01274 if (ploc) 01275 *ploc = npre + nsub2; 01276 01277 return dest; 01278 } 01279 01280 01281 /*! 01282 * stringReplaceEachSubstr() 01283 * 01284 * Input: src (input string; can be of zero length) 01285 * sub1 (substring to be replaced) 01286 * sub2 (substring to put in; can be "") 01287 * &count (<optional return > the number of times that sub1 01288 * is found in src; 0 if not found) 01289 * Return: dest (string with substring replaced), or null if the 01290 * substring not found or on error. 01291 * 01292 * Notes: 01293 * (1) Replaces every instance. 01294 * (2) To only remove each instance of sub1, use "" for sub2 01295 * (3) Returns NULL if sub1 and sub2 are the same. 01296 */ 01297 char * 01298 stringReplaceEachSubstr(const char *src, 01299 const char *sub1, 01300 const char *sub2, 01301 l_int32 *pcount) 01302 { 01303 char *currstr, *newstr; 01304 l_int32 loc; 01305 01306 PROCNAME("stringReplaceEachSubstr"); 01307 01308 if (!src) 01309 return (char *)ERROR_PTR("src not defined", procName, NULL); 01310 if (!sub1) 01311 return (char *)ERROR_PTR("sub1 not defined", procName, NULL); 01312 if (!sub2) 01313 return (char *)ERROR_PTR("sub2 not defined", procName, NULL); 01314 01315 if (pcount) 01316 *pcount = 0; 01317 loc = 0; 01318 if ((newstr = stringReplaceSubstr(src, sub1, sub2, NULL, &loc)) == NULL) 01319 return NULL; 01320 01321 if (pcount) 01322 (*pcount)++; 01323 while (1) { 01324 currstr = newstr; 01325 newstr = stringReplaceSubstr(currstr, sub1, sub2, NULL, &loc); 01326 if (!newstr) 01327 return currstr; 01328 FREE(currstr); 01329 if (pcount) 01330 (*pcount)++; 01331 } 01332 } 01333 01334 01335 /*! 01336 * arrayFindEachSequence() 01337 * 01338 * Input: data (byte array) 01339 * datalen (length of data, in bytes) 01340 * sequence (subarray of bytes to find in data) 01341 * seqlen (length of sequence, in bytes) 01342 * Return: numa of offsets where the sequence is found, or null if 01343 * none are found or on error 01344 * 01345 * Notes: 01346 * (1) The byte arrays @data and @sequence are not C strings, 01347 * as they can contain null bytes. Therefore, for each 01348 * we must give the length of the array. 01349 * (2) This finds every occurrence in @data of @sequence. 01350 */ 01351 NUMA * 01352 arrayFindEachSequence(const l_uint8 *data, 01353 l_int32 datalen, 01354 const l_uint8 *sequence, 01355 l_int32 seqlen) 01356 { 01357 l_int32 start, offset, realoffset, found; 01358 NUMA *na; 01359 01360 PROCNAME("arrayFindEachSequence"); 01361 01362 if (!data || !sequence) 01363 return (NUMA *)ERROR_PTR("data & sequence not both defined", 01364 procName, NULL); 01365 01366 na = numaCreate(0); 01367 start = 0; 01368 while (1) { 01369 arrayFindSequence(data + start, datalen - start, sequence, seqlen, 01370 &offset, &found); 01371 if (found == TRUE) { 01372 realoffset = start + offset; 01373 numaAddNumber(na, realoffset); 01374 start = realoffset + seqlen; 01375 if (start >= datalen) break; 01376 } 01377 else /* no more */ 01378 break; 01379 } 01380 01381 if (numaGetCount(na) == 0) 01382 numaDestroy(&na); 01383 return na; 01384 } 01385 01386 01387 /*! 01388 * arrayFindSequence() 01389 * 01390 * Input: data (byte array) 01391 * datalen (length of data, in bytes) 01392 * sequence (subarray of bytes to find in data) 01393 * seqlen (length of sequence, in bytes) 01394 * &offset (return> offset from beginning of 01395 * data where the sequence begins) 01396 * &found (<optional return> 1 if sequence is found; 0 otherwise) 01397 * Return: 0 if OK, 1 on error 01398 * 01399 * Notes: 01400 * (1) The byte arrays 'data' and 'sequence' are not C strings, 01401 * as they can contain null bytes. Therefore, for each 01402 * we must give the length of the array. 01403 * (2) This searches for the first occurrence in @data of @sequence, 01404 * which consists of @seqlen bytes. The parameter @seqlen 01405 * must not exceed the actual length of the @sequence byte array. 01406 * (3) If the sequence is not found, the offset will be set to -1. 01407 */ 01408 l_int32 01409 arrayFindSequence(const l_uint8 *data, 01410 l_int32 datalen, 01411 const l_uint8 *sequence, 01412 l_int32 seqlen, 01413 l_int32 *poffset, 01414 l_int32 *pfound) 01415 { 01416 l_int32 i, j, found, lastpos; 01417 01418 PROCNAME("arrayFindSequence"); 01419 01420 if (!data || !sequence) 01421 return ERROR_INT("data & sequence not both defined", procName, 1); 01422 if (!poffset) 01423 return ERROR_INT("&offset not defined", procName, 1); 01424 01425 *poffset = -1; 01426 if (pfound) *pfound = 0; 01427 lastpos = datalen - seqlen + 1; 01428 found = 0; 01429 for (i = 0; i < lastpos; i++) { 01430 for (j = 0; j < seqlen; j++) { 01431 if (data[i + j] != sequence[j]) 01432 break; 01433 if (j == seqlen - 1) 01434 found = 1; 01435 } 01436 if (found) 01437 break; 01438 } 01439 01440 if (found) { 01441 *poffset = i; 01442 if (pfound) *pfound = 1; 01443 } 01444 01445 return 0; 01446 } 01447 01448 01449 /*--------------------------------------------------------------------* 01450 * Safe realloc * 01451 *--------------------------------------------------------------------*/ 01452 /*! 01453 * reallocNew() 01454 * 01455 * Input: &indata (<optional>; nulls indata) 01456 * size of input data to be copied (bytes) 01457 * size of data to be reallocated (bytes) 01458 * Return: ptr to new data, or null on error 01459 * 01460 * Action: !N.B. (3) and (4)! 01461 * (1) Allocates memory, initialized to 0 01462 * (2) Copies as much of the input data as possible 01463 * to the new block, truncating the copy if necessary 01464 * (3) Frees the input data 01465 * (4) Zeroes the input data ptr 01466 * 01467 * Notes: 01468 * (1) If newsize <=0, just frees input data and nulls ptr 01469 * (2) If input ptr is null, just callocs new memory 01470 * (3) This differs from realloc in that it always allocates 01471 * new memory (if newsize > 0) and initializes it to 0, 01472 * it requires the amount of old data to be copied, 01473 * and it takes the address of the input ptr and 01474 * nulls the handle. 01475 */ 01476 void * 01477 reallocNew(void **pindata, 01478 l_int32 oldsize, 01479 l_int32 newsize) 01480 { 01481 l_int32 minsize; 01482 void *indata; 01483 void *newdata; 01484 01485 PROCNAME("reallocNew"); 01486 01487 if (!pindata) 01488 return ERROR_PTR("input data not defined", procName, NULL); 01489 indata = *pindata; 01490 01491 if (newsize <= 0) { /* nonstandard usage */ 01492 if (indata) { 01493 FREE(indata); 01494 *pindata = NULL; 01495 } 01496 return NULL; 01497 } 01498 01499 if (!indata) /* nonstandard usage */ 01500 { 01501 if ((newdata = (void *)CALLOC(1, newsize)) == NULL) 01502 return ERROR_PTR("newdata not made", procName, NULL); 01503 return newdata; 01504 } 01505 01506 /* Standard usage */ 01507 if ((newdata = (void *)CALLOC(1, newsize)) == NULL) 01508 return ERROR_PTR("newdata not made", procName, NULL); 01509 minsize = L_MIN(oldsize, newsize); 01510 memcpy((char *)newdata, (char *)indata, minsize); 01511 01512 FREE(indata); 01513 *pindata = NULL; 01514 01515 return newdata; 01516 } 01517 01518 01519 01520 /*--------------------------------------------------------------------* 01521 * Read and write between file and memory * 01522 *--------------------------------------------------------------------*/ 01523 /*! 01524 * l_binaryRead() 01525 * 01526 * Input: filename 01527 * &nbytes (<return> number of bytes read) 01528 * Return: data, or null on error 01529 */ 01530 l_uint8 * 01531 l_binaryRead(const char *filename, 01532 size_t *pnbytes) 01533 { 01534 l_uint8 *data; 01535 FILE *fp; 01536 01537 PROCNAME("l_binaryRead"); 01538 01539 if (!filename) 01540 return (l_uint8 *)ERROR_PTR("filename not defined", procName, NULL); 01541 if (!pnbytes) 01542 return (l_uint8 *)ERROR_PTR("pnbytes not defined", procName, NULL); 01543 *pnbytes = 0; 01544 01545 if ((fp = fopenReadStream(filename)) == NULL) 01546 return (l_uint8 *)ERROR_PTR("file stream not opened", procName, NULL); 01547 01548 data = l_binaryReadStream(fp, pnbytes); 01549 fclose(fp); 01550 return data; 01551 } 01552 01553 01554 /*! 01555 * l_binaryReadStream() 01556 * 01557 * Input: stream 01558 * &nbytes (<return> number of bytes read) 01559 * Return: null-terminated array, or null on error 01560 * (reading 0 bytes is not an error) 01561 * 01562 * Notes: 01563 * (1) The returned array is terminated with a null byte so that 01564 * it can be used to read ascii data into a proper C string. 01565 * (2) Side effect: this re-positions the stream ptr to the 01566 * beginning of the file. 01567 */ 01568 l_uint8 * 01569 l_binaryReadStream(FILE *fp, 01570 size_t *pnbytes) 01571 { 01572 l_int32 ignore; 01573 l_uint8 *data; 01574 01575 PROCNAME("l_binaryReadStream"); 01576 01577 if (!pnbytes) 01578 return (l_uint8 *)ERROR_PTR("&nbytes not defined", procName, NULL); 01579 *pnbytes = 0; 01580 if (!fp) 01581 return (l_uint8 *)ERROR_PTR("stream not defined", procName, NULL); 01582 01583 *pnbytes = fnbytesInFile(fp); 01584 if ((data = (l_uint8 *)CALLOC(1, *pnbytes + 1)) == NULL) 01585 return (l_uint8 *)ERROR_PTR("calloc fail for data", procName, NULL); 01586 ignore = fread(data, 1, *pnbytes, fp); 01587 return data; 01588 } 01589 01590 01591 /*! 01592 * l_binaryWrite() 01593 * 01594 * Input: filename (output) 01595 * operation ("w" for write; "a" for append) 01596 * data (binary data to be written) 01597 * nbytes (size of data array) 01598 * Return: 0 if OK; 1 on error 01599 */ 01600 l_int32 01601 l_binaryWrite(const char *filename, 01602 const char *operation, 01603 void *data, 01604 size_t nbytes) 01605 { 01606 char actualOperation[20]; 01607 FILE *fp; 01608 01609 PROCNAME("l_binaryWrite"); 01610 01611 if (!filename) 01612 return ERROR_INT("filename not defined", procName, 1); 01613 if (!operation) 01614 return ERROR_INT("operation not defined", procName, 1); 01615 if (!data) 01616 return ERROR_INT("data not defined", procName, 1); 01617 if (nbytes <= 0) 01618 return ERROR_INT("nbytes must be > 0", procName, 1); 01619 01620 if (!strcmp(operation, "w") && !strcmp(operation, "a")) 01621 return ERROR_INT("operation not one of {'w','a'}", procName, 1); 01622 01623 /* The 'b' flag to fopen() is ignored for all POSIX 01624 * conforming systems. However, Windows needs the 'b' flag. */ 01625 stringCopy(actualOperation, operation, 2); 01626 strncat(actualOperation, "b", 2); 01627 01628 if ((fp = fopenWriteStream(filename, actualOperation)) == NULL) 01629 return ERROR_INT("stream not opened", procName, 1); 01630 fwrite(data, 1, nbytes, fp); 01631 fclose(fp); 01632 return 0; 01633 } 01634 01635 01636 /*! 01637 * nbytesInFile() 01638 * 01639 * Input: filename 01640 * Return: nbytes in file; 0 on error 01641 */ 01642 size_t 01643 nbytesInFile(const char *filename) 01644 { 01645 size_t nbytes; 01646 FILE *fp; 01647 01648 PROCNAME("nbytesInFile"); 01649 01650 if (!filename) 01651 return ERROR_INT("filename not defined", procName, 0); 01652 if ((fp = fopenReadStream(filename)) == NULL) 01653 return ERROR_INT("stream not opened", procName, 0); 01654 nbytes = fnbytesInFile(fp); 01655 fclose(fp); 01656 return nbytes; 01657 } 01658 01659 01660 /*! 01661 * fnbytesInFile() 01662 * 01663 * Input: file stream 01664 * Return: nbytes in file; 0 on error 01665 */ 01666 size_t 01667 fnbytesInFile(FILE *fp) 01668 { 01669 size_t nbytes, pos; 01670 01671 PROCNAME("fnbytesInFile"); 01672 01673 if (!fp) 01674 return ERROR_INT("stream not open", procName, 0); 01675 01676 pos = ftell(fp); /* initial position */ 01677 fseek(fp, 0, SEEK_END); /* EOF */ 01678 nbytes = ftell(fp); 01679 fseek(fp, pos, SEEK_SET); /* back to initial position */ 01680 return nbytes; 01681 } 01682 01683 01684 /*--------------------------------------------------------------------* 01685 * Copy in memory * 01686 *--------------------------------------------------------------------*/ 01687 /*! 01688 * l_binaryCopy() 01689 * 01690 * Input: datas 01691 * size (of data array) 01692 * Return: datad (on heap), or null on error 01693 * 01694 * Notes: 01695 * (1) We add 4 bytes to the zeroed output because in some cases 01696 * (e.g., string handling) it is important to have the data 01697 * be null terminated. This guarantees that after the memcpy, 01698 * the result is automatically null terminated. 01699 */ 01700 l_uint8 * 01701 l_binaryCopy(l_uint8 *datas, 01702 size_t size) 01703 { 01704 l_uint8 *datad; 01705 01706 PROCNAME("l_binaryCopy"); 01707 01708 if (!datas) 01709 return (l_uint8 *)ERROR_PTR("datas not defined", procName, NULL); 01710 01711 if ((datad = (l_uint8 *)CALLOC(size + 4, sizeof(l_uint8))) == NULL) 01712 return (l_uint8 *)ERROR_PTR("datad not made", procName, NULL); 01713 memcpy(datad, datas, size); 01714 return datad; 01715 } 01716 01717 01718 /*--------------------------------------------------------------------* 01719 * File copy operations * 01720 *--------------------------------------------------------------------*/ 01721 /*! 01722 * fileCopy() 01723 * 01724 * Input: srcfile (copy this file) 01725 * newfile (to this file) 01726 * Return: 0 if OK, 1 on error 01727 */ 01728 l_int32 01729 fileCopy(const char *srcfile, 01730 const char *newfile) 01731 { 01732 l_int32 ret; 01733 size_t nbytes; 01734 l_uint8 *data; 01735 01736 PROCNAME("fileCopy"); 01737 01738 if (!srcfile) 01739 return ERROR_INT("srcfile not defined", procName, 1); 01740 if (!newfile) 01741 return ERROR_INT("newfile not defined", procName, 1); 01742 01743 if ((data = l_binaryRead(srcfile, &nbytes)) == NULL) 01744 return ERROR_INT("data not returned", procName, 1); 01745 ret = l_binaryWrite(newfile, "w", data, nbytes); 01746 FREE(data); 01747 return ret; 01748 } 01749 01750 01751 /*! 01752 * fileConcatenate() 01753 * 01754 * Input: srcfile (file to append) 01755 * destfile (file to add to) 01756 * Return: 0 if OK, 1 on error 01757 */ 01758 l_int32 01759 fileConcatenate(const char *srcfile, 01760 const char *destfile) 01761 { 01762 size_t nbytes; 01763 l_uint8 *data; 01764 01765 PROCNAME("fileConcatenate"); 01766 01767 if (!srcfile) 01768 return ERROR_INT("srcfile not defined", procName, 1); 01769 if (!destfile) 01770 return ERROR_INT("destfile not defined", procName, 1); 01771 01772 data = l_binaryRead(srcfile, &nbytes); 01773 l_binaryWrite(destfile, "a", data, nbytes); 01774 FREE(data); 01775 return 0; 01776 } 01777 01778 01779 /*! 01780 * fileAppendString() 01781 * 01782 * Input: filename 01783 * str (string to append to file) 01784 * Return: 0 if OK, 1 on error 01785 */ 01786 l_int32 01787 fileAppendString(const char *filename, 01788 const char *str) 01789 { 01790 FILE *fp; 01791 01792 PROCNAME("fileAppendString"); 01793 01794 if (!filename) 01795 return ERROR_INT("filename not defined", procName, 1); 01796 if (!str) 01797 return ERROR_INT("str not defined", procName, 1); 01798 01799 if ((fp = fopenWriteStream(filename, "a")) == NULL) 01800 return ERROR_INT("stream not opened", procName, 1); 01801 fprintf(fp, "%s", str); 01802 fclose(fp); 01803 return 0; 01804 } 01805 01806 01807 /*--------------------------------------------------------------------* 01808 * Test files for equivalence * 01809 *--------------------------------------------------------------------*/ 01810 /*! 01811 * filesAreIdentical() 01812 * 01813 * Input: fname1 01814 * fname2 01815 * &same (<return> 1 if identical; 0 if different) 01816 * Return: 0 if OK, 1 on error 01817 */ 01818 l_int32 01819 filesAreIdentical(const char *fname1, 01820 const char *fname2, 01821 l_int32 *psame) 01822 { 01823 l_int32 i, same; 01824 size_t nbytes1, nbytes2; 01825 l_uint8 *array1, *array2; 01826 01827 PROCNAME("filesAreIdentical"); 01828 01829 if (!psame) 01830 return ERROR_INT("&same not defined", procName, 1); 01831 *psame = 0; 01832 if (!fname1 || !fname2) 01833 return ERROR_INT("both names not defined", procName, 1); 01834 01835 nbytes1 = nbytesInFile(fname1); 01836 nbytes2 = nbytesInFile(fname2); 01837 if (nbytes1 != nbytes2) 01838 return 0; 01839 01840 if ((array1 = l_binaryRead(fname1, &nbytes1)) == NULL) 01841 return ERROR_INT("array1 not read", procName, 1); 01842 if ((array2 = l_binaryRead(fname2, &nbytes2)) == NULL) 01843 return ERROR_INT("array2 not read", procName, 1); 01844 same = 1; 01845 for (i = 0; i < nbytes1; i++) { 01846 if (array1[i] != array2[i]) { 01847 same = 0; 01848 break; 01849 } 01850 } 01851 FREE(array1); 01852 FREE(array2); 01853 *psame = same; 01854 01855 return 0; 01856 } 01857 01858 01859 /*--------------------------------------------------------------------------* 01860 * 16 and 32 bit byte-swapping on big endian and little endian machines * 01861 * * 01862 * These are typically used for I/O conversions: * 01863 * (1) endian conversion for data that was read from a file * 01864 * (2) endian conversion on data before it is written to a file * 01865 *--------------------------------------------------------------------------*/ 01866 01867 /*--------------------------------------------------------------------* 01868 * 16-bit byte swapping * 01869 *--------------------------------------------------------------------*/ 01870 #ifdef L_BIG_ENDIAN 01871 01872 l_uint16 01873 convertOnBigEnd16(l_uint16 shortin) 01874 { 01875 return ((shortin << 8) | (shortin >> 8)); 01876 } 01877 01878 l_uint16 01879 convertOnLittleEnd16(l_uint16 shortin) 01880 { 01881 return shortin; 01882 } 01883 01884 #else /* L_LITTLE_ENDIAN */ 01885 01886 l_uint16 01887 convertOnLittleEnd16(l_uint16 shortin) 01888 { 01889 return ((shortin << 8) | (shortin >> 8)); 01890 } 01891 01892 l_uint16 01893 convertOnBigEnd16(l_uint16 shortin) 01894 { 01895 return shortin; 01896 } 01897 01898 #endif /* L_BIG_ENDIAN */ 01899 01900 01901 /*--------------------------------------------------------------------* 01902 * 32-bit byte swapping * 01903 *--------------------------------------------------------------------*/ 01904 #ifdef L_BIG_ENDIAN 01905 01906 l_uint32 01907 convertOnBigEnd32(l_uint32 wordin) 01908 { 01909 return ((wordin << 24) | ((wordin << 8) & 0x00ff0000) | 01910 ((wordin >> 8) & 0x0000ff00) | (wordin >> 24)); 01911 } 01912 01913 l_uint32 01914 convertOnLittleEnd32(l_uint32 wordin) 01915 { 01916 return wordin; 01917 } 01918 01919 #else /* L_LITTLE_ENDIAN */ 01920 01921 l_uint32 01922 convertOnLittleEnd32(l_uint32 wordin) 01923 { 01924 return ((wordin << 24) | ((wordin << 8) & 0x00ff0000) | 01925 ((wordin >> 8) & 0x0000ff00) | (wordin >> 24)); 01926 } 01927 01928 l_uint32 01929 convertOnBigEnd32(l_uint32 wordin) 01930 { 01931 return wordin; 01932 } 01933 01934 #endif /* L_BIG_ENDIAN */ 01935 01936 01937 01938 /*--------------------------------------------------------------------* 01939 * Opening file streams * 01940 *--------------------------------------------------------------------*/ 01941 /*! 01942 * fopenReadStream() 01943 * 01944 * Input: filename 01945 * Return: stream, or null on error 01946 * 01947 * Notes: 01948 * (1) This wrapper also handles pathname conversions for Windows. 01949 * It should be used whenever you want to run fopen() to 01950 * read from a stream. 01951 */ 01952 FILE * 01953 fopenReadStream(const char *filename) 01954 { 01955 char *fname, *tail; 01956 FILE *fp; 01957 01958 PROCNAME("fopenReadStream"); 01959 01960 if (!filename) 01961 return (FILE *)ERROR_PTR("filename not defined", procName, NULL); 01962 01963 /* Try input filename */ 01964 fname = genPathname(filename, NULL); 01965 fp = fopen(fname, "rb"); 01966 FREE(fname); 01967 if (fp) return fp; 01968 01969 /* Else, strip directory and try locally */ 01970 splitPathAtDirectory(filename, NULL, &tail); 01971 fp = fopen(tail, "rb"); 01972 FREE(tail); 01973 01974 if (!fp) 01975 return (FILE *)ERROR_PTR("file not found", procName, NULL); 01976 return fp; 01977 } 01978 01979 01980 /*! 01981 * fopenWriteStream() 01982 * 01983 * Input: filename 01984 * modestring 01985 * Return: stream, or null on error 01986 * 01987 * Notes: 01988 * (1) This wrapper also handles pathname conversions for Windows. 01989 * It should be used whenever you want to run fopen() to 01990 * write or append to a stream. 01991 */ 01992 FILE * 01993 fopenWriteStream(const char *filename, 01994 const char *modestring) 01995 { 01996 FILE *fp; 01997 01998 PROCNAME("fopenWriteStream"); 01999 02000 if (!filename) 02001 return (FILE *)ERROR_PTR("filename not defined", procName, NULL); 02002 02003 #ifdef _WIN32 02004 { 02005 char *fname; 02006 fname = genPathname(filename, NULL); 02007 fp = fopen(fname, modestring); 02008 FREE(fname); 02009 } 02010 #else 02011 fp = fopen(filename, modestring); 02012 #endif /* _WIN32 */ 02013 02014 if (!fp) 02015 return (FILE *)ERROR_PTR("stream not opened", procName, NULL); 02016 return fp; 02017 } 02018 02019 02020 /*--------------------------------------------------------------------* 02021 * Functions to avoid C-runtime boundary crossing with dlls * 02022 *--------------------------------------------------------------------*/ 02023 /* 02024 * Problems arise when pointers to streams and data are passed 02025 * between two Windows DLLs that have been generated with different 02026 * C runtimes. To avoid this, leptonica provides wrappers for 02027 * several C library calls. 02028 */ 02029 /*! 02030 * lept_fopen() 02031 * 02032 * Input: filename 02033 * mode (same as for fopen(); e.g., "rb") 02034 * Return: stream or null on error 02035 * 02036 * Notes: 02037 * (1) This must be used by any application that passes 02038 * a file handle to a leptonica Windows DLL. 02039 */ 02040 FILE * 02041 lept_fopen(const char *filename, 02042 const char *mode) 02043 { 02044 PROCNAME("lept_fopen"); 02045 02046 if (!filename) 02047 return (FILE *)ERROR_PTR("filename not defined", procName, NULL); 02048 if (!mode) 02049 return (FILE *)ERROR_PTR("mode not defined", procName, NULL); 02050 02051 if (stringFindSubstr(mode, "r", NULL)) 02052 return fopenReadStream(filename); 02053 else 02054 return fopenWriteStream(filename, mode); 02055 } 02056 02057 02058 /*! 02059 * lept_fclose() 02060 * 02061 * Input: fp (stream handle) 02062 * Return: 0 if OK, 1 on error 02063 * 02064 * Notes: 02065 * (1) This should be used by any application that accepts 02066 * a file handle generated by a leptonica Windows DLL. 02067 */ 02068 l_int32 02069 lept_fclose(FILE *fp) 02070 { 02071 PROCNAME("lept_fclose"); 02072 02073 if (!fp) 02074 return ERROR_INT("stream not defined", procName, 1); 02075 02076 return fclose(fp); 02077 } 02078 02079 02080 /*! 02081 * lept_calloc() 02082 * 02083 * Input: nmemb (number of members) 02084 * size (of each member) 02085 * Return: void ptr, or null on error 02086 * 02087 * Notes: 02088 * (1) For safety with windows DLLs, this can be used in conjunction 02089 * with lept_free() to avoid C-runtime boundary problems. 02090 * Just use these two functions throughout your application. 02091 */ 02092 void * 02093 lept_calloc(size_t nmemb, 02094 size_t size) 02095 { 02096 if (nmemb <= 0 || size <= 0) 02097 return NULL; 02098 return CALLOC(nmemb, size); 02099 } 02100 02101 02102 /*! 02103 * lept_free() 02104 * 02105 * Input: void ptr 02106 * Return: 0 if OK, 1 on error 02107 * 02108 * Notes: 02109 * (1) This should be used by any application that accepts 02110 * heap data allocated by a leptonica Windows DLL. 02111 */ 02112 void 02113 lept_free(void *ptr) 02114 { 02115 if (!ptr) return; 02116 FREE(ptr); 02117 return; 02118 } 02119 02120 02121 /*--------------------------------------------------------------------* 02122 * Cross-platform file system operations * 02123 * [ These only write to /tmp or its subdirectories ] * 02124 *--------------------------------------------------------------------*/ 02125 /*! 02126 * lept_mkdir() 02127 * 02128 * Input: subdir 02129 * Return: 0 on success, non-zero on failure 02130 * 02131 * Notes: 02132 * (1) This makes a subdirectory of /tmp/. 02133 * (2) Use unix pathname separators. 02134 * (3) On Windows, it makes a subdirectory of <Temp>/leptonica, 02135 * where <Temp> is the Windows temp dir. The name translation is: 02136 * /tmp --> <Temp>/leptonica 02137 */ 02138 l_int32 02139 lept_mkdir(const char *subdir) 02140 { 02141 char *dir; 02142 l_int32 ret; 02143 #ifdef _WIN32 02144 char *newpath; 02145 l_uint32 attributes; 02146 #endif /* !_WIN32 */ 02147 02148 PROCNAME("lept_mkdir"); 02149 02150 if (!subdir) 02151 return ERROR_INT("subdir not defined", procName, 1); 02152 if ((strlen(subdir) == 0) || (subdir[0] == '.') || (subdir[0] == '/')) 02153 return ERROR_INT("subdir not an actual subdirectory", procName, 1); 02154 02155 dir = pathJoin("/tmp", subdir); 02156 02157 #ifndef _WIN32 02158 ret = mkdir(dir, 0777); 02159 #else 02160 /* Make sure the leptonica subdir exists in tmp dir */ 02161 newpath = genPathname("/tmp", NULL); 02162 attributes = GetFileAttributes(newpath); 02163 if (attributes == INVALID_FILE_ATTRIBUTES) { 02164 ret = (CreateDirectory(newpath, NULL) ? 0 : 1); 02165 } 02166 FREE(newpath); 02167 02168 newpath = genPathname(dir, NULL); 02169 ret = (CreateDirectory(newpath, NULL) ? 0 : 1); 02170 FREE(newpath); 02171 #endif /* !_WIN32 */ 02172 02173 FREE(dir); 02174 return ret; 02175 } 02176 02177 02178 /*! 02179 * lept_rmdir() 02180 * 02181 * Input: subdir (of /tmp or its equivalent on Windows) 02182 * Return: 0 on success, non-zero on failure 02183 * 02184 * Notes: 02185 * (1) On unix, this removes all the files in the named 02186 * subdirectory of /tmp. It then removes the subdirectory. 02187 * (2) Use unix pathname separators. 02188 * (3) On Windows, the affected directory is a subdirectory 02189 * of <Temp>/leptonica, where <Temp> is the Windows temp dir. 02190 * (4) TODO: Use a new function lept_dirExists(path) to test 02191 * if the directory exists, and if not, fail silently. 02192 */ 02193 l_int32 02194 lept_rmdir(const char *subdir) 02195 { 02196 char *dir, *fname, *fullname; 02197 l_int32 ret, i, nfiles; 02198 SARRAY *sa; 02199 #ifdef _WIN32 02200 char *newpath; 02201 #endif /* _WIN32 */ 02202 02203 PROCNAME("lept_rmdir"); 02204 02205 if (!subdir) 02206 return ERROR_INT("subdir not defined", procName, 1); 02207 if ((strlen(subdir) == 0) || (subdir[0] == '.') || (subdir[0] == '/')) 02208 return ERROR_INT("subdir not an actual subdirectory", procName, 1); 02209 02210 if ((dir = pathJoin("/tmp", subdir)) == NULL) 02211 return ERROR_INT("dir not made", procName, 1); 02212 02213 /* List all the files in temp subdir */ 02214 if ((sa = getFilenamesInDirectory(dir)) == NULL) { 02215 L_WARNING_STRING("directory %s does not exist", procName, dir); 02216 FREE(dir); 02217 return 1; 02218 } 02219 nfiles = sarrayGetCount(sa); 02220 02221 #ifndef _WIN32 02222 for (i = 0; i < nfiles; i++) { 02223 fname = sarrayGetString(sa, i, L_NOCOPY); 02224 fullname = genPathname(dir, fname); 02225 remove(fullname); 02226 FREE(fullname); 02227 } 02228 ret = remove(dir); 02229 #else 02230 for (i = 0; i < nfiles; i++) { 02231 fname = sarrayGetString(sa, i, L_NOCOPY); 02232 fullname = genPathname(dir, fname); 02233 ret = DeleteFile(fullname); 02234 FREE(fullname); 02235 } 02236 newpath = genPathname(dir, NULL); 02237 ret = (RemoveDirectory(newpath) ? 0 : 1); 02238 FREE(newpath); 02239 #endif /* !_WIN32 */ 02240 02241 sarrayDestroy(&sa); 02242 FREE(dir); 02243 return ret; 02244 } 02245 02246 02247 /*! 02248 * lept_rm() 02249 * 02250 * Input: subdir (can be NULL, in which case the removed file is 02251 * in /tmp) 02252 * filename (without the directory) 02253 * Return: 0 on success, non-zero on failure 02254 * 02255 * Notes: 02256 * (1) This removes the named file in /tmp or a subdirectory of /tmp. 02257 * If the file is in /tmp, use NULL for the subdir. 02258 * (2) Use unix pathname separators. 02259 * (3) On Windows, the file is in either <Temp>/leptonica, or 02260 * a subdirectory of this, where <Temp> is the Windows temp dir. 02261 * The name translation is: /tmp --> <Temp>/leptonica 02262 */ 02263 l_int32 02264 lept_rm(const char *subdir, 02265 const char *filename) 02266 { 02267 char *dir, *pathname; 02268 l_int32 ret; 02269 #ifdef _WIN32 02270 char *newpath; 02271 #endif /* _WIN32 */ 02272 02273 PROCNAME("lept_rm"); 02274 02275 if (!filename) 02276 return ERROR_INT("filename not defined", procName, 1); 02277 if ((strlen(filename) == 0) || (filename[0] == '.') || (filename[0] == '/')) 02278 return ERROR_INT("filename cannot contain a path", procName, 1); 02279 02280 if (subdir) { 02281 dir = pathJoin("/tmp", subdir); 02282 pathname = pathJoin(dir, filename); 02283 FREE(dir); 02284 } 02285 else 02286 pathname = pathJoin("/tmp", filename); 02287 if (!pathname) 02288 return ERROR_INT("pathname not made", procName, 1); 02289 02290 #ifndef _WIN32 02291 ret = remove(pathname); 02292 #else 02293 newpath = genPathname(pathname, NULL); 02294 if (!newpath) { 02295 FREE(pathname); 02296 return ERROR_INT("newpath not made", procName, 1); 02297 } 02298 ret = (DeleteFile(newpath) ? 0 : 1); 02299 FREE(newpath); 02300 #endif /* !_WIN32 */ 02301 02302 FREE(pathname); 02303 return ret; 02304 } 02305 02306 02307 /*! 02308 * lept_mv() 02309 * 02310 * Input: srcfile, newfile 02311 * Return: 0 on success, non-zero on failure 02312 * 02313 * Notes: 02314 * (1) This moves a srcfile to /tmp or to a subdirectory of /tmp. 02315 * (2) The input srcfile name is the complete pathname. 02316 * The input newfile is either in /tmp or a subdirectory 02317 * of /tmp, and newfile can be specified either as the 02318 * full path or without the leading '/tmp'. 02319 * (3) Use unix pathname separators. 02320 * (4) On Windows, the source and target filename are altered 02321 * internally if necessary to conform to the Windows temp file. 02322 * The name translation is: /tmp --> <Temp>/leptonica 02323 */ 02324 l_int32 02325 lept_mv(const char *srcfile, 02326 const char *newfile) 02327 { 02328 char *newfileplus; 02329 l_int32 ret; 02330 #ifndef _WIN32 02331 char *command; 02332 l_int32 nbytes; 02333 #else 02334 char *srcpath, *newpath, *tail; 02335 l_uint32 attributes; 02336 #endif /* !_WIN32 */ 02337 02338 PROCNAME("lept_mv"); 02339 02340 if (!srcfile || !newfile) 02341 return ERROR_INT("srcfile and newfile not both defined", procName, 1); 02342 if (strncmp(newfile, "/tmp/", 5)) 02343 newfileplus = pathJoin("/tmp", newfile); 02344 else 02345 newfileplus = stringNew(newfile); 02346 02347 #ifndef _WIN32 02348 nbytes = strlen(srcfile) + strlen(newfileplus) + 10; 02349 command = (char *)CALLOC(nbytes, sizeof(char)); 02350 snprintf(command, nbytes, "mv %s %s", srcfile, newfileplus); 02351 ret = system(command); 02352 FREE(command); 02353 #else 02354 srcpath = genPathname(srcfile, NULL); 02355 newpath = genPathname(newfileplus, NULL); 02356 attributes = GetFileAttributes(newpath); 02357 if (attributes != INVALID_FILE_ATTRIBUTES && 02358 (attributes & FILE_ATTRIBUTE_DIRECTORY)) { 02359 if (splitPathAtDirectory(srcpath, NULL, &tail)) { 02360 FREE(srcpath); 02361 FREE(newpath); 02362 return ERROR_INT("Unable to split source filename into root & tail", 02363 procName, 1); 02364 } 02365 FREE(newpath); 02366 newpath = genPathname(newfileplus, tail); 02367 FREE(tail); 02368 } 02369 02370 /* New file overwritten if it already exists */ 02371 ret = (MoveFileEx(srcpath, newpath, 02372 MOVEFILE_COPY_ALLOWED | 02373 MOVEFILE_REPLACE_EXISTING) ? 0 : 1); 02374 FREE(srcpath); 02375 FREE(newpath); 02376 #endif /* !_WIN32 */ 02377 02378 FREE(newfileplus); 02379 return ret; 02380 } 02381 02382 02383 /*! 02384 * lept_cp() 02385 * 02386 * Input: srcfile 02387 * newfile 02388 * Return: 0 on success, non-zero on failure 02389 * 02390 * Notes: 02391 * (1) This copies a file to /tmp or a subdirectory of /tmp. 02392 * (2) The input srcfile name is the complete pathname. 02393 * The input newfile is either in /tmp or a subdirectory 02394 * of /tmp, and newfile can be specified either as the 02395 * full path or without the leading '/tmp'. 02396 * (3) Use unix pathname separators. 02397 * (4) On Windows, the source and target filename are altered 02398 * internally if necessary to conform to the Windows temp file. 02399 * (5) Alternatively, you can use fileCopy(). This avoids 02400 * forking a new process and has no restrictions on the 02401 * destination directory. 02402 */ 02403 l_int32 02404 lept_cp(const char *srcfile, 02405 const char *newfile) 02406 { 02407 char *newfileplus; 02408 l_int32 ret; 02409 #ifndef _WIN32 02410 char *command; 02411 l_int32 nbytes; 02412 #else 02413 char *srcpath, *newpath, *tail; 02414 l_uint32 attributes; 02415 #endif /* !_WIN32 */ 02416 02417 PROCNAME("lept_cp"); 02418 02419 if (!srcfile || !newfile) 02420 return ERROR_INT("srcfile and newfile not both defined", procName, 1); 02421 if (strncmp(newfile, "/tmp/", 5)) 02422 newfileplus = pathJoin("/tmp", newfile); 02423 else 02424 newfileplus = stringNew(newfile); 02425 02426 #ifndef _WIN32 02427 nbytes = strlen(srcfile) + strlen(newfileplus) + 10; 02428 command = (char *)CALLOC(nbytes, sizeof(char)); 02429 snprintf(command, nbytes, "cp %s %s", srcfile, newfile); 02430 ret = system(command); 02431 FREE(command); 02432 #else 02433 srcpath = genPathname(srcfile, NULL); 02434 newpath = genPathname(newfileplus, NULL); 02435 attributes = GetFileAttributes(newpath); 02436 if (attributes != INVALID_FILE_ATTRIBUTES && 02437 (attributes & FILE_ATTRIBUTE_DIRECTORY)) { 02438 if (splitPathAtDirectory(srcpath, NULL, &tail)) { 02439 FREE(srcpath); 02440 FREE(newpath); 02441 return ERROR_INT("Unable to split source filename into root & tail", 02442 procName, 1); 02443 } 02444 FREE(newpath); 02445 newpath = genPathname(newfileplus, tail); 02446 FREE(tail); 02447 } 02448 02449 /* New file overwritten if it already exists */ 02450 ret = (CopyFile(srcpath, newpath, FALSE) ? 0 : 1); 02451 FREE(srcpath); 02452 FREE(newpath); 02453 #endif /* !_WIN32 */ 02454 02455 FREE(newfileplus); 02456 return ret; 02457 } 02458 02459 02460 /*--------------------------------------------------------------------* 02461 * File name operations * 02462 *--------------------------------------------------------------------*/ 02463 /*! 02464 * splitPathAtDirectory() 02465 * 02466 * Input: pathname (full path; can be a directory) 02467 * &dir (<optional return> root directory name of 02468 * input path, including trailing '/') 02469 * &tail (<optional return> path tail, which is either 02470 * the file name within the root directory or 02471 * the last sub-directory in the path) 02472 * Return: 0 if OK, 1 on error 02473 * 02474 * Notes: 02475 * (1) If you only want the tail, input null for the root directory ptr. 02476 * (2) If you only want the root directory name, input null for the 02477 * tail ptr. 02478 * (3) This function makes decisions based only on the lexical 02479 * structure of the input. Examples: 02480 * /usr/tmp/abc --> dir: /usr/tmp/ tail: abc 02481 * /usr/tmp/ --> dir: /usr/tmp/ tail: [empty string] 02482 * /usr/tmp --> dir: /usr/ tail: tmp 02483 * (4) N.B. The input pathname must have unix directory separators 02484 * for unix and windows directory separators for windows. 02485 */ 02486 l_int32 02487 splitPathAtDirectory(const char *pathname, 02488 char **pdir, 02489 char **ptail) 02490 { 02491 char *cpathname, *lastslash; 02492 02493 PROCNAME("splitPathAtDirectory"); 02494 02495 if (!pdir && !ptail) 02496 return ERROR_INT("null input for both strings", procName, 1); 02497 if (pdir) *pdir = NULL; 02498 if (ptail) *ptail = NULL; 02499 if (!pathname) 02500 return ERROR_INT("pathname not defined", procName, 1); 02501 02502 cpathname = stringNew(pathname); 02503 if ((lastslash = strrchr(cpathname, sepchar))) { 02504 if (ptail) 02505 *ptail = stringNew(lastslash + 1); 02506 if (pdir) { 02507 *(lastslash + 1) = '\0'; 02508 *pdir = cpathname; 02509 } 02510 else 02511 FREE(cpathname); 02512 } 02513 else { /* no directory */ 02514 if (pdir) 02515 *pdir = stringNew(""); 02516 if (ptail) 02517 *ptail = cpathname; 02518 else 02519 FREE(cpathname); 02520 } 02521 02522 return 0; 02523 } 02524 02525 02526 /*! 02527 * splitPathAtExtension() 02528 * 02529 * Input: pathname (full path; can be a directory) 02530 * &basename (<optional return> pathname not including the 02531 * last dot and characters after that) 02532 * &extension (<optional return> path extension, which is 02533 * the last dot and the characters after it. If 02534 * there is no extension, it returns the empty string) 02535 * Return: 0 if OK, 1 on error 02536 * 02537 * Notes: 02538 * (1) If you only want the extension, input null for the basename ptr. 02539 * (2) If you only want the basename without extension, input null 02540 * for the extension ptr. 02541 * (3) This function makes decisions based only on the lexical 02542 * structure of the input. Examples: 02543 * /usr/tmp/abc.jpg --> basename: /usr/tmp/abc ext: .jpg 02544 * /usr/tmp/.jpg --> basename: /usr/tmp/ tail: .jpg 02545 * /usr/tmp.jpg/ --> basename: /usr/tmp.jpg/ tail: [empty str] 02546 * ./.jpg --> basename: ./ tail: .jpg 02547 * (4) N.B. The input pathname must have unix directory separators 02548 * for unix and windows directory separators for windows. 02549 */ 02550 l_int32 02551 splitPathAtExtension(const char *pathname, 02552 char **pbasename, 02553 char **pextension) 02554 { 02555 char *tail, *dir, *lastdot; 02556 char empty[4] = ""; 02557 02558 PROCNAME("splitPathExtension"); 02559 02560 if (!pbasename && !pextension) 02561 return ERROR_INT("null input for both strings", procName, 1); 02562 if (pbasename) *pbasename = NULL; 02563 if (pextension) *pextension = NULL; 02564 if (!pathname) 02565 return ERROR_INT("pathname not defined", procName, 1); 02566 02567 /* Split out the directory first */ 02568 splitPathAtDirectory(pathname, &dir, &tail); 02569 02570 /* Then look for a "." in the tail part. 02571 * This way we ignore all "." in the directory. */ 02572 if ((lastdot = strrchr(tail, '.'))) { 02573 if (pextension) 02574 *pextension = stringNew(lastdot); 02575 if (pbasename) { 02576 *lastdot = '\0'; 02577 *pbasename = stringJoin(dir, tail); 02578 } 02579 } 02580 else { 02581 if (pextension) 02582 *pextension = stringNew(empty); 02583 if (pbasename) 02584 *pbasename = stringNew(pathname); 02585 } 02586 FREE(dir); 02587 FREE(tail); 02588 return 0; 02589 } 02590 02591 02592 /*! 02593 * pathJoin() 02594 * 02595 * Input: dir (<optional> can be null) 02596 * fname (<optional> can be null) 02597 * Return: specially concatenated path, or null on error 02598 * 02599 * Notes: 02600 * (1) Use unix-style pathname separators ('/'). 02601 * (2) @fname can be the entire path, or part of the path containing 02602 * at least one directory, or a tail without a directory, or NULL. 02603 * (3) It produces a path that strips multiple slashes to a single 02604 * slash, joins @dir and @fname by a slash, and has no trailing 02605 * slashes (except in the cases where @dir == "/" and 02606 * @fname == NULL, or v.v.). 02607 * (4) If both @dir and @fname are null, produces an empty string. 02608 * (5) The result is not canonicalized or tested for correctness: 02609 * garbage in (e.g., ...), garbage out. 02610 * (6) Examples: 02611 * //tmp// + //abc/ --> /tmp/abc 02612 * tmp/ + /abc/ --> tmp/abc 02613 * tmp/ + abc/ --> tmp/abc 02614 * /tmp/ + /// --> /tmp 02615 * /tmp/ + NULL --> /tmp 02616 * // + /abc// --> /abc 02617 * // + NULL --> / 02618 * NULL + /abc/def/ --> /abc/def 02619 * NULL + abc// --> abc 02620 * NULL + // --> / 02621 * NULL + NULL --> (empty string) 02622 * "" + "" --> (empty string) 02623 * "" + / --> / 02624 */ 02625 char * 02626 pathJoin(const char *dir, 02627 const char *fname) 02628 { 02629 char *slash = (char *)"/"; 02630 char *str, *dest; 02631 l_int32 i, n1, n2, emptydir; 02632 size_t size; 02633 SARRAY *sa1, *sa2; 02634 L_BYTEA *ba; 02635 02636 if (!dir && !fname) 02637 return stringNew(""); 02638 02639 sa1 = sarrayCreate(0); 02640 sa2 = sarrayCreate(0); 02641 ba = l_byteaCreate(4); 02642 02643 /* Process @dir */ 02644 if (dir && strlen(dir) > 0) { 02645 if (dir[0] == '/') 02646 l_byteaAppendString(ba, slash); 02647 sarraySplitString(sa1, dir, "/"); /* removes all slashes */ 02648 n1 = sarrayGetCount(sa1); 02649 for (i = 0; i < n1; i++) { 02650 str = sarrayGetString(sa1, i, L_NOCOPY); 02651 l_byteaAppendString(ba, str); 02652 l_byteaAppendString(ba, slash); 02653 } 02654 } 02655 02656 /* Special case to add leading slash: dir NULL or empty string */ 02657 emptydir = dir && strlen(dir) == 0; 02658 if ((!dir || emptydir) && fname && strlen(fname) > 0 && fname[0] == '/') 02659 l_byteaAppendString(ba, slash); 02660 02661 /* Process @fname */ 02662 if (fname && strlen(fname) > 0) { 02663 sarraySplitString(sa2, fname, "/"); 02664 n2 = sarrayGetCount(sa2); 02665 for (i = 0; i < n2; i++) { 02666 str = sarrayGetString(sa2, i, L_NOCOPY); 02667 l_byteaAppendString(ba, str); 02668 l_byteaAppendString(ba, slash); 02669 } 02670 } 02671 02672 /* Remove trailing slash */ 02673 dest = (char *)l_byteaCopyData(ba, &size); 02674 if (size > 1 && dest[size - 1] == '/') 02675 dest[size - 1] = '\0'; 02676 02677 sarrayDestroy(&sa1); 02678 sarrayDestroy(&sa2); 02679 l_byteaDestroy(&ba); 02680 return dest; 02681 } 02682 02683 02684 /*! 02685 * genPathname() 02686 * 02687 * Input: dir (directory name, with or without trailing '/') 02688 * fname (<optional> file name within the directory) 02689 * Return: pathname (either a directory or full path), or null on error 02690 * 02691 * Notes: 02692 * (1) Use unix-style pathname separators ('/'). 02693 * (2) This function can be used in several ways: 02694 * * to generate a full path from a directory and a file name 02695 * * to convert a unix pathname to a windows pathname 02696 * * to convert from the unix '/tmp' directory to the 02697 * equivalent windows temp directory. 02698 * The windows name translation is: 02699 * /tmp --> <Temp>/leptonica 02700 * (3) There are three cases for the input: 02701 * (a) @dir is a directory and @fname is null: result is a directory 02702 * (b) @dir is a full path and @fname is null: result is a full path 02703 * (c) @dir is a directory and @fname is defined: result is a full path 02704 * (4) In all cases, the resulting pathname is not terminated with a slash 02705 * (5) The caller is responsible for freeing the pathname. 02706 */ 02707 char * 02708 genPathname(const char *dir, 02709 const char *fname) 02710 { 02711 char *cdir, *pathout; 02712 l_int32 dirlen, namelen, size; 02713 02714 PROCNAME("genPathname"); 02715 02716 if (!dir) 02717 return (char *)ERROR_PTR("dir not defined", procName, NULL); 02718 02719 /* Remove trailing slash in dir, except when dir == "/" */ 02720 cdir = stringNew(dir); 02721 dirlen = strlen(cdir); 02722 if (cdir[dirlen - 1] == '/' && dirlen != 1) { 02723 cdir[dirlen - 1] = '\0'; 02724 dirlen--; 02725 } 02726 02727 namelen = (fname) ? strlen(fname) : 0; 02728 size = dirlen + namelen + 256; 02729 if ((pathout = (char *)CALLOC(size, sizeof(char))) == NULL) 02730 return (char *)ERROR_PTR("pathout not made", procName, NULL); 02731 02732 #ifdef _WIN32 02733 { 02734 char dirt[MAX_PATH]; 02735 if (stringFindSubstr(cdir, "/", NULL) > 0) { 02736 char *tempdir; 02737 l_int32 tdirlen; 02738 tempdir = stringReplaceEachSubstr(cdir, "/", "\\", NULL); 02739 tdirlen = strlen(tempdir); 02740 if (strncmp(dir, "/tmp", 4) == 0) { /* get temp directory */ 02741 GetTempPath(sizeof(dirt), dirt); 02742 stringCopy(pathout, dirt, strlen(dirt) - 1); 02743 stringCat(pathout, size, "\\leptonica"); 02744 if (tdirlen > 4) 02745 stringCat(pathout, size, tempdir + 4); 02746 02747 /* Set an extra null byte. Otherwise, when setting 02748 sepchar later, no trailing null byte remains. */ 02749 pathout[strlen(pathout) + 1] = '\0'; 02750 } 02751 else { 02752 stringCopy(pathout, tempdir, tdirlen); 02753 } 02754 FREE(tempdir); 02755 } 02756 else { /* no '/' characters; OK as is */ 02757 stringCopy(pathout, cdir, dirlen); 02758 } 02759 } 02760 #else 02761 stringCopy(pathout, cdir, dirlen); 02762 #endif /* _WIN32 */ 02763 02764 if (fname && strlen(fname) > 0) { 02765 dirlen = strlen(pathout); 02766 pathout[dirlen] = sepchar; /* append sepchar */ 02767 strncat(pathout, fname, namelen); 02768 } 02769 FREE(cdir); 02770 return pathout; 02771 } 02772 02773 02774 /*! 02775 * genTempFilename() 02776 * 02777 * Input: dir (directory name; use '.' for local dir; 02778 * no trailing '/' and @dir == "/" is invalid) 02779 * tail (<optional> tailname, including extension if any; 02780 * can be null or empty but can't contain '/') 02781 * usetime (1 to include current time in microseconds in 02782 * the filename; 0 to omit. 02783 * usepid (1 to include pid in filename; 0 to omit. 02784 * Return: temp filename, or null on error 02785 * 02786 * Notes: 02787 * (1) Use unix-style pathname separators ('/'). 02788 * (2) Specifying the root directory (@dir == "/") is invalid. 02789 * (3) Specifying a @tail containing '/' is invalid. 02790 * (4) The most general form (@usetime = @usepid = 1) is: 02791 * <dir>/<usec>_<pid>_<tail> 02792 * When @usetime = 1, @usepid = 0, the output filename is: 02793 * <dir>/<usec>_<tail> 02794 * When @usepid = 0, @usepid = 1, the output filename is: 02795 * <dir>/<pid>_<tail> 02796 * When @usetime = @usepid = 0, the output filename is: 02797 * <dir>/<tail> 02798 * Note: It is not valid to have @tail = null or empty and have 02799 * both @usetime = @usepid = 0. That is, there must be 02800 * some non-empty tail name. 02801 * (5) N.B. The caller is responsible for freeing the returned filename. 02802 * For windows, to avoid C-runtime boundary crossing problems 02803 * when using DLLs, you must use lept_free() to free the name. 02804 * (6) For windows, if the caller requests the directory '/tmp', 02805 * this uses GetTempPath() to select the actual directory, 02806 * avoiding platform-conditional code in use. The directory 02807 * selected is <Temp>/leptonica, where <Temp> is the Windows 02808 * temp directory. 02809 * (7) Set @usetime = @usepid = 1 when 02810 * (a) more than one process is writing and reading temp files, or 02811 * (b) multiple threads from a single process call this function, or 02812 * (c) there is the possiblity of an attack where the intruder 02813 * is logged onto the server and might try to guess filenames. 02814 */ 02815 char * 02816 genTempFilename(const char *dir, 02817 const char *tail, 02818 l_int32 usetime, 02819 l_int32 usepid) 02820 { 02821 char buf[256]; 02822 l_int32 i, buflen, usec, pid, emptytail; 02823 #ifdef _WIN32 02824 char *newpath; 02825 l_uint32 attributes; 02826 l_int32 ret; 02827 #endif /* !_WIN32 */ 02828 02829 PROCNAME("genTempFilename"); 02830 02831 if (!dir) 02832 return (char *)ERROR_PTR("dir not defined", procName, NULL); 02833 if (dir && strlen(dir) == 1 && dir[0] == '/') 02834 return (char *)ERROR_PTR("dir == '/' not permitted", procName, NULL); 02835 if (tail && strlen(tail) > 0 && stringFindSubstr(tail, "/", NULL)) 02836 return (char *)ERROR_PTR("tail can't contain '/'", procName, NULL); 02837 emptytail = tail && (strlen(tail) == 0); 02838 if (!usetime && !usepid && (!tail || emptytail)) 02839 return (char *)ERROR_PTR("name can't be a directory", procName, NULL); 02840 02841 if (usepid) pid = getpid(); 02842 buflen = sizeof(buf); 02843 for (i = 0; i < buflen; i++) 02844 buf[i] = 0; 02845 l_getCurrentTime(NULL, &usec); 02846 02847 #ifdef _WIN32 02848 { /* do not assume /tmp exists */ 02849 char dirt[MAX_PATH]; 02850 if (!strcmp(dir, "/tmp")) { 02851 GetTempPath(sizeof(dirt), dirt); 02852 stringCat(dirt, sizeof(dirt), "leptonica\\"); 02853 02854 /* Make sure the leptonica subdir exists in tmp dir */ 02855 newpath = genPathname("/tmp", NULL); 02856 attributes = GetFileAttributes(newpath); 02857 if (attributes == INVALID_FILE_ATTRIBUTES) { 02858 ret = (CreateDirectory(newpath, NULL) ? 0 : 1); 02859 } 02860 FREE(newpath); 02861 } 02862 else 02863 snprintf(dirt, sizeof(dirt), "%s\\", dir); /* add trailing '\' */ 02864 02865 if (usetime && usepid) 02866 snprintf(buf, buflen, "%s%d_%d_", dirt, usec, pid); 02867 else if (usetime) 02868 snprintf(buf, buflen, "%s%d_", dirt, usec); 02869 else if (usepid) 02870 snprintf(buf, buflen, "%s%d_", dirt, pid); 02871 else 02872 snprintf(buf, buflen, "%s", dirt); 02873 } 02874 #else 02875 if (usetime && usepid) 02876 snprintf(buf, buflen, "%s/%d_%d_", dir, usec, pid); 02877 else if (usetime) 02878 snprintf(buf, buflen, "%s/%d_", dir, usec); 02879 else if (usepid) 02880 snprintf(buf, buflen, "%s/%d_", dir, pid); 02881 else 02882 snprintf(buf, buflen, "%s/", dir); 02883 #endif 02884 02885 return stringJoin(buf, tail); 02886 } 02887 02888 02889 /*! 02890 * extractNumberFromFilename() 02891 * 02892 * Input: fname 02893 * numpre (number of characters before the digits to be found) 02894 * numpost (number of characters after the digits to be found) 02895 * Return: num (number embedded in the filename); -1 on error or if 02896 * not found 02897 * 02898 * Notes: 02899 * (1) Use unix-style pathname separators ('/'). 02900 * (2) The number is to be found in the basename, which is the 02901 * filename without either the directory or the last extension. 02902 * (3) When a number is found, it is non-negative. If no number 02903 * is found, this returns -1, without an error message. The 02904 * caller needs to check. 02905 */ 02906 l_int32 02907 extractNumberFromFilename(const char *fname, 02908 l_int32 numpre, 02909 l_int32 numpost) 02910 { 02911 char *tail, *basename; 02912 l_int32 len, nret, num; 02913 02914 PROCNAME("extractNumberFromFilename"); 02915 02916 if (!fname) 02917 return ERROR_INT("fname not defined", procName, -1); 02918 02919 splitPathAtDirectory(fname, NULL, &tail); 02920 splitPathAtExtension(tail, &basename, NULL); 02921 FREE(tail); 02922 02923 len = strlen(basename); 02924 if (numpre + numpost > len - 1) { 02925 FREE(basename); 02926 return ERROR_INT("numpre + numpost too big", procName, -1); 02927 } 02928 02929 basename[len - numpost] = '\0'; 02930 nret = sscanf(basename + numpre, "%d", &num); 02931 FREE(basename); 02932 02933 if (nret == 1) 02934 return num; 02935 else 02936 return -1; /* not found */ 02937 } 02938 02939 02940 /*---------------------------------------------------------------------* 02941 * Generate random integer in given range * 02942 *---------------------------------------------------------------------*/ 02943 /*! 02944 * genRandomIntegerInRange() 02945 * 02946 * Input: range (size of range; must be >= 2) 02947 * seed (use 0 to skip; otherwise call srand) 02948 * val (<return> random integer in range {0 ... range-1} 02949 * Return: 0 if OK, 1 on error 02950 * 02951 * Notes: 02952 * (1) For example, to choose a rand integer between 0 and 99, 02953 * use @range = 100. 02954 */ 02955 l_int32 02956 genRandomIntegerInRange(l_int32 range, 02957 l_int32 seed, 02958 l_int32 *pval) 02959 { 02960 PROCNAME("genRandomIntegerInRange"); 02961 02962 if (!pval) 02963 return ERROR_INT("&val not defined", procName, 1); 02964 *pval = 0; 02965 if (range < 2) 02966 return ERROR_INT("range must be >= 2", procName, 1); 02967 02968 if (seed > 0) srand(seed); 02969 *pval = (l_int32)((l_float64)range * 02970 ((l_float64)rand() / (l_float64)RAND_MAX)); 02971 return 0; 02972 } 02973 02974 02975 /*---------------------------------------------------------------------* 02976 * Leptonica version number * 02977 *---------------------------------------------------------------------*/ 02978 /*! 02979 * getLeptonicaVersion() 02980 * 02981 * Return: string of version number (e.g., 'leptonica-1.68') 02982 * 02983 * Notes: 02984 * (1) The caller has responsibility to free the memory. 02985 */ 02986 char * 02987 getLeptonicaVersion() 02988 { 02989 char *version = (char *)CALLOC(100, sizeof(char)); 02990 02991 #ifdef _MSC_VER 02992 #ifdef _USRDLL 02993 char dllStr[] = "DLL"; 02994 #else 02995 char dllStr[] = "LIB"; 02996 #endif 02997 #ifdef _DEBUG 02998 char debugStr[] = "Debug"; 02999 #else 03000 char debugStr[] = "Release"; 03001 #endif 03002 #ifdef _M_IX86 03003 char bitStr[] = " 32 bit"; 03004 #elif _M_X64 03005 char bitStr[] = " 64 bit"; 03006 #else 03007 char bitStr[] = "" 03008 #endif 03009 snprintf(version, 100, "leptonica-%d.%d (%s, %s) [MSC v.%d %s %s%s]", 03010 LIBLEPT_MAJOR_VERSION, LIBLEPT_MINOR_VERSION, 03011 __DATE__, __TIME__, _MSC_VER, dllStr, debugStr, bitStr); 03012 03013 #else 03014 03015 snprintf(version, 100, "leptonica-%d.%d", LIBLEPT_MAJOR_VERSION, 03016 LIBLEPT_MINOR_VERSION); 03017 03018 #endif /* _MSC_VER */ 03019 return version; 03020 } 03021 03022 03023 /*---------------------------------------------------------------------* 03024 * Timing procs * 03025 *---------------------------------------------------------------------*/ 03026 #ifndef _WIN32 03027 03028 #include <sys/time.h> 03029 #include <sys/resource.h> 03030 03031 static struct rusage rusage_before; 03032 static struct rusage rusage_after; 03033 03034 /*! 03035 * startTimer(), stopTimer() 03036 * 03037 * Example of usage: 03038 * 03039 * startTimer(); 03040 * .... 03041 * fprintf(stderr, "Elapsed time = %7.3f sec\n", stopTimer()); 03042 */ 03043 void 03044 startTimer(void) 03045 { 03046 getrusage(RUSAGE_SELF, &rusage_before); 03047 } 03048 03049 l_float32 03050 stopTimer(void) 03051 { 03052 l_int32 tsec, tusec; 03053 03054 getrusage(RUSAGE_SELF, &rusage_after); 03055 03056 tsec = rusage_after.ru_utime.tv_sec - rusage_before.ru_utime.tv_sec; 03057 tusec = rusage_after.ru_utime.tv_usec - rusage_before.ru_utime.tv_usec; 03058 return (tsec + ((l_float32)tusec) / 1000000.0); 03059 } 03060 03061 03062 /*! 03063 * startTimerNested(), stopTimerNested() 03064 * 03065 * Example of usage: 03066 * 03067 * L_TIMER t1 = startTimerNested(); 03068 * .... 03069 * L_TIMER t2 = startTimerNested(); 03070 * .... 03071 * fprintf(stderr, "Elapsed time 2 = %7.3f sec\n", stopTimerNested(t2)); 03072 * .... 03073 * fprintf(stderr, "Elapsed time 1 = %7.3f sec\n", stopTimerNested(t1)); 03074 */ 03075 L_TIMER 03076 startTimerNested(void) 03077 { 03078 struct rusage *rusage_start; 03079 03080 rusage_start = (struct rusage *)CALLOC(1, sizeof(struct rusage)); 03081 getrusage(RUSAGE_SELF, rusage_start); 03082 return rusage_start; 03083 } 03084 03085 l_float32 03086 stopTimerNested(L_TIMER rusage_start) 03087 { 03088 l_int32 tsec, tusec; 03089 struct rusage rusage_stop; 03090 03091 getrusage(RUSAGE_SELF, &rusage_stop); 03092 03093 tsec = rusage_stop.ru_utime.tv_sec - 03094 ((struct rusage *)rusage_start)->ru_utime.tv_sec; 03095 tusec = rusage_stop.ru_utime.tv_usec - 03096 ((struct rusage *)rusage_start)->ru_utime.tv_usec; 03097 FREE(rusage_start); 03098 return (tsec + ((l_float32)tusec) / 1000000.0); 03099 } 03100 03101 03102 /*! 03103 * l_getCurrentTime() 03104 * 03105 * Input: &sec (<optional return> in seconds since birth of Unix) 03106 * &usec (<optional return> in microseconds since birth of Unix) 03107 * Return: void 03108 */ 03109 void 03110 l_getCurrentTime(l_int32 *sec, 03111 l_int32 *usec) 03112 { 03113 struct timeval tv; 03114 03115 gettimeofday(&tv, NULL); 03116 if (sec) *sec = (l_int32)tv.tv_sec; 03117 if (usec) *usec = (l_int32)tv.tv_usec; 03118 return; 03119 } 03120 03121 03122 #else /* _WIN32 : resource.h not implemented under Windows */ 03123 03124 /* Note: if division by 10^7 seems strange, the time is expressed 03125 * as the number of 100-nanosecond intervals that have elapsed 03126 * since 12:00 A.M. January 1, 1601. */ 03127 03128 static ULARGE_INTEGER utime_before; 03129 static ULARGE_INTEGER utime_after; 03130 03131 void 03132 startTimer(void) 03133 { 03134 HANDLE this_process; 03135 FILETIME start, stop, kernel, user; 03136 03137 this_process = GetCurrentProcess(); 03138 03139 GetProcessTimes(this_process, &start, &stop, &kernel, &user); 03140 03141 utime_before.LowPart = user.dwLowDateTime; 03142 utime_before.HighPart = user.dwHighDateTime; 03143 } 03144 03145 l_float32 03146 stopTimer(void) 03147 { 03148 HANDLE this_process; 03149 FILETIME start, stop, kernel, user; 03150 ULONGLONG hnsec; /* in units of hecto-nanosecond (100 ns) intervals */ 03151 03152 this_process = GetCurrentProcess(); 03153 03154 GetProcessTimes(this_process, &start, &stop, &kernel, &user); 03155 03156 utime_after.LowPart = user.dwLowDateTime; 03157 utime_after.HighPart = user.dwHighDateTime; 03158 hnsec = utime_after.QuadPart - utime_before.QuadPart; 03159 return (l_float32)(signed)hnsec / 10000000.0; 03160 } 03161 03162 L_TIMER 03163 startTimerNested(void) 03164 { 03165 HANDLE this_process; 03166 FILETIME start, stop, kernel, user; 03167 ULARGE_INTEGER *utime_start; 03168 03169 this_process = GetCurrentProcess(); 03170 03171 GetProcessTimes (this_process, &start, &stop, &kernel, &user); 03172 03173 utime_start = (ULARGE_INTEGER *)CALLOC(1, sizeof(ULARGE_INTEGER)); 03174 utime_start->LowPart = user.dwLowDateTime; 03175 utime_start->HighPart = user.dwHighDateTime; 03176 return utime_start; 03177 } 03178 03179 l_float32 03180 stopTimerNested(L_TIMER utime_start) 03181 { 03182 HANDLE this_process; 03183 FILETIME start, stop, kernel, user; 03184 ULARGE_INTEGER utime_stop; 03185 ULONGLONG hnsec; /* in units of 100 ns intervals */ 03186 03187 this_process = GetCurrentProcess (); 03188 03189 GetProcessTimes (this_process, &start, &stop, &kernel, &user); 03190 03191 utime_stop.LowPart = user.dwLowDateTime; 03192 utime_stop.HighPart = user.dwHighDateTime; 03193 hnsec = utime_stop.QuadPart - ((ULARGE_INTEGER *)utime_start)->QuadPart; 03194 FREE(utime_start); 03195 return (l_float32)(signed)hnsec / 10000000.0; 03196 } 03197 03198 void 03199 l_getCurrentTime(l_int32 *sec, 03200 l_int32 *usec) 03201 { 03202 ULARGE_INTEGER utime, birthunix; 03203 FILETIME systemtime; 03204 LONGLONG birthunixhnsec = 116444736000000000; /*in units of 100 ns */ 03205 LONGLONG usecs; 03206 03207 GetSystemTimeAsFileTime(&systemtime); 03208 utime.LowPart = systemtime.dwLowDateTime; 03209 utime.HighPart = systemtime.dwHighDateTime; 03210 03211 birthunix.LowPart = (DWORD) birthunixhnsec; 03212 birthunix.HighPart = birthunixhnsec >> 32; 03213 03214 usecs = (LONGLONG) ((utime.QuadPart - birthunix.QuadPart) / 10); 03215 03216 if (sec) *sec = (l_int32) (usecs / 1000000); 03217 if (usec) *usec = (l_int32) (usecs % 1000000); 03218 return; 03219 } 03220 03221 #endif 03222 03223 03224 /*! 03225 * l_getFormattedDate() 03226 * 03227 * Input: (none) 03228 * Return: formatted date string, or null on error 03229 */ 03230 char * 03231 l_getFormattedDate() 03232 { 03233 char buf[64]; 03234 time_t tmp1; 03235 struct tm *tmp2; 03236 03237 tmp1 = time(NULL); 03238 tmp2 = localtime(&tmp1); 03239 strftime(buf, sizeof(buf), "%y%m%d%H%M%S", tmp2); 03240 return stringNew(buf); 03241 } 03242 03243 03244 /*--------------------------------------------------------------------* 03245 * Deprecated binary read functions * 03246 *--------------------------------------------------------------------*/ 03247 /* Don't use these: they use l_int32 instead of size_t */ 03248 /*! 03249 * arrayRead() 03250 * 03251 * Input: filename 03252 * &nbytes (<return> number of bytes read) 03253 * Return: array, or null on error 03254 */ 03255 l_uint8 * 03256 arrayRead(const char *fname, 03257 l_int32 *pnbytes) 03258 { 03259 l_uint8 *data; 03260 FILE *fp; 03261 03262 PROCNAME("arrayRead"); 03263 03264 if (!fname) 03265 return (l_uint8 *)ERROR_PTR("fname not defined", procName, NULL); 03266 if (!pnbytes) 03267 return (l_uint8 *)ERROR_PTR("pnbytes not defined", procName, NULL); 03268 *pnbytes = 0; 03269 03270 if ((fp = fopenReadStream(fname)) == NULL) 03271 return (l_uint8 *)ERROR_PTR("file stream not opened", procName, NULL); 03272 03273 data = arrayReadStream(fp, pnbytes); 03274 fclose(fp); 03275 03276 return data; 03277 } 03278 03279 03280 /*! 03281 * arrayReadStream() 03282 * 03283 * Input: stream 03284 * &nbytes (<return> number of bytes read) 03285 * Return: null-terminated array, or null on error 03286 * (reading 0 bytes is not an error) 03287 * 03288 * Notes: 03289 * (1) N.B.: as a side effect, this always re-positions the 03290 * stream ptr to the beginning of the file. 03291 */ 03292 l_uint8 * 03293 arrayReadStream(FILE *fp, 03294 l_int32 *pnbytes) 03295 { 03296 l_int32 ignore; 03297 l_uint8 *data; 03298 03299 PROCNAME("arrayReadStream"); 03300 03301 if (!fp) 03302 return (l_uint8 *)ERROR_PTR("stream not defined", procName, NULL); 03303 if (!pnbytes) 03304 return (l_uint8 *)ERROR_PTR("ptr to nbytes not defined", 03305 procName, NULL); 03306 03307 *pnbytes = fnbytesInFile(fp); 03308 if ((data = (l_uint8 *)CALLOC(1, *pnbytes + 1)) == NULL) 03309 return (l_uint8 *)ERROR_PTR("CALLOC fail for data", procName, NULL); 03310 ignore = fread(data, 1, *pnbytes, fp); 03311 return data; 03312 } 03313