Leptonica 1.68
C Image Processing Library

bytearray.c

Go to the documentation of this file.
00001 /*====================================================================*
00002  -  Copyright (C) 2001 Leptonica.  All rights reserved.
00003  -  This software is distributed in the hope that it will be
00004  -  useful, but with NO WARRANTY OF ANY KIND.
00005  -  No author or distributor accepts responsibility to anyone for the
00006  -  consequences of using this software, or for whether it serves any
00007  -  particular purpose or works at all, unless he or she says so in
00008  -  writing.  Everyone is granted permission to copy, modify and
00009  -  redistribute this source code, for commercial or non-commercial
00010  -  purposes, with the following restrictions: (1) the origin of this
00011  -  source code must not be misrepresented; (2) modified versions must
00012  -  be plainly marked as such; and (3) this notice may not be removed
00013  -  or altered from any source or modified source distribution.
00014  *====================================================================*/
00015 
00016 /*
00017  *   bytearray.c
00018  *
00019  *   Functions for handling byte arrays, in analogy with C++ 'strings'
00020  *
00021  *      Creation, copy, clone, destruction
00022  *           L_BYTEA      *l_byteaCreate()
00023  *           L_BYTEA      *l_byteaInitFromMem()
00024  *           L_BYTEA      *l_byteaInitFromFile()
00025  *           L_BYTEA      *l_byteaInitFromStream()
00026  *           L_BYTEA      *l_byteaCopy()
00027  *           L_BYTEA      *l_byteaClone()
00028  *           void          l_byteaDestroy()
00029  *
00030  *      Accessors
00031  *           size_t        l_byteaGetSize()
00032  *           l_uint8      *l_byteaGetData()
00033  *           l_uint8      *l_byteaCopyData()
00034  *
00035  *      Appending
00036  *           l_int32       l_byteaAppendData()
00037  *           l_int32       l_byteaAppendString()
00038  *           l_int32       l_byteaExtendArrayToSize()
00039  *
00040  *      Join/Split
00041  *           l_int32       l_byteaJoin()
00042  *           l_int32       l_byteaSplit()
00043  *
00044  *      Search
00045  *           l_int32       l_byteaFindEachSequence()
00046  *
00047  *      Output to file
00048  *           l_int32       l_byteaWrite()
00049  *           l_int32       l_byteaWriteStream()
00050  *
00051  *   The internal data array is always null-terminated, for ease of use
00052  *   in the event that it is an ascii string without null bytes.
00053  */
00054 
00055 #include <string.h>
00056 #include "allheaders.h"
00057 
00058 static const l_int32  INITIAL_ARRAYSIZE = 200;   /* n'import quoi */
00059 
00060 
00061 /*---------------------------------------------------------------------*
00062  *                  Creation, copy, clone, destruction                 *
00063  *---------------------------------------------------------------------*/
00064 /*!
00065  *  l_byteaCreate()
00066  *
00067  *      Input:  n (determines initial size of data array)
00068  *      Return: l_bytea, or null on error
00069  *
00070  *  Notes:
00071  *      (1) The allocated array is n + 1 bytes.  This allows room
00072  *          for null termination.
00073  */
00074 L_BYTEA *
00075 l_byteaCreate(size_t  nbytes)
00076 {
00077 L_BYTEA  *ba;
00078 
00079     PROCNAME("l_byteaCreate");
00080 
00081     if (nbytes <= 0)
00082         nbytes = INITIAL_ARRAYSIZE;
00083 
00084     if ((ba = (L_BYTEA *)CALLOC(1, sizeof(L_BYTEA))) == NULL)
00085         return (L_BYTEA *)ERROR_PTR("ba not made", procName, NULL);
00086 
00087     if ((ba->data = (l_uint8 *)CALLOC(nbytes + 1, sizeof(l_uint8))) == NULL)
00088         return (L_BYTEA *)ERROR_PTR("ba array not made", procName, NULL);
00089     ba->nalloc = nbytes + 1;
00090     ba->refcount = 1;
00091 
00092     return ba;
00093 }
00094 
00095 
00096 /*!
00097  *  l_byteaInitFromMem()
00098  *
00099  *      Input:  data (to be copied to the array)
00100  *              size (amount of data)
00101  *      Return: l_bytea, or null on error
00102  */
00103 L_BYTEA *
00104 l_byteaInitFromMem(l_uint8  *data,
00105                    size_t    size)
00106 {
00107 L_BYTEA  *ba;
00108 
00109     PROCNAME("l_byteaInitFromMem");
00110 
00111     if (!data)
00112         return (L_BYTEA *)ERROR_PTR("data not defined", procName, NULL);
00113     if (size <= 0)
00114         return (L_BYTEA *)ERROR_PTR("no bytes to initialize", procName, NULL);
00115 
00116     if ((ba = l_byteaCreate(size)) == NULL)
00117         return (L_BYTEA *)ERROR_PTR("ba not made", procName, NULL);
00118     memcpy(ba->data, data, size);
00119     ba->size = size;
00120     return ba;
00121 }
00122 
00123 
00124 /*!
00125  *  l_byteaInitFromFile()
00126  *
00127  *      Input:  fname
00128  *      Return: l_bytea, or null on error
00129  */
00130 L_BYTEA *
00131 l_byteaInitFromFile(const char  *fname)
00132 {
00133 FILE     *fp;
00134 L_BYTEA  *ba;
00135 
00136     PROCNAME("l_byteaInitFromFile");
00137 
00138     if (!fname)
00139         return (L_BYTEA *)ERROR_PTR("fname not defined", procName, NULL);
00140 
00141     if ((fp = fopenReadStream(fname)) == NULL)
00142         return (L_BYTEA *)ERROR_PTR("file stream not opened", procName, NULL);
00143     if ((ba = l_byteaInitFromStream(fp)) == NULL)
00144         return (L_BYTEA *)ERROR_PTR("ba not made", procName, NULL);
00145     fclose(fp);
00146     return ba;
00147 }
00148 
00149 
00150 /*!
00151  *  l_byteaInitFromStream()
00152  *
00153  *      Input:  stream
00154  *      Return: l_bytea, or null on error
00155  */
00156 L_BYTEA *
00157 l_byteaInitFromStream(FILE  *fp)
00158 {
00159 l_uint8  *data;
00160 size_t    nbytes;
00161 L_BYTEA  *ba;
00162 
00163     PROCNAME("l_byteaInitFromStream");
00164 
00165     if (!fp)
00166         return (L_BYTEA *)ERROR_PTR("stream not defined", procName, NULL);
00167 
00168     if ((data = l_binaryReadStream(fp, &nbytes)) == NULL)
00169         return (L_BYTEA *)ERROR_PTR("data not read", procName, NULL);
00170     if ((ba = l_byteaCreate(nbytes)) == NULL)
00171         return (L_BYTEA *)ERROR_PTR("ba not made", procName, NULL);
00172     memcpy(ba->data, data, nbytes);
00173     ba->size = nbytes;
00174     FREE(data);
00175     return ba;
00176 }
00177 
00178 
00179 /*!
00180  *  l_byteaCopy()
00181  *
00182  *      Input:  bas  (source lba)
00183  *              copyflag (L_COPY, L_CLONE)
00184  *      Return: clone or copy of bas, or null on error
00185  *
00186  *  Notes:
00187  *      (1) If cloning, up the refcount and return a ptr to @bas.
00188  */
00189 L_BYTEA *
00190 l_byteaCopy(L_BYTEA  *bas,
00191             l_int32   copyflag)
00192 {
00193     PROCNAME("l_byteaCopy");
00194 
00195     if (!bas)
00196         return (L_BYTEA *)ERROR_PTR("bas not defined", procName, NULL);
00197 
00198     if (copyflag == L_CLONE) {
00199         bas->refcount++;
00200         return bas;
00201     }
00202 
00203     return l_byteaInitFromMem(bas->data, bas->size);
00204 }
00205 
00206 
00207 /*!
00208  *  l_byteaDestroy()
00209  *
00210  *      Input:  &ba (<will be set to null before returning>)
00211  *      Return: void
00212  *
00213  *  Notes:
00214  *      (1) Decrements the ref count and, if 0, destroys the lba.
00215  *      (2) Always nulls the input ptr.
00216  *      (3) If the data has been previously removed, the lba will
00217  *          have been nulled, so this will do nothing.
00218  */
00219 void
00220 l_byteaDestroy(L_BYTEA  **pba)
00221 {
00222 L_BYTEA  *ba;
00223 
00224     PROCNAME("l_byteaDestroy");
00225 
00226     if (pba == NULL) {
00227         L_WARNING("ptr address is null!", procName);
00228         return;
00229     }
00230 
00231     if ((ba = *pba) == NULL)
00232         return;
00233 
00234         /* Decrement the ref count.  If it is 0, destroy the lba. */
00235     ba->refcount--;
00236     if (ba->refcount <= 0) {
00237         if (ba->data) FREE(ba->data);
00238         FREE(ba);
00239     }
00240 
00241     *pba = NULL;
00242     return;
00243 }
00244 
00245 
00246 /*---------------------------------------------------------------------*
00247  *                               Accessors                             *
00248  *---------------------------------------------------------------------*/
00249 /*!
00250  *  l_byteaGetSize()
00251  *
00252  *      Input:  ba
00253  *      Return: size of stored byte array, or 0 on error
00254  */
00255 size_t
00256 l_byteaGetSize(L_BYTEA  *ba)
00257 {
00258     PROCNAME("l_byteaGetSize");
00259 
00260     if (!ba)
00261         return ERROR_INT("ba not defined", procName, 0);
00262     return ba->size;
00263 }
00264 
00265 
00266 /*!
00267  *  l_byteaGetData()
00268  *
00269  *      Input:  ba
00270  *              &size (<returned> size of data in lba)
00271  *      Return: ptr to existing data array, or NULL on error
00272  *
00273  *  Notes:
00274  *      (1) The returned ptr is owned by @ba.  Do not free it!
00275  */
00276 l_uint8 *
00277 l_byteaGetData(L_BYTEA  *ba,
00278                size_t   *psize)
00279 {
00280     PROCNAME("l_byteaGetData");
00281 
00282     if (!ba)
00283         return (l_uint8 *)ERROR_PTR("ba not defined", procName, NULL);
00284     if (!psize)
00285         return (l_uint8 *)ERROR_PTR("&size not defined", procName, NULL);
00286 
00287     *psize = ba->size;
00288     return ba->data;
00289 }
00290 
00291 
00292 /*!
00293  *  l_byteaCopyData()
00294  *
00295  *      Input:  ba
00296  *              &size (<returned> size of data in lba)
00297  *      Return: copy of data in use in the data array, or null on error.
00298  *
00299  *  Notes:
00300  *      (1) The returned data is owned by the caller.  The input @ba
00301  *          still owns the original data array.
00302  */
00303 l_uint8 *
00304 l_byteaCopyData(L_BYTEA  *ba,
00305                 size_t   *psize)
00306 {
00307 l_uint8  *data;
00308 
00309     PROCNAME("l_byteaCopyData");
00310 
00311     if (!psize)
00312         return (l_uint8 *)ERROR_PTR("&size not defined", procName, NULL);
00313     *psize = 0;
00314     if (!ba)
00315         return (l_uint8 *)ERROR_PTR("ba not defined", procName, NULL);
00316 
00317     data = l_byteaGetData(ba, psize);
00318     return l_binaryCopy(data, *psize);
00319 }
00320 
00321 
00322 /*---------------------------------------------------------------------*
00323  *                               Appending                             *
00324  *---------------------------------------------------------------------*/
00325 /*!
00326  *  l_byteaAppendData()
00327  *
00328  *      Input:  ba
00329  *              newdata (byte array to be appended)
00330  *              size (size of data array)
00331  *      Return: 0 if OK, 1 on error
00332  */
00333 l_int32
00334 l_byteaAppendData(L_BYTEA  *ba,
00335                   l_uint8  *newdata,
00336                   size_t    newbytes)
00337 {
00338 size_t  size, nalloc, reqsize;
00339 
00340     PROCNAME("l_byteaAppendData");
00341 
00342     if (!ba)
00343         return ERROR_INT("ba not defined", procName, 1);
00344     if (!newdata)
00345         return ERROR_INT("newdata not defined", procName, 1);
00346 
00347     size = l_byteaGetSize(ba);
00348     reqsize = size + newbytes + 1;
00349     nalloc = ba->nalloc;
00350     if (nalloc < reqsize)
00351         l_byteaExtendArrayToSize(ba, 2 * reqsize);
00352 
00353     memcpy((char *)(ba->data + size), (char *)newdata, newbytes);
00354     ba->size += newbytes;
00355     return 0;
00356 }
00357 
00358 
00359 /*!
00360  *  l_byteaAppendString()
00361  *
00362  *      Input:  ba
00363  *              str (null-terminated string to be appended)
00364  *      Return: 0 if OK, 1 on error
00365  */
00366 l_int32
00367 l_byteaAppendString(L_BYTEA  *ba,
00368                     char     *str)
00369 {
00370 size_t  size, len, nalloc, reqsize;
00371 
00372     PROCNAME("l_byteaAppendString");
00373 
00374     if (!ba)
00375         return ERROR_INT("ba not defined", procName, 1);
00376     if (!str)
00377         return ERROR_INT("str not defined", procName, 1);
00378 
00379     size = l_byteaGetSize(ba);
00380     len = strlen(str);
00381     reqsize = size + len + 1;
00382     nalloc = ba->nalloc;
00383     if (nalloc < reqsize)
00384         l_byteaExtendArrayToSize(ba, 2 * reqsize);
00385 
00386     memcpy(ba->data + size, str, len);
00387     ba->size += len;
00388     return 0;
00389 }
00390 
00391 
00392 /*!
00393  *  l_byteaExtendArrayToSize()
00394  *
00395  *      Input:  ba
00396  *              size (new size of lba data array)
00397  *      Return: 0 if OK; 1 on error
00398  */
00399 l_int32
00400 l_byteaExtendArrayToSize(L_BYTEA  *ba,
00401                          size_t    size)
00402 {
00403     PROCNAME("l_byteaExtendArrayToSize");
00404 
00405     if (!ba)
00406         return ERROR_INT("ba not defined", procName, 1);
00407 
00408     if (size > ba->nalloc) {
00409         if ((ba->data =
00410             (l_uint8 *)reallocNew((void **)&ba->data, ba->nalloc, size))
00411                  == NULL)
00412             return ERROR_INT("new array not returned", procName, 1);
00413         ba->nalloc = size;
00414     }
00415     return 0;
00416 }
00417 
00418 
00419 /*---------------------------------------------------------------------*
00420  *                        String join/split                            *
00421  *---------------------------------------------------------------------*/
00422 /*!
00423  *  l_byteaJoin()
00424  *
00425  *      Input:  ba1
00426  *              &ba2 (data array is added to the one in ba1, and
00427  *                     then ba2 is destroyed)
00428  *      Return: 0 if OK, 1 on error
00429  *
00430  *  Notes:
00431  *      (1) It is a no-op, not an error, for @ba2 to be null.
00432  */
00433 l_int32
00434 l_byteaJoin(L_BYTEA   *ba1,
00435             L_BYTEA  **pba2)
00436 {
00437 l_uint8  *data2;
00438 size_t    nbytes2;
00439 L_BYTEA  *ba2;
00440 
00441     PROCNAME("l_byteaJoin");
00442 
00443     if (!ba1)
00444         return ERROR_INT("ba1 not defined", procName, 1);
00445     if (!pba2)
00446         return ERROR_INT("&ba2 not defined", procName, 1);
00447     if ((ba2 = *pba2) == NULL) return 0;
00448 
00449     data2 = l_byteaGetData(ba2, &nbytes2);
00450     l_byteaAppendData(ba1, data2, nbytes2);
00451 
00452     l_byteaDestroy(pba2);
00453     return 0;
00454 }
00455 
00456 
00457 /*!
00458  *  l_byteaSplit()
00459  *
00460  *      Input:  ba1 (lba to split; array bytes nulled beyond the split loc)
00461  *              splitloc (location in ba1 to split; ba2 begins there)
00462  *              &ba2 (<return> with data starting at splitloc)
00463  *      Return: 0 if OK, 1 on error
00464  */
00465 l_int32
00466 l_byteaSplit(L_BYTEA   *ba1,
00467              size_t     splitloc,
00468              L_BYTEA  **pba2)
00469 {
00470 l_uint8  *data1;
00471 size_t    nbytes1, nbytes2;
00472 
00473     PROCNAME("l_byteaSplit");
00474 
00475     if (!pba2)
00476         return ERROR_INT("&ba2 not defined", procName, 1);
00477     *pba2 = NULL;
00478     if (!ba1)
00479         return ERROR_INT("ba1 not defined", procName, 1);
00480 
00481     data1 = l_byteaGetData(ba1, &nbytes1);
00482     if (splitloc < 0 || splitloc >= nbytes1)
00483         return ERROR_INT("splitloc invalid", procName, 1);
00484     nbytes2 = nbytes1 - splitloc;
00485 
00486         /* Make the new lba */
00487     *pba2 = l_byteaInitFromMem(data1 + splitloc, nbytes2);
00488 
00489         /* Null the removed bytes in the input lba */
00490     memset(data1 + splitloc, 0, nbytes2);
00491     ba1->size = splitloc;
00492     return 0;
00493 }
00494 
00495 
00496 /*---------------------------------------------------------------------*
00497  *                                Search                               *
00498  *---------------------------------------------------------------------*/
00499 /*!
00500  *  l_byteaFindEachSequence()
00501  *
00502  *      Input:  ba
00503  *              sequence (subarray of bytes to find in data)
00504  *              seqlen (length of sequence, in bytes)
00505  *              &na (<return> byte positions of each occurrence of @sequence)
00506  *      Return: 0 if OK, 1 on error
00507  */
00508 l_int32
00509 l_byteaFindEachSequence(L_BYTEA   *ba,
00510                         l_uint8   *sequence,
00511                         l_int32    seqlen,
00512                         NUMA     **pna)
00513 {
00514 l_uint8  *data;
00515 size_t    size;
00516 
00517     PROCNAME("l_byteaFindEachSequence");
00518 
00519     if (!pna)
00520         return ERROR_INT("&na not defined", procName, 1);
00521     *pna = NULL;
00522     if (!ba)
00523         return ERROR_INT("ba not defined", procName, 1);
00524     if (!sequence)
00525         return ERROR_INT("sequence not defined", procName, 1);
00526 
00527     data = l_byteaGetData(ba, &size);
00528     *pna = arrayFindEachSequence(data, size, sequence, seqlen);
00529     return 0;
00530 }
00531 
00532 
00533 /*---------------------------------------------------------------------*
00534  *                              Output to file                         *
00535  *---------------------------------------------------------------------*/
00536 /*!
00537  *  l_byteaWrite()
00538  *
00539  *      Input:  fname (output file)
00540  *              ba
00541  *              startloc (first byte to output)
00542  *              endloc (last byte to output; use 0 to write to the
00543  *                      end of the data array)
00544  *      Return: 0 if OK, 1 on error
00545  */
00546 l_int32
00547 l_byteaWrite(const char  *fname,
00548              L_BYTEA     *ba,
00549              size_t       startloc,
00550              size_t       endloc)
00551 {
00552 l_int32  ret;
00553 FILE    *fp;
00554 
00555     PROCNAME("l_byteaWrite");
00556 
00557     if (!fname)
00558         return ERROR_INT("fname not defined", procName, 1);
00559     if (!ba)
00560         return ERROR_INT("ba not defined", procName, 1);
00561 
00562     if ((fp = fopenWriteStream(fname, "wb")) == NULL)
00563         return ERROR_INT("stream not opened", procName, 1);
00564     ret = l_byteaWriteStream(fp, ba, startloc, endloc);
00565     fclose(fp);
00566     return ret;
00567 }
00568 
00569 
00570 /*!
00571  *  l_byteaWriteStream()
00572  *
00573  *      Input:  stream (opened for binary write)
00574  *              ba
00575  *              startloc (first byte to output)
00576  *              endloc (last byte to output; use 0 to write to the
00577  *                      end of the data array)
00578  *      Return: 0 if OK, 1 on error
00579  */
00580 l_int32
00581 l_byteaWriteStream(FILE     *fp,
00582                    L_BYTEA  *ba,
00583                    size_t    startloc,
00584                    size_t    endloc)
00585 {
00586 l_uint8  *data;
00587 size_t    size, nbytes;
00588 
00589     PROCNAME("l_byteaWriteStream");
00590 
00591     if (!fp)
00592         return ERROR_INT("stream not defined", procName, 1);
00593     if (!ba)
00594         return ERROR_INT("ba not defined", procName, 1);
00595 
00596     data = l_byteaGetData(ba, &size);
00597     if (startloc >= size)
00598         return ERROR_INT("invalid startloc", procName, 1);
00599     if (endloc == 0) endloc = size - 1;
00600     nbytes = endloc - startloc + 1;
00601     if (nbytes < 1)
00602         return ERROR_INT("endloc must be >= startloc", procName, 1);
00603 
00604     fwrite(data + startloc, 1, nbytes, fp);
00605     return 0;
00606 }
00607 
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Defines