Leptonica 1.68
C Image Processing Library

parseprotos.c

Go to the documentation of this file.
00001 /*====================================================================*
00002  -  Copyright (C) 2001 Leptonica.  All rights reserved.
00003  -  This software is distributed in the hope that it will be
00004  -  useful, but with NO WARRANTY OF ANY KIND.
00005  -  No author or distributor accepts responsibility to anyone for the
00006  -  consequences of using this software, or for whether it serves any
00007  -  particular purpose or works at all, unless he or she says so in
00008  -  writing.  Everyone is granted permission to copy, modify and
00009  -  redistribute this source code, for commercial or non-commercial
00010  -  purposes, with the following restrictions: (1) the origin of this
00011  -  source code must not be misrepresented; (2) modified versions must
00012  -  be plainly marked as such; and (3) this notice may not be removed
00013  -  or altered from any source or modified source distribution.
00014  *====================================================================*/
00015 
00016 /*
00017  * parseprotos.c
00018  *
00019  *       char             *parseForProtos()
00020  *
00021  *    Static helpers
00022  *       static l_int32    getNextNonCommentLine()
00023  *       static l_int32    getNextNonBlankLine()
00024  *       static l_int32    getNextNonDoubleSlashLine()
00025  *       static l_int32    searchForProtoSignature()
00026  *       static char      *captureProtoSignature()
00027  *       static char      *cleanProtoSignature()
00028  *       static l_int32    skipToEndOfFunction()
00029  *       static l_int32    skipToMatchingBrace()
00030  *       static l_int32    skipToSemicolon()
00031  *       static l_int32    getOffsetForCharacter()
00032  *       static l_int32    getOffsetForMatchingRP()
00033  */
00034 
00035 #include <string.h>
00036 #include "allheaders.h"
00037 
00038     /* MS VC++ can't handle array initialization with static consts ! */
00039 #define L_BUF_SIZE      512    /* max token size */
00040 
00041 
00042 static l_int32 getNextNonCommentLine(SARRAY *sa, l_int32 start, l_int32 *pnext);
00043 static l_int32 getNextNonBlankLine(SARRAY *sa, l_int32 start, l_int32 *pnext);
00044 static l_int32 getNextNonDoubleSlashLine(SARRAY *sa, l_int32 start,
00045             l_int32 *pnext);
00046 static l_int32 searchForProtoSignature(SARRAY *sa, l_int32 begin,
00047             l_int32 *pstart, l_int32 *pstop, l_int32 *pcharindex,
00048             l_int32 *pfound);
00049 static char * captureProtoSignature(SARRAY *sa, l_int32 start, l_int32 stop,
00050             l_int32 charindex);
00051 static char * cleanProtoSignature(char *str);
00052 static l_int32 skipToEndOfFunction(SARRAY *sa, l_int32 start,
00053             l_int32 charindex, l_int32 *pnext);
00054 static l_int32 skipToMatchingBrace(SARRAY *sa, l_int32 start,
00055             l_int32 lbindex, l_int32 *prbline, l_int32 *prbindex);
00056 static l_int32 skipToSemicolon(SARRAY *sa, l_int32 start,
00057             l_int32 charindex, l_int32 *pnext);
00058 static l_int32 getOffsetForCharacter(SARRAY *sa, l_int32 start, char tchar,
00059             l_int32 *psoffset, l_int32 *pboffset, l_int32 *ptoffset);
00060 static l_int32 getOffsetForMatchingRP(SARRAY *sa, l_int32 start,
00061             l_int32 soffsetlp, l_int32 boffsetlp, l_int32 toffsetlp,
00062             l_int32 *psoffset, l_int32 *pboffset, l_int32 *ptoffset);
00063 
00064 
00065 /*
00066  *  parseForProtos()
00067  *
00068  *      Input:  filein (output of cpp)
00069  *              prestring (<optional> string that prefaces each decl;
00070  *                        use NULL to omit)
00071  *      Return: parsestr (string of function prototypes), or NULL on error
00072  *
00073  *  Notes:
00074  *      (1) We parse the output of cpp:
00075  *              cpp -ansi <filein> 
00076  *          Three plans were attempted, with success on the third. 
00077  *      (2) Plan 1.  A cursory examination of the cpp output indicated that
00078  *          every function was preceeded by a cpp comment statement.
00079  *          So we just need to look at statements beginning after comments.
00080  *          Unfortunately, this is NOT the case.  Some functions start
00081  *          without cpp comment lines, typically when there are no
00082  *          comments in the source that immediately precede the function.
00083  *      (3) Plan 2.  Consider the keywords in the language that start
00084  *          parts of the cpp file.  Some, like 'typedef', 'enum',
00085  *          'union' and 'struct', are followed after a while by '{',
00086  *          and eventually end with '}, plus an optional token and a
00087  *          final ';'  Others, like 'extern' and 'static', are never
00088  *          the beginnings of global function definitions.   Function
00089  *          prototypes have one or more sets of '(' followed eventually
00090  *          by a ')', and end with ';'.  But function definitions have
00091  *          tokens, followed by '(', more tokens, ')' and then
00092  *          immediately a '{'.  We would generate a prototype from this
00093  *          by adding a ';' to all tokens up to the ')'.  So we use
00094  *          these special tokens to decide what we are parsing.  And
00095  *          whenever a function definition is found and the prototype
00096  *          extracted, we skip through the rest of the function
00097  *          past the corresponding '}'.  This token ends a line, and
00098  *          is often on a line of its own.  But as it turns out,
00099  *          the only keyword we need to consider is 'static'.
00100  *      (4) Plan 3.  Consider the parentheses and braces for various
00101  *          declarations.  A struct, enum, or union has a pair of
00102  *          braces followed by a semicolon.  They cannot have parentheses
00103  *          before the left brace, but a struct can have lots of parentheses
00104  *          within the brace set.  A function prototype has no braces.
00105  *          A function declaration can have sets of left and right
00106  *          parentheses, but these are followed by a left brace.
00107  *          So plan 3 looks at the way parentheses and braces are
00108  *          organized.  Once the beginning of a function definition
00109  *          is found, the prototype is extracted and we search for
00110  *          the ending right brace.
00111  *      (5) To find the ending right brace, it is necessary to do some
00112  *          careful parsing.  For example, in this file, we have
00113  *          left and right braces as characters, and these must not
00114  *          be counted.  Somewhat more tricky, the file fhmtauto.c
00115  *          generates code, and includes a right brace in a string.
00116  *          So we must not include braces that are in strings.  But how
00117  *          do we know if something is inside a string?  Keep state,
00118  *          starting with not-inside, and every time you hit a double quote
00119  *          that is not escaped, toggle the condition.  Any brace
00120  *          found in the state of being within a string is ignored.
00121  *      (6) When a prototype is extracted, it is put in a canonical
00122  *          form (i.e., cleaned up).  Finally, we check that it is
00123  *          not static and save it.  (If static, it is ignored).
00124  *      (7) The @prestring for unix is NULL; it is included here so that
00125  *          you can use Microsoft's declaration for importing or
00126  *          exporting to a dll.  See environ.h for examples of use.
00127  *          Here, we set: @prestring = "LEPT_DLL ".  Note in particular
00128  *          the space character that will separate 'LEPT_DLL' from
00129  *          the standard unix prototype that follows.
00130  */
00131 char *
00132 parseForProtos(const char *filein,
00133                const char *prestring)
00134 {
00135 char    *strdata, *str, *newstr, *parsestr, *secondword;
00136 l_int32  start, next, stop, charindex, found;
00137 size_t   nbytes;
00138 SARRAY  *sa, *saout, *satest;
00139 
00140     PROCNAME("parseForProtos");
00141 
00142     if (!filein)
00143         return (char *)ERROR_PTR("filein not defined", procName, NULL);
00144 
00145         /* Read in the cpp output into memory, one string for each
00146          * line in the file, omitting blank lines.  */
00147     strdata = (char *)l_binaryRead(filein, &nbytes);
00148     sa = sarrayCreateLinesFromString(strdata, 0);
00149 
00150     saout = sarrayCreate(0);
00151     next = 0;
00152     while (1) {  /* repeat after each non-static prototype is extracted */
00153         searchForProtoSignature(sa, next, &start, &stop, &charindex, &found);
00154         if (!found)
00155             break;
00156 /*        fprintf(stderr, "  start = %d, stop = %d, charindex = %d\n",
00157                 start, stop, charindex); */
00158         str = captureProtoSignature(sa, start, stop, charindex);
00159 
00160             /* Make sure that the signature found by cpp is neither
00161              * static nor extern.  We get 'extern' declarations from
00162              * header files, and with some versions of cpp running on
00163              * #include <sys/stat.h> we get something of the form:
00164              *    extern ... (( ... )) ... ( ... ) { ...
00165              * For this, the 1st '(' is the lp, the 2nd ')' is the rp,
00166              * and there is a lot of garbage between the rp and the lb.
00167              * It is easiest to simply reject any signature that starts
00168              * with 'extern'.  Note also that an 'extern' token has been
00169              * prepended to each prototype, so the 'static' or
00170              * 'extern' keywords we are looking for, if they exist,
00171              * would be the second word. */
00172         satest = sarrayCreateWordsFromString(str);
00173         secondword = sarrayGetString(satest, 1, 0);
00174         if (strcmp(secondword, "static") &&  /* not static */
00175             strcmp(secondword, "extern")) {  /* not extern */
00176             if (prestring) {  /* prepend it to the prototype */
00177                 newstr = stringJoin(prestring, str);
00178                 sarrayAddString(saout, newstr, L_INSERT);
00179                 FREE(str);
00180             }
00181             else
00182                 sarrayAddString(saout, str, L_INSERT);
00183         }
00184         else
00185             FREE(str);
00186         sarrayDestroy(&satest);
00187 
00188         skipToEndOfFunction(sa, stop, charindex, &next);
00189         if (next == -1) break;
00190     }
00191 
00192         /* Flatten into a string with newlines between prototypes */
00193     parsestr = sarrayToString(saout, 1);
00194     FREE(strdata);
00195     sarrayDestroy(&sa);
00196     sarrayDestroy(&saout);
00197 
00198     return parsestr;
00199 }
00200 
00201 
00202 /* 
00203  *  getNextNonCommentLine()
00204  *
00205  *      Input:  sa (output from cpp, by line)
00206  *              start (starting index to search)
00207  *              &next (<return> index of first uncommented line after
00208  *                     the start line)
00209  *      Return: 0 if OK, 1 on error
00210  *
00211  *  Notes:
00212  *      (1) Skips over all consecutive comment lines, beginning at 'start'
00213  *      (2) If all lines to the end are '#' comments, return next = -1
00214  */
00215 static l_int32
00216 getNextNonCommentLine(SARRAY  *sa,
00217                       l_int32  start,
00218                       l_int32 *pnext)
00219 {
00220 char    *str;
00221 l_int32  i, n;
00222 
00223     PROCNAME("getNextNonCommentLine");
00224 
00225     if (!sa)
00226         return ERROR_INT("sa not defined", procName, 1);
00227     if (!pnext)
00228         return ERROR_INT("&pnext not defined", procName, 1);
00229 
00230         /* Init for situation where this line and all following are comments */
00231     *pnext = -1;
00232 
00233     n = sarrayGetCount(sa);
00234     for (i = start; i < n; i++) {
00235         if ((str = sarrayGetString(sa, i, 0)) == NULL)
00236             return ERROR_INT("str not returned; shouldn't happen", procName, 1);
00237         if (str[0] != '#') {
00238             *pnext = i;
00239             return 0;
00240         }
00241     }
00242 
00243     return 0;
00244 }
00245 
00246 
00247 /* 
00248  *  getNextNonBlankLine()
00249  *
00250  *      Input:  sa (output from cpp, by line)
00251  *              start (starting index to search)
00252  *              &next (<return> index of first nonblank line after
00253  *                     the start line)
00254  *      Return: 0 if OK, 1 on error
00255  *
00256  *  Notes:
00257  *      (1) Skips over all consecutive blank lines, beginning at 'start'
00258  *      (2) A blank line has only whitespace characters (' ', '\t', '\n', '\r')
00259  *      (3) If all lines to the end are blank, return next = -1
00260  */
00261 static l_int32
00262 getNextNonBlankLine(SARRAY  *sa,
00263                     l_int32  start,
00264                     l_int32 *pnext)
00265 {
00266 char    *str;
00267 l_int32  i, j, n, len;
00268 
00269     PROCNAME("getNextNonBlankLine");
00270 
00271     if (!sa)
00272         return ERROR_INT("sa not defined", procName, 1);
00273     if (!pnext)
00274         return ERROR_INT("&pnext not defined", procName, 1);
00275 
00276         /* Init for situation where this line and all following are blank */
00277     *pnext = -1;
00278 
00279     n = sarrayGetCount(sa);
00280     for (i = start; i < n; i++) {
00281         if ((str = sarrayGetString(sa, i, 0)) == NULL)
00282             return ERROR_INT("str not returned; shouldn't happen", procName, 1);
00283         len = strlen(str);
00284         for (j = 0; j < len; j++) {
00285             if (str[j] != ' ' && str[j] != '\t'
00286                 && str[j] != '\n' && str[j] != '\r') {  /* non-blank */
00287                 *pnext = i;
00288                 return 0;
00289             }
00290         }
00291     }
00292 
00293     return 0;
00294 }
00295 
00296 
00297 /* 
00298  *  getNextNonDoubleSlashLine()
00299  *
00300  *      Input:  sa (output from cpp, by line)
00301  *              start (starting index to search)
00302  *              &next (<return> index of first uncommented line after
00303  *                     the start line)
00304  *      Return: 0 if OK, 1 on error
00305  *
00306  *  Notes:
00307  *      (1) Skips over all consecutive '//' lines, beginning at 'start'
00308  *      (2) If all lines to the end start with '//', return next = -1
00309  */
00310 static l_int32
00311 getNextNonDoubleSlashLine(SARRAY  *sa,
00312                           l_int32  start,
00313                           l_int32 *pnext)
00314 {
00315 char    *str;
00316 l_int32  i, n, len;
00317 
00318     PROCNAME("getNextNonDoubleSlashLine");
00319 
00320     if (!sa)
00321         return ERROR_INT("sa not defined", procName, 1);
00322     if (!pnext)
00323         return ERROR_INT("&pnext not defined", procName, 1);
00324 
00325         /* Init for situation where this line and all following
00326          * start with '//' */
00327     *pnext = -1;
00328 
00329     n = sarrayGetCount(sa);
00330     for (i = start; i < n; i++) {
00331         if ((str = sarrayGetString(sa, i, 0)) == NULL)
00332             return ERROR_INT("str not returned; shouldn't happen", procName, 1);
00333         len = strlen(str);
00334         if (len < 2 || str[0] != '/' || str[1] != '/') {
00335             *pnext = i;
00336             return 0;
00337         }
00338     }
00339 
00340     return 0;
00341 }
00342 
00343 
00344 /*
00345  *  searchForProtoSignature()
00346  *
00347  *      Input:  sa (output from cpp, by line)
00348  *              begin (beginning index to search)
00349  *              &start (<return> starting index for function definition)
00350  *              &stop (<return> index of line on which proto is completed)
00351  *              &charindex (<return> char index of completing ')' character)
00352  *              &found (<return> 1 if valid signature is found; 0 otherwise)
00353  *      Return: 0 if OK, 1 on error
00354  *
00355  *  Notes:
00356  *      (1) If this returns found == 0, it means that there are no
00357  *          more function definitions in the file.  Caller must check
00358  *          this value and exit the loop over the entire cpp file.
00359  *      (2) This follows plan 3 (see above).  We skip comment and blank
00360  *          lines at the beginning.  Then we don't check for keywords.
00361  *          Instead, find the relative locations of the first occurrences
00362  *          of these four tokens: left parenthesis (lp), right
00363  *          parenthesis (rp), left brace (lb) and semicolon (sc).
00364  *      (3) The signature of a function definition looks like this:
00365  *               .... '(' .... ')' '{'
00366  *          where the lp and rp must both precede the lb, with only
00367  *          whitespace between the rp and the lb.  The '....'
00368  *          are sets of tokens that have no braces.
00369  *      (4) If a function definition is found, this returns found = 1,
00370  *          with 'start' being the first line of the definition and
00371  *          'charindex' being the position of the ')' in line 'stop'
00372  *          at the end of the arg list.
00373  */
00374 static l_int32
00375 searchForProtoSignature(SARRAY   *sa,
00376                         l_int32   begin,
00377                         l_int32  *pstart,
00378                         l_int32  *pstop,
00379                         l_int32  *pcharindex,
00380                         l_int32  *pfound)
00381 {
00382 l_int32  next, rbline, rbindex, scline;
00383 l_int32  soffsetlp, soffsetrp, soffsetlb, soffsetsc;
00384 l_int32  boffsetlp, boffsetrp, boffsetlb, boffsetsc;
00385 l_int32  toffsetlp, toffsetrp, toffsetlb, toffsetsc;
00386 
00387     PROCNAME("searchForProtoSignature");
00388 
00389     if (!sa)
00390         return ERROR_INT("sa not defined", procName, 1);
00391     if (!pstart)
00392         return ERROR_INT("&start not defined", procName, 1);
00393     if (!pstop)
00394         return ERROR_INT("&stop not defined", procName, 1);
00395     if (!pcharindex)
00396         return ERROR_INT("&charindex not defined", procName, 1);
00397     if (!pfound)
00398         return ERROR_INT("&found not defined", procName, 1);
00399 
00400     *pfound = FALSE;
00401 
00402     while (1) {
00403 
00404             /* Skip over sequential '#' comment lines */
00405         getNextNonCommentLine(sa, begin, &next);
00406         if (next == -1) return 0;
00407         if (next != begin) {
00408             begin = next;
00409             continue;
00410         }
00411 
00412             /* Skip over sequential blank lines */
00413         getNextNonBlankLine(sa, begin, &next);
00414         if (next == -1) return 0;
00415         if (next != begin) {
00416             begin = next;
00417             continue;
00418         }
00419 
00420             /* Skip over sequential lines starting with '//' */
00421         getNextNonDoubleSlashLine(sa, begin, &next);
00422         if (next == -1) return 0;
00423         if (next != begin) {
00424             begin = next;
00425             continue;
00426         }
00427 
00428             /* Search for specific character sequence patterns; namely
00429              * a lp, a matching rp, a lb and a semicolon.
00430              * Abort the search if no lp is found. */
00431         getOffsetForCharacter(sa, next, '(', &soffsetlp, &boffsetlp,
00432                               &toffsetlp);
00433         if (soffsetlp == -1)
00434             break;
00435         getOffsetForMatchingRP(sa, next, soffsetlp, boffsetlp, toffsetlp,
00436                                &soffsetrp, &boffsetrp, &toffsetrp);
00437         getOffsetForCharacter(sa, next, '{', &soffsetlb, &boffsetlb,
00438                               &toffsetlb);
00439         getOffsetForCharacter(sa, next, ';', &soffsetsc, &boffsetsc,
00440                               &toffsetsc);
00441 
00442             /* We've found a lp.  Now weed out the case where a matching
00443              * rp and a lb are not both found. */
00444         if (soffsetrp == -1 || soffsetlb == -1)
00445             break;
00446 
00447             /* Check if a left brace occurs before a left parenthesis;
00448              * if so, skip it */
00449         if (toffsetlb < toffsetlp) {  
00450             skipToMatchingBrace(sa, next + soffsetlb, boffsetlb,
00451                 &rbline, &rbindex);
00452             skipToSemicolon(sa, rbline, rbindex, &scline);
00453             begin = scline + 1;
00454             continue;
00455         }
00456 
00457             /* Check if a semicolon occurs before a left brace or
00458              * a left parenthesis; if so, skip it */
00459         if ((soffsetsc != -1) &&
00460             (toffsetsc < toffsetlb || toffsetsc < toffsetlp)) {  
00461             skipToSemicolon(sa, next, 0, &scline);
00462             begin = scline + 1;
00463             continue;
00464         }
00465 
00466             /* OK, it should be a function definition.  We haven't
00467              * checked that there is only white space between the
00468              * rp and lb, but we've only seen problems with two
00469              * extern inlines in sys/stat.h, and this is handled
00470              * later by eliminating any prototype beginning with 'extern'. */
00471         *pstart = next;
00472         *pstop = next + soffsetrp;
00473         *pcharindex = boffsetrp;
00474         *pfound = TRUE;
00475         break;
00476     }
00477 
00478     return 0;
00479 }
00480 
00481 
00482 /*
00483  *  captureProtoSignature()
00484  *
00485  *      Input:  sa (output from cpp, by line)
00486  *              start (starting index to search; never a comment line)
00487  *              stop (index of line on which pattern is completed)
00488  *              charindex (char index of completing ')' character)
00489  *      Return: cleanstr (prototype string), or NULL on error
00490  *
00491  *  Notes:
00492  *      (1) Return all characters, ending with a ';' after the ')'
00493  */
00494 static char *
00495 captureProtoSignature(SARRAY  *sa,
00496                       l_int32  start,
00497                       l_int32  stop,
00498                       l_int32  charindex)
00499 {
00500 char    *str, *newstr, *protostr, *cleanstr;
00501 SARRAY  *sap;
00502 l_int32  i;
00503 
00504     PROCNAME("captureProtoSignature");
00505 
00506     if (!sa)
00507         return (char *)ERROR_PTR("sa not defined", procName, NULL);
00508 
00509     sap = sarrayCreate(0);
00510     for (i = start; i < stop; i++) {
00511         str = sarrayGetString(sa, i, 1);
00512         sarrayAddString(sap, str, 0);
00513     }
00514     str = sarrayGetString(sa, stop, 1);
00515     str[charindex + 1] = '\0';
00516     newstr = stringJoin(str, ";");
00517     sarrayAddString(sap, newstr, 0);
00518     FREE(str);
00519     protostr = sarrayToString(sap, 2);
00520     sarrayDestroy(&sap);
00521     cleanstr = cleanProtoSignature(protostr);
00522     FREE(protostr);
00523 
00524     return cleanstr;
00525 }
00526 
00527 
00528 /*
00529  *  cleanProtoSignature()
00530  *
00531  *      Input:  instr (input prototype string)
00532  *      Return: cleanstr (clean prototype string), or NULL on error
00533  *
00534  *  Notes:
00535  *      (1) Adds 'extern' at beginning and regularizes spaces
00536  *          between tokens.
00537  */
00538 static char *
00539 cleanProtoSignature(char *instr)
00540 {
00541 char    *str, *cleanstr;
00542 char     buf[L_BUF_SIZE];
00543 char     externstring[] = "extern";
00544 l_int32  i, j, nwords, nchars, index, len;
00545 SARRAY  *sa, *saout;
00546 
00547     PROCNAME("cleanProtoSignature");
00548 
00549     if (!instr)
00550         return (char *)ERROR_PTR("instr not defined", procName, NULL);
00551 
00552     sa = sarrayCreateWordsFromString(instr);
00553     nwords = sarrayGetCount(sa);
00554     saout = sarrayCreate(0);
00555     sarrayAddString(saout, externstring, 1);
00556     for (i = 0; i < nwords; i++) {
00557         str = sarrayGetString(sa, i, 0);
00558         nchars = strlen(str);
00559         index = 0;
00560         for (j = 0; j < nchars; j++) {
00561             if (index > L_BUF_SIZE - 6)
00562                 return (char *)ERROR_PTR("token too large", procName, NULL);
00563             if (str[j] == '(') {
00564                 buf[index++] = ' ';
00565                 buf[index++] = '(';
00566                 buf[index++] = ' ';
00567             }
00568             else if (str[j] == ')') {
00569                 buf[index++] = ' ';
00570                 buf[index++] = ')';
00571             }
00572             else 
00573                 buf[index++] = str[j];
00574         }
00575         buf[index] = '\0';
00576         sarrayAddString(saout, buf, 1);
00577     }
00578 
00579         /* Flatten to a prototype string with spaces added after
00580          * each word, and remove the last space */
00581     cleanstr = sarrayToString(saout, 2);
00582     len = strlen(cleanstr);
00583     cleanstr[len - 1] = '\0';
00584 
00585     sarrayDestroy(&sa);
00586     sarrayDestroy(&saout);
00587     return cleanstr;
00588 }
00589 
00590 
00591 /*
00592  *  skipToEndOfFunction()
00593  *
00594  *      Input:  sa (output from cpp, by line)
00595  *              start (index of starting line with left bracket to search)
00596  *              lbindex (starting char index for left bracket)
00597  *              &next (index of line following the ending '}' for function
00598  *      Return: 0 if OK, 1 on error
00599  */
00600 static l_int32
00601 skipToEndOfFunction(SARRAY   *sa,
00602                     l_int32   start,
00603                     l_int32   lbindex,
00604                     l_int32  *pnext)
00605 {
00606 l_int32  end, rbindex;
00607 l_int32 soffsetlb, boffsetlb, toffsetlb;
00608 
00609     PROCNAME("skipToEndOfFunction");
00610 
00611     if (!sa)
00612         return ERROR_INT("sa not defined", procName, 1);
00613     if (!pnext)
00614         return ERROR_INT("&next not defined", procName, 1);
00615 
00616     getOffsetForCharacter(sa, start, '{', &soffsetlb, &boffsetlb,
00617                 &toffsetlb);
00618     skipToMatchingBrace(sa, start + soffsetlb, boffsetlb, &end, &rbindex);
00619     if (end == -1) {  /* shouldn't happen! */
00620         *pnext = -1;
00621         return 1;
00622     }
00623 
00624     *pnext = end + 1;
00625     return 0;
00626 }
00627 
00628 
00629 /*
00630  *  skipToMatchingBrace()
00631  *
00632  *      Input:  sa (output from cpp, by line)
00633  *              start (index of starting line with left bracket to search)
00634  *              lbindex (starting char index for left bracket)
00635  *              &stop (index of line with the matching right bracket)
00636  *              &rbindex (char index of matching right bracket)
00637  *      Return: 0 if OK, 1 on error
00638  *
00639  *  Notes:
00640  *      (1) If the matching right brace is not found, returns
00641  *          stop = -1.  This shouldn't happen.
00642  */
00643 static l_int32
00644 skipToMatchingBrace(SARRAY   *sa,
00645                     l_int32   start,
00646                     l_int32   lbindex,
00647                     l_int32  *pstop,
00648                     l_int32  *prbindex)
00649 {
00650 char    *str;
00651 l_int32  i, j, jstart, n, sumbrace, found, instring, nchars;
00652 
00653     PROCNAME("skipToMatchingBrace");
00654 
00655     if (!sa)
00656         return ERROR_INT("sa not defined", procName, 1);
00657     if (!pstop)
00658         return ERROR_INT("&stop not defined", procName, 1);
00659     if (!prbindex)
00660         return ERROR_INT("&rbindex not defined", procName, 1);
00661 
00662     instring = 0;  /* init to FALSE; toggle on double quotes */
00663     *pstop = -1;
00664     n = sarrayGetCount(sa);
00665     sumbrace = 1;
00666     found = FALSE;
00667     for (i = start; i < n; i++) {
00668         str = sarrayGetString(sa, i, 0);
00669         jstart = 0;
00670         if (i == start)
00671             jstart = lbindex + 1;
00672         nchars = strlen(str);
00673         for (j = jstart; j < nchars; j++) {
00674                 /* Toggle the instring state every time you encounter
00675                  * a double quote that is NOT escaped. */
00676             if (j == jstart && str[j] == '\"')
00677                 instring = 1 - instring;
00678             if (j > jstart && str[j] == '\"' && str[j-1] != '\\')
00679                 instring = 1 - instring;
00680                 /* Record the braces if they are neither a literal character
00681                  * nor within a string. */
00682             if (str[j] == '{' && str[j+1] != '\'' && !instring)
00683                 sumbrace++;
00684             else if (str[j] == '}' && str[j+1] != '\'' && !instring) {
00685                 sumbrace--;
00686                 if (sumbrace == 0) {
00687                     found = TRUE;
00688                     *prbindex = j;
00689                     break;
00690                 }
00691             }
00692         }
00693         if (found) {
00694             *pstop = i;
00695             return 0;
00696         }
00697     }
00698 
00699     return ERROR_INT("matching right brace not found", procName, 1);
00700 }
00701 
00702 
00703 /*
00704  *  skipToSemicolon()
00705  *
00706  *      Input:  sa (output from cpp, by line)
00707  *              start (index of starting line to search)
00708  *              charindex (starting char index for search)
00709  *              &next (index of line containing the next ';')
00710  *      Return: 0 if OK, 1 on error
00711  *
00712  *  Notes:
00713  *      (1) If the semicolon isn't found, returns next = -1.
00714  *          This shouldn't happen.
00715  *      (2) This is only used in contexts where the semicolon is
00716  *          not within a string.
00717  */
00718 static l_int32
00719 skipToSemicolon(SARRAY   *sa,
00720                 l_int32   start,
00721                 l_int32   charindex,
00722                 l_int32  *pnext)
00723 {
00724 char    *str;
00725 l_int32  i, j, n, jstart, nchars, found;
00726 
00727     PROCNAME("skipToSemicolon");
00728 
00729     if (!sa)
00730         return ERROR_INT("sa not defined", procName, 1);
00731     if (!pnext)
00732         return ERROR_INT("&next not defined", procName, 1);
00733 
00734     *pnext = -1;
00735     n = sarrayGetCount(sa);
00736     found = FALSE;
00737     for (i = start; i < n; i++) {
00738         str = sarrayGetString(sa, i, 0);
00739         jstart = 0;
00740         if (i == start)
00741             jstart = charindex + 1;
00742         nchars = strlen(str);
00743         for (j = jstart; j < nchars; j++) {
00744             if (str[j] == ';') {
00745                 found = TRUE;;
00746                 break;
00747             }
00748         }
00749         if (found) {
00750             *pnext = i;
00751             return 0;
00752         }
00753     }
00754 
00755     return ERROR_INT("semicolon not found", procName, 1);
00756 }
00757 
00758 
00759 /*
00760  *  getOffsetForCharacter()
00761  *
00762  *      Input:  sa (output from cpp, by line)
00763  *              start (starting index in sa to search; never a comment line)
00764  *              tchar (we are searching for the first instance of this)
00765  *              &soffset (<return> offset in strings from start index)
00766  *              &boffset (<return> offset in bytes within string in which
00767  *                        the character is first found)
00768  *              &toffset (<return> offset in total bytes from beginning of
00769  *                        string indexed by 'start' to the location where
00770  *                        the character is first found)
00771  *      Return: 0 if OK, 1 on error
00772  *
00773  *  Notes:
00774  *      (1) We are searching for the first instance of 'tchar', starting
00775  *          at the beginning of the string indexed by start.
00776  *      (2) If the character is not found, soffset is returned as -1,
00777  *          and the other offsets are set to very large numbers.  The
00778  *          caller must check the value of soffset.
00779  *      (3) This is only used in contexts where it is not necessary to
00780  *          consider if the character is inside a string.
00781  */
00782 static l_int32
00783 getOffsetForCharacter(SARRAY   *sa,
00784                       l_int32   start,
00785                       char      tchar,
00786                       l_int32  *psoffset,
00787                       l_int32  *pboffset,
00788                       l_int32  *ptoffset)
00789 {
00790 char    *str;
00791 l_int32  i, j, n, nchars, totchars, found;
00792 
00793     PROCNAME("getOffsetForCharacter");
00794 
00795     if (!sa)
00796         return ERROR_INT("sa not defined", procName, 1);
00797     if (!psoffset)
00798         return ERROR_INT("&soffset not defined", procName, 1);
00799     if (!pboffset)
00800         return ERROR_INT("&boffset not defined", procName, 1);
00801     if (!ptoffset)
00802         return ERROR_INT("&toffset not defined", procName, 1);
00803 
00804     *psoffset = -1;  /* init to not found */
00805     *pboffset = 100000000;
00806     *ptoffset = 100000000;
00807 
00808     n = sarrayGetCount(sa);
00809     found = FALSE; 
00810     totchars = 0;
00811     for (i = start; i < n; i++) {
00812         if ((str = sarrayGetString(sa, i, 0)) == NULL)
00813             return ERROR_INT("str not returned; shouldn't happen", procName, 1);
00814         nchars = strlen(str);
00815         for (j = 0; j < nchars; j++) {
00816             if (str[j] == tchar) {
00817                 found = TRUE;
00818                 break;
00819             }
00820         }
00821         if (found)
00822             break;
00823         totchars += nchars;
00824     }
00825 
00826     if (found) {
00827         *psoffset = i - start;
00828         *pboffset = j;
00829         *ptoffset = totchars + j;
00830     }
00831 
00832     return 0;
00833 }
00834 
00835 
00836 /*
00837  *  getOffsetForMatchingRP()
00838  *
00839  *      Input:  sa (output from cpp, by line)
00840  *              start (starting index in sa to search; never a comment line)
00841  *              soffsetlp (string offset to first LP)
00842  *              boffsetlp (byte offset within string to first LP)
00843  *              toffsetlp (total byte offset to first LP)
00844  *              &soffset (<return> offset in strings from start index)
00845  *              &boffset (<return> offset in bytes within string in which
00846  *                        the matching RP is found)
00847  *              &toffset (<return> offset in total bytes from beginning of
00848  *                        string indexed by 'start' to the location where
00849  *                        the matching RP is found);
00850  *      Return: 0 if OK, 1 on error
00851  *
00852  *  Notes:
00853  *      (1) We are searching for the matching right parenthesis (RP) that
00854  *          corresponds to the first LP found beginning at the string
00855  *          indexed by start.
00856  *      (2) If the matching RP is not found, soffset is returned as -1,
00857  *          and the other offsets are set to very large numbers.  The
00858  *          caller must check the value of soffset.
00859  *      (3) This is only used in contexts where it is not necessary to
00860  *          consider if the character is inside a string.
00861  *      (4) We must do this because although most arg lists have a single
00862  *          left and right parenthesis, it is possible to construct
00863  *          more complicated prototype declarations, such as those
00864  *          where functions are passed in.  The C++ rules for prototypes
00865  *          are strict, and require that for functions passed in as args,
00866  *          the function name arg be placed in parenthesis, as well
00867  *          as its arg list, thus incurring two extra levels of parentheses.
00868  */
00869 static l_int32
00870 getOffsetForMatchingRP(SARRAY   *sa,
00871                        l_int32   start,
00872                        l_int32   soffsetlp,
00873                        l_int32   boffsetlp,
00874                        l_int32   toffsetlp,
00875                        l_int32  *psoffset,
00876                        l_int32  *pboffset,
00877                        l_int32  *ptoffset)
00878 {
00879 char    *str;
00880 l_int32  i, j, n, nchars, totchars, leftmatch, firstline, jstart, found;
00881 
00882     PROCNAME("getOffsetForMatchingRP");
00883 
00884     if (!sa)
00885         return ERROR_INT("sa not defined", procName, 1);
00886     if (!psoffset)
00887         return ERROR_INT("&soffset not defined", procName, 1);
00888     if (!pboffset)
00889         return ERROR_INT("&boffset not defined", procName, 1);
00890     if (!ptoffset)
00891         return ERROR_INT("&toffset not defined", procName, 1);
00892 
00893     *psoffset = -1;  /* init to not found */
00894     *pboffset = 100000000;
00895     *ptoffset = 100000000;
00896 
00897     n = sarrayGetCount(sa);
00898     found = FALSE; 
00899     totchars = toffsetlp;
00900     leftmatch = 1;  /* count of (LP - RP); we're finished when it goes to 0. */
00901     firstline = start + soffsetlp;
00902     for (i = firstline; i < n; i++) {
00903         if ((str = sarrayGetString(sa, i, 0)) == NULL)
00904             return ERROR_INT("str not returned; shouldn't happen", procName, 1);
00905         nchars = strlen(str);
00906         jstart = 0;
00907         if (i == firstline)
00908             jstart = boffsetlp + 1;
00909         for (j = jstart; j < nchars; j++) {
00910             if (str[j] == '(')
00911                 leftmatch++;
00912             else if (str[j] == ')')
00913                 leftmatch--;
00914             if (leftmatch == 0) {
00915                 found = TRUE;
00916                 break;
00917             }
00918         }
00919         if (found)
00920             break;
00921         if (i == firstline)
00922             totchars += nchars - boffsetlp;
00923         else
00924             totchars += nchars;
00925     }
00926 
00927     if (found) {
00928         *psoffset = i - start;
00929         *pboffset = j;
00930         *ptoffset = totchars + j;
00931     }
00932 
00933     return 0;
00934 }
00935 
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Defines