Leptonica 1.68
C Image Processing Library
|
00001 /*====================================================================* 00002 - Copyright (C) 2001 Leptonica. All rights reserved. 00003 - This software is distributed in the hope that it will be 00004 - useful, but with NO WARRANTY OF ANY KIND. 00005 - No author or distributor accepts responsibility to anyone for the 00006 - consequences of using this software, or for whether it serves any 00007 - particular purpose or works at all, unless he or she says so in 00008 - writing. Everyone is granted permission to copy, modify and 00009 - redistribute this source code, for commercial or non-commercial 00010 - purposes, with the following restrictions: (1) the origin of this 00011 - source code must not be misrepresented; (2) modified versions must 00012 - be plainly marked as such; and (3) this notice may not be removed 00013 - or altered from any source or modified source distribution. 00014 *====================================================================*/ 00015 00016 /* 00017 * parseprotos.c 00018 * 00019 * char *parseForProtos() 00020 * 00021 * Static helpers 00022 * static l_int32 getNextNonCommentLine() 00023 * static l_int32 getNextNonBlankLine() 00024 * static l_int32 getNextNonDoubleSlashLine() 00025 * static l_int32 searchForProtoSignature() 00026 * static char *captureProtoSignature() 00027 * static char *cleanProtoSignature() 00028 * static l_int32 skipToEndOfFunction() 00029 * static l_int32 skipToMatchingBrace() 00030 * static l_int32 skipToSemicolon() 00031 * static l_int32 getOffsetForCharacter() 00032 * static l_int32 getOffsetForMatchingRP() 00033 */ 00034 00035 #include <string.h> 00036 #include "allheaders.h" 00037 00038 /* MS VC++ can't handle array initialization with static consts ! */ 00039 #define L_BUF_SIZE 512 /* max token size */ 00040 00041 00042 static l_int32 getNextNonCommentLine(SARRAY *sa, l_int32 start, l_int32 *pnext); 00043 static l_int32 getNextNonBlankLine(SARRAY *sa, l_int32 start, l_int32 *pnext); 00044 static l_int32 getNextNonDoubleSlashLine(SARRAY *sa, l_int32 start, 00045 l_int32 *pnext); 00046 static l_int32 searchForProtoSignature(SARRAY *sa, l_int32 begin, 00047 l_int32 *pstart, l_int32 *pstop, l_int32 *pcharindex, 00048 l_int32 *pfound); 00049 static char * captureProtoSignature(SARRAY *sa, l_int32 start, l_int32 stop, 00050 l_int32 charindex); 00051 static char * cleanProtoSignature(char *str); 00052 static l_int32 skipToEndOfFunction(SARRAY *sa, l_int32 start, 00053 l_int32 charindex, l_int32 *pnext); 00054 static l_int32 skipToMatchingBrace(SARRAY *sa, l_int32 start, 00055 l_int32 lbindex, l_int32 *prbline, l_int32 *prbindex); 00056 static l_int32 skipToSemicolon(SARRAY *sa, l_int32 start, 00057 l_int32 charindex, l_int32 *pnext); 00058 static l_int32 getOffsetForCharacter(SARRAY *sa, l_int32 start, char tchar, 00059 l_int32 *psoffset, l_int32 *pboffset, l_int32 *ptoffset); 00060 static l_int32 getOffsetForMatchingRP(SARRAY *sa, l_int32 start, 00061 l_int32 soffsetlp, l_int32 boffsetlp, l_int32 toffsetlp, 00062 l_int32 *psoffset, l_int32 *pboffset, l_int32 *ptoffset); 00063 00064 00065 /* 00066 * parseForProtos() 00067 * 00068 * Input: filein (output of cpp) 00069 * prestring (<optional> string that prefaces each decl; 00070 * use NULL to omit) 00071 * Return: parsestr (string of function prototypes), or NULL on error 00072 * 00073 * Notes: 00074 * (1) We parse the output of cpp: 00075 * cpp -ansi <filein> 00076 * Three plans were attempted, with success on the third. 00077 * (2) Plan 1. A cursory examination of the cpp output indicated that 00078 * every function was preceeded by a cpp comment statement. 00079 * So we just need to look at statements beginning after comments. 00080 * Unfortunately, this is NOT the case. Some functions start 00081 * without cpp comment lines, typically when there are no 00082 * comments in the source that immediately precede the function. 00083 * (3) Plan 2. Consider the keywords in the language that start 00084 * parts of the cpp file. Some, like 'typedef', 'enum', 00085 * 'union' and 'struct', are followed after a while by '{', 00086 * and eventually end with '}, plus an optional token and a 00087 * final ';' Others, like 'extern' and 'static', are never 00088 * the beginnings of global function definitions. Function 00089 * prototypes have one or more sets of '(' followed eventually 00090 * by a ')', and end with ';'. But function definitions have 00091 * tokens, followed by '(', more tokens, ')' and then 00092 * immediately a '{'. We would generate a prototype from this 00093 * by adding a ';' to all tokens up to the ')'. So we use 00094 * these special tokens to decide what we are parsing. And 00095 * whenever a function definition is found and the prototype 00096 * extracted, we skip through the rest of the function 00097 * past the corresponding '}'. This token ends a line, and 00098 * is often on a line of its own. But as it turns out, 00099 * the only keyword we need to consider is 'static'. 00100 * (4) Plan 3. Consider the parentheses and braces for various 00101 * declarations. A struct, enum, or union has a pair of 00102 * braces followed by a semicolon. They cannot have parentheses 00103 * before the left brace, but a struct can have lots of parentheses 00104 * within the brace set. A function prototype has no braces. 00105 * A function declaration can have sets of left and right 00106 * parentheses, but these are followed by a left brace. 00107 * So plan 3 looks at the way parentheses and braces are 00108 * organized. Once the beginning of a function definition 00109 * is found, the prototype is extracted and we search for 00110 * the ending right brace. 00111 * (5) To find the ending right brace, it is necessary to do some 00112 * careful parsing. For example, in this file, we have 00113 * left and right braces as characters, and these must not 00114 * be counted. Somewhat more tricky, the file fhmtauto.c 00115 * generates code, and includes a right brace in a string. 00116 * So we must not include braces that are in strings. But how 00117 * do we know if something is inside a string? Keep state, 00118 * starting with not-inside, and every time you hit a double quote 00119 * that is not escaped, toggle the condition. Any brace 00120 * found in the state of being within a string is ignored. 00121 * (6) When a prototype is extracted, it is put in a canonical 00122 * form (i.e., cleaned up). Finally, we check that it is 00123 * not static and save it. (If static, it is ignored). 00124 * (7) The @prestring for unix is NULL; it is included here so that 00125 * you can use Microsoft's declaration for importing or 00126 * exporting to a dll. See environ.h for examples of use. 00127 * Here, we set: @prestring = "LEPT_DLL ". Note in particular 00128 * the space character that will separate 'LEPT_DLL' from 00129 * the standard unix prototype that follows. 00130 */ 00131 char * 00132 parseForProtos(const char *filein, 00133 const char *prestring) 00134 { 00135 char *strdata, *str, *newstr, *parsestr, *secondword; 00136 l_int32 start, next, stop, charindex, found; 00137 size_t nbytes; 00138 SARRAY *sa, *saout, *satest; 00139 00140 PROCNAME("parseForProtos"); 00141 00142 if (!filein) 00143 return (char *)ERROR_PTR("filein not defined", procName, NULL); 00144 00145 /* Read in the cpp output into memory, one string for each 00146 * line in the file, omitting blank lines. */ 00147 strdata = (char *)l_binaryRead(filein, &nbytes); 00148 sa = sarrayCreateLinesFromString(strdata, 0); 00149 00150 saout = sarrayCreate(0); 00151 next = 0; 00152 while (1) { /* repeat after each non-static prototype is extracted */ 00153 searchForProtoSignature(sa, next, &start, &stop, &charindex, &found); 00154 if (!found) 00155 break; 00156 /* fprintf(stderr, " start = %d, stop = %d, charindex = %d\n", 00157 start, stop, charindex); */ 00158 str = captureProtoSignature(sa, start, stop, charindex); 00159 00160 /* Make sure that the signature found by cpp is neither 00161 * static nor extern. We get 'extern' declarations from 00162 * header files, and with some versions of cpp running on 00163 * #include <sys/stat.h> we get something of the form: 00164 * extern ... (( ... )) ... ( ... ) { ... 00165 * For this, the 1st '(' is the lp, the 2nd ')' is the rp, 00166 * and there is a lot of garbage between the rp and the lb. 00167 * It is easiest to simply reject any signature that starts 00168 * with 'extern'. Note also that an 'extern' token has been 00169 * prepended to each prototype, so the 'static' or 00170 * 'extern' keywords we are looking for, if they exist, 00171 * would be the second word. */ 00172 satest = sarrayCreateWordsFromString(str); 00173 secondword = sarrayGetString(satest, 1, 0); 00174 if (strcmp(secondword, "static") && /* not static */ 00175 strcmp(secondword, "extern")) { /* not extern */ 00176 if (prestring) { /* prepend it to the prototype */ 00177 newstr = stringJoin(prestring, str); 00178 sarrayAddString(saout, newstr, L_INSERT); 00179 FREE(str); 00180 } 00181 else 00182 sarrayAddString(saout, str, L_INSERT); 00183 } 00184 else 00185 FREE(str); 00186 sarrayDestroy(&satest); 00187 00188 skipToEndOfFunction(sa, stop, charindex, &next); 00189 if (next == -1) break; 00190 } 00191 00192 /* Flatten into a string with newlines between prototypes */ 00193 parsestr = sarrayToString(saout, 1); 00194 FREE(strdata); 00195 sarrayDestroy(&sa); 00196 sarrayDestroy(&saout); 00197 00198 return parsestr; 00199 } 00200 00201 00202 /* 00203 * getNextNonCommentLine() 00204 * 00205 * Input: sa (output from cpp, by line) 00206 * start (starting index to search) 00207 * &next (<return> index of first uncommented line after 00208 * the start line) 00209 * Return: 0 if OK, 1 on error 00210 * 00211 * Notes: 00212 * (1) Skips over all consecutive comment lines, beginning at 'start' 00213 * (2) If all lines to the end are '#' comments, return next = -1 00214 */ 00215 static l_int32 00216 getNextNonCommentLine(SARRAY *sa, 00217 l_int32 start, 00218 l_int32 *pnext) 00219 { 00220 char *str; 00221 l_int32 i, n; 00222 00223 PROCNAME("getNextNonCommentLine"); 00224 00225 if (!sa) 00226 return ERROR_INT("sa not defined", procName, 1); 00227 if (!pnext) 00228 return ERROR_INT("&pnext not defined", procName, 1); 00229 00230 /* Init for situation where this line and all following are comments */ 00231 *pnext = -1; 00232 00233 n = sarrayGetCount(sa); 00234 for (i = start; i < n; i++) { 00235 if ((str = sarrayGetString(sa, i, 0)) == NULL) 00236 return ERROR_INT("str not returned; shouldn't happen", procName, 1); 00237 if (str[0] != '#') { 00238 *pnext = i; 00239 return 0; 00240 } 00241 } 00242 00243 return 0; 00244 } 00245 00246 00247 /* 00248 * getNextNonBlankLine() 00249 * 00250 * Input: sa (output from cpp, by line) 00251 * start (starting index to search) 00252 * &next (<return> index of first nonblank line after 00253 * the start line) 00254 * Return: 0 if OK, 1 on error 00255 * 00256 * Notes: 00257 * (1) Skips over all consecutive blank lines, beginning at 'start' 00258 * (2) A blank line has only whitespace characters (' ', '\t', '\n', '\r') 00259 * (3) If all lines to the end are blank, return next = -1 00260 */ 00261 static l_int32 00262 getNextNonBlankLine(SARRAY *sa, 00263 l_int32 start, 00264 l_int32 *pnext) 00265 { 00266 char *str; 00267 l_int32 i, j, n, len; 00268 00269 PROCNAME("getNextNonBlankLine"); 00270 00271 if (!sa) 00272 return ERROR_INT("sa not defined", procName, 1); 00273 if (!pnext) 00274 return ERROR_INT("&pnext not defined", procName, 1); 00275 00276 /* Init for situation where this line and all following are blank */ 00277 *pnext = -1; 00278 00279 n = sarrayGetCount(sa); 00280 for (i = start; i < n; i++) { 00281 if ((str = sarrayGetString(sa, i, 0)) == NULL) 00282 return ERROR_INT("str not returned; shouldn't happen", procName, 1); 00283 len = strlen(str); 00284 for (j = 0; j < len; j++) { 00285 if (str[j] != ' ' && str[j] != '\t' 00286 && str[j] != '\n' && str[j] != '\r') { /* non-blank */ 00287 *pnext = i; 00288 return 0; 00289 } 00290 } 00291 } 00292 00293 return 0; 00294 } 00295 00296 00297 /* 00298 * getNextNonDoubleSlashLine() 00299 * 00300 * Input: sa (output from cpp, by line) 00301 * start (starting index to search) 00302 * &next (<return> index of first uncommented line after 00303 * the start line) 00304 * Return: 0 if OK, 1 on error 00305 * 00306 * Notes: 00307 * (1) Skips over all consecutive '//' lines, beginning at 'start' 00308 * (2) If all lines to the end start with '//', return next = -1 00309 */ 00310 static l_int32 00311 getNextNonDoubleSlashLine(SARRAY *sa, 00312 l_int32 start, 00313 l_int32 *pnext) 00314 { 00315 char *str; 00316 l_int32 i, n, len; 00317 00318 PROCNAME("getNextNonDoubleSlashLine"); 00319 00320 if (!sa) 00321 return ERROR_INT("sa not defined", procName, 1); 00322 if (!pnext) 00323 return ERROR_INT("&pnext not defined", procName, 1); 00324 00325 /* Init for situation where this line and all following 00326 * start with '//' */ 00327 *pnext = -1; 00328 00329 n = sarrayGetCount(sa); 00330 for (i = start; i < n; i++) { 00331 if ((str = sarrayGetString(sa, i, 0)) == NULL) 00332 return ERROR_INT("str not returned; shouldn't happen", procName, 1); 00333 len = strlen(str); 00334 if (len < 2 || str[0] != '/' || str[1] != '/') { 00335 *pnext = i; 00336 return 0; 00337 } 00338 } 00339 00340 return 0; 00341 } 00342 00343 00344 /* 00345 * searchForProtoSignature() 00346 * 00347 * Input: sa (output from cpp, by line) 00348 * begin (beginning index to search) 00349 * &start (<return> starting index for function definition) 00350 * &stop (<return> index of line on which proto is completed) 00351 * &charindex (<return> char index of completing ')' character) 00352 * &found (<return> 1 if valid signature is found; 0 otherwise) 00353 * Return: 0 if OK, 1 on error 00354 * 00355 * Notes: 00356 * (1) If this returns found == 0, it means that there are no 00357 * more function definitions in the file. Caller must check 00358 * this value and exit the loop over the entire cpp file. 00359 * (2) This follows plan 3 (see above). We skip comment and blank 00360 * lines at the beginning. Then we don't check for keywords. 00361 * Instead, find the relative locations of the first occurrences 00362 * of these four tokens: left parenthesis (lp), right 00363 * parenthesis (rp), left brace (lb) and semicolon (sc). 00364 * (3) The signature of a function definition looks like this: 00365 * .... '(' .... ')' '{' 00366 * where the lp and rp must both precede the lb, with only 00367 * whitespace between the rp and the lb. The '....' 00368 * are sets of tokens that have no braces. 00369 * (4) If a function definition is found, this returns found = 1, 00370 * with 'start' being the first line of the definition and 00371 * 'charindex' being the position of the ')' in line 'stop' 00372 * at the end of the arg list. 00373 */ 00374 static l_int32 00375 searchForProtoSignature(SARRAY *sa, 00376 l_int32 begin, 00377 l_int32 *pstart, 00378 l_int32 *pstop, 00379 l_int32 *pcharindex, 00380 l_int32 *pfound) 00381 { 00382 l_int32 next, rbline, rbindex, scline; 00383 l_int32 soffsetlp, soffsetrp, soffsetlb, soffsetsc; 00384 l_int32 boffsetlp, boffsetrp, boffsetlb, boffsetsc; 00385 l_int32 toffsetlp, toffsetrp, toffsetlb, toffsetsc; 00386 00387 PROCNAME("searchForProtoSignature"); 00388 00389 if (!sa) 00390 return ERROR_INT("sa not defined", procName, 1); 00391 if (!pstart) 00392 return ERROR_INT("&start not defined", procName, 1); 00393 if (!pstop) 00394 return ERROR_INT("&stop not defined", procName, 1); 00395 if (!pcharindex) 00396 return ERROR_INT("&charindex not defined", procName, 1); 00397 if (!pfound) 00398 return ERROR_INT("&found not defined", procName, 1); 00399 00400 *pfound = FALSE; 00401 00402 while (1) { 00403 00404 /* Skip over sequential '#' comment lines */ 00405 getNextNonCommentLine(sa, begin, &next); 00406 if (next == -1) return 0; 00407 if (next != begin) { 00408 begin = next; 00409 continue; 00410 } 00411 00412 /* Skip over sequential blank lines */ 00413 getNextNonBlankLine(sa, begin, &next); 00414 if (next == -1) return 0; 00415 if (next != begin) { 00416 begin = next; 00417 continue; 00418 } 00419 00420 /* Skip over sequential lines starting with '//' */ 00421 getNextNonDoubleSlashLine(sa, begin, &next); 00422 if (next == -1) return 0; 00423 if (next != begin) { 00424 begin = next; 00425 continue; 00426 } 00427 00428 /* Search for specific character sequence patterns; namely 00429 * a lp, a matching rp, a lb and a semicolon. 00430 * Abort the search if no lp is found. */ 00431 getOffsetForCharacter(sa, next, '(', &soffsetlp, &boffsetlp, 00432 &toffsetlp); 00433 if (soffsetlp == -1) 00434 break; 00435 getOffsetForMatchingRP(sa, next, soffsetlp, boffsetlp, toffsetlp, 00436 &soffsetrp, &boffsetrp, &toffsetrp); 00437 getOffsetForCharacter(sa, next, '{', &soffsetlb, &boffsetlb, 00438 &toffsetlb); 00439 getOffsetForCharacter(sa, next, ';', &soffsetsc, &boffsetsc, 00440 &toffsetsc); 00441 00442 /* We've found a lp. Now weed out the case where a matching 00443 * rp and a lb are not both found. */ 00444 if (soffsetrp == -1 || soffsetlb == -1) 00445 break; 00446 00447 /* Check if a left brace occurs before a left parenthesis; 00448 * if so, skip it */ 00449 if (toffsetlb < toffsetlp) { 00450 skipToMatchingBrace(sa, next + soffsetlb, boffsetlb, 00451 &rbline, &rbindex); 00452 skipToSemicolon(sa, rbline, rbindex, &scline); 00453 begin = scline + 1; 00454 continue; 00455 } 00456 00457 /* Check if a semicolon occurs before a left brace or 00458 * a left parenthesis; if so, skip it */ 00459 if ((soffsetsc != -1) && 00460 (toffsetsc < toffsetlb || toffsetsc < toffsetlp)) { 00461 skipToSemicolon(sa, next, 0, &scline); 00462 begin = scline + 1; 00463 continue; 00464 } 00465 00466 /* OK, it should be a function definition. We haven't 00467 * checked that there is only white space between the 00468 * rp and lb, but we've only seen problems with two 00469 * extern inlines in sys/stat.h, and this is handled 00470 * later by eliminating any prototype beginning with 'extern'. */ 00471 *pstart = next; 00472 *pstop = next + soffsetrp; 00473 *pcharindex = boffsetrp; 00474 *pfound = TRUE; 00475 break; 00476 } 00477 00478 return 0; 00479 } 00480 00481 00482 /* 00483 * captureProtoSignature() 00484 * 00485 * Input: sa (output from cpp, by line) 00486 * start (starting index to search; never a comment line) 00487 * stop (index of line on which pattern is completed) 00488 * charindex (char index of completing ')' character) 00489 * Return: cleanstr (prototype string), or NULL on error 00490 * 00491 * Notes: 00492 * (1) Return all characters, ending with a ';' after the ')' 00493 */ 00494 static char * 00495 captureProtoSignature(SARRAY *sa, 00496 l_int32 start, 00497 l_int32 stop, 00498 l_int32 charindex) 00499 { 00500 char *str, *newstr, *protostr, *cleanstr; 00501 SARRAY *sap; 00502 l_int32 i; 00503 00504 PROCNAME("captureProtoSignature"); 00505 00506 if (!sa) 00507 return (char *)ERROR_PTR("sa not defined", procName, NULL); 00508 00509 sap = sarrayCreate(0); 00510 for (i = start; i < stop; i++) { 00511 str = sarrayGetString(sa, i, 1); 00512 sarrayAddString(sap, str, 0); 00513 } 00514 str = sarrayGetString(sa, stop, 1); 00515 str[charindex + 1] = '\0'; 00516 newstr = stringJoin(str, ";"); 00517 sarrayAddString(sap, newstr, 0); 00518 FREE(str); 00519 protostr = sarrayToString(sap, 2); 00520 sarrayDestroy(&sap); 00521 cleanstr = cleanProtoSignature(protostr); 00522 FREE(protostr); 00523 00524 return cleanstr; 00525 } 00526 00527 00528 /* 00529 * cleanProtoSignature() 00530 * 00531 * Input: instr (input prototype string) 00532 * Return: cleanstr (clean prototype string), or NULL on error 00533 * 00534 * Notes: 00535 * (1) Adds 'extern' at beginning and regularizes spaces 00536 * between tokens. 00537 */ 00538 static char * 00539 cleanProtoSignature(char *instr) 00540 { 00541 char *str, *cleanstr; 00542 char buf[L_BUF_SIZE]; 00543 char externstring[] = "extern"; 00544 l_int32 i, j, nwords, nchars, index, len; 00545 SARRAY *sa, *saout; 00546 00547 PROCNAME("cleanProtoSignature"); 00548 00549 if (!instr) 00550 return (char *)ERROR_PTR("instr not defined", procName, NULL); 00551 00552 sa = sarrayCreateWordsFromString(instr); 00553 nwords = sarrayGetCount(sa); 00554 saout = sarrayCreate(0); 00555 sarrayAddString(saout, externstring, 1); 00556 for (i = 0; i < nwords; i++) { 00557 str = sarrayGetString(sa, i, 0); 00558 nchars = strlen(str); 00559 index = 0; 00560 for (j = 0; j < nchars; j++) { 00561 if (index > L_BUF_SIZE - 6) 00562 return (char *)ERROR_PTR("token too large", procName, NULL); 00563 if (str[j] == '(') { 00564 buf[index++] = ' '; 00565 buf[index++] = '('; 00566 buf[index++] = ' '; 00567 } 00568 else if (str[j] == ')') { 00569 buf[index++] = ' '; 00570 buf[index++] = ')'; 00571 } 00572 else 00573 buf[index++] = str[j]; 00574 } 00575 buf[index] = '\0'; 00576 sarrayAddString(saout, buf, 1); 00577 } 00578 00579 /* Flatten to a prototype string with spaces added after 00580 * each word, and remove the last space */ 00581 cleanstr = sarrayToString(saout, 2); 00582 len = strlen(cleanstr); 00583 cleanstr[len - 1] = '\0'; 00584 00585 sarrayDestroy(&sa); 00586 sarrayDestroy(&saout); 00587 return cleanstr; 00588 } 00589 00590 00591 /* 00592 * skipToEndOfFunction() 00593 * 00594 * Input: sa (output from cpp, by line) 00595 * start (index of starting line with left bracket to search) 00596 * lbindex (starting char index for left bracket) 00597 * &next (index of line following the ending '}' for function 00598 * Return: 0 if OK, 1 on error 00599 */ 00600 static l_int32 00601 skipToEndOfFunction(SARRAY *sa, 00602 l_int32 start, 00603 l_int32 lbindex, 00604 l_int32 *pnext) 00605 { 00606 l_int32 end, rbindex; 00607 l_int32 soffsetlb, boffsetlb, toffsetlb; 00608 00609 PROCNAME("skipToEndOfFunction"); 00610 00611 if (!sa) 00612 return ERROR_INT("sa not defined", procName, 1); 00613 if (!pnext) 00614 return ERROR_INT("&next not defined", procName, 1); 00615 00616 getOffsetForCharacter(sa, start, '{', &soffsetlb, &boffsetlb, 00617 &toffsetlb); 00618 skipToMatchingBrace(sa, start + soffsetlb, boffsetlb, &end, &rbindex); 00619 if (end == -1) { /* shouldn't happen! */ 00620 *pnext = -1; 00621 return 1; 00622 } 00623 00624 *pnext = end + 1; 00625 return 0; 00626 } 00627 00628 00629 /* 00630 * skipToMatchingBrace() 00631 * 00632 * Input: sa (output from cpp, by line) 00633 * start (index of starting line with left bracket to search) 00634 * lbindex (starting char index for left bracket) 00635 * &stop (index of line with the matching right bracket) 00636 * &rbindex (char index of matching right bracket) 00637 * Return: 0 if OK, 1 on error 00638 * 00639 * Notes: 00640 * (1) If the matching right brace is not found, returns 00641 * stop = -1. This shouldn't happen. 00642 */ 00643 static l_int32 00644 skipToMatchingBrace(SARRAY *sa, 00645 l_int32 start, 00646 l_int32 lbindex, 00647 l_int32 *pstop, 00648 l_int32 *prbindex) 00649 { 00650 char *str; 00651 l_int32 i, j, jstart, n, sumbrace, found, instring, nchars; 00652 00653 PROCNAME("skipToMatchingBrace"); 00654 00655 if (!sa) 00656 return ERROR_INT("sa not defined", procName, 1); 00657 if (!pstop) 00658 return ERROR_INT("&stop not defined", procName, 1); 00659 if (!prbindex) 00660 return ERROR_INT("&rbindex not defined", procName, 1); 00661 00662 instring = 0; /* init to FALSE; toggle on double quotes */ 00663 *pstop = -1; 00664 n = sarrayGetCount(sa); 00665 sumbrace = 1; 00666 found = FALSE; 00667 for (i = start; i < n; i++) { 00668 str = sarrayGetString(sa, i, 0); 00669 jstart = 0; 00670 if (i == start) 00671 jstart = lbindex + 1; 00672 nchars = strlen(str); 00673 for (j = jstart; j < nchars; j++) { 00674 /* Toggle the instring state every time you encounter 00675 * a double quote that is NOT escaped. */ 00676 if (j == jstart && str[j] == '\"') 00677 instring = 1 - instring; 00678 if (j > jstart && str[j] == '\"' && str[j-1] != '\\') 00679 instring = 1 - instring; 00680 /* Record the braces if they are neither a literal character 00681 * nor within a string. */ 00682 if (str[j] == '{' && str[j+1] != '\'' && !instring) 00683 sumbrace++; 00684 else if (str[j] == '}' && str[j+1] != '\'' && !instring) { 00685 sumbrace--; 00686 if (sumbrace == 0) { 00687 found = TRUE; 00688 *prbindex = j; 00689 break; 00690 } 00691 } 00692 } 00693 if (found) { 00694 *pstop = i; 00695 return 0; 00696 } 00697 } 00698 00699 return ERROR_INT("matching right brace not found", procName, 1); 00700 } 00701 00702 00703 /* 00704 * skipToSemicolon() 00705 * 00706 * Input: sa (output from cpp, by line) 00707 * start (index of starting line to search) 00708 * charindex (starting char index for search) 00709 * &next (index of line containing the next ';') 00710 * Return: 0 if OK, 1 on error 00711 * 00712 * Notes: 00713 * (1) If the semicolon isn't found, returns next = -1. 00714 * This shouldn't happen. 00715 * (2) This is only used in contexts where the semicolon is 00716 * not within a string. 00717 */ 00718 static l_int32 00719 skipToSemicolon(SARRAY *sa, 00720 l_int32 start, 00721 l_int32 charindex, 00722 l_int32 *pnext) 00723 { 00724 char *str; 00725 l_int32 i, j, n, jstart, nchars, found; 00726 00727 PROCNAME("skipToSemicolon"); 00728 00729 if (!sa) 00730 return ERROR_INT("sa not defined", procName, 1); 00731 if (!pnext) 00732 return ERROR_INT("&next not defined", procName, 1); 00733 00734 *pnext = -1; 00735 n = sarrayGetCount(sa); 00736 found = FALSE; 00737 for (i = start; i < n; i++) { 00738 str = sarrayGetString(sa, i, 0); 00739 jstart = 0; 00740 if (i == start) 00741 jstart = charindex + 1; 00742 nchars = strlen(str); 00743 for (j = jstart; j < nchars; j++) { 00744 if (str[j] == ';') { 00745 found = TRUE;; 00746 break; 00747 } 00748 } 00749 if (found) { 00750 *pnext = i; 00751 return 0; 00752 } 00753 } 00754 00755 return ERROR_INT("semicolon not found", procName, 1); 00756 } 00757 00758 00759 /* 00760 * getOffsetForCharacter() 00761 * 00762 * Input: sa (output from cpp, by line) 00763 * start (starting index in sa to search; never a comment line) 00764 * tchar (we are searching for the first instance of this) 00765 * &soffset (<return> offset in strings from start index) 00766 * &boffset (<return> offset in bytes within string in which 00767 * the character is first found) 00768 * &toffset (<return> offset in total bytes from beginning of 00769 * string indexed by 'start' to the location where 00770 * the character is first found) 00771 * Return: 0 if OK, 1 on error 00772 * 00773 * Notes: 00774 * (1) We are searching for the first instance of 'tchar', starting 00775 * at the beginning of the string indexed by start. 00776 * (2) If the character is not found, soffset is returned as -1, 00777 * and the other offsets are set to very large numbers. The 00778 * caller must check the value of soffset. 00779 * (3) This is only used in contexts where it is not necessary to 00780 * consider if the character is inside a string. 00781 */ 00782 static l_int32 00783 getOffsetForCharacter(SARRAY *sa, 00784 l_int32 start, 00785 char tchar, 00786 l_int32 *psoffset, 00787 l_int32 *pboffset, 00788 l_int32 *ptoffset) 00789 { 00790 char *str; 00791 l_int32 i, j, n, nchars, totchars, found; 00792 00793 PROCNAME("getOffsetForCharacter"); 00794 00795 if (!sa) 00796 return ERROR_INT("sa not defined", procName, 1); 00797 if (!psoffset) 00798 return ERROR_INT("&soffset not defined", procName, 1); 00799 if (!pboffset) 00800 return ERROR_INT("&boffset not defined", procName, 1); 00801 if (!ptoffset) 00802 return ERROR_INT("&toffset not defined", procName, 1); 00803 00804 *psoffset = -1; /* init to not found */ 00805 *pboffset = 100000000; 00806 *ptoffset = 100000000; 00807 00808 n = sarrayGetCount(sa); 00809 found = FALSE; 00810 totchars = 0; 00811 for (i = start; i < n; i++) { 00812 if ((str = sarrayGetString(sa, i, 0)) == NULL) 00813 return ERROR_INT("str not returned; shouldn't happen", procName, 1); 00814 nchars = strlen(str); 00815 for (j = 0; j < nchars; j++) { 00816 if (str[j] == tchar) { 00817 found = TRUE; 00818 break; 00819 } 00820 } 00821 if (found) 00822 break; 00823 totchars += nchars; 00824 } 00825 00826 if (found) { 00827 *psoffset = i - start; 00828 *pboffset = j; 00829 *ptoffset = totchars + j; 00830 } 00831 00832 return 0; 00833 } 00834 00835 00836 /* 00837 * getOffsetForMatchingRP() 00838 * 00839 * Input: sa (output from cpp, by line) 00840 * start (starting index in sa to search; never a comment line) 00841 * soffsetlp (string offset to first LP) 00842 * boffsetlp (byte offset within string to first LP) 00843 * toffsetlp (total byte offset to first LP) 00844 * &soffset (<return> offset in strings from start index) 00845 * &boffset (<return> offset in bytes within string in which 00846 * the matching RP is found) 00847 * &toffset (<return> offset in total bytes from beginning of 00848 * string indexed by 'start' to the location where 00849 * the matching RP is found); 00850 * Return: 0 if OK, 1 on error 00851 * 00852 * Notes: 00853 * (1) We are searching for the matching right parenthesis (RP) that 00854 * corresponds to the first LP found beginning at the string 00855 * indexed by start. 00856 * (2) If the matching RP is not found, soffset is returned as -1, 00857 * and the other offsets are set to very large numbers. The 00858 * caller must check the value of soffset. 00859 * (3) This is only used in contexts where it is not necessary to 00860 * consider if the character is inside a string. 00861 * (4) We must do this because although most arg lists have a single 00862 * left and right parenthesis, it is possible to construct 00863 * more complicated prototype declarations, such as those 00864 * where functions are passed in. The C++ rules for prototypes 00865 * are strict, and require that for functions passed in as args, 00866 * the function name arg be placed in parenthesis, as well 00867 * as its arg list, thus incurring two extra levels of parentheses. 00868 */ 00869 static l_int32 00870 getOffsetForMatchingRP(SARRAY *sa, 00871 l_int32 start, 00872 l_int32 soffsetlp, 00873 l_int32 boffsetlp, 00874 l_int32 toffsetlp, 00875 l_int32 *psoffset, 00876 l_int32 *pboffset, 00877 l_int32 *ptoffset) 00878 { 00879 char *str; 00880 l_int32 i, j, n, nchars, totchars, leftmatch, firstline, jstart, found; 00881 00882 PROCNAME("getOffsetForMatchingRP"); 00883 00884 if (!sa) 00885 return ERROR_INT("sa not defined", procName, 1); 00886 if (!psoffset) 00887 return ERROR_INT("&soffset not defined", procName, 1); 00888 if (!pboffset) 00889 return ERROR_INT("&boffset not defined", procName, 1); 00890 if (!ptoffset) 00891 return ERROR_INT("&toffset not defined", procName, 1); 00892 00893 *psoffset = -1; /* init to not found */ 00894 *pboffset = 100000000; 00895 *ptoffset = 100000000; 00896 00897 n = sarrayGetCount(sa); 00898 found = FALSE; 00899 totchars = toffsetlp; 00900 leftmatch = 1; /* count of (LP - RP); we're finished when it goes to 0. */ 00901 firstline = start + soffsetlp; 00902 for (i = firstline; i < n; i++) { 00903 if ((str = sarrayGetString(sa, i, 0)) == NULL) 00904 return ERROR_INT("str not returned; shouldn't happen", procName, 1); 00905 nchars = strlen(str); 00906 jstart = 0; 00907 if (i == firstline) 00908 jstart = boffsetlp + 1; 00909 for (j = jstart; j < nchars; j++) { 00910 if (str[j] == '(') 00911 leftmatch++; 00912 else if (str[j] == ')') 00913 leftmatch--; 00914 if (leftmatch == 0) { 00915 found = TRUE; 00916 break; 00917 } 00918 } 00919 if (found) 00920 break; 00921 if (i == firstline) 00922 totchars += nchars - boffsetlp; 00923 else 00924 totchars += nchars; 00925 } 00926 00927 if (found) { 00928 *psoffset = i - start; 00929 *pboffset = j; 00930 *ptoffset = totchars + j; 00931 } 00932 00933 return 0; 00934 } 00935