Leptonica  1.77.0
Image processing and image analysis suite
parseprotos.c
1 /*====================================================================*
2  - Copyright (C) 2001 Leptonica. All rights reserved.
3  -
4  - Redistribution and use in source and binary forms, with or without
5  - modification, are permitted provided that the following conditions
6  - are met:
7  - 1. Redistributions of source code must retain the above copyright
8  - notice, this list of conditions and the following disclaimer.
9  - 2. Redistributions in binary form must reproduce the above
10  - copyright notice, this list of conditions and the following
11  - disclaimer in the documentation and/or other materials
12  - provided with the distribution.
13  -
14  - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15  - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16  - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17  - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY
18  - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19  - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20  - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21  - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22  - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23  - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24  - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *====================================================================*/
26 
27 /*
28  * parseprotos.c
29  *
30  * char *parseForProtos()
31  *
32  * Static helpers
33  * static l_int32 getNextNonCommentLine()
34  * static l_int32 getNextNonBlankLine()
35  * static l_int32 getNextNonDoubleSlashLine()
36  * static l_int32 searchForProtoSignature()
37  * static char *captureProtoSignature()
38  * static char *cleanProtoSignature()
39  * static l_int32 skipToEndOfFunction()
40  * static l_int32 skipToMatchingBrace()
41  * static l_int32 skipToSemicolon()
42  * static l_int32 getOffsetForCharacter()
43  * static l_int32 getOffsetForMatchingRP()
44  */
45 
46 #include <string.h>
47 #include "allheaders.h"
48 
49 static const l_int32 L_BUF_SIZE = 2048; /* max token size */
50 
51 static l_int32 getNextNonCommentLine(SARRAY *sa, l_int32 start, l_int32 *pnext);
52 static l_int32 getNextNonBlankLine(SARRAY *sa, l_int32 start, l_int32 *pnext);
53 static l_int32 getNextNonDoubleSlashLine(SARRAY *sa, l_int32 start,
54  l_int32 *pnext);
55 static l_int32 searchForProtoSignature(SARRAY *sa, l_int32 begin,
56  l_int32 *pstart, l_int32 *pstop, l_int32 *pcharindex,
57  l_int32 *pfound);
58 static char * captureProtoSignature(SARRAY *sa, l_int32 start, l_int32 stop,
59  l_int32 charindex);
60 static char * cleanProtoSignature(char *str);
61 static l_int32 skipToEndOfFunction(SARRAY *sa, l_int32 start,
62  l_int32 charindex, l_int32 *pnext);
63 static l_int32 skipToMatchingBrace(SARRAY *sa, l_int32 start,
64  l_int32 lbindex, l_int32 *prbline, l_int32 *prbindex);
65 static l_int32 skipToSemicolon(SARRAY *sa, l_int32 start,
66  l_int32 charindex, l_int32 *pnext);
67 static l_int32 getOffsetForCharacter(SARRAY *sa, l_int32 start, char tchar,
68  l_int32 *psoffset, l_int32 *pboffset, l_int32 *ptoffset);
69 static l_int32 getOffsetForMatchingRP(SARRAY *sa, l_int32 start,
70  l_int32 soffsetlp, l_int32 boffsetlp, l_int32 toffsetlp,
71  l_int32 *psoffset, l_int32 *pboffset, l_int32 *ptoffset);
72 
73 
74 /*
75  * parseForProtos()
76  *
77  * Input: filein (output of cpp)
78  * prestring (<optional> string that prefaces each decl;
79  * use NULL to omit)
80  * Return: parsestr (string of function prototypes), or NULL on error
81  *
82  * Notes:
83  * (1) We parse the output of cpp:
84  * cpp -ansi <filein>
85  * Three plans were attempted, with success on the third.
86  * (2) Plan 1. A cursory examination of the cpp output indicated that
87  * every function was preceded by a cpp comment statement.
88  * So we just need to look at statements beginning after comments.
89  * Unfortunately, this is NOT the case. Some functions start
90  * without cpp comment lines, typically when there are no
91  * comments in the source that immediately precede the function.
92  * (3) Plan 2. Consider the keywords in the language that start
93  * parts of the cpp file. Some, like 'enum', 'union' and
94  * 'struct', are followed after a while by '{', and eventually
95  * end with '}, plus an optional token and a final ';'.
96  * Others, like 'extern', 'static' and 'typedef', are never
97  * the beginnings of global function definitions. Function
98  * prototypes have one or more sets of '(' followed eventually
99  * by a ')', and end with ';'. But function definitions have
100  * tokens, followed by '(', more tokens, ')' and then
101  * immediately a '{'. We would generate a prototype from this
102  * by adding a ';' to all tokens up to the ')'. So we use
103  * these special tokens to decide what we are parsing. And
104  * whenever a function definition is found and the prototype
105  * extracted, we skip through the rest of the function
106  * past the corresponding '}'. This token ends a line, and
107  * is often on a line of its own. But as it turns out,
108  * the only keyword we need to consider is 'static'.
109  * (4) Plan 3. Consider the parentheses and braces for various
110  * declarations. A struct, enum, or union has a pair of
111  * braces followed by a semicolon. With the exception of an
112  * __attribute__ declaration for a struct, they cannot have parentheses
113  * before the left brace, but a struct can have lots of parentheses
114  * within the brace set. A function prototype has no braces.
115  * A function declaration can have sets of left and right
116  * parentheses, but these are followed by a left brace.
117  * So plan 3 looks at the way parentheses and braces are
118  * organized. Once the beginning of a function definition
119  * is found, the prototype is extracted and we search for
120  * the ending right brace.
121  * (5) To find the ending right brace, it is necessary to do some
122  * careful parsing. For example, in this file, we have
123  * left and right braces as characters, and these must not
124  * be counted. Somewhat more tricky, the file fhmtauto.c
125  * generates code, and includes a right brace in a string.
126  * So we must not include braces that are in strings. But how
127  * do we know if something is inside a string? Keep state,
128  * starting with not-inside, and every time you hit a double quote
129  * that is not escaped, toggle the condition. Any brace
130  * found in the state of being within a string is ignored.
131  * (6) When a prototype is extracted, it is put in a canonical
132  * form (i.e., cleaned up). Finally, we check that it is
133  * not static and save it. (If static, it is ignored).
134  * (7) The %prestring for unix is NULL; it is included here so that
135  * you can use Microsoft's declaration for importing or
136  * exporting to a dll. See environ.h for examples of use.
137  * Here, we set: %prestring = "LEPT_DLL ". Note in particular
138  * the space character that will separate 'LEPT_DLL' from
139  * the standard unix prototype that follows.
140  */
141 char *
142 parseForProtos(const char *filein,
143  const char *prestring)
144 {
145 char *strdata, *str, *newstr, *parsestr, *secondword;
146 l_int32 start, next, stop, charindex, found;
147 size_t nbytes;
148 SARRAY *sa, *saout, *satest;
149 
150  PROCNAME("parseForProtos");
151 
152  if (!filein)
153  return (char *)ERROR_PTR("filein not defined", procName, NULL);
154 
155  /* Read in the cpp output into memory, one string for each
156  * line in the file, omitting blank lines. */
157  strdata = (char *)l_binaryRead(filein, &nbytes);
158  sa = sarrayCreateLinesFromString(strdata, 0);
159 
160  saout = sarrayCreate(0);
161  next = 0;
162  while (1) { /* repeat after each non-static prototype is extracted */
163  searchForProtoSignature(sa, next, &start, &stop, &charindex, &found);
164  if (!found)
165  break;
166 /* fprintf(stderr, " start = %d, stop = %d, charindex = %d\n",
167  start, stop, charindex); */
168  str = captureProtoSignature(sa, start, stop, charindex);
169 
170  /* Make sure that the signature found by cpp does not begin with
171  * static, extern or typedef. We get 'extern' declarations
172  * from header files, and with some versions of cpp running on
173  * #include <sys/stat.h> we get something of the form:
174  * extern ... (( ... )) ... ( ... ) { ...
175  * For this, the 1st '(' is the lp, the 2nd ')' is the rp,
176  * and there is a lot of garbage between the rp and the lp.
177  * It is easiest to simply reject any signature that starts
178  * with 'extern'. Note also that an 'extern' token has been
179  * prepended to each prototype, so the 'static' or
180  * 'extern' keywords we are looking for, if they exist,
181  * would be the second word. We also have a typedef in
182  * bmpio.c that has the form:
183  * typedef struct __attribute__((....)) { ...} ... ;
184  * This is avoided by blacklisting 'typedef' along with 'extern'
185  * and 'static'. */
186  satest = sarrayCreateWordsFromString(str);
187  secondword = sarrayGetString(satest, 1, L_NOCOPY);
188  if (strcmp(secondword, "static") && /* not static */
189  strcmp(secondword, "extern") && /* not extern */
190  strcmp(secondword, "typedef")) { /* not typedef */
191  if (prestring) { /* prepend it to the prototype */
192  newstr = stringJoin(prestring, str);
193  sarrayAddString(saout, newstr, L_INSERT);
194  LEPT_FREE(str);
195  } else {
196  sarrayAddString(saout, str, L_INSERT);
197  }
198  } else {
199  LEPT_FREE(str);
200  }
201  sarrayDestroy(&satest);
202 
203  skipToEndOfFunction(sa, stop, charindex, &next);
204  if (next == -1) break;
205  }
206 
207  /* Flatten into a string with newlines between prototypes */
208  parsestr = sarrayToString(saout, 1);
209  LEPT_FREE(strdata);
210  sarrayDestroy(&sa);
211  sarrayDestroy(&saout);
212 
213  return parsestr;
214 }
215 
216 
217 /*
218  * getNextNonCommentLine()
219  *
220  * Input: sa (output from cpp, by line)
221  * start (starting index to search)
222  * &next (<return> index of first uncommented line after
223  * the start line)
224  * Return: 0 if OK, 1 on error
225  *
226  * Notes:
227  * (1) Skips over all consecutive comment lines, beginning at 'start'
228  * (2) If all lines to the end are '#' comments, return next = -1
229  */
230 static l_int32
231 getNextNonCommentLine(SARRAY *sa,
232  l_int32 start,
233  l_int32 *pnext)
234 {
235 char *str;
236 l_int32 i, n;
237 
238  PROCNAME("getNextNonCommentLine");
239 
240  if (!sa)
241  return ERROR_INT("sa not defined", procName, 1);
242  if (!pnext)
243  return ERROR_INT("&pnext not defined", procName, 1);
244 
245  /* Init for situation where this line and all following are comments */
246  *pnext = -1;
247 
248  n = sarrayGetCount(sa);
249  for (i = start; i < n; i++) {
250  if ((str = sarrayGetString(sa, i, L_NOCOPY)) == NULL)
251  return ERROR_INT("str not returned; shouldn't happen", procName, 1);
252  if (str[0] != '#') {
253  *pnext = i;
254  return 0;
255  }
256  }
257 
258  return 0;
259 }
260 
261 
262 /*
263  * getNextNonBlankLine()
264  *
265  * Input: sa (output from cpp, by line)
266  * start (starting index to search)
267  * &next (<return> index of first nonblank line after
268  * the start line)
269  * Return: 0 if OK, 1 on error
270  *
271  * Notes:
272  * (1) Skips over all consecutive blank lines, beginning at 'start'
273  * (2) A blank line has only whitespace characters (' ', '\t', '\n', '\r')
274  * (3) If all lines to the end are blank, return next = -1
275  */
276 static l_int32
277 getNextNonBlankLine(SARRAY *sa,
278  l_int32 start,
279  l_int32 *pnext)
280 {
281 char *str;
282 l_int32 i, j, n, len;
283 
284  PROCNAME("getNextNonBlankLine");
285 
286  if (!sa)
287  return ERROR_INT("sa not defined", procName, 1);
288  if (!pnext)
289  return ERROR_INT("&pnext not defined", procName, 1);
290 
291  /* Init for situation where this line and all following are blank */
292  *pnext = -1;
293 
294  n = sarrayGetCount(sa);
295  for (i = start; i < n; i++) {
296  if ((str = sarrayGetString(sa, i, L_NOCOPY)) == NULL)
297  return ERROR_INT("str not returned; shouldn't happen", procName, 1);
298  len = strlen(str);
299  for (j = 0; j < len; j++) {
300  if (str[j] != ' ' && str[j] != '\t'
301  && str[j] != '\n' && str[j] != '\r') { /* non-blank */
302  *pnext = i;
303  return 0;
304  }
305  }
306  }
307 
308  return 0;
309 }
310 
311 
312 /*
313  * getNextNonDoubleSlashLine()
314  *
315  * Input: sa (output from cpp, by line)
316  * start (starting index to search)
317  * &next (<return> index of first uncommented line after
318  * the start line)
319  * Return: 0 if OK, 1 on error
320  *
321  * Notes:
322  * (1) Skips over all consecutive '//' lines, beginning at 'start'
323  * (2) If all lines to the end start with '//', return next = -1
324  */
325 static l_int32
326 getNextNonDoubleSlashLine(SARRAY *sa,
327  l_int32 start,
328  l_int32 *pnext)
329 {
330 char *str;
331 l_int32 i, n, len;
332 
333  PROCNAME("getNextNonDoubleSlashLine");
334 
335  if (!sa)
336  return ERROR_INT("sa not defined", procName, 1);
337  if (!pnext)
338  return ERROR_INT("&pnext not defined", procName, 1);
339 
340  /* Init for situation where this line and all following
341  * start with '//' */
342  *pnext = -1;
343 
344  n = sarrayGetCount(sa);
345  for (i = start; i < n; i++) {
346  if ((str = sarrayGetString(sa, i, L_NOCOPY)) == NULL)
347  return ERROR_INT("str not returned; shouldn't happen", procName, 1);
348  len = strlen(str);
349  if (len < 2 || str[0] != '/' || str[1] != '/') {
350  *pnext = i;
351  return 0;
352  }
353  }
354 
355  return 0;
356 }
357 
358 
359 /*
360  * searchForProtoSignature()
361  *
362  * Input: sa (output from cpp, by line)
363  * begin (beginning index to search)
364  * &start (<return> starting index for function definition)
365  * &stop (<return> index of line on which proto is completed)
366  * &charindex (<return> char index of completing ')' character)
367  * &found (<return> 1 if valid signature is found; 0 otherwise)
368  * Return: 0 if OK, 1 on error
369  *
370  * Notes:
371  * (1) If this returns found == 0, it means that there are no
372  * more function definitions in the file. Caller must check
373  * this value and exit the loop over the entire cpp file.
374  * (2) This follows plan 3 (see above). We skip comment and blank
375  * lines at the beginning. Then we don't check for keywords.
376  * Instead, find the relative locations of the first occurrences
377  * of these four tokens: left parenthesis (lp), right
378  * parenthesis (rp), left brace (lb) and semicolon (sc).
379  * (3) The signature of a function definition looks like this:
380  * .... '(' .... ')' '{'
381  * where the lp and rp must both precede the lb, with only
382  * whitespace between the rp and the lb. The '....'
383  * are sets of tokens that have no braces.
384  * (4) If a function definition is found, this returns found = 1,
385  * with 'start' being the first line of the definition and
386  * 'charindex' being the position of the ')' in line 'stop'
387  * at the end of the arg list.
388  */
389 static l_int32
390 searchForProtoSignature(SARRAY *sa,
391  l_int32 begin,
392  l_int32 *pstart,
393  l_int32 *pstop,
394  l_int32 *pcharindex,
395  l_int32 *pfound)
396 {
397 l_int32 next, rbline, rbindex, scline;
398 l_int32 soffsetlp, soffsetrp, soffsetlb, soffsetsc;
399 l_int32 boffsetlp, boffsetrp, boffsetlb, boffsetsc;
400 l_int32 toffsetlp, toffsetrp, toffsetlb, toffsetsc;
401 
402  PROCNAME("searchForProtoSignature");
403 
404  if (!sa)
405  return ERROR_INT("sa not defined", procName, 1);
406  if (!pstart)
407  return ERROR_INT("&start not defined", procName, 1);
408  if (!pstop)
409  return ERROR_INT("&stop not defined", procName, 1);
410  if (!pcharindex)
411  return ERROR_INT("&charindex not defined", procName, 1);
412  if (!pfound)
413  return ERROR_INT("&found not defined", procName, 1);
414 
415  *pfound = FALSE;
416 
417  while (1) {
418 
419  /* Skip over sequential '#' comment lines */
420  getNextNonCommentLine(sa, begin, &next);
421  if (next == -1) return 0;
422  if (next != begin) {
423  begin = next;
424  continue;
425  }
426 
427  /* Skip over sequential blank lines */
428  getNextNonBlankLine(sa, begin, &next);
429  if (next == -1) return 0;
430  if (next != begin) {
431  begin = next;
432  continue;
433  }
434 
435  /* Skip over sequential lines starting with '//' */
436  getNextNonDoubleSlashLine(sa, begin, &next);
437  if (next == -1) return 0;
438  if (next != begin) {
439  begin = next;
440  continue;
441  }
442 
443  /* Search for specific character sequence patterns; namely
444  * a lp, a matching rp, a lb and a semicolon.
445  * Abort the search if no lp is found. */
446  getOffsetForCharacter(sa, next, '(', &soffsetlp, &boffsetlp,
447  &toffsetlp);
448  if (soffsetlp == -1)
449  break;
450  getOffsetForMatchingRP(sa, next, soffsetlp, boffsetlp, toffsetlp,
451  &soffsetrp, &boffsetrp, &toffsetrp);
452  getOffsetForCharacter(sa, next, '{', &soffsetlb, &boffsetlb,
453  &toffsetlb);
454  getOffsetForCharacter(sa, next, ';', &soffsetsc, &boffsetsc,
455  &toffsetsc);
456 
457  /* We've found a lp. Now weed out the case where a matching
458  * rp and a lb are not both found. */
459  if (soffsetrp == -1 || soffsetlb == -1)
460  break;
461 
462  /* Check if a left brace occurs before a left parenthesis;
463  * if so, skip it */
464  if (toffsetlb < toffsetlp) {
465  skipToMatchingBrace(sa, next + soffsetlb, boffsetlb,
466  &rbline, &rbindex);
467  skipToSemicolon(sa, rbline, rbindex, &scline);
468  begin = scline + 1;
469  continue;
470  }
471 
472  /* Check if a semicolon occurs before a left brace or
473  * a left parenthesis; if so, skip it */
474  if ((soffsetsc != -1) &&
475  (toffsetsc < toffsetlb || toffsetsc < toffsetlp)) {
476  skipToSemicolon(sa, next, 0, &scline);
477  begin = scline + 1;
478  continue;
479  }
480 
481  /* OK, it should be a function definition. We haven't
482  * checked that there is only white space between the
483  * rp and lb, but we've only seen problems with two
484  * extern inlines in sys/stat.h, and this is handled
485  * later by eliminating any prototype beginning with 'extern'. */
486  *pstart = next;
487  *pstop = next + soffsetrp;
488  *pcharindex = boffsetrp;
489  *pfound = TRUE;
490  break;
491  }
492 
493  return 0;
494 }
495 
496 
497 /*
498  * captureProtoSignature()
499  *
500  * Input: sa (output from cpp, by line)
501  * start (starting index to search; never a comment line)
502  * stop (index of line on which pattern is completed)
503  * charindex (char index of completing ')' character)
504  * Return: cleanstr (prototype string), or NULL on error
505  *
506  * Notes:
507  * (1) Return all characters, ending with a ';' after the ')'
508  */
509 static char *
510 captureProtoSignature(SARRAY *sa,
511  l_int32 start,
512  l_int32 stop,
513  l_int32 charindex)
514 {
515 char *str, *newstr, *protostr, *cleanstr;
516 SARRAY *sap;
517 l_int32 i;
518 
519  PROCNAME("captureProtoSignature");
520 
521  if (!sa)
522  return (char *)ERROR_PTR("sa not defined", procName, NULL);
523 
524  sap = sarrayCreate(0);
525  for (i = start; i < stop; i++) {
526  str = sarrayGetString(sa, i, L_COPY);
527  sarrayAddString(sap, str, L_INSERT);
528  }
529  str = sarrayGetString(sa, stop, L_COPY);
530  str[charindex + 1] = '\0';
531  newstr = stringJoin(str, ";");
532  sarrayAddString(sap, newstr, L_INSERT);
533  LEPT_FREE(str);
534  protostr = sarrayToString(sap, 2);
535  sarrayDestroy(&sap);
536  cleanstr = cleanProtoSignature(protostr);
537  LEPT_FREE(protostr);
538 
539  return cleanstr;
540 }
541 
542 
543 /*
544  * cleanProtoSignature()
545  *
546  * Input: instr (input prototype string)
547  * Return: cleanstr (clean prototype string), or NULL on error
548  *
549  * Notes:
550  * (1) Adds 'extern' at beginning and regularizes spaces
551  * between tokens.
552  */
553 static char *
554 cleanProtoSignature(char *instr)
555 {
556 char *str, *cleanstr;
557 char buf[L_BUF_SIZE];
558 char externstring[] = "extern";
559 l_int32 i, j, nwords, nchars, index, len;
560 SARRAY *sa, *saout;
561 
562  PROCNAME("cleanProtoSignature");
563 
564  if (!instr)
565  return (char *)ERROR_PTR("instr not defined", procName, NULL);
566 
567  sa = sarrayCreateWordsFromString(instr);
568  nwords = sarrayGetCount(sa);
569  saout = sarrayCreate(0);
570  sarrayAddString(saout, externstring, L_COPY);
571  for (i = 0; i < nwords; i++) {
572  str = sarrayGetString(sa, i, L_NOCOPY);
573  nchars = strlen(str);
574  index = 0;
575  for (j = 0; j < nchars; j++) {
576  if (index > L_BUF_SIZE - 6) {
577  sarrayDestroy(&sa);
578  sarrayDestroy(&saout);
579  return (char *)ERROR_PTR("token too large", procName, NULL);
580  }
581  if (str[j] == '(') {
582  buf[index++] = ' ';
583  buf[index++] = '(';
584  buf[index++] = ' ';
585  } else if (str[j] == ')') {
586  buf[index++] = ' ';
587  buf[index++] = ')';
588  } else {
589  buf[index++] = str[j];
590  }
591  }
592  buf[index] = '\0';
593  sarrayAddString(saout, buf, L_COPY);
594  }
595 
596  /* Flatten to a prototype string with spaces added after
597  * each word, and remove the last space */
598  cleanstr = sarrayToString(saout, 2);
599  len = strlen(cleanstr);
600  cleanstr[len - 1] = '\0';
601 
602  sarrayDestroy(&sa);
603  sarrayDestroy(&saout);
604  return cleanstr;
605 }
606 
607 
608 /*
609  * skipToEndOfFunction()
610  *
611  * Input: sa (output from cpp, by line)
612  * start (index of starting line with left bracket to search)
613  * lbindex (starting char index for left bracket)
614  * &next (index of line following the ending '}' for function
615  * Return: 0 if OK, 1 on error
616  */
617 static l_int32
618 skipToEndOfFunction(SARRAY *sa,
619  l_int32 start,
620  l_int32 lbindex,
621  l_int32 *pnext)
622 {
623 l_int32 end, rbindex;
624 l_int32 soffsetlb, boffsetlb, toffsetlb;
625 
626  PROCNAME("skipToEndOfFunction");
627 
628  if (!sa)
629  return ERROR_INT("sa not defined", procName, 1);
630  if (!pnext)
631  return ERROR_INT("&next not defined", procName, 1);
632 
633  getOffsetForCharacter(sa, start, '{', &soffsetlb, &boffsetlb,
634  &toffsetlb);
635  skipToMatchingBrace(sa, start + soffsetlb, boffsetlb, &end, &rbindex);
636  if (end == -1) { /* shouldn't happen! */
637  *pnext = -1;
638  return 1;
639  }
640 
641  *pnext = end + 1;
642  return 0;
643 }
644 
645 
646 /*
647  * skipToMatchingBrace()
648  *
649  * Input: sa (output from cpp, by line)
650  * start (index of starting line with left bracket to search)
651  * lbindex (starting char index for left bracket)
652  * &stop (index of line with the matching right bracket)
653  * &rbindex (char index of matching right bracket)
654  * Return: 0 if OK, 1 on error
655  *
656  * Notes:
657  * (1) If the matching right brace is not found, returns
658  * stop = -1. This shouldn't happen.
659  */
660 static l_int32
661 skipToMatchingBrace(SARRAY *sa,
662  l_int32 start,
663  l_int32 lbindex,
664  l_int32 *pstop,
665  l_int32 *prbindex)
666 {
667 char *str;
668 l_int32 i, j, jstart, n, sumbrace, found, instring, nchars;
669 
670  PROCNAME("skipToMatchingBrace");
671 
672  if (!sa)
673  return ERROR_INT("sa not defined", procName, 1);
674  if (!pstop)
675  return ERROR_INT("&stop not defined", procName, 1);
676  if (!prbindex)
677  return ERROR_INT("&rbindex not defined", procName, 1);
678 
679  instring = 0; /* init to FALSE; toggle on double quotes */
680  *pstop = -1;
681  n = sarrayGetCount(sa);
682  sumbrace = 1;
683  found = FALSE;
684  for (i = start; i < n; i++) {
685  str = sarrayGetString(sa, i, L_NOCOPY);
686  jstart = 0;
687  if (i == start)
688  jstart = lbindex + 1;
689  nchars = strlen(str);
690  for (j = jstart; j < nchars; j++) {
691  /* Toggle the instring state every time you encounter
692  * a double quote that is NOT escaped. */
693  if (j == jstart && str[j] == '\"')
694  instring = 1 - instring;
695  if (j > jstart && str[j] == '\"' && str[j-1] != '\\')
696  instring = 1 - instring;
697  /* Record the braces if they are neither a literal character
698  * nor within a string. */
699  if (str[j] == '{' && str[j+1] != '\'' && !instring) {
700  sumbrace++;
701  } else if (str[j] == '}' && str[j+1] != '\'' && !instring) {
702  sumbrace--;
703  if (sumbrace == 0) {
704  found = TRUE;
705  *prbindex = j;
706  break;
707  }
708  }
709  }
710  if (found) {
711  *pstop = i;
712  return 0;
713  }
714  }
715 
716  return ERROR_INT("matching right brace not found", procName, 1);
717 }
718 
719 
720 /*
721  * skipToSemicolon()
722  *
723  * Input: sa (output from cpp, by line)
724  * start (index of starting line to search)
725  * charindex (starting char index for search)
726  * &next (index of line containing the next ';')
727  * Return: 0 if OK, 1 on error
728  *
729  * Notes:
730  * (1) If the semicolon isn't found, returns next = -1.
731  * This shouldn't happen.
732  * (2) This is only used in contexts where the semicolon is
733  * not within a string.
734  */
735 static l_int32
736 skipToSemicolon(SARRAY *sa,
737  l_int32 start,
738  l_int32 charindex,
739  l_int32 *pnext)
740 {
741 char *str;
742 l_int32 i, j, n, jstart, nchars, found;
743 
744  PROCNAME("skipToSemicolon");
745 
746  if (!sa)
747  return ERROR_INT("sa not defined", procName, 1);
748  if (!pnext)
749  return ERROR_INT("&next not defined", procName, 1);
750 
751  *pnext = -1;
752  n = sarrayGetCount(sa);
753  found = FALSE;
754  for (i = start; i < n; i++) {
755  str = sarrayGetString(sa, i, L_NOCOPY);
756  jstart = 0;
757  if (i == start)
758  jstart = charindex + 1;
759  nchars = strlen(str);
760  for (j = jstart; j < nchars; j++) {
761  if (str[j] == ';') {
762  found = TRUE;;
763  break;
764  }
765  }
766  if (found) {
767  *pnext = i;
768  return 0;
769  }
770  }
771 
772  return ERROR_INT("semicolon not found", procName, 1);
773 }
774 
775 
776 /*
777  * getOffsetForCharacter()
778  *
779  * Input: sa (output from cpp, by line)
780  * start (starting index in sa to search; never a comment line)
781  * tchar (we are searching for the first instance of this)
782  * &soffset (<return> offset in strings from start index)
783  * &boffset (<return> offset in bytes within string in which
784  * the character is first found)
785  * &toffset (<return> offset in total bytes from beginning of
786  * string indexed by 'start' to the location where
787  * the character is first found)
788  * Return: 0 if OK, 1 on error
789  *
790  * Notes:
791  * (1) We are searching for the first instance of 'tchar', starting
792  * at the beginning of the string indexed by start.
793  * (2) If the character is not found, soffset is returned as -1,
794  * and the other offsets are set to very large numbers. The
795  * caller must check the value of soffset.
796  * (3) This is only used in contexts where it is not necessary to
797  * consider if the character is inside a string.
798  */
799 static l_int32
800 getOffsetForCharacter(SARRAY *sa,
801  l_int32 start,
802  char tchar,
803  l_int32 *psoffset,
804  l_int32 *pboffset,
805  l_int32 *ptoffset)
806 {
807 char *str;
808 l_int32 i, j, n, nchars, totchars, found;
809 
810  PROCNAME("getOffsetForCharacter");
811 
812  if (!sa)
813  return ERROR_INT("sa not defined", procName, 1);
814  if (!psoffset)
815  return ERROR_INT("&soffset not defined", procName, 1);
816  if (!pboffset)
817  return ERROR_INT("&boffset not defined", procName, 1);
818  if (!ptoffset)
819  return ERROR_INT("&toffset not defined", procName, 1);
820 
821  *psoffset = -1; /* init to not found */
822  *pboffset = 100000000;
823  *ptoffset = 100000000;
824 
825  n = sarrayGetCount(sa);
826  found = FALSE;
827  totchars = 0;
828  for (i = start; i < n; i++) {
829  if ((str = sarrayGetString(sa, i, L_NOCOPY)) == NULL)
830  return ERROR_INT("str not returned; shouldn't happen", procName, 1);
831  nchars = strlen(str);
832  for (j = 0; j < nchars; j++) {
833  if (str[j] == tchar) {
834  found = TRUE;
835  break;
836  }
837  }
838  if (found)
839  break;
840  totchars += nchars;
841  }
842 
843  if (found) {
844  *psoffset = i - start;
845  *pboffset = j;
846  *ptoffset = totchars + j;
847  }
848 
849  return 0;
850 }
851 
852 
853 /*
854  * getOffsetForMatchingRP()
855  *
856  * Input: sa (output from cpp, by line)
857  * start (starting index in sa to search; never a comment line)
858  * soffsetlp (string offset to first LP)
859  * boffsetlp (byte offset within string to first LP)
860  * toffsetlp (total byte offset to first LP)
861  * &soffset (<return> offset in strings from start index)
862  * &boffset (<return> offset in bytes within string in which
863  * the matching RP is found)
864  * &toffset (<return> offset in total bytes from beginning of
865  * string indexed by 'start' to the location where
866  * the matching RP is found);
867  * Return: 0 if OK, 1 on error
868  *
869  * Notes:
870  * (1) We are searching for the matching right parenthesis (RP) that
871  * corresponds to the first LP found beginning at the string
872  * indexed by start.
873  * (2) If the matching RP is not found, soffset is returned as -1,
874  * and the other offsets are set to very large numbers. The
875  * caller must check the value of soffset.
876  * (3) This is only used in contexts where it is not necessary to
877  * consider if the character is inside a string.
878  * (4) We must do this because although most arg lists have a single
879  * left and right parenthesis, it is possible to construct
880  * more complicated prototype declarations, such as those
881  * where functions are passed in. The C++ rules for prototypes
882  * are strict, and require that for functions passed in as args,
883  * the function name arg be placed in parenthesis, as well
884  * as its arg list, thus incurring two extra levels of parentheses.
885  */
886 static l_int32
887 getOffsetForMatchingRP(SARRAY *sa,
888  l_int32 start,
889  l_int32 soffsetlp,
890  l_int32 boffsetlp,
891  l_int32 toffsetlp,
892  l_int32 *psoffset,
893  l_int32 *pboffset,
894  l_int32 *ptoffset)
895 {
896 char *str;
897 l_int32 i, j, n, nchars, totchars, leftmatch, firstline, jstart, found;
898 
899  PROCNAME("getOffsetForMatchingRP");
900 
901  if (!sa)
902  return ERROR_INT("sa not defined", procName, 1);
903  if (!psoffset)
904  return ERROR_INT("&soffset not defined", procName, 1);
905  if (!pboffset)
906  return ERROR_INT("&boffset not defined", procName, 1);
907  if (!ptoffset)
908  return ERROR_INT("&toffset not defined", procName, 1);
909 
910  *psoffset = -1; /* init to not found */
911  *pboffset = 100000000;
912  *ptoffset = 100000000;
913 
914  n = sarrayGetCount(sa);
915  found = FALSE;
916  totchars = toffsetlp;
917  leftmatch = 1; /* count of (LP - RP); we're finished when it goes to 0. */
918  firstline = start + soffsetlp;
919  for (i = firstline; i < n; i++) {
920  if ((str = sarrayGetString(sa, i, L_NOCOPY)) == NULL)
921  return ERROR_INT("str not returned; shouldn't happen", procName, 1);
922  nchars = strlen(str);
923  jstart = 0;
924  if (i == firstline)
925  jstart = boffsetlp + 1;
926  for (j = jstart; j < nchars; j++) {
927  if (str[j] == '(')
928  leftmatch++;
929  else if (str[j] == ')')
930  leftmatch--;
931  if (leftmatch == 0) {
932  found = TRUE;
933  break;
934  }
935  }
936  if (found)
937  break;
938  if (i == firstline)
939  totchars += nchars - boffsetlp;
940  else
941  totchars += nchars;
942  }
943 
944  if (found) {
945  *psoffset = i - start;
946  *pboffset = j;
947  *ptoffset = totchars + j;
948  }
949 
950  return 0;
951 }
char * sarrayToString(SARRAY *sa, l_int32 addnlflag)
sarrayToString()
Definition: sarray1.c:763
Definition: pix.h:717
Definition: pix.h:716
SARRAY * sarrayCreate(l_int32 n)
sarrayCreate()
Definition: sarray1.c:163
Definition: array.h:116
l_uint8 * l_binaryRead(const char *filename, size_t *pnbytes)
l_binaryRead()
Definition: utils2.c:1212
l_ok sarrayAddString(SARRAY *sa, const char *string, l_int32 copyflag)
sarrayAddString()
Definition: sarray1.c:446
char * sarrayGetString(SARRAY *sa, l_int32 index, l_int32 copyflag)
sarrayGetString()
Definition: sarray1.c:681
SARRAY * sarrayCreateLinesFromString(const char *string, l_int32 blankflag)
sarrayCreateLinesFromString()
Definition: sarray1.c:276
l_int32 sarrayGetCount(SARRAY *sa)
sarrayGetCount()
Definition: sarray1.c:621
Definition: pix.h:718
char * stringJoin(const char *src1, const char *src2)
stringJoin()
Definition: utils2.c:509
SARRAY * sarrayCreateWordsFromString(const char *string)
sarrayCreateWordsFromString()
Definition: sarray1.c:226
static const l_int32 L_BUF_SIZE
Definition: classapp.c:55
void sarrayDestroy(SARRAY **psa)
sarrayDestroy()
Definition: sarray1.c:355