1: #include <ctype.h>
2:
3: typedef int boolean;
4: #define TRUE 1
5: #define FALSE 0
6: #define NIL 0
7:
8: boolean l_onecase; /* true if upper and lower equivalent */
9:
10: #define makelower(c) (isupper((c)) ? tolower((c)) : (c))
11:
12: /* STRNCMP - like strncmp except that we convert the
13: * first string to lower case before comparing
14: * if l_onecase is set.
15: */
16:
17: STRNCMP(s1, s2, len)
18: register char *s1,*s2;
19: register int len;
20: {
21: if (l_onecase) {
22: do
23: if (*s2 - makelower(*s1))
24: return (*s2 - makelower(*s1));
25: else {
26: s2++;
27: s1++;
28: }
29: while (--len);
30: } else {
31: do
32: if (*s2 - *s1)
33: return (*s2 - *s1);
34: else {
35: s2++;
36: s1++;
37: }
38: while (--len);
39: }
40: return(0);
41: }
42:
43: /* The following routine converts an irregular expression to
44: * internal format.
45: *
46: * Either meta symbols (\a \d or \p) or character strings or
47: * operations ( alternation or perenthesizing ) can be
48: * specified. Each starts with a descriptor byte. The descriptor
49: * byte has STR set for strings, META set for meta symbols
50: * and OPER set for operations.
51: * The descriptor byte can also have the OPT bit set if the object
52: * defined is optional. Also ALT can be set to indicate an alternation.
53: *
54: * For metasymbols the byte following the descriptor byte identities
55: * the meta symbol (containing an ascii 'a', 'd', 'p', '|', or '('). For
56: * strings the byte after the descriptor is a character count for
57: * the string:
58: *
59: * meta symbols := descriptor
60: * symbol
61: *
62: * strings := descriptor
63: * character count
64: * the string
65: *
66: * operatins := descriptor
67: * symbol
68: * character count
69: */
70:
71: /*
72: * handy macros for accessing parts of match blocks
73: */
74: #define MSYM(A) (*(A+1)) /* symbol in a meta symbol block */
75: #define MNEXT(A) (A+2) /* character following a metasymbol block */
76:
77: #define OSYM(A) (*(A+1)) /* symbol in an operation block */
78: #define OCNT(A) (*(A+2)) /* character count */
79: #define ONEXT(A) (A+3) /* next character after the operation */
80: #define OPTR(A) (A+*(A+2)) /* place pointed to by the operator */
81:
82: #define SCNT(A) (*(A+1)) /* byte count of a string */
83: #define SSTR(A) (A+2) /* address of the string */
84: #define SNEXT(A) (A+2+*(A+1)) /* character following the string */
85:
86: /*
87: * bit flags in the descriptor
88: */
89: #define OPT 1
90: #define STR 2
91: #define META 4
92: #define ALT 8
93: #define OPER 16
94:
95: char *ure; /* pointer current position in unconverted exp */
96: char *ccre; /* pointer to current position in converted exp*/
97: char *malloc();
98:
99: char *
100: convexp(re)
101: char *re; /* unconverted irregular expression */
102: {
103: register char *cre; /* pointer to converted regular expression */
104:
105: /* allocate room for the converted expression */
106: if (re == NIL)
107: return (NIL);
108: if (*re == '\0')
109: return (NIL);
110: cre = malloc (4 * strlen(re) + 3);
111: ccre = cre;
112: ure = re;
113:
114: /* start the conversion with a \a */
115: *cre = META | OPT;
116: MSYM(cre) = 'a';
117: ccre = MNEXT(cre);
118:
119: /* start the conversion (its recursive) */
120: expconv ();
121: *ccre = 0;
122: return (cre);
123: }
124:
125: expconv()
126: {
127: register char *cs; /* pointer to current symbol in converted exp */
128: register char c; /* character being processed */
129: register char *acs; /* pinter to last alternate */
130: register int temp;
131:
132: /* let the conversion begin */
133: acs = NIL;
134: while (*ure != NIL) {
135: switch (c = *ure++) {
136:
137: case '\\':
138: switch (c = *ure++) {
139:
140: /* escaped characters are just characters */
141: default:
142: if ((*cs & STR) == 0) {
143: cs = ccre;
144: *cs = STR;
145: SCNT(cs) = 1;
146: ccre += 2;
147: } else
148: SCNT(cs)++;
149: *ccre++ = c;
150: break;
151:
152: /* normal(?) metacharacters */
153: case 'a':
154: case 'd':
155: case 'e':
156: case 'p':
157: if (acs != NIL && acs != cs) {
158: do {
159: temp = OCNT(acs);
160: OCNT(acs) = ccre - acs;
161: acs -= temp;
162: } while (temp != 0);
163: acs = NIL;
164: }
165: cs = ccre;
166: *cs = META;
167: MSYM(cs) = c;
168: ccre = MNEXT(cs);
169: break;
170: }
171: break;
172:
173: /* just put the symbol in */
174: case '^':
175: case '$':
176: if (acs != NIL && acs != cs) {
177: do {
178: temp = OCNT(acs);
179: OCNT(acs) = ccre - acs;
180: acs -= temp;
181: } while (temp != 0);
182: acs = NIL;
183: }
184: cs = ccre;
185: *cs = META;
186: MSYM(cs) = c;
187: ccre = MNEXT(cs);
188: break;
189:
190: /* mark the last match sequence as optional */
191: case '?':
192: *cs = *cs | OPT;
193: break;
194:
195: /* recurse and define a subexpression */
196: case '(':
197: if (acs != NIL && acs != cs) {
198: do {
199: temp = OCNT(acs);
200: OCNT(acs) = ccre - acs;
201: acs -= temp;
202: } while (temp != 0);
203: acs = NIL;
204: }
205: cs = ccre;
206: *cs = OPER;
207: OSYM(cs) = '(';
208: ccre = ONEXT(cs);
209: expconv ();
210: OCNT(cs) = ccre - cs; /* offset to next symbol */
211: break;
212:
213: /* return from a recursion */
214: case ')':
215: if (acs != NIL) {
216: do {
217: temp = OCNT(acs);
218: OCNT(acs) = ccre - acs;
219: acs -= temp;
220: } while (temp != 0);
221: acs = NIL;
222: }
223: cs = ccre;
224: *cs = META;
225: MSYM(cs) = c;
226: ccre = MNEXT(cs);
227: return;
228:
229: /* mark the last match sequence as having an alternate */
230: /* the third byte will contain an offset to jump over the */
231: /* alternate match in case the first did not fail */
232: case '|':
233: if (acs != NIL && acs != cs)
234: OCNT(ccre) = ccre - acs; /* make a back pointer */
235: else
236: OCNT(ccre) = 0;
237: *cs |= ALT;
238: cs = ccre;
239: *cs = OPER;
240: OSYM(cs) = '|';
241: ccre = ONEXT(cs);
242: acs = cs; /* remember that the pointer is to be filles */
243: break;
244:
245: /* if its not a metasymbol just build a scharacter string */
246: default:
247: if ((*cs & STR) == 0) {
248: cs = ccre;
249: *cs = STR;
250: SCNT(cs) = 1;
251: ccre = SSTR(cs);
252: } else
253: SCNT(cs)++;
254: *ccre++ = c;
255: break;
256: }
257: }
258: if (acs != NIL) {
259: do {
260: temp = OCNT(acs);
261: OCNT(acs) = ccre - acs;
262: acs -= temp;
263: } while (temp != 0);
264: acs = NIL;
265: }
266: return;
267: }
268: /* end of convertre */
269:
270:
271: /*
272: * The following routine recognises an irregular expresion
273: * with the following special characters:
274: *
275: * \? - means last match was optional
276: * \a - matches any number of characters
277: * \d - matches any number of spaces and tabs
278: * \p - matches any number of alphanumeric
279: * characters. The
280: * characters matched will be copied into
281: * the area pointed to by 'name'.
282: * \| - alternation
283: * \( \) - grouping used mostly for alternation and
284: * optionality
285: *
286: * The irregular expression must be translated to internal form
287: * prior to calling this routine
288: *
289: * The value returned is the pointer to the first non \a
290: * character matched.
291: */
292:
293: boolean _escaped; /* true if we are currently _escaped */
294: char *_start; /* start of string */
295:
296: char *
297: expmatch (s, re, mstring)
298: register char *s; /* string to check for a match in */
299: register char *re; /* a converted irregular expression */
300: register char *mstring; /* where to put whatever matches a \p */
301: {
302: register char *cs; /* the current symbol */
303: register char *ptr,*s1; /* temporary pointer */
304: boolean matched; /* a temporary boolean */
305:
306: /* initial conditions */
307: if (re == NIL)
308: return (NIL);
309: cs = re;
310: matched = FALSE;
311:
312: /* loop till expression string is exhausted (or at least pretty tired) */
313: while (*cs) {
314: switch (*cs & (OPER | STR | META)) {
315:
316: /* try to match a string */
317: case STR:
318: matched = !STRNCMP (s, SSTR(cs), SCNT(cs));
319: if (matched) {
320:
321: /* hoorah it matches */
322: s += SCNT(cs);
323: cs = SNEXT(cs);
324: } else if (*cs & ALT) {
325:
326: /* alternation, skip to next expression */
327: cs = SNEXT(cs);
328: } else if (*cs & OPT) {
329:
330: /* the match is optional */
331: cs = SNEXT(cs);
332: matched = 1; /* indicate a successful match */
333: } else {
334:
335: /* no match, error return */
336: return (NIL);
337: }
338: break;
339:
340: /* an operator, do something fancy */
341: case OPER:
342: switch (OSYM(cs)) {
343:
344: /* this is an alternation */
345: case '|':
346: if (matched)
347:
348: /* last thing in the alternation was a match, skip ahead */
349: cs = OPTR(cs);
350: else
351:
352: /* no match, keep trying */
353: cs = ONEXT(cs);
354: break;
355:
356: /* this is a grouping, recurse */
357: case '(':
358: ptr = expmatch (s, ONEXT(cs), mstring);
359: if (ptr != NIL) {
360:
361: /* the subexpression matched */
362: matched = 1;
363: s = ptr;
364: } else if (*cs & ALT) {
365:
366: /* alternation, skip to next expression */
367: matched = 0;
368: } else if (*cs & OPT) {
369:
370: /* the match is optional */
371: matched = 1; /* indicate a successful match */
372: } else {
373:
374: /* no match, error return */
375: return (NIL);
376: }
377: cs = OPTR(cs);
378: break;
379: }
380: break;
381:
382: /* try to match a metasymbol */
383: case META:
384: switch (MSYM(cs)) {
385:
386: /* try to match anything and remember what was matched */
387: case 'p':
388: /*
389: * This is really the same as trying the match the
390: * remaining parts of the expression to any subset
391: * of the string.
392: */
393: s1 = s;
394: do {
395: ptr = expmatch (s1, MNEXT(cs), mstring);
396: if (ptr != NIL && s1 != s) {
397:
398: /* we have a match, remember the match */
399: strncpy (mstring, s, s1 - s);
400: mstring[s1 - s] = '\0';
401: return (ptr);
402: } else if (ptr != NIL && (*cs & OPT)) {
403:
404: /* it was aoptional so no match is ok */
405: return (ptr);
406: } else if (ptr != NIL) {
407:
408: /* not optional and we still matched */
409: return (NIL);
410: }
411: if (!isalnum(*s1) && *s1 != '_')
412: return (NIL);
413: if (*s1 == '\\')
414: _escaped = _escaped ? FALSE : TRUE;
415: else
416: _escaped = FALSE;
417: } while (*s1++);
418: return (NIL);
419:
420: /* try to match anything */
421: case 'a':
422: /*
423: * This is really the same as trying the match the
424: * remaining parts of the expression to any subset
425: * of the string.
426: */
427: s1 = s;
428: do {
429: ptr = expmatch (s1, MNEXT(cs), mstring);
430: if (ptr != NIL && s1 != s) {
431:
432: /* we have a match */
433: return (ptr);
434: } else if (ptr != NIL && (*cs & OPT)) {
435:
436: /* it was aoptional so no match is ok */
437: return (ptr);
438: } else if (ptr != NIL) {
439:
440: /* not optional and we still matched */
441: return (NIL);
442: }
443: if (*s1 == '\\')
444: _escaped = _escaped ? FALSE : TRUE;
445: else
446: _escaped = FALSE;
447: } while (*s1++);
448: return (NIL);
449:
450: /* fail if we are currently _escaped */
451: case 'e':
452: if (_escaped)
453: return(NIL);
454: cs = MNEXT(cs);
455: break;
456:
457: /* match any number of tabs and spaces */
458: case 'd':
459: ptr = s;
460: while (*s == ' ' || *s == '\t')
461: s++;
462: if (s != ptr || s == _start) {
463:
464: /* match, be happy */
465: matched = 1;
466: cs = MNEXT(cs);
467: } else if (*s == '\n' || *s == '\0') {
468:
469: /* match, be happy */
470: matched = 1;
471: cs = MNEXT(cs);
472: } else if (*cs & ALT) {
473:
474: /* try the next part */
475: matched = 0;
476: cs = MNEXT(cs);
477: } else if (*cs & OPT) {
478:
479: /* doesn't matter */
480: matched = 1;
481: cs = MNEXT(cs);
482: } else
483:
484: /* no match, error return */
485: return (NIL);
486: break;
487:
488: /* check for end of line */
489: case '$':
490: if (*s == '\0' || *s == '\n') {
491:
492: /* match, be happy */
493: s++;
494: matched = 1;
495: cs = MNEXT(cs);
496: } else if (*cs & ALT) {
497:
498: /* try the next part */
499: matched = 0;
500: cs = MNEXT(cs);
501: } else if (*cs & OPT) {
502:
503: /* doesn't matter */
504: matched = 1;
505: cs = MNEXT(cs);
506: } else
507:
508: /* no match, error return */
509: return (NIL);
510: break;
511:
512: /* check for start of line */
513: case '^':
514: if (s == _start) {
515:
516: /* match, be happy */
517: matched = 1;
518: cs = MNEXT(cs);
519: } else if (*cs & ALT) {
520:
521: /* try the next part */
522: matched = 0;
523: cs = MNEXT(cs);
524: } else if (*cs & OPT) {
525:
526: /* doesn't matter */
527: matched = 1;
528: cs = MNEXT(cs);
529: } else
530:
531: /* no match, error return */
532: return (NIL);
533: break;
534:
535: /* end of a subexpression, return success */
536: case ')':
537: return (s);
538: }
539: break;
540: }
541: }
542: return (s);
543: }
Defined functions
Defined variables
ccre
defined in line
96; used 28 times
ure
defined in line
95; used 4 times
Defined typedef's
Defined macros
ALT
defined in line
92; used 6 times
FALSE
defined in line
5; used 5 times
META
defined in line
91; used 5 times
MNEXT
defined in line
75; used 17 times
MSYM
defined in line
74; used 5 times
NIL
defined in line
6; used 36 times
- in line 106-109(3),
133-134(2),
157,
163,
176,
182,
197,
203,
215,
221,
233,
258,
264,
307-308(2),
336,
359,
375,
396,
402-412(4),
418,
430-441(4),
448-453(2),
485,
509,
532
OCNT
defined in line
78; used 13 times
ONEXT
defined in line
79; used 4 times
OPER
defined in line
93; used 3 times
OPT
defined in line
89; used 9 times
OPTR
defined in line
80; used 2 times
OSYM
defined in line
77; used 3 times
SCNT
defined in line
82; used 6 times
SNEXT
defined in line
84; used 3 times
SSTR
defined in line
83; used 2 times
STR
defined in line
90; used 5 times
TRUE
defined in line
4; used 2 times