1: #include <ctype.h>
   2: 
   3: typedef int boolean;
   4: #define TRUE    1
   5: #define FALSE   0
   6: #define NIL 0
   7: 
   8: boolean l_onecase;  /* true if upper and lower equivalent */
   9: 
  10: #define makelower(c) (isupper((c)) ? tolower((c)) : (c))
  11: 
  12: /*  STRNCMP -	like strncmp except that we convert the
  13:  *	 	first string to lower case before comparing
  14:  *		if l_onecase is set.
  15:  */
  16: 
  17: STRNCMP(s1, s2, len)
  18:     register char *s1,*s2;
  19:     register int len;
  20: {
  21:     if (l_onecase) {
  22:         do
  23:         if (*s2 - makelower(*s1))
  24:             return (*s2 - makelower(*s1));
  25:         else {
  26:             s2++;
  27:             s1++;
  28:         }
  29:         while (--len);
  30:     } else {
  31:         do
  32:         if (*s2 - *s1)
  33:             return (*s2 - *s1);
  34:         else {
  35:             s2++;
  36:             s1++;
  37:         }
  38:         while (--len);
  39:     }
  40:     return(0);
  41: }
  42: 
  43: /*	The following routine converts an irregular expression to
  44:  *	internal format.
  45:  *
  46:  *	Either meta symbols (\a \d or \p) or character strings or
  47:  *	operations ( alternation or perenthesizing ) can be
  48:  *	specified.  Each starts with a descriptor byte.  The descriptor
  49:  *	byte has STR set for strings, META set for meta symbols
  50:  *	and OPER set for operations.
  51:  *	The descriptor byte can also have the OPT bit set if the object
  52:  *	defined is optional.  Also ALT can be set to indicate an alternation.
  53:  *
  54:  *	For metasymbols the byte following the descriptor byte identities
  55:  *	the meta symbol (containing an ascii 'a', 'd', 'p', '|', or '(').  For
  56:  *	strings the byte after the descriptor is a character count for
  57:  *	the string:
  58:  *
  59:  *		meta symbols := descriptor
  60:  *				symbol
  61:  *
  62:  *		strings :=	descriptor
  63:  *				character count
  64:  *				the string
  65:  *
  66:  *		operatins :=	descriptor
  67:  *				symbol
  68:  *				character count
  69:  */
  70: 
  71: /*
  72:  *  handy macros for accessing parts of match blocks
  73:  */
  74: #define MSYM(A) (*(A+1))    /* symbol in a meta symbol block */
  75: #define MNEXT(A) (A+2)      /* character following a metasymbol block */
  76: 
  77: #define OSYM(A) (*(A+1))    /* symbol in an operation block */
  78: #define OCNT(A) (*(A+2))    /* character count */
  79: #define ONEXT(A) (A+3)      /* next character after the operation */
  80: #define OPTR(A) (A+*(A+2))  /* place pointed to by the operator */
  81: 
  82: #define SCNT(A) (*(A+1))    /* byte count of a string */
  83: #define SSTR(A) (A+2)       /* address of the string */
  84: #define SNEXT(A) (A+2+*(A+1))   /* character following the string */
  85: 
  86: /*
  87:  *  bit flags in the descriptor
  88:  */
  89: #define OPT 1
  90: #define STR 2
  91: #define META 4
  92: #define ALT 8
  93: #define OPER 16
  94: 
  95: char *ure;      /* pointer current position in unconverted exp */
  96: char *ccre;     /* pointer to current position in converted exp*/
  97: char *malloc();
  98: 
  99: char *
 100: convexp(re)
 101:     char *re;       /* unconverted irregular expression */
 102: {
 103:     register char *cre;     /* pointer to converted regular expression */
 104: 
 105:     /* allocate room for the converted expression */
 106:     if (re == NIL)
 107:     return (NIL);
 108:     if (*re == '\0')
 109:     return (NIL);
 110:     cre = malloc (4 * strlen(re) + 3);
 111:     ccre = cre;
 112:     ure = re;
 113: 
 114:     /* start the conversion with a \a */
 115:     *cre = META | OPT;
 116:     MSYM(cre) = 'a';
 117:     ccre = MNEXT(cre);
 118: 
 119:     /* start the conversion (its recursive) */
 120:     expconv ();
 121:     *ccre = 0;
 122:     return (cre);
 123: }
 124: 
 125: expconv()
 126: {
 127:     register char *cs;      /* pointer to current symbol in converted exp */
 128:     register char c;        /* character being processed */
 129:     register char *acs;     /* pinter to last alternate */
 130:     register int temp;
 131: 
 132:     /* let the conversion begin */
 133:     acs = NIL;
 134:     while (*ure != NIL) {
 135:     switch (c = *ure++) {
 136: 
 137:     case '\\':
 138:         switch (c = *ure++) {
 139: 
 140:         /* escaped characters are just characters */
 141:         default:
 142:         if ((*cs & STR) == 0) {
 143:             cs = ccre;
 144:             *cs = STR;
 145:             SCNT(cs) = 1;
 146:             ccre += 2;
 147:         } else
 148:             SCNT(cs)++;
 149:         *ccre++ = c;
 150:         break;
 151: 
 152:         /* normal(?) metacharacters */
 153:         case 'a':
 154:         case 'd':
 155:         case 'e':
 156:         case 'p':
 157:         if (acs != NIL && acs != cs) {
 158:             do {
 159:             temp = OCNT(acs);
 160:             OCNT(acs) = ccre - acs;
 161:             acs -= temp;
 162:             } while (temp != 0);
 163:             acs = NIL;
 164:         }
 165:         cs = ccre;
 166:         *cs = META;
 167:         MSYM(cs) = c;
 168:         ccre = MNEXT(cs);
 169:         break;
 170:         }
 171:         break;
 172: 
 173:     /* just put the symbol in */
 174:     case '^':
 175:     case '$':
 176:         if (acs != NIL && acs != cs) {
 177:         do {
 178:             temp = OCNT(acs);
 179:             OCNT(acs) = ccre - acs;
 180:             acs -= temp;
 181:         } while (temp != 0);
 182:         acs = NIL;
 183:         }
 184:         cs = ccre;
 185:         *cs = META;
 186:         MSYM(cs) = c;
 187:         ccre = MNEXT(cs);
 188:         break;
 189: 
 190:     /* mark the last match sequence as optional */
 191:     case '?':
 192:         *cs = *cs | OPT;
 193:         break;
 194: 
 195:     /* recurse and define a subexpression */
 196:     case '(':
 197:         if (acs != NIL && acs != cs) {
 198:         do {
 199:             temp = OCNT(acs);
 200:             OCNT(acs) = ccre - acs;
 201:             acs -= temp;
 202:         } while (temp != 0);
 203:         acs = NIL;
 204:         }
 205:         cs = ccre;
 206:         *cs = OPER;
 207:         OSYM(cs) = '(';
 208:         ccre = ONEXT(cs);
 209:         expconv ();
 210:         OCNT(cs) = ccre - cs;       /* offset to next symbol */
 211:         break;
 212: 
 213:     /* return from a recursion */
 214:     case ')':
 215:         if (acs != NIL) {
 216:         do {
 217:             temp = OCNT(acs);
 218:             OCNT(acs) = ccre - acs;
 219:             acs -= temp;
 220:         } while (temp != 0);
 221:         acs = NIL;
 222:         }
 223:         cs = ccre;
 224:         *cs = META;
 225:         MSYM(cs) = c;
 226:         ccre = MNEXT(cs);
 227:         return;
 228: 
 229:     /* mark the last match sequence as having an alternate */
 230:     /* the third byte will contain an offset to jump over the */
 231:     /* alternate match in case the first did not fail */
 232:     case '|':
 233:         if (acs != NIL && acs != cs)
 234:         OCNT(ccre) = ccre - acs;    /* make a back pointer */
 235:         else
 236:         OCNT(ccre) = 0;
 237:         *cs |= ALT;
 238:         cs = ccre;
 239:         *cs = OPER;
 240:         OSYM(cs) = '|';
 241:         ccre = ONEXT(cs);
 242:         acs = cs;   /* remember that the pointer is to be filles */
 243:         break;
 244: 
 245:     /* if its not a metasymbol just build a scharacter string */
 246:     default:
 247:         if ((*cs & STR) == 0) {
 248:         cs = ccre;
 249:         *cs = STR;
 250:         SCNT(cs) = 1;
 251:         ccre = SSTR(cs);
 252:         } else
 253:         SCNT(cs)++;
 254:         *ccre++ = c;
 255:         break;
 256:     }
 257:     }
 258:     if (acs != NIL) {
 259:     do {
 260:         temp = OCNT(acs);
 261:         OCNT(acs) = ccre - acs;
 262:         acs -= temp;
 263:     } while (temp != 0);
 264:     acs = NIL;
 265:     }
 266:     return;
 267: }
 268: /* end of convertre */
 269: 
 270: 
 271: /*
 272:  *	The following routine recognises an irregular expresion
 273:  *	with the following special characters:
 274:  *
 275:  *		\?	-	means last match was optional
 276:  *		\a	-	matches any number of characters
 277:  *		\d	-	matches any number of spaces and tabs
 278:  *		\p	-	matches any number of alphanumeric
 279:  *				characters. The
 280:  *				characters matched will be copied into
 281:  *				the area pointed to by 'name'.
 282:  *		\|	-	alternation
 283:  *		\( \)	-	grouping used mostly for alternation and
 284:  *				optionality
 285:  *
 286:  *	The irregular expression must be translated to internal form
 287:  *	prior to calling this routine
 288:  *
 289:  *	The value returned is the pointer to the first non \a
 290:  *	character matched.
 291:  */
 292: 
 293: boolean _escaped;       /* true if we are currently _escaped */
 294: char *_start;           /* start of string */
 295: 
 296: char *
 297: expmatch (s, re, mstring)
 298:     register char *s;       /* string to check for a match in */
 299:     register char *re;      /* a converted irregular expression */
 300:     register char *mstring; /* where to put whatever matches a \p */
 301: {
 302:     register char *cs;      /* the current symbol */
 303:     register char *ptr,*s1; /* temporary pointer */
 304:     boolean matched;        /* a temporary boolean */
 305: 
 306:     /* initial conditions */
 307:     if (re == NIL)
 308:     return (NIL);
 309:     cs = re;
 310:     matched = FALSE;
 311: 
 312:     /* loop till expression string is exhausted (or at least pretty tired) */
 313:     while (*cs) {
 314:     switch (*cs & (OPER | STR | META)) {
 315: 
 316:     /* try to match a string */
 317:     case STR:
 318:         matched = !STRNCMP (s, SSTR(cs), SCNT(cs));
 319:         if (matched) {
 320: 
 321:         /* hoorah it matches */
 322:         s += SCNT(cs);
 323:         cs = SNEXT(cs);
 324:         } else if (*cs & ALT) {
 325: 
 326:         /* alternation, skip to next expression */
 327:         cs = SNEXT(cs);
 328:         } else if (*cs & OPT) {
 329: 
 330:         /* the match is optional */
 331:         cs = SNEXT(cs);
 332:         matched = 1;        /* indicate a successful match */
 333:         } else {
 334: 
 335:         /* no match, error return */
 336:         return (NIL);
 337:         }
 338:         break;
 339: 
 340:     /* an operator, do something fancy */
 341:     case OPER:
 342:         switch (OSYM(cs)) {
 343: 
 344:         /* this is an alternation */
 345:         case '|':
 346:         if (matched)
 347: 
 348:             /* last thing in the alternation was a match, skip ahead */
 349:             cs = OPTR(cs);
 350:         else
 351: 
 352:             /* no match, keep trying */
 353:             cs = ONEXT(cs);
 354:         break;
 355: 
 356:         /* this is a grouping, recurse */
 357:         case '(':
 358:         ptr = expmatch (s, ONEXT(cs), mstring);
 359:         if (ptr != NIL) {
 360: 
 361:             /* the subexpression matched */
 362:             matched = 1;
 363:             s = ptr;
 364:         } else if (*cs & ALT) {
 365: 
 366:             /* alternation, skip to next expression */
 367:             matched = 0;
 368:         } else if (*cs & OPT) {
 369: 
 370:             /* the match is optional */
 371:             matched = 1;    /* indicate a successful match */
 372:         } else {
 373: 
 374:             /* no match, error return */
 375:             return (NIL);
 376:         }
 377:         cs = OPTR(cs);
 378:         break;
 379:         }
 380:         break;
 381: 
 382:     /* try to match a metasymbol */
 383:     case META:
 384:         switch (MSYM(cs)) {
 385: 
 386:         /* try to match anything and remember what was matched */
 387:         case 'p':
 388:         /*
 389: 		 *  This is really the same as trying the match the
 390: 		 *  remaining parts of the expression to any subset
 391: 		 *  of the string.
 392: 		 */
 393:         s1 = s;
 394:         do {
 395:             ptr = expmatch (s1, MNEXT(cs), mstring);
 396:             if (ptr != NIL && s1 != s) {
 397: 
 398:             /* we have a match, remember the match */
 399:             strncpy (mstring, s, s1 - s);
 400:             mstring[s1 - s] = '\0';
 401:             return (ptr);
 402:             } else if (ptr != NIL && (*cs & OPT)) {
 403: 
 404:             /* it was aoptional so no match is ok */
 405:             return (ptr);
 406:             } else if (ptr != NIL) {
 407: 
 408:             /* not optional and we still matched */
 409:             return (NIL);
 410:             }
 411:             if (!isalnum(*s1) && *s1 != '_')
 412:             return (NIL);
 413:             if (*s1 == '\\')
 414:             _escaped = _escaped ? FALSE : TRUE;
 415:             else
 416:             _escaped = FALSE;
 417:         } while (*s1++);
 418:         return (NIL);
 419: 
 420:         /* try to match anything */
 421:         case 'a':
 422:         /*
 423: 		 *  This is really the same as trying the match the
 424: 		 *  remaining parts of the expression to any subset
 425: 		 *  of the string.
 426: 		 */
 427:         s1 = s;
 428:         do {
 429:             ptr = expmatch (s1, MNEXT(cs), mstring);
 430:             if (ptr != NIL && s1 != s) {
 431: 
 432:             /* we have a match */
 433:             return (ptr);
 434:             } else if (ptr != NIL && (*cs & OPT)) {
 435: 
 436:             /* it was aoptional so no match is ok */
 437:             return (ptr);
 438:             } else if (ptr != NIL) {
 439: 
 440:             /* not optional and we still matched */
 441:             return (NIL);
 442:             }
 443:             if (*s1 == '\\')
 444:             _escaped = _escaped ? FALSE : TRUE;
 445:             else
 446:             _escaped = FALSE;
 447:         } while (*s1++);
 448:         return (NIL);
 449: 
 450:         /* fail if we are currently _escaped */
 451:         case 'e':
 452:         if (_escaped)
 453:             return(NIL);
 454:         cs = MNEXT(cs);
 455:         break;
 456: 
 457:         /* match any number of tabs and spaces */
 458:         case 'd':
 459:         ptr = s;
 460:         while (*s == ' ' || *s == '\t')
 461:             s++;
 462:         if (s != ptr || s == _start) {
 463: 
 464:             /* match, be happy */
 465:             matched = 1;
 466:             cs = MNEXT(cs);
 467:         } else if (*s == '\n' || *s == '\0') {
 468: 
 469:             /* match, be happy */
 470:             matched = 1;
 471:             cs = MNEXT(cs);
 472:         } else if (*cs & ALT) {
 473: 
 474:             /* try the next part */
 475:             matched = 0;
 476:             cs = MNEXT(cs);
 477:         } else if (*cs & OPT) {
 478: 
 479:             /* doesn't matter */
 480:             matched = 1;
 481:             cs = MNEXT(cs);
 482:         } else
 483: 
 484:             /* no match, error return */
 485:             return (NIL);
 486:         break;
 487: 
 488:         /* check for end of line */
 489:         case '$':
 490:         if (*s == '\0' || *s == '\n') {
 491: 
 492:             /* match, be happy */
 493:             s++;
 494:             matched = 1;
 495:             cs = MNEXT(cs);
 496:         } else if (*cs & ALT) {
 497: 
 498:             /* try the next part */
 499:             matched = 0;
 500:             cs = MNEXT(cs);
 501:         } else if (*cs & OPT) {
 502: 
 503:             /* doesn't matter */
 504:             matched = 1;
 505:             cs = MNEXT(cs);
 506:         } else
 507: 
 508:             /* no match, error return */
 509:             return (NIL);
 510:         break;
 511: 
 512:         /* check for start of line */
 513:         case '^':
 514:         if (s == _start) {
 515: 
 516:             /* match, be happy */
 517:             matched = 1;
 518:             cs = MNEXT(cs);
 519:         } else if (*cs & ALT) {
 520: 
 521:             /* try the next part */
 522:             matched = 0;
 523:             cs = MNEXT(cs);
 524:         } else if (*cs & OPT) {
 525: 
 526:             /* doesn't matter */
 527:             matched = 1;
 528:             cs = MNEXT(cs);
 529:         } else
 530: 
 531:             /* no match, error return */
 532:             return (NIL);
 533:         break;
 534: 
 535:         /* end of a subexpression, return success */
 536:         case ')':
 537:         return (s);
 538:         }
 539:         break;
 540:     }
 541:     }
 542:     return (s);
 543: }

Defined functions

STRNCMP defined in line 17; used 3 times
convexp defined in line 99; used 12 times
expconv defined in line 125; used 2 times
expmatch defined in line 296; used 15 times

Defined variables

_escaped defined in line 293; used 7 times
_start defined in line 294; used 2 times
ccre defined in line 96; used 28 times
l_onecase defined in line 8; used 1 times
  • in line 21
ure defined in line 95; used 4 times

Defined typedef's

boolean defined in line 3; used 3 times

Defined macros

ALT defined in line 92; used 6 times
FALSE defined in line 5; used 5 times
META defined in line 91; used 5 times
MNEXT defined in line 75; used 17 times
MSYM defined in line 74; used 5 times
NIL defined in line 6; used 36 times
OCNT defined in line 78; used 13 times
ONEXT defined in line 79; used 4 times
OPER defined in line 93; used 3 times
OPT defined in line 89; used 9 times
OPTR defined in line 80; used 2 times
OSYM defined in line 77; used 3 times
SCNT defined in line 82; used 6 times
SNEXT defined in line 84; used 3 times
SSTR defined in line 83; used 2 times
STR defined in line 90; used 5 times
TRUE defined in line 4; used 2 times
makelower defined in line 10; used 2 times
Last modified: 1982-09-30
Generated: 2016-12-26
Generated by src2html V0.67
page hit count: 1194
Valid CSS Valid XHTML 1.0 Strict