1: #include "awk.def"
   2: #include "stdio.h"
   3: #include "awk.h"
   4: 
   5: extern node *op2();
   6: extern struct fa *cgotofn();
   7: #define MAXLIN 256
   8: #define NCHARS 128
   9: #define NSTATES 256
  10: 
  11: #define type(v) v->nobj
  12: #define left(v) v->narg[0]
  13: #define right(v)    v->narg[1]
  14: #define parent(v)   v->nnext
  15: 
  16: #define LEAF    case CCL: case NCCL: case CHAR: case DOT:
  17: #define UNARY   case FINAL: case STAR: case PLUS: case QUEST:
  18: 
  19: /* encoding in tree nodes:
  20: 	leaf (CCL, NCCL, CHAR, DOT): left is index, right contains value or pointer to value
  21: 	unary (FINAL, STAR, PLUS, QUEST): left is child, right is null
  22: 	binary (CAT, OR): left and right are children
  23: 	parent contains pointer to parent
  24: */
  25: 
  26: struct fa {
  27:     int cch;
  28:     struct fa *st;
  29: };
  30: 
  31: int *state[NSTATES];
  32: int *foll[MAXLIN];
  33: char    chars[MAXLIN];
  34: int setvec[MAXLIN];
  35: node    *point[MAXLIN];
  36: 
  37: int setcnt;
  38: int line;
  39: 
  40: 
  41: struct fa *makedfa(p)   /* returns dfa for tree pointed to by p */
  42: node *p;
  43: {
  44:     node *p1;
  45:     struct fa *fap;
  46:     p1 = op2(CAT, op2(STAR, op2(DOT, (node *) 0, (node *) 0), (node *) 0), p);
  47:         /* put DOT STAR in front of reg. exp. */
  48:     p1 = op2(FINAL, p1, (node *) 0);        /* install FINAL node */
  49: 
  50:     line = 0;
  51:     penter(p1); /* enter parent pointers and leaf indices */
  52:     point[line] = p1;   /* FINAL node */
  53:     setvec[0] = 1;      /* for initial DOT STAR */
  54:     cfoll(p1);  /* set up follow sets */
  55:     fap = cgotofn();
  56:     freetr(p1); /* add this when alloc works */
  57:     return(fap);
  58: }
  59: 
  60: penter(p)   /* set up parent pointers and leaf indices */
  61: node *p;
  62: {
  63:     switch(type(p)) {
  64:         LEAF
  65:             left(p) = (node *) line;
  66:             point[line++] = p;
  67:             break;
  68:         UNARY
  69:             penter(left(p));
  70:             parent(left(p)) = p;
  71:             break;
  72:         case CAT:
  73:         case OR:
  74:             penter(left(p));
  75:             penter(right(p));
  76:             parent(left(p)) = p;
  77:             parent(right(p)) = p;
  78:             break;
  79:         default:
  80:             error(FATAL, "unknown type %d in penter\n", type(p));
  81:             break;
  82:     }
  83: }
  84: 
  85: freetr(p)   /* free parse tree and follow sets */
  86: node *p;
  87: {
  88:     switch(type(p)) {
  89:         LEAF
  90:             xfree(foll[(int) left(p)]);
  91:             xfree(p);
  92:             break;
  93:         UNARY
  94:             freetr(left(p));
  95:             xfree(p);
  96:             break;
  97:         case CAT:
  98:         case OR:
  99:             freetr(left(p));
 100:             freetr(right(p));
 101:             xfree(p);
 102:             break;
 103:         default:
 104:             error(FATAL, "unknown type %d in freetr", type(p));
 105:             break;
 106:     }
 107: }
 108: char *cclenter(p)
 109: register char *p;
 110: {
 111:     register i, c;
 112:     char *op;
 113: 
 114:     op = p;
 115:     i = 0;
 116:     while ((c = *p++) != 0) {
 117:         if (c == '-' && i > 0 && chars[i-1] != 0) {
 118:             if (*p != 0) {
 119:                 c = chars[i-1];
 120:                 while (c < *p) {
 121:                     if (i >= MAXLIN)
 122:                         overflo();
 123:                     chars[i++] = ++c;
 124:                 }
 125:                 p++;
 126:                 continue;
 127:             }
 128:         }
 129:         if (i >= MAXLIN)
 130:             overflo();
 131:         chars[i++] = c;
 132:     }
 133:     chars[i++] = '\0';
 134:     dprintf("cclenter: in = |%s|, out = |%s|\n", op, chars, NULL);
 135:     xfree(op);
 136:     return(tostring(chars));
 137: }
 138: 
 139: overflo()
 140: {
 141:     error(FATAL, "regular expression too long\n");
 142: }
 143: 
 144: cfoll(v)        /* enter follow set of each leaf of vertex v into foll[leaf] */
 145: register node *v;
 146: {
 147:     register i;
 148:     int prev;
 149:     int *add();
 150: 
 151:     switch(type(v)) {
 152:         LEAF
 153:             setcnt = 0;
 154:             for (i=1; i<=line; i++)
 155:                 setvec[i] = 0;
 156:             follow(v);
 157:             if (notin(foll, ( (int) left(v))-1, &prev)) {
 158:                 foll[(int) left(v)] = add(setcnt);
 159:             }
 160:             else
 161:                 foll[ (int) left(v)] = foll[prev];
 162:             break;
 163:         UNARY
 164:             cfoll(left(v));
 165:             break;
 166:         case CAT:
 167:         case OR:
 168:             cfoll(left(v));
 169:             cfoll(right(v));
 170:             break;
 171:         default:
 172:             error(FATAL, "unknown type %d in cfoll", type(v));
 173:     }
 174: }
 175: 
 176: first(p)            /* collects initially active leaves of p into setvec */
 177: register node *p;       /* returns 0 or 1 depending on whether p matches empty string */
 178: {
 179:     register b;
 180: 
 181:     switch(type(p)) {
 182:         LEAF
 183:             if (setvec[(int) left(p)] != 1) {
 184:                 setvec[(int) left(p)] = 1;
 185:                 setcnt++;
 186:             }
 187:             if (type(p) == CCL && (*(char *) right(p)) == '\0')
 188:                 return(0);      /* empty CCL */
 189:             else return(1);
 190:         case FINAL:
 191:         case PLUS:
 192:             if (first(left(p)) == 0) return(0);
 193:             return(1);
 194:         case STAR:
 195:         case QUEST:
 196:             first(left(p));
 197:             return(0);
 198:         case CAT:
 199:             if (first(left(p)) == 0 && first(right(p)) == 0) return(0);
 200:             return(1);
 201:         case OR:
 202:             b = first(right(p));
 203:             if (first(left(p)) == 0 || b == 0) return(0);
 204:             return(1);
 205:     }
 206:     error(FATAL, "unknown type %d in first\n", type(p));
 207:     return(-1);
 208: }
 209: 
 210: follow(v)
 211: node *v;        /* collects leaves that can follow v into setvec */
 212: {
 213:     node *p;
 214: 
 215:     if (type(v) == FINAL)
 216:         return;
 217:     p = parent(v);
 218:     switch (type(p)) {
 219:         case STAR:
 220:         case PLUS:  first(v);
 221:                 follow(p);
 222:                 return;
 223: 
 224:         case OR:
 225:         case QUEST: follow(p);
 226:                 return;
 227: 
 228:         case CAT:   if (v == left(p)) { /* v is left child of p */
 229:                     if (first(right(p)) == 0) {
 230:                         follow(p);
 231:                         return;
 232:                     }
 233:                 }
 234:                 else        /* v is right child */
 235:                     follow(p);
 236:                 return;
 237:         case FINAL: if (setvec[line] != 1) {
 238:                     setvec[line] = 1;
 239:                     setcnt++;
 240:                 }
 241:                 return;
 242:     }
 243: }
 244: 
 245: member(c, s)    /* is c in s? */
 246: register char c, *s;
 247: {
 248:     while (*s)
 249:         if (c == *s++)
 250:             return(1);
 251:     return(0);
 252: }
 253: 
 254: notin(array, n, prev)       /* is setvec in array[0] thru array[n]? */
 255: int **array;
 256: int *prev; {
 257:     register i, j;
 258:     int *ptr;
 259:     for (i=0; i<=n; i++) {
 260:         ptr = array[i];
 261:         if (*ptr == setcnt) {
 262:             for (j=0; j < setcnt; j++)
 263:                 if (setvec[*(++ptr)] != 1) goto nxt;
 264:             *prev = i;
 265:             return(0);
 266:         }
 267:         nxt: ;
 268:     }
 269:     return(1);
 270: }
 271: 
 272: int *add(n) {       /* remember setvec */
 273:     int *ptr, *p;
 274:     register i;
 275:     if ((p = ptr = (int *) malloc((n+1)*sizeof(int))) == NULL)
 276:         overflo();
 277:     *ptr = n;
 278:     dprintf("add(%d)\n", n, NULL, NULL);
 279:     for (i=1; i <= line; i++)
 280:         if (setvec[i] == 1) {
 281:             *(++ptr) = i;
 282:             dprintf("  ptr = %o, *ptr = %d, i = %d\n", ptr, *ptr, i);
 283:         }
 284:     dprintf("\n", NULL, NULL, NULL);
 285:     return(p);
 286: }
 287: 
 288: struct fa *cgotofn()
 289: {
 290:     register i, k;
 291:     register int *ptr;
 292:     char c;
 293:     char *p;
 294:     node *cp;
 295:     int j, n, s, ind, numtrans;
 296:     int finflg;
 297:     int curpos, num, prev;
 298:     struct fa *where[NSTATES];
 299: 
 300:     int fatab[257];
 301:     struct fa *pfa;
 302: 
 303:     char index[MAXLIN];
 304:     char iposns[MAXLIN];
 305:     int sposns[MAXLIN];
 306:     int spmax, spinit;
 307: 
 308:     char symbol[NCHARS];
 309:     char isyms[NCHARS];
 310:     char ssyms[NCHARS];
 311:     int ssmax, ssinit;
 312: 
 313:     for (i=0; i<=line; i++) index[i] = iposns[i] = setvec[i] = 0;
 314:     for (i=0; i<NCHARS; i++)  isyms[i] = symbol[i] = 0;
 315:     setcnt = 0;
 316:     /* compute initial positions and symbols of state 0 */
 317:     ssmax = 0;
 318:     ptr = state[0] = foll[0];
 319:     spinit = *ptr;
 320:     for (i=0; i<spinit; i++) {
 321:         curpos = *(++ptr);
 322:         sposns[i] = curpos;
 323:         iposns[curpos] = 1;
 324:         cp = point[curpos];
 325:         dprintf("i = %d, spinit = %d, curpos = %d\n", i, spinit, curpos);
 326:         switch (type(cp)) {
 327:             case CHAR:
 328:                 k = (int) right(cp);
 329:                 if (isyms[k] != 1) {
 330:                     isyms[k] = 1;
 331:                     ssyms[ssmax++] = k;
 332:                 }
 333:                 break;
 334:             case DOT:
 335:                 for (k=1; k<NCHARS; k++) {
 336:                     if (k != HAT) {
 337:                         if (isyms[k] != 1) {
 338:                             isyms[k] = 1;
 339:                             ssyms[ssmax++] = k;
 340:                         }
 341:                     }
 342:                 }
 343:                 break;
 344:             case CCL:
 345:                 for (p = (char *) right(cp); *p; p++) {
 346:                     if (*p != HAT) {
 347:                         if (isyms[*p] != 1) {
 348:                             isyms[*p] = 1;
 349:                             ssyms[ssmax++] = *p;
 350:                         }
 351:                     }
 352:                 }
 353:                 break;
 354:             case NCCL:
 355:                 for (k=1; k<NCHARS; k++) {
 356:                     if (k != HAT && !member(k, (char *) right(cp))) {
 357:                         if (isyms[k] != 1) {
 358:                             isyms[k] = 1;
 359:                             ssyms[ssmax++] = k;
 360:                         }
 361:                     }
 362:                 }
 363:         }
 364:     }
 365:     ssinit = ssmax;
 366:     n = 0;
 367:     for (s=0; s<=n; s++)  {
 368:     dprintf("s = %d\n", s, NULL, NULL);
 369:         ind = 0;
 370:         numtrans = 0;
 371:         finflg = 0;
 372:         if (*(state[s] + *state[s]) == line) {      /* s final? */
 373:             finflg = 1;
 374:             goto tenter;
 375:         }
 376:         spmax = spinit;
 377:         ssmax = ssinit;
 378:         ptr = state[s];
 379:         num = *ptr;
 380:         for (i=0; i<num; i++) {
 381:             curpos = *(++ptr);
 382:             if (iposns[curpos] != 1 && index[curpos] != 1) {
 383:                 index[curpos] = 1;
 384:                 sposns[spmax++] = curpos;
 385:             }
 386:             cp = point[curpos];
 387:             switch (type(cp)) {
 388:                 case CHAR:
 389:                     k = (int) right(cp);
 390:                     if (isyms[k] == 0 && symbol[k] == 0) {
 391:                         symbol[k] = 1;
 392:                         ssyms[ssmax++] = k;
 393:                     }
 394:                     break;
 395:                 case DOT:
 396:                     for (k=1; k<NCHARS; k++) {
 397:                         if (k != HAT) {
 398:                             if (isyms[k] == 0 && symbol[k] == 0) {
 399:                                 symbol[k] = 1;
 400:                                 ssyms[ssmax++] = k;
 401:                             }
 402:                         }
 403:                     }
 404:                     break;
 405:                 case CCL:
 406:                     for (p = (char *) right(cp); *p; p++) {
 407:                         if (*p != HAT) {
 408:                             if (isyms[*p] == 0 && symbol[*p] == 0) {
 409:                                 symbol[*p] = 1;
 410:                                 ssyms[ssmax++] = *p;
 411:                             }
 412:                         }
 413:                     }
 414:                     break;
 415:                 case NCCL:
 416:                     for (k=1; k<NCHARS; k++) {
 417:                         if (k != HAT && !member(k, (char *) right(cp))) {
 418:                             if (isyms[k] == 0 && symbol[k] == 0) {
 419:                                 symbol[k] = 1;
 420:                                 ssyms[ssmax++] = k;
 421:                             }
 422:                         }
 423:                     }
 424:             }
 425:         }
 426:         for (j=0; j<ssmax; j++) {   /* nextstate(s, ssyms[j]) */
 427:             c = ssyms[j];
 428:             symbol[c] = 0;
 429:             setcnt = 0;
 430:             for (k=0; k<=line; k++) setvec[k] = 0;
 431:             for (i=0; i<spmax; i++) {
 432:                 index[sposns[i]] = 0;
 433:                 cp = point[sposns[i]];
 434:                 if ((k = type(cp)) != FINAL)
 435:                     if (k == CHAR && c == (int) right(cp)
 436:                      || k == DOT
 437:                      || k == CCL && member(c, (char *) right(cp))
 438:                      || k == NCCL && !member(c, (char *) right(cp))) {
 439:                         ptr = foll[sposns[i]];
 440:                         num = *ptr;
 441:                         for (k=0; k<num; k++) {
 442:                             if (setvec[*(++ptr)] != 1
 443:                                 && iposns[*ptr] != 1) {
 444:                                 setvec[*ptr] = 1;
 445:                                 setcnt++;
 446:                             }
 447:                         }
 448:                     }
 449:             } /* end nextstate */
 450:             if (notin(state, n, &prev)) {
 451:                 if (n >= NSTATES) {
 452:                     dprintf("cgotofn: notin; state = %d, n = %d\n", state, n, NULL);
 453:                     overflo();
 454:                 }
 455:                 state[++n] = add(setcnt);
 456:                 dprintf("	delta(%d,%o) = %d", s,c,n);
 457:                 dprintf(", ind = %d\n", ind+1, NULL, NULL);
 458:                 fatab[++ind] = c;
 459:                 fatab[++ind] = n;
 460:                 numtrans++;
 461:             }
 462:             else {
 463:                 if (prev != 0) {
 464:                     dprintf("	delta(%d,%o) = %d", s,c,prev);
 465:                     dprintf(", ind = %d\n", ind+1, NULL, NULL);
 466:                     fatab[++ind] = c;
 467:                     fatab[++ind] = prev;
 468:                     numtrans++;
 469:                 }
 470:             }
 471:         }
 472:     tenter:
 473:         if ((pfa = (struct fa *) malloc((numtrans + 1) * sizeof(struct fa))) == NULL)
 474:             overflo();
 475:         where[s] = pfa;
 476:         if (finflg)
 477:             pfa->cch = -1;      /* s is a final state */
 478:         else
 479:             pfa->cch = numtrans;
 480:         pfa->st = 0;
 481:         for (i=1, pfa += 1; i<=numtrans; i++, pfa++) {
 482:             pfa->cch = fatab[2*i-1];
 483:             pfa->st = (struct fa *) fatab[2*i];
 484:         }
 485:     }
 486:     for (i=0; i<=n; i++) {
 487:         xfree(state[i]);    /* free state[i] */
 488:         pfa = where[i];
 489:         pfa->st = where[0];
 490:         dprintf("state %d: (%o)\n", i, pfa, NULL);
 491:         dprintf("	numtrans = %d,	default = %o\n", pfa->cch, pfa->st, NULL);
 492:         for (k=1; k<=pfa->cch; k++) {
 493:             (pfa+k)->st = where[ (int) (pfa+k)->st];
 494:             dprintf("	char = %o,	nextstate = %o\n",(pfa+k)->cch, (pfa+k)->st, NULL);
 495:         }
 496:     }
 497:     pfa = where[0];
 498:     if ((num = pfa->cch) < 0)
 499:         return(where[0]);
 500:     for (pfa += num; num; num--, pfa--)
 501:         if (pfa->cch == HAT) {
 502:             return(pfa->st);
 503:         }
 504:     return(where[0]);
 505: }
 506: 
 507: match(pfa, p)
 508: register struct fa *pfa;
 509: register char *p;
 510: {
 511:     register count;
 512:     char c;
 513:     if (p == 0) return(0);
 514:     if (pfa->cch == 1) {        /* fast test for first character, if possible */
 515:         c = (++pfa)->cch;
 516:         do
 517:             if (c == *p) {
 518:                 p++;
 519:                 pfa = pfa->st;
 520:                 goto adv;
 521:             }
 522:         while (*p++ != 0);
 523:         return(0);
 524:     }
 525:    adv: if ((count = pfa->cch) < 0) return(1);
 526:     do {
 527:         for (pfa += count; count; count--, pfa--)
 528:             if (pfa->cch == *p) {
 529:                 break;
 530:             }
 531:         pfa = pfa->st;
 532:         if ((count = pfa->cch) < 0) return(1);
 533:     } while(*p++ != 0);
 534:     return(0);
 535: }

Defined functions

add defined in line 272; used 3 times
cclenter defined in line 108; used 2 times
cfoll defined in line 144; used 4 times
cgotofn defined in line 288; used 2 times
  • in line 6, 55
first defined in line 176; used 8 times
follow defined in line 210; used 5 times
freetr defined in line 85; used 4 times
makedfa defined in line 41; used 2 times
match defined in line 507; used 1 times
member defined in line 245; used 5 times
notin defined in line 254; used 2 times
overflo defined in line 139; used 5 times
penter defined in line 60; used 4 times

Defined variables

chars defined in line 33; used 7 times
foll defined in line 32; used 7 times
line defined in line 38; used 11 times
point defined in line 35; used 5 times
setcnt defined in line 37; used 10 times
setvec defined in line 34; used 12 times
state defined in line 31; used 8 times

Defined struct's

fa defined in line 26; used 21 times

Defined macros

LEAF defined in line 16; used 4 times
MAXLIN defined in line 7; used 9 times
NCHARS defined in line 8; used 8 times
NSTATES defined in line 9; used 3 times
UNARY defined in line 17; used 3 times
left defined in line 12; used 20 times
parent defined in line 14; used 4 times
right defined in line 13; used 17 times
type defined in line 11; used 14 times
Last modified: 1981-07-10
Generated: 2016-12-26
Generated by src2html V0.67
page hit count: 1379
Valid CSS Valid XHTML 1.0 Strict