1: #ifndef lint
   2: static char sccsid[] = "@(#)b.c	4.2 8/11/83";
   3: #endif
   4: 
   5: #include "awk.def"
   6: #include "stdio.h"
   7: #include "awk.h"
   8: 
   9: extern node *op2();
  10: extern struct fa *cgotofn();
  11: #define MAXLIN 256
  12: #define NCHARS 128
  13: #define NSTATES 256
  14: 
  15: #define type(v) v->nobj
  16: #define left(v) v->narg[0]
  17: #define right(v)    v->narg[1]
  18: #define parent(v)   v->nnext
  19: 
  20: #define LEAF    case CCL: case NCCL: case CHAR: case DOT:
  21: #define UNARY   case FINAL: case STAR: case PLUS: case QUEST:
  22: 
  23: /* encoding in tree nodes:
  24: 	leaf (CCL, NCCL, CHAR, DOT): left is index, right contains value or pointer to value
  25: 	unary (FINAL, STAR, PLUS, QUEST): left is child, right is null
  26: 	binary (CAT, OR): left and right are children
  27: 	parent contains pointer to parent
  28: */
  29: 
  30: struct fa {
  31:     int cch;
  32:     struct fa *st;
  33: };
  34: 
  35: int *state[NSTATES];
  36: int *foll[MAXLIN];
  37: char    chars[MAXLIN];
  38: int setvec[MAXLIN];
  39: node    *point[MAXLIN];
  40: 
  41: int setcnt;
  42: int line;
  43: 
  44: 
  45: struct fa *makedfa(p)   /* returns dfa for tree pointed to by p */
  46: node *p;
  47: {
  48:     node *p1;
  49:     struct fa *fap;
  50:     p1 = op2(CAT, op2(STAR, op2(DOT, (node *) 0, (node *) 0), (node *) 0), p);
  51:         /* put DOT STAR in front of reg. exp. */
  52:     p1 = op2(FINAL, p1, (node *) 0);        /* install FINAL node */
  53: 
  54:     line = 0;
  55:     penter(p1); /* enter parent pointers and leaf indices */
  56:     point[line] = p1;   /* FINAL node */
  57:     setvec[0] = 1;      /* for initial DOT STAR */
  58:     cfoll(p1);  /* set up follow sets */
  59:     fap = cgotofn();
  60:     freetr(p1); /* add this when alloc works */
  61:     return(fap);
  62: }
  63: 
  64: penter(p)   /* set up parent pointers and leaf indices */
  65: node *p;
  66: {
  67:     switch(type(p)) {
  68:         LEAF
  69:             left(p) = (node *) line;
  70:             point[line++] = p;
  71:             break;
  72:         UNARY
  73:             penter(left(p));
  74:             parent(left(p)) = p;
  75:             break;
  76:         case CAT:
  77:         case OR:
  78:             penter(left(p));
  79:             penter(right(p));
  80:             parent(left(p)) = p;
  81:             parent(right(p)) = p;
  82:             break;
  83:         default:
  84:             error(FATAL, "unknown type %d in penter\n", type(p));
  85:             break;
  86:     }
  87: }
  88: 
  89: freetr(p)   /* free parse tree and follow sets */
  90: node *p;
  91: {
  92:     switch(type(p)) {
  93:         LEAF
  94:             xfree(foll[(int) left(p)]);
  95:             xfree(p);
  96:             break;
  97:         UNARY
  98:             freetr(left(p));
  99:             xfree(p);
 100:             break;
 101:         case CAT:
 102:         case OR:
 103:             freetr(left(p));
 104:             freetr(right(p));
 105:             xfree(p);
 106:             break;
 107:         default:
 108:             error(FATAL, "unknown type %d in freetr", type(p));
 109:             break;
 110:     }
 111: }
 112: char *cclenter(p)
 113: register char *p;
 114: {
 115:     register i, c;
 116:     char *op;
 117: 
 118:     op = p;
 119:     i = 0;
 120:     while ((c = *p++) != 0) {
 121:         if (c == '-' && i > 0 && chars[i-1] != 0) {
 122:             if (*p != 0) {
 123:                 c = chars[i-1];
 124:                 while (c < *p) {
 125:                     if (i >= MAXLIN)
 126:                         overflo();
 127:                     chars[i++] = ++c;
 128:                 }
 129:                 p++;
 130:                 continue;
 131:             }
 132:         }
 133:         if (i >= MAXLIN)
 134:             overflo();
 135:         chars[i++] = c;
 136:     }
 137:     chars[i++] = '\0';
 138:     dprintf("cclenter: in = |%s|, out = |%s|\n", op, chars, NULL);
 139:     xfree(op);
 140:     return(tostring(chars));
 141: }
 142: 
 143: overflo()
 144: {
 145:     error(FATAL, "regular expression too long\n");
 146: }
 147: 
 148: cfoll(v)        /* enter follow set of each leaf of vertex v into foll[leaf] */
 149: register node *v;
 150: {
 151:     register i;
 152:     int prev;
 153:     int *add();
 154: 
 155:     switch(type(v)) {
 156:         LEAF
 157:             setcnt = 0;
 158:             for (i=1; i<=line; i++)
 159:                 setvec[i] = 0;
 160:             follow(v);
 161:             if (notin(foll, ( (int) left(v))-1, &prev)) {
 162:                 foll[(int) left(v)] = add(setcnt);
 163:             }
 164:             else
 165:                 foll[ (int) left(v)] = foll[prev];
 166:             break;
 167:         UNARY
 168:             cfoll(left(v));
 169:             break;
 170:         case CAT:
 171:         case OR:
 172:             cfoll(left(v));
 173:             cfoll(right(v));
 174:             break;
 175:         default:
 176:             error(FATAL, "unknown type %d in cfoll", type(v));
 177:     }
 178: }
 179: 
 180: first(p)            /* collects initially active leaves of p into setvec */
 181: register node *p;       /* returns 0 or 1 depending on whether p matches empty string */
 182: {
 183:     register b;
 184: 
 185:     switch(type(p)) {
 186:         LEAF
 187:             if (setvec[(int) left(p)] != 1) {
 188:                 setvec[(int) left(p)] = 1;
 189:                 setcnt++;
 190:             }
 191:             if (type(p) == CCL && (*(char *) right(p)) == '\0')
 192:                 return(0);      /* empty CCL */
 193:             else return(1);
 194:         case FINAL:
 195:         case PLUS:
 196:             if (first(left(p)) == 0) return(0);
 197:             return(1);
 198:         case STAR:
 199:         case QUEST:
 200:             first(left(p));
 201:             return(0);
 202:         case CAT:
 203:             if (first(left(p)) == 0 && first(right(p)) == 0) return(0);
 204:             return(1);
 205:         case OR:
 206:             b = first(right(p));
 207:             if (first(left(p)) == 0 || b == 0) return(0);
 208:             return(1);
 209:     }
 210:     error(FATAL, "unknown type %d in first\n", type(p));
 211:     return(-1);
 212: }
 213: 
 214: follow(v)
 215: node *v;        /* collects leaves that can follow v into setvec */
 216: {
 217:     node *p;
 218: 
 219:     if (type(v) == FINAL)
 220:         return;
 221:     p = parent(v);
 222:     switch (type(p)) {
 223:         case STAR:
 224:         case PLUS:  first(v);
 225:                 follow(p);
 226:                 return;
 227: 
 228:         case OR:
 229:         case QUEST: follow(p);
 230:                 return;
 231: 
 232:         case CAT:   if (v == left(p)) { /* v is left child of p */
 233:                     if (first(right(p)) == 0) {
 234:                         follow(p);
 235:                         return;
 236:                     }
 237:                 }
 238:                 else        /* v is right child */
 239:                     follow(p);
 240:                 return;
 241:         case FINAL: if (setvec[line] != 1) {
 242:                     setvec[line] = 1;
 243:                     setcnt++;
 244:                 }
 245:                 return;
 246:     }
 247: }
 248: 
 249: member(c, s)    /* is c in s? */
 250: register char c, *s;
 251: {
 252:     while (*s)
 253:         if (c == *s++)
 254:             return(1);
 255:     return(0);
 256: }
 257: 
 258: notin(array, n, prev)       /* is setvec in array[0] thru array[n]? */
 259: int **array;
 260: int *prev; {
 261:     register i, j;
 262:     int *ptr;
 263:     for (i=0; i<=n; i++) {
 264:         ptr = array[i];
 265:         if (*ptr == setcnt) {
 266:             for (j=0; j < setcnt; j++)
 267:                 if (setvec[*(++ptr)] != 1) goto nxt;
 268:             *prev = i;
 269:             return(0);
 270:         }
 271:         nxt: ;
 272:     }
 273:     return(1);
 274: }
 275: 
 276: int *add(n) {       /* remember setvec */
 277:     int *ptr, *p;
 278:     register i;
 279:     if ((p = ptr = (int *) malloc((n+1)*sizeof(int))) == NULL)
 280:         overflo();
 281:     *ptr = n;
 282:     dprintf("add(%d)\n", n, NULL, NULL);
 283:     for (i=1; i <= line; i++)
 284:         if (setvec[i] == 1) {
 285:             *(++ptr) = i;
 286:             dprintf("  ptr = %o, *ptr = %d, i = %d\n", ptr, *ptr, i);
 287:         }
 288:     dprintf("\n", NULL, NULL, NULL);
 289:     return(p);
 290: }
 291: 
 292: struct fa *cgotofn()
 293: {
 294:     register i, k;
 295:     register int *ptr;
 296:     char c;
 297:     char *p;
 298:     node *cp;
 299:     int j, n, s, ind, numtrans;
 300:     int finflg;
 301:     int curpos, num, prev;
 302:     struct fa *where[NSTATES];
 303: 
 304:     int fatab[257];
 305:     struct fa *pfa;
 306: 
 307:     char index[MAXLIN];
 308:     char iposns[MAXLIN];
 309:     int sposns[MAXLIN];
 310:     int spmax, spinit;
 311: 
 312:     char symbol[NCHARS];
 313:     char isyms[NCHARS];
 314:     char ssyms[NCHARS];
 315:     int ssmax, ssinit;
 316: 
 317:     for (i=0; i<=line; i++) index[i] = iposns[i] = setvec[i] = 0;
 318:     for (i=0; i<NCHARS; i++)  isyms[i] = symbol[i] = 0;
 319:     setcnt = 0;
 320:     /* compute initial positions and symbols of state 0 */
 321:     ssmax = 0;
 322:     ptr = state[0] = foll[0];
 323:     spinit = *ptr;
 324:     for (i=0; i<spinit; i++) {
 325:         curpos = *(++ptr);
 326:         sposns[i] = curpos;
 327:         iposns[curpos] = 1;
 328:         cp = point[curpos];
 329:         dprintf("i = %d, spinit = %d, curpos = %d\n", i, spinit, curpos);
 330:         switch (type(cp)) {
 331:             case CHAR:
 332:                 k = (int) right(cp);
 333:                 if (isyms[k] != 1) {
 334:                     isyms[k] = 1;
 335:                     ssyms[ssmax++] = k;
 336:                 }
 337:                 break;
 338:             case DOT:
 339:                 for (k=1; k<NCHARS; k++) {
 340:                     if (k != HAT) {
 341:                         if (isyms[k] != 1) {
 342:                             isyms[k] = 1;
 343:                             ssyms[ssmax++] = k;
 344:                         }
 345:                     }
 346:                 }
 347:                 break;
 348:             case CCL:
 349:                 for (p = (char *) right(cp); *p; p++) {
 350:                     if (*p != HAT) {
 351:                         if (isyms[*p] != 1) {
 352:                             isyms[*p] = 1;
 353:                             ssyms[ssmax++] = *p;
 354:                         }
 355:                     }
 356:                 }
 357:                 break;
 358:             case NCCL:
 359:                 for (k=1; k<NCHARS; k++) {
 360:                     if (k != HAT && !member(k, (char *) right(cp))) {
 361:                         if (isyms[k] != 1) {
 362:                             isyms[k] = 1;
 363:                             ssyms[ssmax++] = k;
 364:                         }
 365:                     }
 366:                 }
 367:         }
 368:     }
 369:     ssinit = ssmax;
 370:     n = 0;
 371:     for (s=0; s<=n; s++)  {
 372:     dprintf("s = %d\n", s, NULL, NULL);
 373:         ind = 0;
 374:         numtrans = 0;
 375:         finflg = 0;
 376:         if (*(state[s] + *state[s]) == line) {      /* s final? */
 377:             finflg = 1;
 378:             goto tenter;
 379:         }
 380:         spmax = spinit;
 381:         ssmax = ssinit;
 382:         ptr = state[s];
 383:         num = *ptr;
 384:         for (i=0; i<num; i++) {
 385:             curpos = *(++ptr);
 386:             if (iposns[curpos] != 1 && index[curpos] != 1) {
 387:                 index[curpos] = 1;
 388:                 sposns[spmax++] = curpos;
 389:             }
 390:             cp = point[curpos];
 391:             switch (type(cp)) {
 392:                 case CHAR:
 393:                     k = (int) right(cp);
 394:                     if (isyms[k] == 0 && symbol[k] == 0) {
 395:                         symbol[k] = 1;
 396:                         ssyms[ssmax++] = k;
 397:                     }
 398:                     break;
 399:                 case DOT:
 400:                     for (k=1; k<NCHARS; k++) {
 401:                         if (k != HAT) {
 402:                             if (isyms[k] == 0 && symbol[k] == 0) {
 403:                                 symbol[k] = 1;
 404:                                 ssyms[ssmax++] = k;
 405:                             }
 406:                         }
 407:                     }
 408:                     break;
 409:                 case CCL:
 410:                     for (p = (char *) right(cp); *p; p++) {
 411:                         if (*p != HAT) {
 412:                             if (isyms[*p] == 0 && symbol[*p] == 0) {
 413:                                 symbol[*p] = 1;
 414:                                 ssyms[ssmax++] = *p;
 415:                             }
 416:                         }
 417:                     }
 418:                     break;
 419:                 case NCCL:
 420:                     for (k=1; k<NCHARS; k++) {
 421:                         if (k != HAT && !member(k, (char *) right(cp))) {
 422:                             if (isyms[k] == 0 && symbol[k] == 0) {
 423:                                 symbol[k] = 1;
 424:                                 ssyms[ssmax++] = k;
 425:                             }
 426:                         }
 427:                     }
 428:             }
 429:         }
 430:         for (j=0; j<ssmax; j++) {   /* nextstate(s, ssyms[j]) */
 431:             c = ssyms[j];
 432:             symbol[c] = 0;
 433:             setcnt = 0;
 434:             for (k=0; k<=line; k++) setvec[k] = 0;
 435:             for (i=0; i<spmax; i++) {
 436:                 index[sposns[i]] = 0;
 437:                 cp = point[sposns[i]];
 438:                 if ((k = type(cp)) != FINAL)
 439:                     if (k == CHAR && c == (int) right(cp)
 440:                      || k == DOT
 441:                      || k == CCL && member(c, (char *) right(cp))
 442:                      || k == NCCL && !member(c, (char *) right(cp))) {
 443:                         ptr = foll[sposns[i]];
 444:                         num = *ptr;
 445:                         for (k=0; k<num; k++) {
 446:                             if (setvec[*(++ptr)] != 1
 447:                                 && iposns[*ptr] != 1) {
 448:                                 setvec[*ptr] = 1;
 449:                                 setcnt++;
 450:                             }
 451:                         }
 452:                     }
 453:             } /* end nextstate */
 454:             if (notin(state, n, &prev)) {
 455:                 if (n >= NSTATES) {
 456:                     dprintf("cgotofn: notin; state = %d, n = %d\n", state, n, NULL);
 457:                     overflo();
 458:                 }
 459:                 state[++n] = add(setcnt);
 460:                 dprintf("	delta(%d,%o) = %d", s,c,n);
 461:                 dprintf(", ind = %d\n", ind+1, NULL, NULL);
 462:                 fatab[++ind] = c;
 463:                 fatab[++ind] = n;
 464:                 numtrans++;
 465:             }
 466:             else {
 467:                 if (prev != 0) {
 468:                     dprintf("	delta(%d,%o) = %d", s,c,prev);
 469:                     dprintf(", ind = %d\n", ind+1, NULL, NULL);
 470:                     fatab[++ind] = c;
 471:                     fatab[++ind] = prev;
 472:                     numtrans++;
 473:                 }
 474:             }
 475:         }
 476:     tenter:
 477:         if ((pfa = (struct fa *) malloc((numtrans + 1) * sizeof(struct fa))) == NULL)
 478:             overflo();
 479:         where[s] = pfa;
 480:         if (finflg)
 481:             pfa->cch = -1;      /* s is a final state */
 482:         else
 483:             pfa->cch = numtrans;
 484:         pfa->st = 0;
 485:         for (i=1, pfa += 1; i<=numtrans; i++, pfa++) {
 486:             pfa->cch = fatab[2*i-1];
 487:             pfa->st = (struct fa *) fatab[2*i];
 488:         }
 489:     }
 490:     for (i=0; i<=n; i++) {
 491:         xfree(state[i]);    /* free state[i] */
 492:         pfa = where[i];
 493:         pfa->st = where[0];
 494:         dprintf("state %d: (%o)\n", i, pfa, NULL);
 495:         dprintf("	numtrans = %d,	default = %o\n", pfa->cch, pfa->st, NULL);
 496:         for (k=1; k<=pfa->cch; k++) {
 497:             (pfa+k)->st = where[ (int) (pfa+k)->st];
 498:             dprintf("	char = %o,	nextstate = %o\n",(pfa+k)->cch, (pfa+k)->st, NULL);
 499:         }
 500:     }
 501:     pfa = where[0];
 502:     if ((num = pfa->cch) < 0)
 503:         return(where[0]);
 504:     for (pfa += num; num; num--, pfa--)
 505:         if (pfa->cch == HAT) {
 506:             return(pfa->st);
 507:         }
 508:     return(where[0]);
 509: }
 510: 
 511: match(pfa, p)
 512: register struct fa *pfa;
 513: register char *p;
 514: {
 515:     register count;
 516:     char c;
 517:     if (p == 0) return(0);
 518:     if (pfa->cch == 1) {        /* fast test for first character, if possible */
 519:         c = (++pfa)->cch;
 520:         do
 521:             if (c == *p) {
 522:                 p++;
 523:                 pfa = pfa->st;
 524:                 goto adv;
 525:             }
 526:         while (*p++ != 0);
 527:         return(0);
 528:     }
 529:    adv: if ((count = pfa->cch) < 0) return(1);
 530:     do {
 531:         for (pfa += count; count; count--, pfa--)
 532:             if (pfa->cch == *p) {
 533:                 break;
 534:             }
 535:         pfa = pfa->st;
 536:         if ((count = pfa->cch) < 0) return(1);
 537:     } while(*p++ != 0);
 538:     return(0);
 539: }

Defined functions

add defined in line 276; used 3 times
cclenter defined in line 112; used 2 times
cfoll defined in line 148; used 4 times
cgotofn defined in line 292; used 2 times
first defined in line 180; used 8 times
follow defined in line 214; used 5 times
freetr defined in line 89; used 4 times
makedfa defined in line 45; used 2 times
match defined in line 511; used 1 times
member defined in line 249; used 5 times
notin defined in line 258; used 2 times
overflo defined in line 143; used 5 times
penter defined in line 64; used 4 times

Defined variables

chars defined in line 37; used 7 times
foll defined in line 36; used 7 times
line defined in line 42; used 11 times
point defined in line 39; used 5 times
sccsid defined in line 2; never used
setcnt defined in line 41; used 10 times
setvec defined in line 38; used 12 times
state defined in line 35; used 8 times

Defined struct's

fa defined in line 30; used 21 times

Defined macros

LEAF defined in line 20; used 4 times
MAXLIN defined in line 11; used 9 times
NCHARS defined in line 12; used 8 times
NSTATES defined in line 13; used 3 times
UNARY defined in line 21; used 3 times
left defined in line 16; used 20 times
parent defined in line 18; used 4 times
right defined in line 17; used 17 times
type defined in line 15; used 14 times
Last modified: 1983-08-12
Generated: 2016-12-26
Generated by src2html V0.67
page hit count: 2370
Valid CSS Valid XHTML 1.0 Strict