1: #ifndef lint 2: static char sccsid[] = "@(#)b.c 4.2 8/11/83"; 3: #endif 4: 5: #include "awk.def" 6: #include "stdio.h" 7: #include "awk.h" 8: 9: extern node *op2(); 10: extern struct fa *cgotofn(); 11: #define MAXLIN 256 12: #define NCHARS 128 13: #define NSTATES 256 14: 15: #define type(v) v->nobj 16: #define left(v) v->narg[0] 17: #define right(v) v->narg[1] 18: #define parent(v) v->nnext 19: 20: #define LEAF case CCL: case NCCL: case CHAR: case DOT: 21: #define UNARY case FINAL: case STAR: case PLUS: case QUEST: 22: 23: /* encoding in tree nodes: 24: leaf (CCL, NCCL, CHAR, DOT): left is index, right contains value or pointer to value 25: unary (FINAL, STAR, PLUS, QUEST): left is child, right is null 26: binary (CAT, OR): left and right are children 27: parent contains pointer to parent 28: */ 29: 30: struct fa { 31: int cch; 32: struct fa *st; 33: }; 34: 35: int *state[NSTATES]; 36: int *foll[MAXLIN]; 37: char chars[MAXLIN]; 38: int setvec[MAXLIN]; 39: node *point[MAXLIN]; 40: 41: int setcnt; 42: int line; 43: 44: 45: struct fa *makedfa(p) /* returns dfa for tree pointed to by p */ 46: node *p; 47: { 48: node *p1; 49: struct fa *fap; 50: p1 = op2(CAT, op2(STAR, op2(DOT, (node *) 0, (node *) 0), (node *) 0), p); 51: /* put DOT STAR in front of reg. exp. */ 52: p1 = op2(FINAL, p1, (node *) 0); /* install FINAL node */ 53: 54: line = 0; 55: penter(p1); /* enter parent pointers and leaf indices */ 56: point[line] = p1; /* FINAL node */ 57: setvec[0] = 1; /* for initial DOT STAR */ 58: cfoll(p1); /* set up follow sets */ 59: fap = cgotofn(); 60: freetr(p1); /* add this when alloc works */ 61: return(fap); 62: } 63: 64: penter(p) /* set up parent pointers and leaf indices */ 65: node *p; 66: { 67: switch(type(p)) { 68: LEAF 69: left(p) = (node *) line; 70: point[line++] = p; 71: break; 72: UNARY 73: penter(left(p)); 74: parent(left(p)) = p; 75: break; 76: case CAT: 77: case OR: 78: penter(left(p)); 79: penter(right(p)); 80: parent(left(p)) = p; 81: parent(right(p)) = p; 82: break; 83: default: 84: error(FATAL, "unknown type %d in penter\n", type(p)); 85: break; 86: } 87: } 88: 89: freetr(p) /* free parse tree and follow sets */ 90: node *p; 91: { 92: switch(type(p)) { 93: LEAF 94: xfree(foll[(int) left(p)]); 95: xfree(p); 96: break; 97: UNARY 98: freetr(left(p)); 99: xfree(p); 100: break; 101: case CAT: 102: case OR: 103: freetr(left(p)); 104: freetr(right(p)); 105: xfree(p); 106: break; 107: default: 108: error(FATAL, "unknown type %d in freetr", type(p)); 109: break; 110: } 111: } 112: char *cclenter(p) 113: register char *p; 114: { 115: register i, c; 116: char *op; 117: 118: op = p; 119: i = 0; 120: while ((c = *p++) != 0) { 121: if (c == '-' && i > 0 && chars[i-1] != 0) { 122: if (*p != 0) { 123: c = chars[i-1]; 124: while (c < *p) { 125: if (i >= MAXLIN) 126: overflo(); 127: chars[i++] = ++c; 128: } 129: p++; 130: continue; 131: } 132: } 133: if (i >= MAXLIN) 134: overflo(); 135: chars[i++] = c; 136: } 137: chars[i++] = '\0'; 138: dprintf("cclenter: in = |%s|, out = |%s|\n", op, chars, NULL); 139: xfree(op); 140: return(tostring(chars)); 141: } 142: 143: overflo() 144: { 145: error(FATAL, "regular expression too long\n"); 146: } 147: 148: cfoll(v) /* enter follow set of each leaf of vertex v into foll[leaf] */ 149: register node *v; 150: { 151: register i; 152: int prev; 153: int *add(); 154: 155: switch(type(v)) { 156: LEAF 157: setcnt = 0; 158: for (i=1; i<=line; i++) 159: setvec[i] = 0; 160: follow(v); 161: if (notin(foll, ( (int) left(v))-1, &prev)) { 162: foll[(int) left(v)] = add(setcnt); 163: } 164: else 165: foll[ (int) left(v)] = foll[prev]; 166: break; 167: UNARY 168: cfoll(left(v)); 169: break; 170: case CAT: 171: case OR: 172: cfoll(left(v)); 173: cfoll(right(v)); 174: break; 175: default: 176: error(FATAL, "unknown type %d in cfoll", type(v)); 177: } 178: } 179: 180: first(p) /* collects initially active leaves of p into setvec */ 181: register node *p; /* returns 0 or 1 depending on whether p matches empty string */ 182: { 183: register b; 184: 185: switch(type(p)) { 186: LEAF 187: if (setvec[(int) left(p)] != 1) { 188: setvec[(int) left(p)] = 1; 189: setcnt++; 190: } 191: if (type(p) == CCL && (*(char *) right(p)) == '\0') 192: return(0); /* empty CCL */ 193: else return(1); 194: case FINAL: 195: case PLUS: 196: if (first(left(p)) == 0) return(0); 197: return(1); 198: case STAR: 199: case QUEST: 200: first(left(p)); 201: return(0); 202: case CAT: 203: if (first(left(p)) == 0 && first(right(p)) == 0) return(0); 204: return(1); 205: case OR: 206: b = first(right(p)); 207: if (first(left(p)) == 0 || b == 0) return(0); 208: return(1); 209: } 210: error(FATAL, "unknown type %d in first\n", type(p)); 211: return(-1); 212: } 213: 214: follow(v) 215: node *v; /* collects leaves that can follow v into setvec */ 216: { 217: node *p; 218: 219: if (type(v) == FINAL) 220: return; 221: p = parent(v); 222: switch (type(p)) { 223: case STAR: 224: case PLUS: first(v); 225: follow(p); 226: return; 227: 228: case OR: 229: case QUEST: follow(p); 230: return; 231: 232: case CAT: if (v == left(p)) { /* v is left child of p */ 233: if (first(right(p)) == 0) { 234: follow(p); 235: return; 236: } 237: } 238: else /* v is right child */ 239: follow(p); 240: return; 241: case FINAL: if (setvec[line] != 1) { 242: setvec[line] = 1; 243: setcnt++; 244: } 245: return; 246: } 247: } 248: 249: member(c, s) /* is c in s? */ 250: register char c, *s; 251: { 252: while (*s) 253: if (c == *s++) 254: return(1); 255: return(0); 256: } 257: 258: notin(array, n, prev) /* is setvec in array[0] thru array[n]? */ 259: int **array; 260: int *prev; { 261: register i, j; 262: int *ptr; 263: for (i=0; i<=n; i++) { 264: ptr = array[i]; 265: if (*ptr == setcnt) { 266: for (j=0; j < setcnt; j++) 267: if (setvec[*(++ptr)] != 1) goto nxt; 268: *prev = i; 269: return(0); 270: } 271: nxt: ; 272: } 273: return(1); 274: } 275: 276: int *add(n) { /* remember setvec */ 277: int *ptr, *p; 278: register i; 279: if ((p = ptr = (int *) malloc((n+1)*sizeof(int))) == NULL) 280: overflo(); 281: *ptr = n; 282: dprintf("add(%d)\n", n, NULL, NULL); 283: for (i=1; i <= line; i++) 284: if (setvec[i] == 1) { 285: *(++ptr) = i; 286: dprintf(" ptr = %o, *ptr = %d, i = %d\n", ptr, *ptr, i); 287: } 288: dprintf("\n", NULL, NULL, NULL); 289: return(p); 290: } 291: 292: struct fa *cgotofn() 293: { 294: register i, k; 295: register int *ptr; 296: char c; 297: char *p; 298: node *cp; 299: int j, n, s, ind, numtrans; 300: int finflg; 301: int curpos, num, prev; 302: struct fa *where[NSTATES]; 303: 304: int fatab[257]; 305: struct fa *pfa; 306: 307: char index[MAXLIN]; 308: char iposns[MAXLIN]; 309: int sposns[MAXLIN]; 310: int spmax, spinit; 311: 312: char symbol[NCHARS]; 313: char isyms[NCHARS]; 314: char ssyms[NCHARS]; 315: int ssmax, ssinit; 316: 317: for (i=0; i<=line; i++) index[i] = iposns[i] = setvec[i] = 0; 318: for (i=0; i<NCHARS; i++) isyms[i] = symbol[i] = 0; 319: setcnt = 0; 320: /* compute initial positions and symbols of state 0 */ 321: ssmax = 0; 322: ptr = state[0] = foll[0]; 323: spinit = *ptr; 324: for (i=0; i<spinit; i++) { 325: curpos = *(++ptr); 326: sposns[i] = curpos; 327: iposns[curpos] = 1; 328: cp = point[curpos]; 329: dprintf("i = %d, spinit = %d, curpos = %d\n", i, spinit, curpos); 330: switch (type(cp)) { 331: case CHAR: 332: k = (int) right(cp); 333: if (isyms[k] != 1) { 334: isyms[k] = 1; 335: ssyms[ssmax++] = k; 336: } 337: break; 338: case DOT: 339: for (k=1; k<NCHARS; k++) { 340: if (k != HAT) { 341: if (isyms[k] != 1) { 342: isyms[k] = 1; 343: ssyms[ssmax++] = k; 344: } 345: } 346: } 347: break; 348: case CCL: 349: for (p = (char *) right(cp); *p; p++) { 350: if (*p != HAT) { 351: if (isyms[*p] != 1) { 352: isyms[*p] = 1; 353: ssyms[ssmax++] = *p; 354: } 355: } 356: } 357: break; 358: case NCCL: 359: for (k=1; k<NCHARS; k++) { 360: if (k != HAT && !member(k, (char *) right(cp))) { 361: if (isyms[k] != 1) { 362: isyms[k] = 1; 363: ssyms[ssmax++] = k; 364: } 365: } 366: } 367: } 368: } 369: ssinit = ssmax; 370: n = 0; 371: for (s=0; s<=n; s++) { 372: dprintf("s = %d\n", s, NULL, NULL); 373: ind = 0; 374: numtrans = 0; 375: finflg = 0; 376: if (*(state[s] + *state[s]) == line) { /* s final? */ 377: finflg = 1; 378: goto tenter; 379: } 380: spmax = spinit; 381: ssmax = ssinit; 382: ptr = state[s]; 383: num = *ptr; 384: for (i=0; i<num; i++) { 385: curpos = *(++ptr); 386: if (iposns[curpos] != 1 && index[curpos] != 1) { 387: index[curpos] = 1; 388: sposns[spmax++] = curpos; 389: } 390: cp = point[curpos]; 391: switch (type(cp)) { 392: case CHAR: 393: k = (int) right(cp); 394: if (isyms[k] == 0 && symbol[k] == 0) { 395: symbol[k] = 1; 396: ssyms[ssmax++] = k; 397: } 398: break; 399: case DOT: 400: for (k=1; k<NCHARS; k++) { 401: if (k != HAT) { 402: if (isyms[k] == 0 && symbol[k] == 0) { 403: symbol[k] = 1; 404: ssyms[ssmax++] = k; 405: } 406: } 407: } 408: break; 409: case CCL: 410: for (p = (char *) right(cp); *p; p++) { 411: if (*p != HAT) { 412: if (isyms[*p] == 0 && symbol[*p] == 0) { 413: symbol[*p] = 1; 414: ssyms[ssmax++] = *p; 415: } 416: } 417: } 418: break; 419: case NCCL: 420: for (k=1; k<NCHARS; k++) { 421: if (k != HAT && !member(k, (char *) right(cp))) { 422: if (isyms[k] == 0 && symbol[k] == 0) { 423: symbol[k] = 1; 424: ssyms[ssmax++] = k; 425: } 426: } 427: } 428: } 429: } 430: for (j=0; j<ssmax; j++) { /* nextstate(s, ssyms[j]) */ 431: c = ssyms[j]; 432: symbol[c] = 0; 433: setcnt = 0; 434: for (k=0; k<=line; k++) setvec[k] = 0; 435: for (i=0; i<spmax; i++) { 436: index[sposns[i]] = 0; 437: cp = point[sposns[i]]; 438: if ((k = type(cp)) != FINAL) 439: if (k == CHAR && c == (int) right(cp) 440: || k == DOT 441: || k == CCL && member(c, (char *) right(cp)) 442: || k == NCCL && !member(c, (char *) right(cp))) { 443: ptr = foll[sposns[i]]; 444: num = *ptr; 445: for (k=0; k<num; k++) { 446: if (setvec[*(++ptr)] != 1 447: && iposns[*ptr] != 1) { 448: setvec[*ptr] = 1; 449: setcnt++; 450: } 451: } 452: } 453: } /* end nextstate */ 454: if (notin(state, n, &prev)) { 455: if (n >= NSTATES) { 456: dprintf("cgotofn: notin; state = %d, n = %d\n", state, n, NULL); 457: overflo(); 458: } 459: state[++n] = add(setcnt); 460: dprintf(" delta(%d,%o) = %d", s,c,n); 461: dprintf(", ind = %d\n", ind+1, NULL, NULL); 462: fatab[++ind] = c; 463: fatab[++ind] = n; 464: numtrans++; 465: } 466: else { 467: if (prev != 0) { 468: dprintf(" delta(%d,%o) = %d", s,c,prev); 469: dprintf(", ind = %d\n", ind+1, NULL, NULL); 470: fatab[++ind] = c; 471: fatab[++ind] = prev; 472: numtrans++; 473: } 474: } 475: } 476: tenter: 477: if ((pfa = (struct fa *) malloc((numtrans + 1) * sizeof(struct fa))) == NULL) 478: overflo(); 479: where[s] = pfa; 480: if (finflg) 481: pfa->cch = -1; /* s is a final state */ 482: else 483: pfa->cch = numtrans; 484: pfa->st = 0; 485: for (i=1, pfa += 1; i<=numtrans; i++, pfa++) { 486: pfa->cch = fatab[2*i-1]; 487: pfa->st = (struct fa *) fatab[2*i]; 488: } 489: } 490: for (i=0; i<=n; i++) { 491: xfree(state[i]); /* free state[i] */ 492: pfa = where[i]; 493: pfa->st = where[0]; 494: dprintf("state %d: (%o)\n", i, pfa, NULL); 495: dprintf(" numtrans = %d, default = %o\n", pfa->cch, pfa->st, NULL); 496: for (k=1; k<=pfa->cch; k++) { 497: (pfa+k)->st = where[ (int) (pfa+k)->st]; 498: dprintf(" char = %o, nextstate = %o\n",(pfa+k)->cch, (pfa+k)->st, NULL); 499: } 500: } 501: pfa = where[0]; 502: if ((num = pfa->cch) < 0) 503: return(where[0]); 504: for (pfa += num; num; num--, pfa--) 505: if (pfa->cch == HAT) { 506: return(pfa->st); 507: } 508: return(where[0]); 509: } 510: 511: match(pfa, p) 512: register struct fa *pfa; 513: register char *p; 514: { 515: register count; 516: char c; 517: if (p == 0) return(0); 518: if (pfa->cch == 1) { /* fast test for first character, if possible */ 519: c = (++pfa)->cch; 520: do 521: if (c == *p) { 522: p++; 523: pfa = pfa->st; 524: goto adv; 525: } 526: while (*p++ != 0); 527: return(0); 528: } 529: adv: if ((count = pfa->cch) < 0) return(1); 530: do { 531: for (pfa += count; count; count--, pfa--) 532: if (pfa->cch == *p) { 533: break; 534: } 535: pfa = pfa->st; 536: if ((count = pfa->cch) < 0) return(1); 537: } while(*p++ != 0); 538: return(0); 539: }