1: #include "awk.def" 2: #include "stdio.h" 3: #include "awk.h" 4: 5: extern node *op2(); 6: extern struct fa *cgotofn(); 7: #define MAXLIN 256 8: #define NCHARS 128 9: #define NSTATES 256 10: 11: #define type(v) v->nobj 12: #define left(v) v->narg[0] 13: #define right(v) v->narg[1] 14: #define parent(v) v->nnext 15: 16: #define LEAF case CCL: case NCCL: case CHAR: case DOT: 17: #define UNARY case FINAL: case STAR: case PLUS: case QUEST: 18: 19: /* encoding in tree nodes: 20: leaf (CCL, NCCL, CHAR, DOT): left is index, right contains value or pointer to value 21: unary (FINAL, STAR, PLUS, QUEST): left is child, right is null 22: binary (CAT, OR): left and right are children 23: parent contains pointer to parent 24: */ 25: 26: struct fa { 27: int cch; 28: struct fa *st; 29: }; 30: 31: int *state[NSTATES]; 32: int *foll[MAXLIN]; 33: char chars[MAXLIN]; 34: int setvec[MAXLIN]; 35: node *point[MAXLIN]; 36: 37: int setcnt; 38: int line; 39: 40: 41: struct fa *makedfa(p) /* returns dfa for tree pointed to by p */ 42: node *p; 43: { 44: node *p1; 45: struct fa *fap; 46: p1 = op2(CAT, op2(STAR, op2(DOT, (node *) 0, (node *) 0), (node *) 0), p); 47: /* put DOT STAR in front of reg. exp. */ 48: p1 = op2(FINAL, p1, (node *) 0); /* install FINAL node */ 49: 50: line = 0; 51: penter(p1); /* enter parent pointers and leaf indices */ 52: point[line] = p1; /* FINAL node */ 53: setvec[0] = 1; /* for initial DOT STAR */ 54: cfoll(p1); /* set up follow sets */ 55: fap = cgotofn(); 56: freetr(p1); /* add this when alloc works */ 57: return(fap); 58: } 59: 60: penter(p) /* set up parent pointers and leaf indices */ 61: node *p; 62: { 63: switch(type(p)) { 64: LEAF 65: left(p) = (node *) line; 66: point[line++] = p; 67: break; 68: UNARY 69: penter(left(p)); 70: parent(left(p)) = p; 71: break; 72: case CAT: 73: case OR: 74: penter(left(p)); 75: penter(right(p)); 76: parent(left(p)) = p; 77: parent(right(p)) = p; 78: break; 79: default: 80: error(FATAL, "unknown type %d in penter\n", type(p)); 81: break; 82: } 83: } 84: 85: freetr(p) /* free parse tree and follow sets */ 86: node *p; 87: { 88: switch(type(p)) { 89: LEAF 90: xfree(foll[(int) left(p)]); 91: xfree(p); 92: break; 93: UNARY 94: freetr(left(p)); 95: xfree(p); 96: break; 97: case CAT: 98: case OR: 99: freetr(left(p)); 100: freetr(right(p)); 101: xfree(p); 102: break; 103: default: 104: error(FATAL, "unknown type %d in freetr", type(p)); 105: break; 106: } 107: } 108: char *cclenter(p) 109: register char *p; 110: { 111: register i, c; 112: char *op; 113: 114: op = p; 115: i = 0; 116: while ((c = *p++) != 0) { 117: if (c == '-' && i > 0 && chars[i-1] != 0) { 118: if (*p != 0) { 119: c = chars[i-1]; 120: while (c < *p) { 121: if (i >= MAXLIN) 122: overflo(); 123: chars[i++] = ++c; 124: } 125: p++; 126: continue; 127: } 128: } 129: if (i >= MAXLIN) 130: overflo(); 131: chars[i++] = c; 132: } 133: chars[i++] = '\0'; 134: dprintf("cclenter: in = |%s|, out = |%s|\n", op, chars, NULL); 135: xfree(op); 136: return(tostring(chars)); 137: } 138: 139: overflo() 140: { 141: error(FATAL, "regular expression too long\n"); 142: } 143: 144: cfoll(v) /* enter follow set of each leaf of vertex v into foll[leaf] */ 145: register node *v; 146: { 147: register i; 148: int prev; 149: int *add(); 150: 151: switch(type(v)) { 152: LEAF 153: setcnt = 0; 154: for (i=1; i<=line; i++) 155: setvec[i] = 0; 156: follow(v); 157: if (notin(foll, ( (int) left(v))-1, &prev)) { 158: foll[(int) left(v)] = add(setcnt); 159: } 160: else 161: foll[ (int) left(v)] = foll[prev]; 162: break; 163: UNARY 164: cfoll(left(v)); 165: break; 166: case CAT: 167: case OR: 168: cfoll(left(v)); 169: cfoll(right(v)); 170: break; 171: default: 172: error(FATAL, "unknown type %d in cfoll", type(v)); 173: } 174: } 175: 176: first(p) /* collects initially active leaves of p into setvec */ 177: register node *p; /* returns 0 or 1 depending on whether p matches empty string */ 178: { 179: register b; 180: 181: switch(type(p)) { 182: LEAF 183: if (setvec[(int) left(p)] != 1) { 184: setvec[(int) left(p)] = 1; 185: setcnt++; 186: } 187: if (type(p) == CCL && (*(char *) right(p)) == '\0') 188: return(0); /* empty CCL */ 189: else return(1); 190: case FINAL: 191: case PLUS: 192: if (first(left(p)) == 0) return(0); 193: return(1); 194: case STAR: 195: case QUEST: 196: first(left(p)); 197: return(0); 198: case CAT: 199: if (first(left(p)) == 0 && first(right(p)) == 0) return(0); 200: return(1); 201: case OR: 202: b = first(right(p)); 203: if (first(left(p)) == 0 || b == 0) return(0); 204: return(1); 205: } 206: error(FATAL, "unknown type %d in first\n", type(p)); 207: return(-1); 208: } 209: 210: follow(v) 211: node *v; /* collects leaves that can follow v into setvec */ 212: { 213: node *p; 214: 215: if (type(v) == FINAL) 216: return; 217: p = parent(v); 218: switch (type(p)) { 219: case STAR: 220: case PLUS: first(v); 221: follow(p); 222: return; 223: 224: case OR: 225: case QUEST: follow(p); 226: return; 227: 228: case CAT: if (v == left(p)) { /* v is left child of p */ 229: if (first(right(p)) == 0) { 230: follow(p); 231: return; 232: } 233: } 234: else /* v is right child */ 235: follow(p); 236: return; 237: case FINAL: if (setvec[line] != 1) { 238: setvec[line] = 1; 239: setcnt++; 240: } 241: return; 242: } 243: } 244: 245: member(c, s) /* is c in s? */ 246: register char c, *s; 247: { 248: while (*s) 249: if (c == *s++) 250: return(1); 251: return(0); 252: } 253: 254: notin(array, n, prev) /* is setvec in array[0] thru array[n]? */ 255: int **array; 256: int *prev; { 257: register i, j; 258: int *ptr; 259: for (i=0; i<=n; i++) { 260: ptr = array[i]; 261: if (*ptr == setcnt) { 262: for (j=0; j < setcnt; j++) 263: if (setvec[*(++ptr)] != 1) goto nxt; 264: *prev = i; 265: return(0); 266: } 267: nxt: ; 268: } 269: return(1); 270: } 271: 272: int *add(n) { /* remember setvec */ 273: int *ptr, *p; 274: register i; 275: if ((p = ptr = (int *) malloc((n+1)*sizeof(int))) == NULL) 276: overflo(); 277: *ptr = n; 278: dprintf("add(%d)\n", n, NULL, NULL); 279: for (i=1; i <= line; i++) 280: if (setvec[i] == 1) { 281: *(++ptr) = i; 282: dprintf(" ptr = %o, *ptr = %d, i = %d\n", ptr, *ptr, i); 283: } 284: dprintf("\n", NULL, NULL, NULL); 285: return(p); 286: } 287: 288: struct fa *cgotofn() 289: { 290: register i, k; 291: register int *ptr; 292: char c; 293: char *p; 294: node *cp; 295: int j, n, s, ind, numtrans; 296: int finflg; 297: int curpos, num, prev; 298: struct fa *where[NSTATES]; 299: 300: int fatab[257]; 301: struct fa *pfa; 302: 303: char index[MAXLIN]; 304: char iposns[MAXLIN]; 305: int sposns[MAXLIN]; 306: int spmax, spinit; 307: 308: char symbol[NCHARS]; 309: char isyms[NCHARS]; 310: char ssyms[NCHARS]; 311: int ssmax, ssinit; 312: 313: for (i=0; i<=line; i++) index[i] = iposns[i] = setvec[i] = 0; 314: for (i=0; i<NCHARS; i++) isyms[i] = symbol[i] = 0; 315: setcnt = 0; 316: /* compute initial positions and symbols of state 0 */ 317: ssmax = 0; 318: ptr = state[0] = foll[0]; 319: spinit = *ptr; 320: for (i=0; i<spinit; i++) { 321: curpos = *(++ptr); 322: sposns[i] = curpos; 323: iposns[curpos] = 1; 324: cp = point[curpos]; 325: dprintf("i = %d, spinit = %d, curpos = %d\n", i, spinit, curpos); 326: switch (type(cp)) { 327: case CHAR: 328: k = (int) right(cp); 329: if (isyms[k] != 1) { 330: isyms[k] = 1; 331: ssyms[ssmax++] = k; 332: } 333: break; 334: case DOT: 335: for (k=1; k<NCHARS; k++) { 336: if (k != HAT) { 337: if (isyms[k] != 1) { 338: isyms[k] = 1; 339: ssyms[ssmax++] = k; 340: } 341: } 342: } 343: break; 344: case CCL: 345: for (p = (char *) right(cp); *p; p++) { 346: if (*p != HAT) { 347: if (isyms[*p] != 1) { 348: isyms[*p] = 1; 349: ssyms[ssmax++] = *p; 350: } 351: } 352: } 353: break; 354: case NCCL: 355: for (k=1; k<NCHARS; k++) { 356: if (k != HAT && !member(k, (char *) right(cp))) { 357: if (isyms[k] != 1) { 358: isyms[k] = 1; 359: ssyms[ssmax++] = k; 360: } 361: } 362: } 363: } 364: } 365: ssinit = ssmax; 366: n = 0; 367: for (s=0; s<=n; s++) { 368: dprintf("s = %d\n", s, NULL, NULL); 369: ind = 0; 370: numtrans = 0; 371: finflg = 0; 372: if (*(state[s] + *state[s]) == line) { /* s final? */ 373: finflg = 1; 374: goto tenter; 375: } 376: spmax = spinit; 377: ssmax = ssinit; 378: ptr = state[s]; 379: num = *ptr; 380: for (i=0; i<num; i++) { 381: curpos = *(++ptr); 382: if (iposns[curpos] != 1 && index[curpos] != 1) { 383: index[curpos] = 1; 384: sposns[spmax++] = curpos; 385: } 386: cp = point[curpos]; 387: switch (type(cp)) { 388: case CHAR: 389: k = (int) right(cp); 390: if (isyms[k] == 0 && symbol[k] == 0) { 391: symbol[k] = 1; 392: ssyms[ssmax++] = k; 393: } 394: break; 395: case DOT: 396: for (k=1; k<NCHARS; k++) { 397: if (k != HAT) { 398: if (isyms[k] == 0 && symbol[k] == 0) { 399: symbol[k] = 1; 400: ssyms[ssmax++] = k; 401: } 402: } 403: } 404: break; 405: case CCL: 406: for (p = (char *) right(cp); *p; p++) { 407: if (*p != HAT) { 408: if (isyms[*p] == 0 && symbol[*p] == 0) { 409: symbol[*p] = 1; 410: ssyms[ssmax++] = *p; 411: } 412: } 413: } 414: break; 415: case NCCL: 416: for (k=1; k<NCHARS; k++) { 417: if (k != HAT && !member(k, (char *) right(cp))) { 418: if (isyms[k] == 0 && symbol[k] == 0) { 419: symbol[k] = 1; 420: ssyms[ssmax++] = k; 421: } 422: } 423: } 424: } 425: } 426: for (j=0; j<ssmax; j++) { /* nextstate(s, ssyms[j]) */ 427: c = ssyms[j]; 428: symbol[c] = 0; 429: setcnt = 0; 430: for (k=0; k<=line; k++) setvec[k] = 0; 431: for (i=0; i<spmax; i++) { 432: index[sposns[i]] = 0; 433: cp = point[sposns[i]]; 434: if ((k = type(cp)) != FINAL) 435: if (k == CHAR && c == (int) right(cp) 436: || k == DOT 437: || k == CCL && member(c, (char *) right(cp)) 438: || k == NCCL && !member(c, (char *) right(cp))) { 439: ptr = foll[sposns[i]]; 440: num = *ptr; 441: for (k=0; k<num; k++) { 442: if (setvec[*(++ptr)] != 1 443: && iposns[*ptr] != 1) { 444: setvec[*ptr] = 1; 445: setcnt++; 446: } 447: } 448: } 449: } /* end nextstate */ 450: if (notin(state, n, &prev)) { 451: if (n >= NSTATES) { 452: dprintf("cgotofn: notin; state = %d, n = %d\n", state, n, NULL); 453: overflo(); 454: } 455: state[++n] = add(setcnt); 456: dprintf(" delta(%d,%o) = %d", s,c,n); 457: dprintf(", ind = %d\n", ind+1, NULL, NULL); 458: fatab[++ind] = c; 459: fatab[++ind] = n; 460: numtrans++; 461: } 462: else { 463: if (prev != 0) { 464: dprintf(" delta(%d,%o) = %d", s,c,prev); 465: dprintf(", ind = %d\n", ind+1, NULL, NULL); 466: fatab[++ind] = c; 467: fatab[++ind] = prev; 468: numtrans++; 469: } 470: } 471: } 472: tenter: 473: if ((pfa = (struct fa *) malloc((numtrans + 1) * sizeof(struct fa))) == NULL) 474: overflo(); 475: where[s] = pfa; 476: if (finflg) 477: pfa->cch = -1; /* s is a final state */ 478: else 479: pfa->cch = numtrans; 480: pfa->st = 0; 481: for (i=1, pfa += 1; i<=numtrans; i++, pfa++) { 482: pfa->cch = fatab[2*i-1]; 483: pfa->st = (struct fa *) fatab[2*i]; 484: } 485: } 486: for (i=0; i<=n; i++) { 487: xfree(state[i]); /* free state[i] */ 488: pfa = where[i]; 489: pfa->st = where[0]; 490: dprintf("state %d: (%o)\n", i, pfa, NULL); 491: dprintf(" numtrans = %d, default = %o\n", pfa->cch, pfa->st, NULL); 492: for (k=1; k<=pfa->cch; k++) { 493: (pfa+k)->st = where[ (int) (pfa+k)->st]; 494: dprintf(" char = %o, nextstate = %o\n",(pfa+k)->cch, (pfa+k)->st, NULL); 495: } 496: } 497: pfa = where[0]; 498: if ((num = pfa->cch) < 0) 499: return(where[0]); 500: for (pfa += num; num; num--, pfa--) 501: if (pfa->cch == HAT) { 502: return(pfa->st); 503: } 504: return(where[0]); 505: } 506: 507: match(pfa, p) 508: register struct fa *pfa; 509: register char *p; 510: { 511: register count; 512: char c; 513: if (p == 0) return(0); 514: if (pfa->cch == 1) { /* fast test for first character, if possible */ 515: c = (++pfa)->cch; 516: do 517: if (c == *p) { 518: p++; 519: pfa = pfa->st; 520: goto adv; 521: } 522: while (*p++ != 0); 523: return(0); 524: } 525: adv: if ((count = pfa->cch) < 0) return(1); 526: do { 527: for (pfa += count; count; count--, pfa--) 528: if (pfa->cch == *p) { 529: break; 530: } 531: pfa = pfa->st; 532: if ((count = pfa->cch) < 0) return(1); 533: } while(*p++ != 0); 534: return(0); 535: }