1: /* 2: * egrep -- print lines containing (or not containing) a regular expression 3: * 4: * status returns: 5: * 0 - ok, and some matches 6: * 1 - ok, but no matches 7: * 2 - some error 8: */ 9: %token CHAR DOT CCL NCCL OR CAT STAR PLUS QUEST 10: %left OR 11: %left CHAR DOT CCL NCCL '(' 12: %left CAT 13: %left STAR PLUS QUEST 14: 15: %{ 16: #include <stdio.h> 17: 18: #define MAXLIN 350 19: #define MAXPOS 4000 20: #define NCHARS 128 21: #define NSTATES 128 22: #define FINAL -1 23: char gotofn[NSTATES][NCHARS]; 24: int state[NSTATES]; 25: char out[NSTATES]; 26: int line 1; 27: int name[MAXLIN]; 28: int left[MAXLIN]; 29: int right[MAXLIN]; 30: int parent[MAXLIN]; 31: int foll[MAXLIN]; 32: int positions[MAXPOS]; 33: char chars[MAXLIN]; 34: int nxtpos; 35: int nxtchar 0; 36: int tmpstat[MAXLIN]; 37: int initstat[MAXLIN]; 38: int xstate; 39: int count; 40: int icount; 41: char *input; 42: 43: long lnum; 44: int bflag; 45: int cflag; 46: int fflag; 47: int lflag; 48: int nflag; 49: int hflag = 1; 50: int sflag; 51: int vflag; 52: int nfile; 53: long blkno; 54: long tln; 55: int nsucc; 56: 57: int f; 58: int fname; 59: int cantopen; 60: 61: FILE *ffile; 62: %} 63: 64: %% 65: s: t 66: ={ unary(FINAL, $1); 67: line--; 68: } 69: ; 70: t: b r 71: ={ $$ = node(CAT, $1, $2); } 72: | OR b r OR 73: ={ $$ = node(CAT, $2, $3); } 74: | OR b r 75: ={ $$ = node(CAT, $2, $3); } 76: | b r OR 77: ={ $$ = node(CAT, $1, $2); } 78: ; 79: b: 80: ={ $$ = enter(DOT); 81: $$ = unary(STAR, $$); } 82: ; 83: r: CHAR 84: ={ $$ = enter($1); } 85: | DOT 86: ={ $$ = enter(DOT); } 87: | CCL 88: ={ $$ = cclenter(CCL); } 89: | NCCL 90: ={ $$ = cclenter(NCCL); } 91: ; 92: 93: r: r OR r 94: ={ $$ = node(OR, $1, $3); } 95: | r r %prec CAT 96: ={ $$ = node(CAT, $1, $2); } 97: | r STAR 98: ={ $$ = unary(STAR, $1); } 99: | r PLUS 100: ={ $$ = unary(PLUS, $1); } 101: | r QUEST 102: ={ $$ = unary(QUEST, $1); } 103: | '(' r ')' 104: ={ $$ = $2; } 105: | error 106: ; 107: 108: %% 109: yyerror(s) { 110: fprintf(stderr, "egrep: %s\n", s); 111: exit(2); 112: } 113: 114: yylex() { 115: extern int yylval; 116: int cclcnt, x; 117: register char c, d; 118: switch(c = nextch()) { 119: case '$': 120: case '^': c = '\n'; 121: goto defchar; 122: case '|': return (OR); 123: case '*': return (STAR); 124: case '+': return (PLUS); 125: case '?': return (QUEST); 126: case '(': return (c); 127: case ')': return (c); 128: case '.': return (DOT); 129: case '\0': return (0); 130: case '\n': return (OR); 131: case '[': 132: x = CCL; 133: cclcnt = 0; 134: count = nxtchar++; 135: if ((c = nextch()) == '^') { 136: x = NCCL; 137: c = nextch(); 138: } 139: do { 140: if (c == '\0') synerror(); 141: if (c == '-' && cclcnt > 0 && chars[nxtchar-1] != 0) { 142: if ((d = nextch()) != 0) { 143: c = chars[nxtchar-1]; 144: while (c < d) { 145: if (nxtchar >= MAXLIN) overflo(); 146: chars[nxtchar++] = ++c; 147: cclcnt++; 148: } 149: continue; 150: } 151: } 152: if (nxtchar >= MAXLIN) overflo(); 153: chars[nxtchar++] = c; 154: cclcnt++; 155: } while ((c = nextch()) != ']'); 156: chars[count] = cclcnt; 157: return (x); 158: case '\\': 159: if ((c = nextch()) == '\0') synerror(); 160: defchar: 161: default: yylval = c; return (CHAR); 162: } 163: } 164: nextch() { 165: register char c; 166: if (fflag) { 167: if ((c = getc(ffile)) == EOF) return(0); 168: } 169: else c = *input++; 170: return(c); 171: } 172: 173: synerror() { 174: fprintf(stderr, "egrep: syntax error\n"); 175: exit(2); 176: } 177: 178: enter(x) int x; { 179: if(line >= MAXLIN) overflo(); 180: name[line] = x; 181: left[line] = 0; 182: right[line] = 0; 183: return(line++); 184: } 185: 186: cclenter(x) int x; { 187: register linno; 188: linno = enter(x); 189: right[linno] = count; 190: return (linno); 191: } 192: 193: node(x, l, r) { 194: if(line >= MAXLIN) overflo(); 195: name[line] = x; 196: left[line] = l; 197: right[line] = r; 198: parent[l] = line; 199: parent[r] = line; 200: return(line++); 201: } 202: 203: unary(x, d) { 204: if(line >= MAXLIN) overflo(); 205: name[line] = x; 206: left[line] = d; 207: right[line] = 0; 208: parent[d] = line; 209: return(line++); 210: } 211: overflo() { 212: fprintf(stderr, "egrep: regular expression too long\n"); 213: exit(2); 214: } 215: 216: cfoll(v) { 217: register i; 218: if (left[v] == 0) { 219: count = 0; 220: for (i=1; i<=line; i++) tmpstat[i] = 0; 221: follow(v); 222: add(foll, v); 223: } 224: else if (right[v] == 0) cfoll(left[v]); 225: else { 226: cfoll(left[v]); 227: cfoll(right[v]); 228: } 229: } 230: cgotofn() { 231: register c, i, k; 232: int n, s; 233: char symbol[NCHARS]; 234: int j, nc, pc, pos; 235: int curpos, num; 236: int number, newpos; 237: count = 0; 238: for (n=3; n<=line; n++) tmpstat[n] = 0; 239: if (cstate(line-1)==0) { 240: tmpstat[line] = 1; 241: count++; 242: out[0] = 1; 243: } 244: for (n=3; n<=line; n++) initstat[n] = tmpstat[n]; 245: count--; /*leave out position 1 */ 246: icount = count; 247: tmpstat[1] = 0; 248: add(state, 0); 249: n = 0; 250: for (s=0; s<=n; s++) { 251: if (out[s] == 1) continue; 252: for (i=0; i<NCHARS; i++) symbol[i] = 0; 253: num = positions[state[s]]; 254: count = icount; 255: for (i=3; i<=line; i++) tmpstat[i] = initstat[i]; 256: pos = state[s] + 1; 257: for (i=0; i<num; i++) { 258: curpos = positions[pos]; 259: if ((c = name[curpos]) >= 0) { 260: if (c < NCHARS) symbol[c] = 1; 261: else if (c == DOT) { 262: for (k=0; k<NCHARS; k++) 263: if (k!='\n') symbol[k] = 1; 264: } 265: else if (c == CCL) { 266: nc = chars[right[curpos]]; 267: pc = right[curpos] + 1; 268: for (k=0; k<nc; k++) symbol[chars[pc++]] = 1; 269: } 270: else if (c == NCCL) { 271: nc = chars[right[curpos]]; 272: for (j = 0; j < NCHARS; j++) { 273: pc = right[curpos] + 1; 274: for (k = 0; k < nc; k++) 275: if (j==chars[pc++]) goto cont; 276: if (j!='\n') symbol[j] = 1; 277: cont:; 278: } 279: } 280: else printf("something's funny\n"); 281: } 282: pos++; 283: } 284: for (c=0; c<NCHARS; c++) { 285: if (symbol[c] == 1) { /* nextstate(s,c) */ 286: count = icount; 287: for (i=3; i <= line; i++) tmpstat[i] = initstat[i]; 288: pos = state[s] + 1; 289: for (i=0; i<num; i++) { 290: curpos = positions[pos]; 291: if ((k = name[curpos]) >= 0) 292: if ( 293: (k == c) 294: | (k == DOT) 295: | (k == CCL && member(c, right[curpos], 1)) 296: | (k == NCCL && member(c, right[curpos], 0)) 297: ) { 298: number = positions[foll[curpos]]; 299: newpos = foll[curpos] + 1; 300: for (k=0; k<number; k++) { 301: if (tmpstat[positions[newpos]] != 1) { 302: tmpstat[positions[newpos]] = 1; 303: count++; 304: } 305: newpos++; 306: } 307: } 308: pos++; 309: } /* end nextstate */ 310: if (notin(n)) { 311: if (n >= NSTATES) overflo(); 312: add(state, ++n); 313: if (tmpstat[line] == 1) out[n] = 1; 314: gotofn[s][c] = n; 315: } 316: else { 317: gotofn[s][c] = xstate; 318: } 319: } 320: } 321: } 322: } 323: 324: cstate(v) { 325: register b; 326: if (left[v] == 0) { 327: if (tmpstat[v] != 1) { 328: tmpstat[v] = 1; 329: count++; 330: } 331: return(1); 332: } 333: else if (right[v] == 0) { 334: if (cstate(left[v]) == 0) return (0); 335: else if (name[v] == PLUS) return (1); 336: else return (0); 337: } 338: else if (name[v] == CAT) { 339: if (cstate(left[v]) == 0 && cstate(right[v]) == 0) return (0); 340: else return (1); 341: } 342: else { /* name[v] == OR */ 343: b = cstate(right[v]); 344: if (cstate(left[v]) == 0 || b == 0) return (0); 345: else return (1); 346: } 347: } 348: 349: 350: member(symb, set, torf) { 351: register i, num, pos; 352: num = chars[set]; 353: pos = set + 1; 354: for (i=0; i<num; i++) 355: if (symb == chars[pos++]) return (torf); 356: return (!torf); 357: } 358: 359: notin(n) { 360: register i, j, pos; 361: for (i=0; i<=n; i++) { 362: if (positions[state[i]] == count) { 363: pos = state[i] + 1; 364: for (j=0; j < count; j++) 365: if (tmpstat[positions[pos++]] != 1) goto nxt; 366: xstate = i; 367: return (0); 368: } 369: nxt: ; 370: } 371: return (1); 372: } 373: 374: add(array, n) int *array; { 375: register i; 376: if (nxtpos + count > MAXPOS) overflo(); 377: array[n] = nxtpos; 378: positions[nxtpos++] = count; 379: for (i=3; i <= line; i++) { 380: if (tmpstat[i] == 1) { 381: positions[nxtpos++] = i; 382: } 383: } 384: } 385: 386: follow(v) int v; { 387: int p; 388: if (v == line) return; 389: p = parent[v]; 390: switch(name[p]) { 391: case STAR: 392: case PLUS: cstate(v); 393: follow(p); 394: return; 395: 396: case OR: 397: case QUEST: follow(p); 398: return; 399: 400: case CAT: if (v == left[p]) { 401: if (cstate(right[p]) == 0) { 402: follow(p); 403: return; 404: } 405: } 406: else follow(p); 407: return; 408: case FINAL: if (tmpstat[line] != 1) { 409: tmpstat[line] = 1; 410: count++; 411: } 412: return; 413: } 414: } 415: 416: 417: main(argc, argv) 418: char **argv; 419: { 420: while (--argc > 0 && (++argv)[0][0]=='-') 421: switch (argv[0][1]) { 422: 423: case 's': 424: sflag++; 425: continue; 426: 427: case 'h': 428: hflag = 0; 429: continue; 430: 431: case 'b': 432: bflag++; 433: continue; 434: 435: case 'c': 436: cflag++; 437: continue; 438: 439: case 'e': 440: argc--; 441: argv++; 442: goto out; 443: 444: case 'f': 445: fflag++; 446: continue; 447: 448: case 'l': 449: lflag++; 450: continue; 451: 452: case 'n': 453: nflag++; 454: continue; 455: 456: case 'v': 457: vflag++; 458: continue; 459: 460: default: 461: fprintf(stderr, "egrep: unknown flag\n"); 462: continue; 463: } 464: out: 465: if (argc<=0) 466: exit(2); 467: if (fflag) { 468: if ((ffile = fopen(fname = *argv, "r")) == NULL) { 469: perror(fname); 470: exit(2); 471: } 472: } 473: else input = *argv; 474: argc--; 475: argv++; 476: 477: yyparse(); 478: 479: cfoll(line-1); 480: cgotofn(); 481: nfile = argc; 482: if (argc<=0) { 483: if (lflag) exit(1); 484: execute(0); 485: } 486: else while (--argc >= 0) { 487: execute(*argv); 488: argv++; 489: } 490: if (cantopen) 491: exit(2); 492: else 493: exit(nsucc == 0); 494: } 495: 496: execute(file) 497: char *file; 498: { 499: register char *p; 500: register cstat; 501: register ccount; 502: char buf[1024]; 503: char *nlp; 504: int istat; 505: if (file) { 506: if ((f = open(file, 0)) < 0) { 507: perror(file); 508: cantopen++; 509: } 510: } 511: else f = 0; 512: ccount = 0; 513: lnum = 1; 514: tln = 0; 515: p = buf; 516: nlp = p; 517: if ((ccount = read(f,p,512))<=0) goto done; 518: blkno = ccount; 519: istat = cstat = gotofn[0]['\n']; 520: if (out[cstat]) goto found; 521: for (;;) { 522: cstat = gotofn[cstat][*p&0377]; /* all input chars made positive */ 523: if (out[cstat]) { 524: found: for(;;) { 525: if (*p++ == '\n') { 526: if (vflag == 0) { 527: succeed: nsucc = 1; 528: if (cflag) tln++; 529: else if (sflag) 530: ; /* ugh */ 531: else if (lflag) { 532: printf("%s\n", file); 533: close(f); 534: return; 535: } 536: else { 537: if (nfile > 1 && hflag) printf("%s:", file); 538: if (bflag) printf("%ld:", (blkno-ccount-1)/512); 539: if (nflag) printf("%5ld:", lnum); 540: /*! added 5 before ld for sorting and other purposes PLWard 7/18/80 USGS*/ 541: if (p <= nlp) { 542: while (nlp < &buf[1024]) putchar(*nlp++); 543: nlp = buf; 544: } 545: while (nlp < p) putchar(*nlp++); 546: } 547: } 548: lnum++; 549: nlp = p; 550: if ((out[(cstat=istat)]) == 0) goto brk2; 551: } 552: cfound: 553: if (--ccount <= 0) { 554: if (p <= &buf[512]) { 555: if ((ccount = read(f, p, 512)) <= 0) goto done; 556: } 557: else if (p == &buf[1024]) { 558: p = buf; 559: if ((ccount = read(f, p, 512)) <= 0) goto done; 560: } 561: else { 562: if ((ccount = read(f, p, &buf[1024]-p)) <= 0) goto done; 563: } 564: if(nlp>p && nlp<=p+ccount) 565: nlp = p+ccount; 566: blkno += ccount; 567: } 568: } 569: } 570: if (*p++ == '\n') { 571: if (vflag) goto succeed; 572: else { 573: lnum++; 574: nlp = p; 575: if (out[(cstat=istat)]) goto cfound; 576: } 577: } 578: brk2: 579: if (--ccount <= 0) { 580: if (p <= &buf[512]) { 581: if ((ccount = read(f, p, 512)) <= 0) break; 582: } 583: else if (p == &buf[1024]) { 584: p = buf; 585: if ((ccount = read(f, p, 512)) <= 0) break; 586: } 587: else { 588: if ((ccount = read(f, p, &buf[1024] - p)) <= 0) break; 589: } 590: if(nlp>p && nlp<=p+ccount) 591: nlp = p+ccount; 592: blkno += ccount; 593: } 594: } 595: done: close(f); 596: if (cflag) { 597: if (nfile > 1) 598: printf("%s:", file); 599: printf("%ld\n", tln); 600: } 601: }