1: /* 2: * egrep -- print lines containing (or not containing) a regular expression 3: * 4: * status returns: 5: * 0 - ok, and some matches 6: * 1 - ok, but no matches 7: * 2 - some error 8: */ 9: %token CHAR DOT CCL NCCL OR CAT STAR PLUS QUEST 10: %left OR 11: %left CHAR DOT CCL NCCL '(' 12: %left CAT 13: %left STAR PLUS QUEST 14: 15: %{ 16: static char *sccsid = "@(#)egrep.y 4.4 (Berkeley) 5/29/85"; 17: #include <stdio.h> 18: #include <sys/types.h> 19: #include <sys/stat.h> 20: 21: #define BLKSIZE 8192 22: #define MAXLIN 350 23: #define MAXPOS 4000 24: #define NCHARS 128 25: #define NSTATES 128 26: #define FINAL -1 27: char gotofn[NSTATES][NCHARS]; 28: int state[NSTATES]; 29: char out[NSTATES]; 30: int line = 1; 31: int name[MAXLIN]; 32: int left[MAXLIN]; 33: int right[MAXLIN]; 34: int parent[MAXLIN]; 35: int foll[MAXLIN]; 36: int positions[MAXPOS]; 37: char chars[MAXLIN]; 38: int nxtpos; 39: int nxtchar = 0; 40: int tmpstat[MAXLIN]; 41: int initstat[MAXLIN]; 42: int xstate; 43: int count; 44: int icount; 45: char *input; 46: FILE *exprfile; 47: 48: long lnum; 49: int bflag; 50: int cflag; 51: int fflag; 52: int lflag; 53: int nflag; 54: int hflag = 1; 55: int sflag; 56: int vflag; 57: int retcode = 0; 58: int nfile; 59: int blkno; 60: long tln; 61: int nsucc; 62: 63: int f; 64: char *fname; 65: %} 66: 67: %% 68: s: t 69: ={ unary(FINAL, $1); 70: line--; 71: } 72: ; 73: t: b r 74: ={ $$ = node(CAT, $1, $2); } 75: | OR b r OR 76: ={ $$ = node(CAT, $2, $3); } 77: | OR b r 78: ={ $$ = node(CAT, $2, $3); } 79: | b r OR 80: ={ $$ = node(CAT, $1, $2); } 81: ; 82: b: 83: ={ $$ = enter(DOT); 84: $$ = unary(STAR, $$); } 85: ; 86: r: CHAR 87: ={ $$ = enter($1); } 88: | DOT 89: ={ $$ = enter(DOT); } 90: | CCL 91: ={ $$ = cclenter(CCL); } 92: | NCCL 93: ={ $$ = cclenter(NCCL); } 94: ; 95: 96: r: r OR r 97: ={ $$ = node(OR, $1, $3); } 98: | r r %prec CAT 99: ={ $$ = node(CAT, $1, $2); } 100: | r STAR 101: ={ $$ = unary(STAR, $1); } 102: | r PLUS 103: ={ $$ = unary(PLUS, $1); } 104: | r QUEST 105: ={ $$ = unary(QUEST, $1); } 106: | '(' r ')' 107: ={ $$ = $2; } 108: | error 109: ; 110: 111: %% 112: yyerror(s) { 113: fprintf(stderr, "egrep: %s\n", s); 114: exit(2); 115: } 116: 117: yylex() { 118: extern int yylval; 119: int cclcnt, x; 120: register char c, d; 121: switch(c = nextch()) { 122: case '$': 123: case '^': c = '\n'; 124: goto defchar; 125: case '|': return (OR); 126: case '*': return (STAR); 127: case '+': return (PLUS); 128: case '?': return (QUEST); 129: case '(': return (c); 130: case ')': return (c); 131: case '.': return (DOT); 132: case '\0': return (0); 133: case '\n': return (OR); 134: case '[': 135: x = CCL; 136: cclcnt = 0; 137: count = nxtchar++; 138: if ((c = nextch()) == '^') { 139: x = NCCL; 140: c = nextch(); 141: } 142: do { 143: if (c == '\0') synerror(); 144: if (c == '-' && cclcnt > 0 && chars[nxtchar-1] != 0) { 145: if ((d = nextch()) != 0) { 146: c = chars[nxtchar-1]; 147: while (c < d) { 148: if (nxtchar >= MAXLIN) overflo(); 149: chars[nxtchar++] = ++c; 150: cclcnt++; 151: } 152: continue; 153: } 154: } 155: if (nxtchar >= MAXLIN) overflo(); 156: chars[nxtchar++] = c; 157: cclcnt++; 158: } while ((c = nextch()) != ']'); 159: chars[count] = cclcnt; 160: return (x); 161: case '\\': 162: if ((c = nextch()) == '\0') synerror(); 163: defchar: 164: default: yylval = c; return (CHAR); 165: } 166: } 167: nextch() { 168: register char c; 169: if (fflag) { 170: if ((c = getc(exprfile)) == EOF) { 171: fclose(exprfile); 172: return(0); 173: } 174: } 175: else c = *input++; 176: return(c); 177: } 178: 179: synerror() { 180: fprintf(stderr, "egrep: syntax error\n"); 181: exit(2); 182: } 183: 184: enter(x) int x; { 185: if(line >= MAXLIN) overflo(); 186: name[line] = x; 187: left[line] = 0; 188: right[line] = 0; 189: return(line++); 190: } 191: 192: cclenter(x) int x; { 193: register linno; 194: linno = enter(x); 195: right[linno] = count; 196: return (linno); 197: } 198: 199: node(x, l, r) { 200: if(line >= MAXLIN) overflo(); 201: name[line] = x; 202: left[line] = l; 203: right[line] = r; 204: parent[l] = line; 205: parent[r] = line; 206: return(line++); 207: } 208: 209: unary(x, d) { 210: if(line >= MAXLIN) overflo(); 211: name[line] = x; 212: left[line] = d; 213: right[line] = 0; 214: parent[d] = line; 215: return(line++); 216: } 217: overflo() { 218: fprintf(stderr, "egrep: regular expression too long\n"); 219: exit(2); 220: } 221: 222: cfoll(v) { 223: register i; 224: if (left[v] == 0) { 225: count = 0; 226: for (i=1; i<=line; i++) tmpstat[i] = 0; 227: follow(v); 228: add(foll, v); 229: } 230: else if (right[v] == 0) cfoll(left[v]); 231: else { 232: cfoll(left[v]); 233: cfoll(right[v]); 234: } 235: } 236: cgotofn() { 237: register c, i, k; 238: int n, s; 239: char symbol[NCHARS]; 240: int j, nc, pc, pos; 241: int curpos, num; 242: int number, newpos; 243: count = 0; 244: for (n=3; n<=line; n++) tmpstat[n] = 0; 245: if (cstate(line-1)==0) { 246: tmpstat[line] = 1; 247: count++; 248: out[0] = 1; 249: } 250: for (n=3; n<=line; n++) initstat[n] = tmpstat[n]; 251: count--; /*leave out position 1 */ 252: icount = count; 253: tmpstat[1] = 0; 254: add(state, 0); 255: n = 0; 256: for (s=0; s<=n; s++) { 257: if (out[s] == 1) continue; 258: for (i=0; i<NCHARS; i++) symbol[i] = 0; 259: num = positions[state[s]]; 260: count = icount; 261: for (i=3; i<=line; i++) tmpstat[i] = initstat[i]; 262: pos = state[s] + 1; 263: for (i=0; i<num; i++) { 264: curpos = positions[pos]; 265: if ((c = name[curpos]) >= 0) { 266: if (c < NCHARS) symbol[c] = 1; 267: else if (c == DOT) { 268: for (k=0; k<NCHARS; k++) 269: if (k!='\n') symbol[k] = 1; 270: } 271: else if (c == CCL) { 272: nc = chars[right[curpos]]; 273: pc = right[curpos] + 1; 274: for (k=0; k<nc; k++) symbol[chars[pc++]] = 1; 275: } 276: else if (c == NCCL) { 277: nc = chars[right[curpos]]; 278: for (j = 0; j < NCHARS; j++) { 279: pc = right[curpos] + 1; 280: for (k = 0; k < nc; k++) 281: if (j==chars[pc++]) goto cont; 282: if (j!='\n') symbol[j] = 1; 283: cont:; 284: } 285: } 286: else printf("something's funny\n"); 287: } 288: pos++; 289: } 290: for (c=0; c<NCHARS; c++) { 291: if (symbol[c] == 1) { /* nextstate(s,c) */ 292: count = icount; 293: for (i=3; i <= line; i++) tmpstat[i] = initstat[i]; 294: pos = state[s] + 1; 295: for (i=0; i<num; i++) { 296: curpos = positions[pos]; 297: if ((k = name[curpos]) >= 0) 298: if ( 299: (k == c) 300: | (k == DOT) 301: | (k == CCL && member(c, right[curpos], 1)) 302: | (k == NCCL && member(c, right[curpos], 0)) 303: ) { 304: number = positions[foll[curpos]]; 305: newpos = foll[curpos] + 1; 306: for (k=0; k<number; k++) { 307: if (tmpstat[positions[newpos]] != 1) { 308: tmpstat[positions[newpos]] = 1; 309: count++; 310: } 311: newpos++; 312: } 313: } 314: pos++; 315: } /* end nextstate */ 316: if (notin(n)) { 317: if (n >= NSTATES) overflo(); 318: add(state, ++n); 319: if (tmpstat[line] == 1) out[n] = 1; 320: gotofn[s][c] = n; 321: } 322: else { 323: gotofn[s][c] = xstate; 324: } 325: } 326: } 327: } 328: } 329: 330: cstate(v) { 331: register b; 332: if (left[v] == 0) { 333: if (tmpstat[v] != 1) { 334: tmpstat[v] = 1; 335: count++; 336: } 337: return(1); 338: } 339: else if (right[v] == 0) { 340: if (cstate(left[v]) == 0) return (0); 341: else if (name[v] == PLUS) return (1); 342: else return (0); 343: } 344: else if (name[v] == CAT) { 345: if (cstate(left[v]) == 0 && cstate(right[v]) == 0) return (0); 346: else return (1); 347: } 348: else { /* name[v] == OR */ 349: b = cstate(right[v]); 350: if (cstate(left[v]) == 0 || b == 0) return (0); 351: else return (1); 352: } 353: } 354: 355: 356: member(symb, set, torf) { 357: register i, num, pos; 358: num = chars[set]; 359: pos = set + 1; 360: for (i=0; i<num; i++) 361: if (symb == chars[pos++]) return (torf); 362: return (!torf); 363: } 364: 365: notin(n) { 366: register i, j, pos; 367: for (i=0; i<=n; i++) { 368: if (positions[state[i]] == count) { 369: pos = state[i] + 1; 370: for (j=0; j < count; j++) 371: if (tmpstat[positions[pos++]] != 1) goto nxt; 372: xstate = i; 373: return (0); 374: } 375: nxt: ; 376: } 377: return (1); 378: } 379: 380: add(array, n) int *array; { 381: register i; 382: if (nxtpos + count > MAXPOS) overflo(); 383: array[n] = nxtpos; 384: positions[nxtpos++] = count; 385: for (i=3; i <= line; i++) { 386: if (tmpstat[i] == 1) { 387: positions[nxtpos++] = i; 388: } 389: } 390: } 391: 392: follow(v) int v; { 393: int p; 394: if (v == line) return; 395: p = parent[v]; 396: switch(name[p]) { 397: case STAR: 398: case PLUS: cstate(v); 399: follow(p); 400: return; 401: 402: case OR: 403: case QUEST: follow(p); 404: return; 405: 406: case CAT: if (v == left[p]) { 407: if (cstate(right[p]) == 0) { 408: follow(p); 409: return; 410: } 411: } 412: else follow(p); 413: return; 414: case FINAL: if (tmpstat[line] != 1) { 415: tmpstat[line] = 1; 416: count++; 417: } 418: return; 419: } 420: } 421: 422: 423: main(argc, argv) 424: char **argv; 425: { 426: while (--argc > 0 && (++argv)[0][0]=='-') 427: switch (argv[0][1]) { 428: 429: case 's': 430: sflag++; 431: continue; 432: 433: case 'h': 434: hflag = 0; 435: continue; 436: 437: case 'b': 438: bflag++; 439: continue; 440: 441: case 'c': 442: cflag++; 443: continue; 444: 445: case 'e': 446: argc--; 447: argv++; 448: goto out; 449: 450: case 'f': 451: fflag++; 452: continue; 453: 454: case 'l': 455: lflag++; 456: continue; 457: 458: case 'n': 459: nflag++; 460: continue; 461: 462: case 'v': 463: vflag++; 464: continue; 465: 466: default: 467: fprintf(stderr, "egrep: unknown flag\n"); 468: continue; 469: } 470: out: 471: if (argc<=0) 472: exit(2); 473: if (fflag) { 474: fname = *argv; 475: exprfile = fopen(fname, "r"); 476: if (exprfile == (FILE *)NULL) { 477: fprintf(stderr, "egrep: can't open %s\n", fname); 478: exit(2); 479: } 480: } 481: else input = *argv; 482: argc--; 483: argv++; 484: 485: yyparse(); 486: 487: cfoll(line-1); 488: cgotofn(); 489: nfile = argc; 490: if (argc<=0) { 491: if (lflag) exit(1); 492: execute(0); 493: } 494: else while (--argc >= 0) { 495: execute(*argv); 496: argv++; 497: } 498: exit(retcode != 0 ? retcode : nsucc == 0); 499: } 500: 501: execute(file) 502: char *file; 503: { 504: register char *p; 505: register cstat; 506: register ccount; 507: static char *buf; 508: static int blksize; 509: struct stat stb; 510: char *nlp; 511: int istat; 512: if (file) { 513: if ((f = open(file, 0)) < 0) { 514: fprintf(stderr, "egrep: can't open %s\n", file); 515: retcode = 2; 516: return; 517: } 518: } 519: else f = 0; 520: if (buf == NULL) { 521: if (fstat(f, &stb) > 0 && stb.st_blksize > 0) 522: blksize = stb.st_blksize; 523: else 524: blksize = BLKSIZE; 525: buf = (char *)malloc(2*blksize); 526: if (buf == NULL) { 527: fprintf(stderr, "egrep: no memory for %s\n", file); 528: retcode = 2; 529: return; 530: } 531: } 532: ccount = 0; 533: lnum = 1; 534: tln = 0; 535: blkno = 0; 536: p = buf; 537: nlp = p; 538: if ((ccount = read(f,p,blksize))<=0) goto done; 539: istat = cstat = gotofn[0]['\n']; 540: if (out[cstat]) goto found; 541: for (;;) { 542: cstat = gotofn[cstat][*p&0377]; /* all input chars made positive */ 543: if (out[cstat]) { 544: found: for(;;) { 545: if (*p++ == '\n') { 546: if (vflag == 0) { 547: succeed: nsucc = 1; 548: if (cflag) tln++; 549: else if (sflag) 550: ; /* ugh */ 551: else if (lflag) { 552: printf("%s\n", file); 553: close(f); 554: return; 555: } 556: else { 557: if (nfile > 1 && hflag) printf("%s:", file); 558: if (bflag) printf("%d:", blkno); 559: if (nflag) printf("%ld:", lnum); 560: if (p <= nlp) { 561: while (nlp < &buf[2*blksize]) putchar(*nlp++); 562: nlp = buf; 563: } 564: while (nlp < p) putchar(*nlp++); 565: } 566: } 567: lnum++; 568: nlp = p; 569: if ((out[(cstat=istat)]) == 0) goto brk2; 570: } 571: cfound: 572: if (--ccount <= 0) { 573: if (p <= &buf[blksize]) { 574: if ((ccount = read(f, p, blksize)) <= 0) goto done; 575: } 576: else if (p == &buf[2*blksize]) { 577: p = buf; 578: if ((ccount = read(f, p, blksize)) <= 0) goto done; 579: } 580: else { 581: if ((ccount = read(f, p, &buf[2*blksize]-p)) <= 0) goto done; 582: } 583: blkno += ccount / 512; 584: } 585: } 586: } 587: if (*p++ == '\n') { 588: if (vflag) goto succeed; 589: else { 590: lnum++; 591: nlp = p; 592: if (out[(cstat=istat)]) goto cfound; 593: } 594: } 595: brk2: 596: if (--ccount <= 0) { 597: if (p <= &buf[blksize]) { 598: if ((ccount = read(f, p, blksize)) <= 0) break; 599: } 600: else if (p == &buf[2*blksize]) { 601: p = buf; 602: if ((ccount = read(f, p, blksize)) <= 0) break; 603: } 604: else { 605: if ((ccount = read(f, p, &buf[2*blksize] - p)) <= 0) break; 606: } 607: blkno += ccount / 512; 608: } 609: } 610: done: close(f); 611: if (cflag) { 612: if (nfile > 1) 613: printf("%s:", file); 614: printf("%ld\n", tln); 615: } 616: }