/* * egrep -- print lines containing (or not containing) a regular expression * * status returns: * 0 - ok, and some matches * 1 - ok, but no matches * 2 - some error */ %token CHAR DOT CCL NCCL OR CAT STAR PLUS QUEST %left OR %left CHAR DOT CCL NCCL '(' %left CAT %left STAR PLUS QUEST %{ #include #define MAXLIN 350 #define MAXPOS 4000 #define NCHARS 128 #define NSTATES 128 #define FINAL -1 char gotofn[NSTATES][NCHARS]; int state[NSTATES]; char out[NSTATES]; int line 1; int name[MAXLIN]; int left[MAXLIN]; int right[MAXLIN]; int parent[MAXLIN]; int foll[MAXLIN]; int positions[MAXPOS]; char chars[MAXLIN]; int nxtpos; int nxtchar 0; int tmpstat[MAXLIN]; int initstat[MAXLIN]; int xstate; int count; int icount; char *input; long lnum; int bflag; int cflag; int fflag; int lflag; int nflag; int hflag = 1; int sflag; int vflag; int nfile; long blkno; long tln; int nsucc; int f; int fname; int cantopen; FILE *ffile; %} %% s: t ={ unary(FINAL, $1); line--; } ; t: b r ={ $$ = node(CAT, $1, $2); } | OR b r OR ={ $$ = node(CAT, $2, $3); } | OR b r ={ $$ = node(CAT, $2, $3); } | b r OR ={ $$ = node(CAT, $1, $2); } ; b: ={ $$ = enter(DOT); $$ = unary(STAR, $$); } ; r: CHAR ={ $$ = enter($1); } | DOT ={ $$ = enter(DOT); } | CCL ={ $$ = cclenter(CCL); } | NCCL ={ $$ = cclenter(NCCL); } ; r: r OR r ={ $$ = node(OR, $1, $3); } | r r %prec CAT ={ $$ = node(CAT, $1, $2); } | r STAR ={ $$ = unary(STAR, $1); } | r PLUS ={ $$ = unary(PLUS, $1); } | r QUEST ={ $$ = unary(QUEST, $1); } | '(' r ')' ={ $$ = $2; } | error ; %% yyerror(s) { fprintf(stderr, "egrep: %s\n", s); exit(2); } yylex() { extern int yylval; int cclcnt, x; register char c, d; switch(c = nextch()) { case '$': case '^': c = '\n'; goto defchar; case '|': return (OR); case '*': return (STAR); case '+': return (PLUS); case '?': return (QUEST); case '(': return (c); case ')': return (c); case '.': return (DOT); case '\0': return (0); case '\n': return (OR); case '[': x = CCL; cclcnt = 0; count = nxtchar++; if ((c = nextch()) == '^') { x = NCCL; c = nextch(); } do { if (c == '\0') synerror(); if (c == '-' && cclcnt > 0 && chars[nxtchar-1] != 0) { if ((d = nextch()) != 0) { c = chars[nxtchar-1]; while (c < d) { if (nxtchar >= MAXLIN) overflo(); chars[nxtchar++] = ++c; cclcnt++; } continue; } } if (nxtchar >= MAXLIN) overflo(); chars[nxtchar++] = c; cclcnt++; } while ((c = nextch()) != ']'); chars[count] = cclcnt; return (x); case '\\': if ((c = nextch()) == '\0') synerror(); defchar: default: yylval = c; return (CHAR); } } nextch() { register char c; if (fflag) { if ((c = getc(ffile)) == EOF) return(0); } else c = *input++; return(c); } synerror() { fprintf(stderr, "egrep: syntax error\n"); exit(2); } enter(x) int x; { if(line >= MAXLIN) overflo(); name[line] = x; left[line] = 0; right[line] = 0; return(line++); } cclenter(x) int x; { register linno; linno = enter(x); right[linno] = count; return (linno); } node(x, l, r) { if(line >= MAXLIN) overflo(); name[line] = x; left[line] = l; right[line] = r; parent[l] = line; parent[r] = line; return(line++); } unary(x, d) { if(line >= MAXLIN) overflo(); name[line] = x; left[line] = d; right[line] = 0; parent[d] = line; return(line++); } overflo() { fprintf(stderr, "egrep: regular expression too long\n"); exit(2); } cfoll(v) { register i; if (left[v] == 0) { count = 0; for (i=1; i<=line; i++) tmpstat[i] = 0; follow(v); add(foll, v); } else if (right[v] == 0) cfoll(left[v]); else { cfoll(left[v]); cfoll(right[v]); } } cgotofn() { register c, i, k; int n, s; char symbol[NCHARS]; int j, nc, pc, pos; int curpos, num; int number, newpos; count = 0; for (n=3; n<=line; n++) tmpstat[n] = 0; if (cstate(line-1)==0) { tmpstat[line] = 1; count++; out[0] = 1; } for (n=3; n<=line; n++) initstat[n] = tmpstat[n]; count--; /*leave out position 1 */ icount = count; tmpstat[1] = 0; add(state, 0); n = 0; for (s=0; s<=n; s++) { if (out[s] == 1) continue; for (i=0; i= 0) { if (c < NCHARS) symbol[c] = 1; else if (c == DOT) { for (k=0; k= 0) if ( (k == c) | (k == DOT) | (k == CCL && member(c, right[curpos], 1)) | (k == NCCL && member(c, right[curpos], 0)) ) { number = positions[foll[curpos]]; newpos = foll[curpos] + 1; for (k=0; k= NSTATES) overflo(); add(state, ++n); if (tmpstat[line] == 1) out[n] = 1; gotofn[s][c] = n; } else { gotofn[s][c] = xstate; } } } } } cstate(v) { register b; if (left[v] == 0) { if (tmpstat[v] != 1) { tmpstat[v] = 1; count++; } return(1); } else if (right[v] == 0) { if (cstate(left[v]) == 0) return (0); else if (name[v] == PLUS) return (1); else return (0); } else if (name[v] == CAT) { if (cstate(left[v]) == 0 && cstate(right[v]) == 0) return (0); else return (1); } else { /* name[v] == OR */ b = cstate(right[v]); if (cstate(left[v]) == 0 || b == 0) return (0); else return (1); } } member(symb, set, torf) { register i, num, pos; num = chars[set]; pos = set + 1; for (i=0; i MAXPOS) overflo(); array[n] = nxtpos; positions[nxtpos++] = count; for (i=3; i <= line; i++) { if (tmpstat[i] == 1) { positions[nxtpos++] = i; } } } follow(v) int v; { int p; if (v == line) return; p = parent[v]; switch(name[p]) { case STAR: case PLUS: cstate(v); follow(p); return; case OR: case QUEST: follow(p); return; case CAT: if (v == left[p]) { if (cstate(right[p]) == 0) { follow(p); return; } } else follow(p); return; case FINAL: if (tmpstat[line] != 1) { tmpstat[line] = 1; count++; } return; } } main(argc, argv) char **argv; { while (--argc > 0 && (++argv)[0][0]=='-') switch (argv[0][1]) { case 's': sflag++; continue; case 'h': hflag = 0; continue; case 'b': bflag++; continue; case 'c': cflag++; continue; case 'e': argc--; argv++; goto out; case 'f': fflag++; continue; case 'l': lflag++; continue; case 'n': nflag++; continue; case 'v': vflag++; continue; default: fprintf(stderr, "egrep: unknown flag\n"); continue; } out: if (argc<=0) exit(2); if (fflag) { if ((ffile = fopen(fname = *argv, "r")) == NULL) { perror(fname); exit(2); } } else input = *argv; argc--; argv++; yyparse(); cfoll(line-1); cgotofn(); nfile = argc; if (argc<=0) { if (lflag) exit(1); execute(0); } else while (--argc >= 0) { execute(*argv); argv++; } if (cantopen) exit(2); else exit(nsucc == 0); } execute(file) char *file; { register char *p; register cstat; register ccount; char buf[1024]; char *nlp; int istat; if (file) { if ((f = open(file, 0)) < 0) { perror(file); cantopen++; } } else f = 0; ccount = 0; lnum = 1; tln = 0; p = buf; nlp = p; if ((ccount = read(f,p,512))<=0) goto done; blkno = ccount; istat = cstat = gotofn[0]['\n']; if (out[cstat]) goto found; for (;;) { cstat = gotofn[cstat][*p&0377]; /* all input chars made positive */ if (out[cstat]) { found: for(;;) { if (*p++ == '\n') { if (vflag == 0) { succeed: nsucc = 1; if (cflag) tln++; else if (sflag) ; /* ugh */ else if (lflag) { printf("%s\n", file); close(f); return; } else { if (nfile > 1 && hflag) printf("%s:", file); if (bflag) printf("%ld:", (blkno-ccount-1)/512); if (nflag) printf("%5ld:", lnum); /*! added 5 before ld for sorting and other purposes PLWard 7/18/80 USGS*/ if (p <= nlp) { while (nlp < &buf[1024]) putchar(*nlp++); nlp = buf; } while (nlp < p) putchar(*nlp++); } } lnum++; nlp = p; if ((out[(cstat=istat)]) == 0) goto brk2; } cfound: if (--ccount <= 0) { if (p <= &buf[512]) { if ((ccount = read(f, p, 512)) <= 0) goto done; } else if (p == &buf[1024]) { p = buf; if ((ccount = read(f, p, 512)) <= 0) goto done; } else { if ((ccount = read(f, p, &buf[1024]-p)) <= 0) goto done; } if(nlp>p && nlp<=p+ccount) nlp = p+ccount; blkno += ccount; } } } if (*p++ == '\n') { if (vflag) goto succeed; else { lnum++; nlp = p; if (out[(cstat=istat)]) goto cfound; } } brk2: if (--ccount <= 0) { if (p <= &buf[512]) { if ((ccount = read(f, p, 512)) <= 0) break; } else if (p == &buf[1024]) { p = buf; if ((ccount = read(f, p, 512)) <= 0) break; } else { if ((ccount = read(f, p, &buf[1024] - p)) <= 0) break; } if(nlp>p && nlp<=p+ccount) nlp = p+ccount; blkno += ccount; } } done: close(f); if (cflag) { if (nfile > 1) printf("%s:", file); printf("%ld\n", tln); } }