1: /*
   2:  * egrep -- print lines containing (or not containing) a regular expression
   3:  *
   4:  *	status returns:
   5:  *		0 - ok, and some matches
   6:  *		1 - ok, but no matches
   7:  *		2 - some error
   8:  */
   9: %token CHAR DOT CCL NCCL OR CAT STAR PLUS QUEST
  10: %left OR
  11: %left CHAR DOT CCL NCCL '('
  12: %left CAT
  13: %left STAR PLUS QUEST
  14: 
  15: %{
  16: #include <stdio.h>
  17: 
  18: #define MAXLIN 350
  19: #define MAXPOS 4000
  20: #define NCHARS 128
  21: #define NSTATES 128
  22: #define FINAL -1
  23: char gotofn[NSTATES][NCHARS];
  24: int state[NSTATES];
  25: char out[NSTATES];
  26: int line 1;
  27: int name[MAXLIN];
  28: int left[MAXLIN];
  29: int right[MAXLIN];
  30: int parent[MAXLIN];
  31: int foll[MAXLIN];
  32: int positions[MAXPOS];
  33: char chars[MAXLIN];
  34: int nxtpos;
  35: int nxtchar 0;
  36: int tmpstat[MAXLIN];
  37: int initstat[MAXLIN];
  38: int xstate;
  39: int count;
  40: int icount;
  41: char *input;
  42: 
  43: long    lnum;
  44: int bflag;
  45: int cflag;
  46: int fflag;
  47: int lflag;
  48: int nflag;
  49: int hflag   = 1;
  50: int sflag;
  51: int vflag;
  52: int nfile;
  53: long    blkno;
  54: long    tln;
  55: int nsucc;
  56: 
  57: int f;
  58: int fname;
  59: int cantopen;
  60: 
  61: FILE    *ffile;
  62: %}
  63: 
  64: %%
  65: s:  t
  66:         ={ unary(FINAL, $1);
  67:           line--;
  68:         }
  69:     ;
  70: t:  b r
  71:         ={ $$ = node(CAT, $1, $2); }
  72:     | OR b r OR
  73:         ={ $$ = node(CAT, $2, $3); }
  74:     | OR b r
  75:         ={ $$ = node(CAT, $2, $3); }
  76:     | b r OR
  77:         ={ $$ = node(CAT, $1, $2); }
  78:     ;
  79: b:
  80:         ={ $$ = enter(DOT);
  81:            $$ = unary(STAR, $$); }
  82:     ;
  83: r:  CHAR
  84:         ={ $$ = enter($1); }
  85:     | DOT
  86:         ={ $$ = enter(DOT); }
  87:     | CCL
  88:         ={ $$ = cclenter(CCL); }
  89:     | NCCL
  90:         ={ $$ = cclenter(NCCL); }
  91:     ;
  92: 
  93: r:  r OR r
  94:         ={ $$ = node(OR, $1, $3); }
  95:     | r r %prec CAT
  96:         ={ $$ = node(CAT, $1, $2); }
  97:     | r STAR
  98:         ={ $$ = unary(STAR, $1); }
  99:     | r PLUS
 100:         ={ $$ = unary(PLUS, $1); }
 101:     | r QUEST
 102:         ={ $$ = unary(QUEST, $1); }
 103:     | '(' r ')'
 104:         ={ $$ = $2; }
 105:     | error
 106:     ;
 107: 
 108: %%
 109: yyerror(s) {
 110:     fprintf(stderr, "egrep: %s\n", s);
 111:     exit(2);
 112: }
 113: 
 114: yylex() {
 115:     extern int yylval;
 116:     int cclcnt, x;
 117:     register char c, d;
 118:     switch(c = nextch()) {
 119:         case '$':
 120:         case '^': c = '\n';
 121:             goto defchar;
 122:         case '|': return (OR);
 123:         case '*': return (STAR);
 124:         case '+': return (PLUS);
 125:         case '?': return (QUEST);
 126:         case '(': return (c);
 127:         case ')': return (c);
 128:         case '.': return (DOT);
 129:         case '\0': return (0);
 130:         case '\n': return (OR);
 131:         case '[':
 132:             x = CCL;
 133:             cclcnt = 0;
 134:             count = nxtchar++;
 135:             if ((c = nextch()) == '^') {
 136:                 x = NCCL;
 137:                 c = nextch();
 138:             }
 139:             do {
 140:                 if (c == '\0') synerror();
 141:                 if (c == '-' && cclcnt > 0 && chars[nxtchar-1] != 0) {
 142:                     if ((d = nextch()) != 0) {
 143:                         c = chars[nxtchar-1];
 144:                         while (c < d) {
 145:                             if (nxtchar >= MAXLIN) overflo();
 146:                             chars[nxtchar++] = ++c;
 147:                             cclcnt++;
 148:                         }
 149:                         continue;
 150:                     }
 151:                 }
 152:                 if (nxtchar >= MAXLIN) overflo();
 153:                 chars[nxtchar++] = c;
 154:                 cclcnt++;
 155:             } while ((c = nextch()) != ']');
 156:             chars[count] = cclcnt;
 157:             return (x);
 158:         case '\\':
 159:             if ((c = nextch()) == '\0') synerror();
 160:         defchar:
 161:         default: yylval = c; return (CHAR);
 162:     }
 163: }
 164: nextch() {
 165:     register char c;
 166:     if (fflag) {
 167:         if ((c = getc(ffile)) == EOF) return(0);
 168:     }
 169:     else c = *input++;
 170:     return(c);
 171: }
 172: 
 173: synerror() {
 174:     fprintf(stderr, "egrep: syntax error\n");
 175:     exit(2);
 176: }
 177: 
 178: enter(x) int x; {
 179:     if(line >= MAXLIN) overflo();
 180:     name[line] = x;
 181:     left[line] = 0;
 182:     right[line] = 0;
 183:     return(line++);
 184: }
 185: 
 186: cclenter(x) int x; {
 187:     register linno;
 188:     linno = enter(x);
 189:     right[linno] = count;
 190:     return (linno);
 191: }
 192: 
 193: node(x, l, r) {
 194:     if(line >= MAXLIN) overflo();
 195:     name[line] = x;
 196:     left[line] = l;
 197:     right[line] = r;
 198:     parent[l] = line;
 199:     parent[r] = line;
 200:     return(line++);
 201: }
 202: 
 203: unary(x, d) {
 204:     if(line >= MAXLIN) overflo();
 205:     name[line] = x;
 206:     left[line] = d;
 207:     right[line] = 0;
 208:     parent[d] = line;
 209:     return(line++);
 210: }
 211: overflo() {
 212:     fprintf(stderr, "egrep: regular expression too long\n");
 213:     exit(2);
 214: }
 215: 
 216: cfoll(v) {
 217:     register i;
 218:     if (left[v] == 0) {
 219:         count = 0;
 220:         for (i=1; i<=line; i++) tmpstat[i] = 0;
 221:         follow(v);
 222:         add(foll, v);
 223:     }
 224:     else if (right[v] == 0) cfoll(left[v]);
 225:     else {
 226:         cfoll(left[v]);
 227:         cfoll(right[v]);
 228:     }
 229: }
 230: cgotofn() {
 231:     register c, i, k;
 232:     int n, s;
 233:     char symbol[NCHARS];
 234:     int j, nc, pc, pos;
 235:     int curpos, num;
 236:     int number, newpos;
 237:     count = 0;
 238:     for (n=3; n<=line; n++) tmpstat[n] = 0;
 239:     if (cstate(line-1)==0) {
 240:         tmpstat[line] = 1;
 241:         count++;
 242:         out[0] = 1;
 243:     }
 244:     for (n=3; n<=line; n++) initstat[n] = tmpstat[n];
 245:     count--;        /*leave out position 1 */
 246:     icount = count;
 247:     tmpstat[1] = 0;
 248:     add(state, 0);
 249:     n = 0;
 250:     for (s=0; s<=n; s++)  {
 251:         if (out[s] == 1) continue;
 252:         for (i=0; i<NCHARS; i++) symbol[i] = 0;
 253:         num = positions[state[s]];
 254:         count = icount;
 255:         for (i=3; i<=line; i++) tmpstat[i] = initstat[i];
 256:         pos = state[s] + 1;
 257:         for (i=0; i<num; i++) {
 258:             curpos = positions[pos];
 259:             if ((c = name[curpos]) >= 0) {
 260:                 if (c < NCHARS) symbol[c] = 1;
 261:                 else if (c == DOT) {
 262:                     for (k=0; k<NCHARS; k++)
 263:                         if (k!='\n') symbol[k] = 1;
 264:                 }
 265:                 else if (c == CCL) {
 266:                     nc = chars[right[curpos]];
 267:                     pc = right[curpos] + 1;
 268:                     for (k=0; k<nc; k++) symbol[chars[pc++]] = 1;
 269:                 }
 270:                 else if (c == NCCL) {
 271:                     nc = chars[right[curpos]];
 272:                     for (j = 0; j < NCHARS; j++) {
 273:                         pc = right[curpos] + 1;
 274:                         for (k = 0; k < nc; k++)
 275:                             if (j==chars[pc++]) goto cont;
 276:                         if (j!='\n') symbol[j] = 1;
 277:                         cont:;
 278:                     }
 279:                 }
 280:                 else printf("something's funny\n");
 281:             }
 282:             pos++;
 283:         }
 284:         for (c=0; c<NCHARS; c++) {
 285:             if (symbol[c] == 1) { /* nextstate(s,c) */
 286:                 count = icount;
 287:                 for (i=3; i <= line; i++) tmpstat[i] = initstat[i];
 288:                 pos = state[s] + 1;
 289:                 for (i=0; i<num; i++) {
 290:                     curpos = positions[pos];
 291:                     if ((k = name[curpos]) >= 0)
 292:                         if (
 293:                             (k == c)
 294:                             | (k == DOT)
 295:                             | (k == CCL && member(c, right[curpos], 1))
 296:                             | (k == NCCL && member(c, right[curpos], 0))
 297:                         ) {
 298:                             number = positions[foll[curpos]];
 299:                             newpos = foll[curpos] + 1;
 300:                             for (k=0; k<number; k++) {
 301:                                 if (tmpstat[positions[newpos]] != 1) {
 302:                                     tmpstat[positions[newpos]] = 1;
 303:                                     count++;
 304:                                 }
 305:                                 newpos++;
 306:                             }
 307:                         }
 308:                     pos++;
 309:                 } /* end nextstate */
 310:                 if (notin(n)) {
 311:                     if (n >= NSTATES) overflo();
 312:                     add(state, ++n);
 313:                     if (tmpstat[line] == 1) out[n] = 1;
 314:                     gotofn[s][c] = n;
 315:                 }
 316:                 else {
 317:                     gotofn[s][c] = xstate;
 318:                 }
 319:             }
 320:         }
 321:     }
 322: }
 323: 
 324: cstate(v) {
 325:     register b;
 326:     if (left[v] == 0) {
 327:         if (tmpstat[v] != 1) {
 328:             tmpstat[v] = 1;
 329:             count++;
 330:         }
 331:         return(1);
 332:     }
 333:     else if (right[v] == 0) {
 334:         if (cstate(left[v]) == 0) return (0);
 335:         else if (name[v] == PLUS) return (1);
 336:         else return (0);
 337:     }
 338:     else if (name[v] == CAT) {
 339:         if (cstate(left[v]) == 0 && cstate(right[v]) == 0) return (0);
 340:         else return (1);
 341:     }
 342:     else { /* name[v] == OR */
 343:         b = cstate(right[v]);
 344:         if (cstate(left[v]) == 0 || b == 0) return (0);
 345:         else return (1);
 346:     }
 347: }
 348: 
 349: 
 350: member(symb, set, torf) {
 351:     register i, num, pos;
 352:     num = chars[set];
 353:     pos = set + 1;
 354:     for (i=0; i<num; i++)
 355:         if (symb == chars[pos++]) return (torf);
 356:     return (!torf);
 357: }
 358: 
 359: notin(n) {
 360:     register i, j, pos;
 361:     for (i=0; i<=n; i++) {
 362:         if (positions[state[i]] == count) {
 363:             pos = state[i] + 1;
 364:             for (j=0; j < count; j++)
 365:                 if (tmpstat[positions[pos++]] != 1) goto nxt;
 366:             xstate = i;
 367:             return (0);
 368:         }
 369:         nxt: ;
 370:     }
 371:     return (1);
 372: }
 373: 
 374: add(array, n) int *array; {
 375:     register i;
 376:     if (nxtpos + count > MAXPOS) overflo();
 377:     array[n] = nxtpos;
 378:     positions[nxtpos++] = count;
 379:     for (i=3; i <= line; i++) {
 380:         if (tmpstat[i] == 1) {
 381:             positions[nxtpos++] = i;
 382:         }
 383:     }
 384: }
 385: 
 386: follow(v) int v; {
 387:     int p;
 388:     if (v == line) return;
 389:     p = parent[v];
 390:     switch(name[p]) {
 391:         case STAR:
 392:         case PLUS:  cstate(v);
 393:                 follow(p);
 394:                 return;
 395: 
 396:         case OR:
 397:         case QUEST: follow(p);
 398:                 return;
 399: 
 400:         case CAT:   if (v == left[p]) {
 401:                     if (cstate(right[p]) == 0) {
 402:                         follow(p);
 403:                         return;
 404:                     }
 405:                 }
 406:                 else follow(p);
 407:                 return;
 408:         case FINAL: if (tmpstat[line] != 1) {
 409:                     tmpstat[line] = 1;
 410:                     count++;
 411:                 }
 412:                 return;
 413:     }
 414: }
 415: 
 416: 
 417: main(argc, argv)
 418: char **argv;
 419: {
 420:     while (--argc > 0 && (++argv)[0][0]=='-')
 421:         switch (argv[0][1]) {
 422: 
 423:         case 's':
 424:             sflag++;
 425:             continue;
 426: 
 427:         case 'h':
 428:             hflag = 0;
 429:             continue;
 430: 
 431:         case 'b':
 432:             bflag++;
 433:             continue;
 434: 
 435:         case 'c':
 436:             cflag++;
 437:             continue;
 438: 
 439:         case 'e':
 440:             argc--;
 441:             argv++;
 442:             goto out;
 443: 
 444:         case 'f':
 445:             fflag++;
 446:             continue;
 447: 
 448:         case 'l':
 449:             lflag++;
 450:             continue;
 451: 
 452:         case 'n':
 453:             nflag++;
 454:             continue;
 455: 
 456:         case 'v':
 457:             vflag++;
 458:             continue;
 459: 
 460:         default:
 461:             fprintf(stderr, "egrep: unknown flag\n");
 462:             continue;
 463:         }
 464: out:
 465:     if (argc<=0)
 466:         exit(2);
 467:     if (fflag) {
 468:         if ((ffile = fopen(fname = *argv, "r")) == NULL) {
 469:             perror(fname);
 470:             exit(2);
 471:         }
 472:     }
 473:     else input = *argv;
 474:     argc--;
 475:     argv++;
 476: 
 477:     yyparse();
 478: 
 479:     cfoll(line-1);
 480:     cgotofn();
 481:     nfile = argc;
 482:     if (argc<=0) {
 483:         if (lflag) exit(1);
 484:         execute(0);
 485:     }
 486:     else while (--argc >= 0) {
 487:         execute(*argv);
 488:         argv++;
 489:     }
 490:     if (cantopen)
 491:         exit(2);
 492:     else
 493:         exit(nsucc == 0);
 494: }
 495: 
 496: execute(file)
 497: char *file;
 498: {
 499:     register char *p;
 500:     register cstat;
 501:     register ccount;
 502:     char buf[1024];
 503:     char *nlp;
 504:     int istat;
 505:     if (file) {
 506:         if ((f = open(file, 0)) < 0) {
 507:             perror(file);
 508:             cantopen++;
 509:         }
 510:     }
 511:     else f = 0;
 512:     ccount = 0;
 513:     lnum = 1;
 514:     tln = 0;
 515:     p = buf;
 516:     nlp = p;
 517:     if ((ccount = read(f,p,512))<=0) goto done;
 518:     blkno = ccount;
 519:     istat = cstat = gotofn[0]['\n'];
 520:     if (out[cstat]) goto found;
 521:     for (;;) {
 522:         cstat = gotofn[cstat][*p&0377]; /* all input chars made positive */
 523:         if (out[cstat]) {
 524:         found:  for(;;) {
 525:                 if (*p++ == '\n') {
 526:                     if (vflag == 0) {
 527:                 succeed:    nsucc = 1;
 528:                         if (cflag) tln++;
 529:                         else if (sflag)
 530:                             ;   /* ugh */
 531:                         else if (lflag) {
 532:                             printf("%s\n", file);
 533:                             close(f);
 534:                             return;
 535:                         }
 536:                         else {
 537:                             if (nfile > 1 && hflag) printf("%s:", file);
 538:                             if (bflag) printf("%ld:", (blkno-ccount-1)/512);
 539:                             if (nflag) printf("%5ld:", lnum);
 540: /*! added 5 before ld for sorting and other purposes PLWard 7/18/80  USGS*/
 541:                             if (p <= nlp) {
 542:                                 while (nlp < &buf[1024]) putchar(*nlp++);
 543:                                 nlp = buf;
 544:                             }
 545:                             while (nlp < p) putchar(*nlp++);
 546:                         }
 547:                     }
 548:                     lnum++;
 549:                     nlp = p;
 550:                     if ((out[(cstat=istat)]) == 0) goto brk2;
 551:                 }
 552:                 cfound:
 553:                 if (--ccount <= 0) {
 554:                     if (p <= &buf[512]) {
 555:                         if ((ccount = read(f, p, 512)) <= 0) goto done;
 556:                     }
 557:                     else if (p == &buf[1024]) {
 558:                         p = buf;
 559:                         if ((ccount = read(f, p, 512)) <= 0) goto done;
 560:                     }
 561:                     else {
 562:                         if ((ccount = read(f, p, &buf[1024]-p)) <= 0) goto done;
 563:                     }
 564:                     if(nlp>p && nlp<=p+ccount)
 565:                         nlp = p+ccount;
 566:                     blkno += ccount;
 567:                 }
 568:             }
 569:         }
 570:         if (*p++ == '\n') {
 571:             if (vflag) goto succeed;
 572:             else {
 573:                 lnum++;
 574:                 nlp = p;
 575:                 if (out[(cstat=istat)]) goto cfound;
 576:             }
 577:         }
 578:         brk2:
 579:         if (--ccount <= 0) {
 580:             if (p <= &buf[512]) {
 581:                 if ((ccount = read(f, p, 512)) <= 0) break;
 582:             }
 583:             else if (p == &buf[1024]) {
 584:                 p = buf;
 585:                 if ((ccount = read(f, p, 512)) <= 0) break;
 586:             }
 587:             else {
 588:                 if ((ccount = read(f, p, &buf[1024] - p)) <= 0) break;
 589:             }
 590:             if(nlp>p && nlp<=p+ccount)
 591:                 nlp = p+ccount;
 592:             blkno += ccount;
 593:         }
 594:     }
 595: done:   close(f);
 596:     if (cflag) {
 597:         if (nfile > 1)
 598:             printf("%s:", file);
 599:         printf("%ld\n", tln);
 600:     }
 601: }

Defined functions

_add defined in line 374; used 3 times
_cclenter defined in line 186; used 2 times
_cfoll defined in line 216; used 4 times
_cgotofn defined in line 230; used 1 times
_cstate defined in line 324; used 8 times
_enter defined in line 178; used 4 times
_execute defined in line 496; used 2 times
_follow defined in line 386; used 5 times
_main defined in line 417; never used
_member defined in line 350; used 2 times
_nextch defined in line 164; used 6 times
_node defined in line 193; used 6 times
_notin defined in line 359; used 1 times
_overflo defined in line 211; used 7 times
_synerror defined in line 173; used 2 times
_unary defined in line 203; used 5 times
_yyerror defined in line 108; never used
_yylex defined in line 114; never used

Defined macros

FINAL defined in line 22; used 1 times
  • in line 66
MAXLIN defined in line 18; used 13 times
MAXPOS defined in line 19; used 2 times
NCHARS defined in line 20; used 7 times
NSTATES defined in line 21; used 4 times
Last modified: 1983-07-24
Generated: 2016-12-26
Generated by src2html V0.67
page hit count: 1561
Valid CSS Valid XHTML 1.0 Strict