1: /*
   2:  * egrep -- print lines containing (or not containing) a regular expression
   3:  *
   4:  *	status returns:
   5:  *		0 - ok, and some matches
   6:  *		1 - ok, but no matches
   7:  *		2 - some error
   8:  */
   9: %token CHAR DOT CCL NCCL OR CAT STAR PLUS QUEST
  10: %left OR
  11: %left CHAR DOT CCL NCCL '('
  12: %left CAT
  13: %left STAR PLUS QUEST
  14: 
  15: %{
  16: static char *sccsid = "@(#)egrep.y	4.4 (Berkeley) 5/29/85";
  17: #include <stdio.h>
  18: #include <sys/types.h>
  19: #include <sys/stat.h>
  20: 
  21: #define BLKSIZE 8192
  22: #define MAXLIN 350
  23: #define MAXPOS 4000
  24: #define NCHARS 128
  25: #define NSTATES 128
  26: #define FINAL -1
  27: char gotofn[NSTATES][NCHARS];
  28: int state[NSTATES];
  29: char out[NSTATES];
  30: int line = 1;
  31: int name[MAXLIN];
  32: int left[MAXLIN];
  33: int right[MAXLIN];
  34: int parent[MAXLIN];
  35: int foll[MAXLIN];
  36: int positions[MAXPOS];
  37: char chars[MAXLIN];
  38: int nxtpos;
  39: int nxtchar = 0;
  40: int tmpstat[MAXLIN];
  41: int initstat[MAXLIN];
  42: int xstate;
  43: int count;
  44: int icount;
  45: char *input;
  46: FILE *exprfile;
  47: 
  48: long    lnum;
  49: int bflag;
  50: int cflag;
  51: int fflag;
  52: int lflag;
  53: int nflag;
  54: int hflag   = 1;
  55: int sflag;
  56: int vflag;
  57: int retcode = 0;
  58: int nfile;
  59: int blkno;
  60: long    tln;
  61: int nsucc;
  62: 
  63: int f;
  64: char    *fname;
  65: %}
  66: 
  67: %%
  68: s:  t
  69:         ={ unary(FINAL, $1);
  70:           line--;
  71:         }
  72:     ;
  73: t:  b r
  74:         ={ $$ = node(CAT, $1, $2); }
  75:     | OR b r OR
  76:         ={ $$ = node(CAT, $2, $3); }
  77:     | OR b r
  78:         ={ $$ = node(CAT, $2, $3); }
  79:     | b r OR
  80:         ={ $$ = node(CAT, $1, $2); }
  81:     ;
  82: b:
  83:         ={ $$ = enter(DOT);
  84:            $$ = unary(STAR, $$); }
  85:     ;
  86: r:  CHAR
  87:         ={ $$ = enter($1); }
  88:     | DOT
  89:         ={ $$ = enter(DOT); }
  90:     | CCL
  91:         ={ $$ = cclenter(CCL); }
  92:     | NCCL
  93:         ={ $$ = cclenter(NCCL); }
  94:     ;
  95: 
  96: r:  r OR r
  97:         ={ $$ = node(OR, $1, $3); }
  98:     | r r %prec CAT
  99:         ={ $$ = node(CAT, $1, $2); }
 100:     | r STAR
 101:         ={ $$ = unary(STAR, $1); }
 102:     | r PLUS
 103:         ={ $$ = unary(PLUS, $1); }
 104:     | r QUEST
 105:         ={ $$ = unary(QUEST, $1); }
 106:     | '(' r ')'
 107:         ={ $$ = $2; }
 108:     | error
 109:     ;
 110: 
 111: %%
 112: yyerror(s) {
 113:     fprintf(stderr, "egrep: %s\n", s);
 114:     exit(2);
 115: }
 116: 
 117: yylex() {
 118:     extern int yylval;
 119:     int cclcnt, x;
 120:     register char c, d;
 121:     switch(c = nextch()) {
 122:         case '$':
 123:         case '^': c = '\n';
 124:             goto defchar;
 125:         case '|': return (OR);
 126:         case '*': return (STAR);
 127:         case '+': return (PLUS);
 128:         case '?': return (QUEST);
 129:         case '(': return (c);
 130:         case ')': return (c);
 131:         case '.': return (DOT);
 132:         case '\0': return (0);
 133:         case '\n': return (OR);
 134:         case '[':
 135:             x = CCL;
 136:             cclcnt = 0;
 137:             count = nxtchar++;
 138:             if ((c = nextch()) == '^') {
 139:                 x = NCCL;
 140:                 c = nextch();
 141:             }
 142:             do {
 143:                 if (c == '\0') synerror();
 144:                 if (c == '-' && cclcnt > 0 && chars[nxtchar-1] != 0) {
 145:                     if ((d = nextch()) != 0) {
 146:                         c = chars[nxtchar-1];
 147:                         while (c < d) {
 148:                             if (nxtchar >= MAXLIN) overflo();
 149:                             chars[nxtchar++] = ++c;
 150:                             cclcnt++;
 151:                         }
 152:                         continue;
 153:                     }
 154:                 }
 155:                 if (nxtchar >= MAXLIN) overflo();
 156:                 chars[nxtchar++] = c;
 157:                 cclcnt++;
 158:             } while ((c = nextch()) != ']');
 159:             chars[count] = cclcnt;
 160:             return (x);
 161:         case '\\':
 162:             if ((c = nextch()) == '\0') synerror();
 163:         defchar:
 164:         default: yylval = c; return (CHAR);
 165:     }
 166: }
 167: nextch() {
 168:     register char c;
 169:     if (fflag) {
 170:         if ((c = getc(exprfile)) == EOF) {
 171:             fclose(exprfile);
 172:             return(0);
 173:         }
 174:     }
 175:     else c = *input++;
 176:     return(c);
 177: }
 178: 
 179: synerror() {
 180:     fprintf(stderr, "egrep: syntax error\n");
 181:     exit(2);
 182: }
 183: 
 184: enter(x) int x; {
 185:     if(line >= MAXLIN) overflo();
 186:     name[line] = x;
 187:     left[line] = 0;
 188:     right[line] = 0;
 189:     return(line++);
 190: }
 191: 
 192: cclenter(x) int x; {
 193:     register linno;
 194:     linno = enter(x);
 195:     right[linno] = count;
 196:     return (linno);
 197: }
 198: 
 199: node(x, l, r) {
 200:     if(line >= MAXLIN) overflo();
 201:     name[line] = x;
 202:     left[line] = l;
 203:     right[line] = r;
 204:     parent[l] = line;
 205:     parent[r] = line;
 206:     return(line++);
 207: }
 208: 
 209: unary(x, d) {
 210:     if(line >= MAXLIN) overflo();
 211:     name[line] = x;
 212:     left[line] = d;
 213:     right[line] = 0;
 214:     parent[d] = line;
 215:     return(line++);
 216: }
 217: overflo() {
 218:     fprintf(stderr, "egrep: regular expression too long\n");
 219:     exit(2);
 220: }
 221: 
 222: cfoll(v) {
 223:     register i;
 224:     if (left[v] == 0) {
 225:         count = 0;
 226:         for (i=1; i<=line; i++) tmpstat[i] = 0;
 227:         follow(v);
 228:         add(foll, v);
 229:     }
 230:     else if (right[v] == 0) cfoll(left[v]);
 231:     else {
 232:         cfoll(left[v]);
 233:         cfoll(right[v]);
 234:     }
 235: }
 236: cgotofn() {
 237:     register c, i, k;
 238:     int n, s;
 239:     char symbol[NCHARS];
 240:     int j, nc, pc, pos;
 241:     int curpos, num;
 242:     int number, newpos;
 243:     count = 0;
 244:     for (n=3; n<=line; n++) tmpstat[n] = 0;
 245:     if (cstate(line-1)==0) {
 246:         tmpstat[line] = 1;
 247:         count++;
 248:         out[0] = 1;
 249:     }
 250:     for (n=3; n<=line; n++) initstat[n] = tmpstat[n];
 251:     count--;        /*leave out position 1 */
 252:     icount = count;
 253:     tmpstat[1] = 0;
 254:     add(state, 0);
 255:     n = 0;
 256:     for (s=0; s<=n; s++)  {
 257:         if (out[s] == 1) continue;
 258:         for (i=0; i<NCHARS; i++) symbol[i] = 0;
 259:         num = positions[state[s]];
 260:         count = icount;
 261:         for (i=3; i<=line; i++) tmpstat[i] = initstat[i];
 262:         pos = state[s] + 1;
 263:         for (i=0; i<num; i++) {
 264:             curpos = positions[pos];
 265:             if ((c = name[curpos]) >= 0) {
 266:                 if (c < NCHARS) symbol[c] = 1;
 267:                 else if (c == DOT) {
 268:                     for (k=0; k<NCHARS; k++)
 269:                         if (k!='\n') symbol[k] = 1;
 270:                 }
 271:                 else if (c == CCL) {
 272:                     nc = chars[right[curpos]];
 273:                     pc = right[curpos] + 1;
 274:                     for (k=0; k<nc; k++) symbol[chars[pc++]] = 1;
 275:                 }
 276:                 else if (c == NCCL) {
 277:                     nc = chars[right[curpos]];
 278:                     for (j = 0; j < NCHARS; j++) {
 279:                         pc = right[curpos] + 1;
 280:                         for (k = 0; k < nc; k++)
 281:                             if (j==chars[pc++]) goto cont;
 282:                         if (j!='\n') symbol[j] = 1;
 283:                         cont:;
 284:                     }
 285:                 }
 286:                 else printf("something's funny\n");
 287:             }
 288:             pos++;
 289:         }
 290:         for (c=0; c<NCHARS; c++) {
 291:             if (symbol[c] == 1) { /* nextstate(s,c) */
 292:                 count = icount;
 293:                 for (i=3; i <= line; i++) tmpstat[i] = initstat[i];
 294:                 pos = state[s] + 1;
 295:                 for (i=0; i<num; i++) {
 296:                     curpos = positions[pos];
 297:                     if ((k = name[curpos]) >= 0)
 298:                         if (
 299:                             (k == c)
 300:                             | (k == DOT)
 301:                             | (k == CCL && member(c, right[curpos], 1))
 302:                             | (k == NCCL && member(c, right[curpos], 0))
 303:                         ) {
 304:                             number = positions[foll[curpos]];
 305:                             newpos = foll[curpos] + 1;
 306:                             for (k=0; k<number; k++) {
 307:                                 if (tmpstat[positions[newpos]] != 1) {
 308:                                     tmpstat[positions[newpos]] = 1;
 309:                                     count++;
 310:                                 }
 311:                                 newpos++;
 312:                             }
 313:                         }
 314:                     pos++;
 315:                 } /* end nextstate */
 316:                 if (notin(n)) {
 317:                     if (n >= NSTATES) overflo();
 318:                     add(state, ++n);
 319:                     if (tmpstat[line] == 1) out[n] = 1;
 320:                     gotofn[s][c] = n;
 321:                 }
 322:                 else {
 323:                     gotofn[s][c] = xstate;
 324:                 }
 325:             }
 326:         }
 327:     }
 328: }
 329: 
 330: cstate(v) {
 331:     register b;
 332:     if (left[v] == 0) {
 333:         if (tmpstat[v] != 1) {
 334:             tmpstat[v] = 1;
 335:             count++;
 336:         }
 337:         return(1);
 338:     }
 339:     else if (right[v] == 0) {
 340:         if (cstate(left[v]) == 0) return (0);
 341:         else if (name[v] == PLUS) return (1);
 342:         else return (0);
 343:     }
 344:     else if (name[v] == CAT) {
 345:         if (cstate(left[v]) == 0 && cstate(right[v]) == 0) return (0);
 346:         else return (1);
 347:     }
 348:     else { /* name[v] == OR */
 349:         b = cstate(right[v]);
 350:         if (cstate(left[v]) == 0 || b == 0) return (0);
 351:         else return (1);
 352:     }
 353: }
 354: 
 355: 
 356: member(symb, set, torf) {
 357:     register i, num, pos;
 358:     num = chars[set];
 359:     pos = set + 1;
 360:     for (i=0; i<num; i++)
 361:         if (symb == chars[pos++]) return (torf);
 362:     return (!torf);
 363: }
 364: 
 365: notin(n) {
 366:     register i, j, pos;
 367:     for (i=0; i<=n; i++) {
 368:         if (positions[state[i]] == count) {
 369:             pos = state[i] + 1;
 370:             for (j=0; j < count; j++)
 371:                 if (tmpstat[positions[pos++]] != 1) goto nxt;
 372:             xstate = i;
 373:             return (0);
 374:         }
 375:         nxt: ;
 376:     }
 377:     return (1);
 378: }
 379: 
 380: add(array, n) int *array; {
 381:     register i;
 382:     if (nxtpos + count > MAXPOS) overflo();
 383:     array[n] = nxtpos;
 384:     positions[nxtpos++] = count;
 385:     for (i=3; i <= line; i++) {
 386:         if (tmpstat[i] == 1) {
 387:             positions[nxtpos++] = i;
 388:         }
 389:     }
 390: }
 391: 
 392: follow(v) int v; {
 393:     int p;
 394:     if (v == line) return;
 395:     p = parent[v];
 396:     switch(name[p]) {
 397:         case STAR:
 398:         case PLUS:  cstate(v);
 399:                 follow(p);
 400:                 return;
 401: 
 402:         case OR:
 403:         case QUEST: follow(p);
 404:                 return;
 405: 
 406:         case CAT:   if (v == left[p]) {
 407:                     if (cstate(right[p]) == 0) {
 408:                         follow(p);
 409:                         return;
 410:                     }
 411:                 }
 412:                 else follow(p);
 413:                 return;
 414:         case FINAL: if (tmpstat[line] != 1) {
 415:                     tmpstat[line] = 1;
 416:                     count++;
 417:                 }
 418:                 return;
 419:     }
 420: }
 421: 
 422: 
 423: main(argc, argv)
 424: char **argv;
 425: {
 426:     while (--argc > 0 && (++argv)[0][0]=='-')
 427:         switch (argv[0][1]) {
 428: 
 429:         case 's':
 430:             sflag++;
 431:             continue;
 432: 
 433:         case 'h':
 434:             hflag = 0;
 435:             continue;
 436: 
 437:         case 'b':
 438:             bflag++;
 439:             continue;
 440: 
 441:         case 'c':
 442:             cflag++;
 443:             continue;
 444: 
 445:         case 'e':
 446:             argc--;
 447:             argv++;
 448:             goto out;
 449: 
 450:         case 'f':
 451:             fflag++;
 452:             continue;
 453: 
 454:         case 'l':
 455:             lflag++;
 456:             continue;
 457: 
 458:         case 'n':
 459:             nflag++;
 460:             continue;
 461: 
 462:         case 'v':
 463:             vflag++;
 464:             continue;
 465: 
 466:         default:
 467:             fprintf(stderr, "egrep: unknown flag\n");
 468:             continue;
 469:         }
 470: out:
 471:     if (argc<=0)
 472:         exit(2);
 473:     if (fflag) {
 474:         fname = *argv;
 475:         exprfile = fopen(fname, "r");
 476:         if (exprfile == (FILE *)NULL) {
 477:             fprintf(stderr, "egrep: can't open %s\n", fname);
 478:             exit(2);
 479:         }
 480:     }
 481:     else input = *argv;
 482:     argc--;
 483:     argv++;
 484: 
 485:     yyparse();
 486: 
 487:     cfoll(line-1);
 488:     cgotofn();
 489:     nfile = argc;
 490:     if (argc<=0) {
 491:         if (lflag) exit(1);
 492:         execute(0);
 493:     }
 494:     else while (--argc >= 0) {
 495:         execute(*argv);
 496:         argv++;
 497:     }
 498:     exit(retcode != 0 ? retcode : nsucc == 0);
 499: }
 500: 
 501: execute(file)
 502: char *file;
 503: {
 504:     register char *p;
 505:     register cstat;
 506:     register ccount;
 507:     static char *buf;
 508:     static int blksize;
 509:     struct stat stb;
 510:     char *nlp;
 511:     int istat;
 512:     if (file) {
 513:         if ((f = open(file, 0)) < 0) {
 514:             fprintf(stderr, "egrep: can't open %s\n", file);
 515:             retcode = 2;
 516:             return;
 517:         }
 518:     }
 519:     else f = 0;
 520:     if (buf == NULL) {
 521:         if (fstat(f, &stb) > 0 && stb.st_blksize > 0)
 522:             blksize = stb.st_blksize;
 523:         else
 524:             blksize = BLKSIZE;
 525:         buf = (char *)malloc(2*blksize);
 526:         if (buf == NULL) {
 527:             fprintf(stderr, "egrep: no memory for %s\n", file);
 528:             retcode = 2;
 529:             return;
 530:         }
 531:     }
 532:     ccount = 0;
 533:     lnum = 1;
 534:     tln = 0;
 535:     blkno = 0;
 536:     p = buf;
 537:     nlp = p;
 538:     if ((ccount = read(f,p,blksize))<=0) goto done;
 539:     istat = cstat = gotofn[0]['\n'];
 540:     if (out[cstat]) goto found;
 541:     for (;;) {
 542:         cstat = gotofn[cstat][*p&0377]; /* all input chars made positive */
 543:         if (out[cstat]) {
 544:         found:  for(;;) {
 545:                 if (*p++ == '\n') {
 546:                     if (vflag == 0) {
 547:                 succeed:    nsucc = 1;
 548:                         if (cflag) tln++;
 549:                         else if (sflag)
 550:                             ;   /* ugh */
 551:                         else if (lflag) {
 552:                             printf("%s\n", file);
 553:                             close(f);
 554:                             return;
 555:                         }
 556:                         else {
 557:                             if (nfile > 1 && hflag) printf("%s:", file);
 558:                             if (bflag) printf("%d:", blkno);
 559:                             if (nflag) printf("%ld:", lnum);
 560:                             if (p <= nlp) {
 561:                                 while (nlp < &buf[2*blksize]) putchar(*nlp++);
 562:                                 nlp = buf;
 563:                             }
 564:                             while (nlp < p) putchar(*nlp++);
 565:                         }
 566:                     }
 567:                     lnum++;
 568:                     nlp = p;
 569:                     if ((out[(cstat=istat)]) == 0) goto brk2;
 570:                 }
 571:                 cfound:
 572:                 if (--ccount <= 0) {
 573:                     if (p <= &buf[blksize]) {
 574:                         if ((ccount = read(f, p, blksize)) <= 0) goto done;
 575:                     }
 576:                     else if (p == &buf[2*blksize]) {
 577:                         p = buf;
 578:                         if ((ccount = read(f, p, blksize)) <= 0) goto done;
 579:                     }
 580:                     else {
 581:                         if ((ccount = read(f, p, &buf[2*blksize]-p)) <= 0) goto done;
 582:                     }
 583:                     blkno += ccount / 512;
 584:                 }
 585:             }
 586:         }
 587:         if (*p++ == '\n') {
 588:             if (vflag) goto succeed;
 589:             else {
 590:                 lnum++;
 591:                 nlp = p;
 592:                 if (out[(cstat=istat)]) goto cfound;
 593:             }
 594:         }
 595:         brk2:
 596:         if (--ccount <= 0) {
 597:             if (p <= &buf[blksize]) {
 598:                 if ((ccount = read(f, p, blksize)) <= 0) break;
 599:             }
 600:             else if (p == &buf[2*blksize]) {
 601:                 p = buf;
 602:                 if ((ccount = read(f, p, blksize)) <= 0) break;
 603:             }
 604:             else {
 605:                 if ((ccount = read(f, p, &buf[2*blksize] - p)) <= 0) break;
 606:             }
 607:             blkno += ccount / 512;
 608:         }
 609:     }
 610: done:   close(f);
 611:     if (cflag) {
 612:         if (nfile > 1)
 613:             printf("%s:", file);
 614:         printf("%ld\n", tln);
 615:     }
 616: }

Defined functions

_add defined in line 380; used 3 times
_cclenter defined in line 192; used 2 times
_cfoll defined in line 222; used 4 times
_cgotofn defined in line 236; used 1 times
_cstate defined in line 330; used 8 times
_enter defined in line 184; used 4 times
_execute defined in line 501; used 2 times
_follow defined in line 392; used 5 times
_main defined in line 423; never used
_member defined in line 356; used 2 times
_nextch defined in line 167; used 6 times
_node defined in line 199; used 6 times
_notin defined in line 365; used 1 times
_overflo defined in line 217; used 7 times
_synerror defined in line 179; used 2 times
_unary defined in line 209; used 5 times
_yyerror defined in line 111; never used
_yylex defined in line 117; never used

Defined macros

BLKSIZE defined in line 21; used 1 times
FINAL defined in line 26; used 1 times
  • in line 69
MAXLIN defined in line 22; used 13 times
MAXPOS defined in line 23; used 2 times
NCHARS defined in line 24; used 7 times
NSTATES defined in line 25; used 4 times
Last modified: 1986-03-04
Generated: 2016-12-26
Generated by src2html V0.67
page hit count: 2206
Valid CSS Valid XHTML 1.0 Strict