1: #ifndef lint 2: static char sccsid[] = "@(#)y1.c 4.1 (Berkeley) 2/11/83"; 3: #endif not lint 4: 5: # include "dextern" 6: 7: /* variables used locally */ 8: 9: /* lookahead computations */ 10: 11: int tbitset; /* size of lookahead sets */ 12: struct looksets lkst [ LSETSIZE ]; 13: int nlset = 0; /* next lookahead set index */ 14: int nolook = 0; /* flag to suppress lookahead computations */ 15: struct looksets clset; /* temporary storage for lookahead computations */ 16: 17: /* working set computations */ 18: 19: struct wset wsets[ WSETSIZE ]; 20: struct wset *cwp; 21: 22: /* state information */ 23: 24: int nstate = 0; /* number of states */ 25: struct item *pstate[NSTATES+2]; /* pointers to the descriptions of the states */ 26: int tystate[NSTATES]; /* contains type information about the states */ 27: int indgo[NSTATES]; /* index to the stored goto table */ 28: int tstates[ NTERMS ]; /* states generated by terminal gotos */ 29: int ntstates[ NNONTERM ]; /* states generated by nonterminal gotos */ 30: int mstates[ NSTATES ]; /* chain of overflows of term/nonterm generation lists */ 31: 32: /* storage for the actions in the parser */ 33: 34: int amem[ACTSIZE]; /* action table storage */ 35: int *memp = amem; /* next free action table position */ 36: 37: /* other storage areas */ 38: 39: int temp1[TEMPSIZE]; /* temporary storage, indexed by terms + ntokens or states */ 40: int lineno= 1; /* current input line number */ 41: int fatfl = 1; /* if on, error is fatal */ 42: int nerrors = 0; /* number of errors */ 43: 44: /* storage for information about the nonterminals */ 45: 46: int **pres[NNONTERM+2]; /* vector of pointers to productions yielding each nonterminal */ 47: struct looksets *pfirst[NNONTERM+2]; /* vector of pointers to first sets for each nonterminal */ 48: int pempty[NNONTERM+1]; /* vector of nonterminals nontrivially deriving e */ 49: 50: main(argc,argv) int argc; char *argv[]; { 51: 52: setup(argc,argv); /* initialize and read productions */ 53: tbitset = NWORDS(ntokens); 54: cpres(); /* make table of which productions yield a given nonterminal */ 55: cempty(); /* make a table of which nonterminals can match the empty string */ 56: cpfir(); /* make a table of firsts of nonterminals */ 57: stagen(); /* generate the states */ 58: output(); /* write the states and the tables */ 59: go2out(); 60: hideprod(); 61: summary(); 62: callopt(); 63: others(); 64: exit(0); 65: } 66: 67: others(){ /* put out other arrays, copy the parsers */ 68: register c, i, j; 69: 70: finput = fopen( PARSER, "r" ); 71: if( finput == NULL ) error( "cannot find parser %s", PARSER ); 72: 73: warray( "yyr1", levprd, nprod ); 74: 75: aryfil( temp1, nprod, 0 ); 76: PLOOP(1,i)temp1[i] = prdptr[i+1]-prdptr[i]-2; 77: warray( "yyr2", temp1, nprod ); 78: 79: aryfil( temp1, nstate, -1000 ); 80: TLOOP(i){ 81: for( j=tstates[i]; j!=0; j=mstates[j] ){ 82: temp1[j] = tokset[i].value; 83: } 84: } 85: NTLOOP(i){ 86: for( j=ntstates[i]; j!=0; j=mstates[j] ){ 87: temp1[j] = -i; 88: } 89: } 90: warray( "yychk", temp1, nstate ); 91: 92: warray( "yydef", defact, nstate ); 93: 94: /* copy parser text */ 95: 96: while( (c=getc(finput) ) != EOF ){ 97: if( c == '$' ){ 98: if( (c=getc(finput)) != 'A' ) putc( '$', ftable ); 99: else { /* copy actions */ 100: faction = fopen( ACTNAME, "r" ); 101: if( faction == NULL ) error( "cannot reopen action tempfile" ); 102: while( (c=getc(faction) ) != EOF ) putc( c, ftable ); 103: fclose(faction); 104: ZAPFILE(ACTNAME); 105: c = getc(finput); 106: } 107: } 108: putc( c, ftable ); 109: } 110: 111: fclose( ftable ); 112: } 113: 114: char *chcopy( p, q ) char *p, *q; { 115: /* copies string q into p, returning next free char ptr */ 116: while( *p = *q++ ) ++p; 117: return( p ); 118: } 119: 120: # define ISIZE 400 121: char *writem(pp) int *pp; { /* creates output string for item pointed to by pp */ 122: int i,*p; 123: static char sarr[ISIZE]; 124: char *q; 125: 126: for( p=pp; *p>0 ; ++p ) ; 127: p = prdptr[-*p]; 128: q = chcopy( sarr, nontrst[*p-NTBASE].name ); 129: q = chcopy( q, " : " ); 130: 131: for(;;){ 132: *q++ = ++p==pp ? '_' : ' '; 133: *q = '\0'; 134: if((i = *p) <= 0) break; 135: q = chcopy( q, symnam(i) ); 136: if( q> &sarr[ISIZE-30] ) error( "item too big" ); 137: } 138: 139: if( (i = *pp) < 0 ){ /* an item calling for a reduction */ 140: q = chcopy( q, " (" ); 141: sprintf( q, "%d)", -i ); 142: } 143: 144: return( sarr ); 145: } 146: 147: char *symnam(i){ /* return a pointer to the name of symbol i */ 148: char *cp; 149: 150: cp = (i>=NTBASE) ? nontrst[i-NTBASE].name : tokset[i].name ; 151: if( *cp == ' ' ) ++cp; 152: return( cp ); 153: } 154: 155: struct wset *zzcwp = wsets; 156: int zzgoent = 0; 157: int zzgobest = 0; 158: int zzacent = 0; 159: int zzexcp = 0; 160: int zzclose = 0; 161: int zzsrconf = 0; 162: int * zzmemsz = mem0; 163: int zzrrconf = 0; 164: 165: summary(){ /* output the summary on the tty */ 166: 167: if( foutput!=NULL ){ 168: fprintf( foutput, "\n%d/%d terminals, %d/%d nonterminals\n", ntokens, NTERMS, 169: nnonter, NNONTERM ); 170: fprintf( foutput, "%d/%d grammar rules, %d/%d states\n", nprod, NPROD, nstate, NSTATES ); 171: fprintf( foutput, "%d shift/reduce, %d reduce/reduce conflicts reported\n", zzsrconf, zzrrconf ); 172: fprintf( foutput, "%d/%d working sets used\n", zzcwp-wsets, WSETSIZE ); 173: fprintf( foutput, "memory: states,etc. %d/%d, parser %d/%d\n", zzmemsz-mem0, MEMSIZE, 174: memp-amem, ACTSIZE ); 175: fprintf( foutput, "%d/%d distinct lookahead sets\n", nlset, LSETSIZE ); 176: fprintf( foutput, "%d extra closures\n", zzclose - 2*nstate ); 177: fprintf( foutput, "%d shift entries, %d exceptions\n", zzacent, zzexcp ); 178: fprintf( foutput, "%d goto entries\n", zzgoent ); 179: fprintf( foutput, "%d entries saved by goto default\n", zzgobest ); 180: } 181: if( zzsrconf!=0 || zzrrconf!=0 ){ 182: fprintf( stdout,"\nconflicts: "); 183: if( zzsrconf )fprintf( stdout, "%d shift/reduce" , zzsrconf ); 184: if( zzsrconf && zzrrconf )fprintf( stdout, ", " ); 185: if( zzrrconf )fprintf( stdout, "%d reduce/reduce" , zzrrconf ); 186: fprintf( stdout, "\n" ); 187: } 188: 189: fclose( ftemp ); 190: if( fdefine != NULL ) fclose( fdefine ); 191: } 192: 193: /* VARARGS1 */ 194: error(s,a1) char *s; { /* write out error comment */ 195: 196: ++nerrors; 197: fprintf( stderr, "\n fatal error: "); 198: fprintf( stderr, s,a1); 199: fprintf( stderr, ", line %d\n", lineno ); 200: if( !fatfl ) return; 201: summary(); 202: exit(1); 203: } 204: 205: aryfil( v, n, c ) int *v,n,c; { /* set elements 0 through n-1 to c */ 206: int i; 207: for( i=0; i<n; ++i ) v[i] = c; 208: } 209: 210: setunion( a, b ) register *a, *b; { 211: /* set a to the union of a and b */ 212: /* return 1 if b is not a subset of a, 0 otherwise */ 213: register i, x, sub; 214: 215: sub = 0; 216: SETLOOP(i){ 217: *a = (x = *a)|*b++; 218: if( *a++ != x ) sub = 1; 219: } 220: return( sub ); 221: } 222: 223: prlook( p ) struct looksets *p;{ 224: register j, *pp; 225: pp = p->lset; 226: if( pp == 0 ) fprintf( foutput, "\tNULL"); 227: else { 228: fprintf( foutput, " { " ); 229: TLOOP(j) { 230: if( BIT(pp,j) ) fprintf( foutput, "%s ", symnam(j) ); 231: } 232: fprintf( foutput, "}" ); 233: } 234: } 235: 236: cpres(){ /* compute an array with the beginnings of productions yielding given nonterminals 237: The array pres points to these lists */ 238: /* the array pyield has the lists: the total size is only NPROD+1 */ 239: register **pmem; 240: register c, j, i; 241: static int * pyield[NPROD]; 242: 243: pmem = pyield; 244: 245: NTLOOP(i){ 246: c = i+NTBASE; 247: pres[i] = pmem; 248: fatfl = 0; /* make undefined symbols nonfatal */ 249: PLOOP(0,j){ 250: if (*prdptr[j] == c) *pmem++ = prdptr[j]+1; 251: } 252: if(pres[i] == pmem){ 253: error("nonterminal %s not defined!", nontrst[i].name); 254: } 255: } 256: pres[i] = pmem; 257: fatfl = 1; 258: if( nerrors ){ 259: summary(); 260: exit(1); 261: } 262: if( pmem != &pyield[nprod] ) error( "internal Yacc error: pyield %d", pmem-&pyield[nprod] ); 263: } 264: 265: int indebug = 0; 266: cpfir() { 267: /* compute an array with the first of nonterminals */ 268: register *p, **s, i, **t, ch, changes; 269: 270: zzcwp = &wsets[nnonter]; 271: NTLOOP(i){ 272: aryfil( wsets[i].ws.lset, tbitset, 0 ); 273: t = pres[i+1]; 274: for( s=pres[i]; s<t; ++s ){ /* initially fill the sets */ 275: for( p = *s; (ch = *p) > 0 ; ++p ) { 276: if( ch < NTBASE ) { 277: SETBIT( wsets[i].ws.lset, ch ); 278: break; 279: } 280: else if( !pempty[ch-NTBASE] ) break; 281: } 282: } 283: } 284: 285: /* now, reflect transitivity */ 286: 287: changes = 1; 288: while( changes ){ 289: changes = 0; 290: NTLOOP(i){ 291: t = pres[i+1]; 292: for( s=pres[i]; s<t; ++s ){ 293: for( p = *s; ( ch = (*p-NTBASE) ) >= 0; ++p ) { 294: changes |= setunion( wsets[i].ws.lset, wsets[ch].ws.lset ); 295: if( !pempty[ch] ) break; 296: } 297: } 298: } 299: } 300: 301: NTLOOP(i) pfirst[i] = flset( &wsets[i].ws ); 302: if( !indebug ) return; 303: if( (foutput!=NULL) ){ 304: NTLOOP(i) { 305: fprintf( foutput, "\n%s: ", nontrst[i].name ); 306: prlook( pfirst[i] ); 307: fprintf( foutput, " %d\n", pempty[i] ); 308: } 309: } 310: } 311: 312: state(c){ /* sorts last state,and sees if it equals earlier ones. returns state number */ 313: int size1,size2; 314: register i; 315: struct item *p1, *p2, *k, *l, *q1, *q2; 316: p1 = pstate[nstate]; 317: p2 = pstate[nstate+1]; 318: if(p1==p2) return(0); /* null state */ 319: /* sort the items */ 320: for(k=p2-1;k>p1;k--) { /* make k the biggest */ 321: for(l=k-1;l>=p1;--l)if( l->pitem > k->pitem ){ 322: int *s; 323: struct looksets *ss; 324: s = k->pitem; 325: k->pitem = l->pitem; 326: l->pitem = s; 327: ss = k->look; 328: k->look = l->look; 329: l->look = ss; 330: } 331: } 332: size1 = p2 - p1; /* size of state */ 333: 334: for( i= (c>=NTBASE)?ntstates[c-NTBASE]:tstates[c]; i != 0; i = mstates[i] ) { 335: /* get ith state */ 336: q1 = pstate[i]; 337: q2 = pstate[i+1]; 338: size2 = q2 - q1; 339: if (size1 != size2) continue; 340: k=p1; 341: for(l=q1;l<q2;l++) { 342: if( l->pitem != k->pitem ) break; 343: ++k; 344: } 345: if (l != q2) continue; 346: /* found it */ 347: pstate[nstate+1] = pstate[nstate]; /* delete last state */ 348: /* fix up lookaheads */ 349: if( nolook ) return(i); 350: for( l=q1,k=p1; l<q2; ++l,++k ){ 351: int s; 352: SETLOOP(s) clset.lset[s] = l->look->lset[s]; 353: if( setunion( clset.lset, k->look->lset ) ) { 354: tystate[i] = MUSTDO; 355: /* register the new set */ 356: l->look = flset( &clset ); 357: } 358: } 359: return (i); 360: } 361: /* state is new */ 362: if( nolook ) error( "yacc state/nolook error" ); 363: pstate[nstate+2] = p2; 364: if(nstate+1 >= NSTATES) error("too many states" ); 365: if( c >= NTBASE ){ 366: mstates[ nstate ] = ntstates[ c-NTBASE ]; 367: ntstates[ c-NTBASE ] = nstate; 368: } 369: else { 370: mstates[ nstate ] = tstates[ c ]; 371: tstates[ c ] = nstate; 372: } 373: tystate[nstate]=MUSTDO; 374: return(nstate++); 375: } 376: 377: int pidebug = 0; /* debugging flag for putitem */ 378: putitem( ptr, lptr ) int *ptr; struct looksets *lptr; { 379: register struct item *j; 380: 381: if( pidebug && (foutput!=NULL) ) { 382: fprintf( foutput, "putitem(%s), state %d\n", writem(ptr), nstate ); 383: } 384: j = pstate[nstate+1]; 385: j->pitem = ptr; 386: if( !nolook ) j->look = flset( lptr ); 387: pstate[nstate+1] = ++j; 388: if( (int *)j > zzmemsz ){ 389: zzmemsz = (int *)j; 390: if( zzmemsz >= &mem0[MEMSIZE] ) error( "out of state space" ); 391: } 392: } 393: 394: cempty(){ /* mark nonterminals which derive the empty string */ 395: /* also, look for nonterminals which don't derive any token strings */ 396: 397: # define EMPTY 1 398: # define WHOKNOWS 0 399: # define OK 1 400: 401: register i, *p; 402: 403: /* first, use the array pempty to detect productions that can never be reduced */ 404: /* set pempty to WHONOWS */ 405: aryfil( pempty, nnonter+1, WHOKNOWS ); 406: 407: /* now, look at productions, marking nonterminals which derive something */ 408: 409: more: 410: PLOOP(0,i){ 411: if( pempty[ *prdptr[i] - NTBASE ] ) continue; 412: for( p=prdptr[i]+1; *p>=0; ++p ){ 413: if( *p>=NTBASE && pempty[ *p-NTBASE ] == WHOKNOWS ) break; 414: } 415: if( *p < 0 ){ /* production can be derived */ 416: pempty[ *prdptr[i]-NTBASE ] = OK; 417: goto more; 418: } 419: } 420: 421: /* now, look at the nonterminals, to see if they are all OK */ 422: 423: NTLOOP(i){ 424: /* the added production rises or falls as the start symbol ... */ 425: if( i == 0 ) continue; 426: if( pempty[ i ] != OK ) { 427: fatfl = 0; 428: error( "nonterminal %s never derives any token string", nontrst[i].name ); 429: } 430: } 431: 432: if( nerrors ){ 433: summary(); 434: exit(1); 435: } 436: 437: /* now, compute the pempty array, to see which nonterminals derive the empty string */ 438: 439: /* set pempty to WHOKNOWS */ 440: 441: aryfil( pempty, nnonter+1, WHOKNOWS ); 442: 443: /* loop as long as we keep finding empty nonterminals */ 444: 445: again: 446: PLOOP(1,i){ 447: if( pempty[ *prdptr[i]-NTBASE ]==WHOKNOWS ){ /* not known to be empty */ 448: for( p=prdptr[i]+1; *p>=NTBASE && pempty[*p-NTBASE]==EMPTY ; ++p ) ; 449: if( *p < 0 ){ /* we have a nontrivially empty nonterminal */ 450: pempty[*prdptr[i]-NTBASE] = EMPTY; 451: goto again; /* got one ... try for another */ 452: } 453: } 454: } 455: 456: } 457: 458: int gsdebug = 0; 459: stagen(){ /* generate the states */ 460: 461: int i, j; 462: register c; 463: register struct wset *p, *q; 464: 465: /* initialize */ 466: 467: nstate = 0; 468: /* THIS IS FUNNY from the standpoint of portability */ 469: /* it represents the magic moment when the mem0 array, which has 470: /* been holding the productions, starts to hold item pointers, of a 471: /* different type... */ 472: /* someday, alloc should be used to allocate all this stuff... for now, we 473: /* accept that if pointers don't fit in integers, there is a problem... */ 474: 475: pstate[0] = pstate[1] = (struct item *)mem; 476: aryfil( clset.lset, tbitset, 0 ); 477: putitem( prdptr[0]+1, &clset ); 478: tystate[0] = MUSTDO; 479: nstate = 1; 480: pstate[2] = pstate[1]; 481: 482: aryfil( amem, ACTSIZE, 0 ); 483: 484: /* now, the main state generation loop */ 485: 486: more: 487: SLOOP(i){ 488: if( tystate[i] != MUSTDO ) continue; 489: tystate[i] = DONE; 490: aryfil( temp1, nnonter+1, 0 ); 491: /* take state i, close it, and do gotos */ 492: closure(i); 493: WSLOOP(wsets,p){ /* generate goto's */ 494: if( p->flag ) continue; 495: p->flag = 1; 496: c = *(p->pitem); 497: if( c <= 1 ) { 498: if( pstate[i+1]-pstate[i] <= p-wsets ) tystate[i] = MUSTLOOKAHEAD; 499: continue; 500: } 501: /* do a goto on c */ 502: WSLOOP(p,q){ 503: if( c == *(q->pitem) ){ /* this item contributes to the goto */ 504: putitem( q->pitem + 1, &q->ws ); 505: q->flag = 1; 506: } 507: } 508: if( c < NTBASE ) { 509: state(c); /* register new state */ 510: } 511: else { 512: temp1[c-NTBASE] = state(c); 513: } 514: } 515: if( gsdebug && (foutput!=NULL) ){ 516: fprintf( foutput, "%d: ", i ); 517: NTLOOP(j) { 518: if( temp1[j] ) fprintf( foutput, "%s %d, ", nontrst[j].name, temp1[j] ); 519: } 520: fprintf( foutput, "\n"); 521: } 522: indgo[i] = apack( &temp1[1], nnonter-1 ) - 1; 523: goto more; /* we have done one goto; do some more */ 524: } 525: /* no more to do... stop */ 526: } 527: 528: int cldebug = 0; /* debugging flag for closure */ 529: closure(i){ /* generate the closure of state i */ 530: 531: int c, ch, work, k; 532: register struct wset *u, *v; 533: int *pi; 534: int **s, **t; 535: struct item *q; 536: register struct item *p; 537: 538: ++zzclose; 539: 540: /* first, copy kernel of state i to wsets */ 541: 542: cwp = wsets; 543: ITMLOOP(i,p,q){ 544: cwp->pitem = p->pitem; 545: cwp->flag = 1; /* this item must get closed */ 546: SETLOOP(k) cwp->ws.lset[k] = p->look->lset[k]; 547: WSBUMP(cwp); 548: } 549: 550: /* now, go through the loop, closing each item */ 551: 552: work = 1; 553: while( work ){ 554: work = 0; 555: WSLOOP(wsets,u){ 556: 557: if( u->flag == 0 ) continue; 558: c = *(u->pitem); /* dot is before c */ 559: 560: if( c < NTBASE ){ 561: u->flag = 0; 562: continue; /* only interesting case is where . is before nonterminal */ 563: } 564: 565: /* compute the lookahead */ 566: aryfil( clset.lset, tbitset, 0 ); 567: 568: /* find items involving c */ 569: 570: WSLOOP(u,v){ 571: if( v->flag == 1 && *(pi=v->pitem) == c ){ 572: v->flag = 0; 573: if( nolook ) continue; 574: while( (ch= *++pi)>0 ){ 575: if( ch < NTBASE ){ /* terminal symbol */ 576: SETBIT( clset.lset, ch ); 577: break; 578: } 579: /* nonterminal symbol */ 580: setunion( clset.lset, pfirst[ch-NTBASE]->lset ); 581: if( !pempty[ch-NTBASE] ) break; 582: } 583: if( ch<=0 ) setunion( clset.lset, v->ws.lset ); 584: } 585: } 586: 587: /* now loop over productions derived from c */ 588: 589: c -= NTBASE; /* c is now nonterminal number */ 590: 591: t = pres[c+1]; 592: for( s=pres[c]; s<t; ++s ){ 593: /* put these items into the closure */ 594: WSLOOP(wsets,v){ /* is the item there */ 595: if( v->pitem == *s ){ /* yes, it is there */ 596: if( nolook ) goto nexts; 597: if( setunion( v->ws.lset, clset.lset ) ) v->flag = work = 1; 598: goto nexts; 599: } 600: } 601: 602: /* not there; make a new entry */ 603: if( cwp-wsets+1 >= WSETSIZE ) error( "working set overflow" ); 604: cwp->pitem = *s; 605: cwp->flag = 1; 606: if( !nolook ){ 607: work = 1; 608: SETLOOP(k) cwp->ws.lset[k] = clset.lset[k]; 609: } 610: WSBUMP(cwp); 611: 612: nexts: ; 613: } 614: 615: } 616: } 617: 618: /* have computed closure; flags are reset; return */ 619: 620: if( cwp > zzcwp ) zzcwp = cwp; 621: if( cldebug && (foutput!=NULL) ){ 622: fprintf( foutput, "\nState %d, nolook = %d\n", i, nolook ); 623: WSLOOP(wsets,u){ 624: if( u->flag ) fprintf( foutput, "flag set!\n"); 625: u->flag = 0; 626: fprintf( foutput, "\t%s", writem(u->pitem)); 627: prlook( &u->ws ); 628: fprintf( foutput, "\n" ); 629: } 630: } 631: } 632: 633: struct looksets *flset( p ) struct looksets *p; { 634: /* decide if the lookahead set pointed to by p is known */ 635: /* return pointer to a perminent location for the set */ 636: 637: register struct looksets *q; 638: int j, *w; 639: register *u, *v; 640: 641: for( q = &lkst[nlset]; q-- > lkst; ){ 642: u = p->lset; 643: v = q->lset; 644: w = & v[tbitset]; 645: while( v<w) if( *u++ != *v++ ) goto more; 646: /* we have matched */ 647: return( q ); 648: more: ; 649: } 650: /* add a new one */ 651: q = &lkst[nlset++]; 652: if( nlset >= LSETSIZE )error("too many lookahead sets" ); 653: SETLOOP(j){ 654: q->lset[j] = p->lset[j]; 655: } 656: return( q ); 657: }