1: # include "dextern" 2: 3: /* variables used locally */ 4: 5: /* lookahead computations */ 6: 7: int tbitset; /* size of lookahead sets */ 8: struct looksets lkst [ LSETSIZE ]; 9: int nlset = 0; /* next lookahead set index */ 10: int nolook = 0; /* flag to suppress lookahead computations */ 11: struct looksets clset; /* temporary storage for lookahead computations */ 12: 13: /* working set computations */ 14: 15: struct wset wsets[ WSETSIZE ]; 16: struct wset *cwp; 17: 18: /* state information */ 19: 20: int nstate = 0; /* number of states */ 21: struct item *pstate[NSTATES+2]; /* pointers to the descriptions of the states */ 22: int tystate[NSTATES]; /* contains type information about the states */ 23: int indgo[NSTATES]; /* index to the stored goto table */ 24: int tstates[ NTERMS ]; /* states generated by terminal gotos */ 25: int ntstates[ NNONTERM ]; /* states generated by nonterminal gotos */ 26: int mstates[ NSTATES ]; /* chain of overflows of term/nonterm generation lists */ 27: 28: /* storage for the actions in the parser */ 29: 30: int amem[ACTSIZE]; /* action table storage */ 31: int *memp = amem; /* next free action table position */ 32: 33: /* other storage areas */ 34: 35: int temp1[TEMPSIZE]; /* temporary storage, indexed by terms + ntokens or states */ 36: int lineno= 1; /* current input line number */ 37: int fatfl = 1; /* if on, error is fatal */ 38: int nerrors = 0; /* number of errors */ 39: 40: /* storage for information about the nonterminals */ 41: 42: int **pres[NNONTERM+2]; /* vector of pointers to productions yielding each nonterminal */ 43: struct looksets *pfirst[NNONTERM+2]; /* vector of pointers to first sets for each nonterminal */ 44: int pempty[NNONTERM+1]; /* vector of nonterminals nontrivially deriving e */ 45: 46: main(argc,argv) int argc; char *argv[]; { 47: 48: setup(argc,argv); /* initialize and read productions */ 49: tbitset = NWORDS(ntokens); 50: cpres(); /* make table of which productions yield a given nonterminal */ 51: cempty(); /* make a table of which nonterminals can match the empty string */ 52: cpfir(); /* make a table of firsts of nonterminals */ 53: stagen(); /* generate the states */ 54: output(); /* write the states and the tables */ 55: go2out(); 56: hideprod(); 57: summary(); 58: callopt(); 59: others(); 60: exit(0); 61: } 62: 63: others(){ /* put out other arrays, copy the parsers */ 64: register c, i, j; 65: 66: finput = fopen( PARSER, "r" ); 67: if( finput == NULL ) error( "cannot find parser %s", PARSER ); 68: 69: warray( "yyr1", levprd, nprod ); 70: 71: aryfil( temp1, nprod, 0 ); 72: PLOOP(1,i)temp1[i] = prdptr[i+1]-prdptr[i]-2; 73: warray( "yyr2", temp1, nprod ); 74: 75: aryfil( temp1, nstate, -1000 ); 76: TLOOP(i){ 77: for( j=tstates[i]; j!=0; j=mstates[j] ){ 78: temp1[j] = tokset[i].value; 79: } 80: } 81: NTLOOP(i){ 82: for( j=ntstates[i]; j!=0; j=mstates[j] ){ 83: temp1[j] = -i; 84: } 85: } 86: warray( "yychk", temp1, nstate ); 87: 88: warray( "yydef", defact, nstate ); 89: 90: /* copy parser text */ 91: 92: while( (c=getc(finput) ) != EOF ){ 93: if( c == '$' ){ 94: if( (c=getc(finput)) != 'A' ) putc( '$', ftable ); 95: else { /* copy actions */ 96: faction = fopen( ACTNAME, "r" ); 97: if( faction == NULL ) error( "cannot reopen action tempfile" ); 98: while( (c=getc(faction) ) != EOF ) putc( c, ftable ); 99: fclose(faction); 100: ZAPFILE(ACTNAME); 101: c = getc(finput); 102: } 103: } 104: putc( c, ftable ); 105: } 106: 107: fclose( ftable ); 108: } 109: 110: char *chcopy( p, q ) char *p, *q; { 111: /* copies string q into p, returning next free char ptr */ 112: while( *p = *q++ ) ++p; 113: return( p ); 114: } 115: 116: # define ISIZE 400 117: char *writem(pp) int *pp; { /* creates output string for item pointed to by pp */ 118: int i,*p; 119: static char sarr[ISIZE]; 120: char *q; 121: 122: for( p=pp; *p>0 ; ++p ) ; 123: p = prdptr[-*p]; 124: q = chcopy( sarr, nontrst[*p-NTBASE].name ); 125: q = chcopy( q, " : " ); 126: 127: for(;;){ 128: *q++ = ++p==pp ? '_' : ' '; 129: *q = '\0'; 130: if((i = *p) <= 0) break; 131: q = chcopy( q, symnam(i) ); 132: if( q> &sarr[ISIZE-30] ) error( "item too big" ); 133: } 134: 135: if( (i = *pp) < 0 ){ /* an item calling for a reduction */ 136: q = chcopy( q, " (" ); 137: sprintf( q, "%d)", -i ); 138: } 139: 140: return( sarr ); 141: } 142: 143: char *symnam(i){ /* return a pointer to the name of symbol i */ 144: char *cp; 145: 146: cp = (i>=NTBASE) ? nontrst[i-NTBASE].name : tokset[i].name ; 147: if( *cp == ' ' ) ++cp; 148: return( cp ); 149: } 150: 151: struct wset *zzcwp = wsets; 152: int zzgoent = 0; 153: int zzgobest = 0; 154: int zzacent = 0; 155: int zzexcp = 0; 156: int zzclose = 0; 157: int zzsrconf = 0; 158: int * zzmemsz = mem0; 159: int zzrrconf = 0; 160: 161: summary(){ /* output the summary on the tty */ 162: 163: if( foutput!=NULL ){ 164: fprintf( foutput, "\n%d/%d terminals, %d/%d nonterminals\n", ntokens, NTERMS, 165: nnonter, NNONTERM ); 166: fprintf( foutput, "%d/%d grammar rules, %d/%d states\n", nprod, NPROD, nstate, NSTATES ); 167: fprintf( foutput, "%d shift/reduce, %d reduce/reduce conflicts reported\n", zzsrconf, zzrrconf ); 168: fprintf( foutput, "%d/%d working sets used\n", zzcwp-wsets, WSETSIZE ); 169: fprintf( foutput, "memory: states,etc. %d/%d, parser %d/%d\n", zzmemsz-mem0, MEMSIZE, 170: memp-amem, ACTSIZE ); 171: fprintf( foutput, "%d/%d distinct lookahead sets\n", nlset, LSETSIZE ); 172: fprintf( foutput, "%d extra closures\n", zzclose - 2*nstate ); 173: fprintf( foutput, "%d shift entries, %d exceptions\n", zzacent, zzexcp ); 174: fprintf( foutput, "%d goto entries\n", zzgoent ); 175: fprintf( foutput, "%d entries saved by goto default\n", zzgobest ); 176: } 177: if( zzsrconf!=0 || zzrrconf!=0 ){ 178: fprintf( stdout,"\nconflicts: "); 179: if( zzsrconf )fprintf( stdout, "%d shift/reduce" , zzsrconf ); 180: if( zzsrconf && zzrrconf )fprintf( stdout, ", " ); 181: if( zzrrconf )fprintf( stdout, "%d reduce/reduce" , zzrrconf ); 182: fprintf( stdout, "\n" ); 183: } 184: 185: fclose( ftemp ); 186: if( fdefine != NULL ) fclose( fdefine ); 187: } 188: 189: /* VARARGS1 */ 190: error(s,a1) char *s; { /* write out error comment */ 191: 192: ++nerrors; 193: fprintf( stderr, "\n fatal error: "); 194: fprintf( stderr, s,a1); 195: fprintf( stderr, ", line %d\n", lineno ); 196: if( !fatfl ) return; 197: summary(); 198: exit(1); 199: } 200: 201: aryfil( v, n, c ) int *v,n,c; { /* set elements 0 through n-1 to c */ 202: int i; 203: for( i=0; i<n; ++i ) v[i] = c; 204: } 205: 206: setunion( a, b ) register *a, *b; { 207: /* set a to the union of a and b */ 208: /* return 1 if b is not a subset of a, 0 otherwise */ 209: register i, x, sub; 210: 211: sub = 0; 212: SETLOOP(i){ 213: *a = (x = *a)|*b++; 214: if( *a++ != x ) sub = 1; 215: } 216: return( sub ); 217: } 218: 219: prlook( p ) struct looksets *p;{ 220: register j, *pp; 221: pp = p->lset; 222: if( pp == 0 ) fprintf( foutput, "\tNULL"); 223: else { 224: fprintf( foutput, " { " ); 225: TLOOP(j) { 226: if( BIT(pp,j) ) fprintf( foutput, "%s ", symnam(j) ); 227: } 228: fprintf( foutput, "}" ); 229: } 230: } 231: 232: cpres(){ /* compute an array with the beginnings of productions yielding given nonterminals 233: The array pres points to these lists */ 234: /* the array pyield has the lists: the total size is only NPROD+1 */ 235: register **pmem; 236: register c, j, i; 237: static int * pyield[NPROD]; 238: 239: pmem = pyield; 240: 241: NTLOOP(i){ 242: c = i+NTBASE; 243: pres[i] = pmem; 244: fatfl = 0; /* make undefined symbols nonfatal */ 245: PLOOP(0,j){ 246: if (*prdptr[j] == c) *pmem++ = prdptr[j]+1; 247: } 248: if(pres[i] == pmem){ 249: error("nonterminal %s not defined!", nontrst[i].name); 250: } 251: } 252: pres[i] = pmem; 253: fatfl = 1; 254: if( nerrors ){ 255: summary(); 256: exit(1); 257: } 258: if( pmem != &pyield[nprod] ) error( "internal Yacc error: pyield %d", pmem-&pyield[nprod] ); 259: } 260: 261: int indebug = 0; 262: cpfir() { 263: /* compute an array with the first of nonterminals */ 264: register *p, **s, i, **t, ch, changes; 265: 266: zzcwp = &wsets[nnonter]; 267: NTLOOP(i){ 268: aryfil( wsets[i].ws.lset, tbitset, 0 ); 269: t = pres[i+1]; 270: for( s=pres[i]; s<t; ++s ){ /* initially fill the sets */ 271: for( p = *s; (ch = *p) > 0 ; ++p ) { 272: if( ch < NTBASE ) { 273: SETBIT( wsets[i].ws.lset, ch ); 274: break; 275: } 276: else if( !pempty[ch-NTBASE] ) break; 277: } 278: } 279: } 280: 281: /* now, reflect transitivity */ 282: 283: changes = 1; 284: while( changes ){ 285: changes = 0; 286: NTLOOP(i){ 287: t = pres[i+1]; 288: for( s=pres[i]; s<t; ++s ){ 289: for( p = *s; ( ch = (*p-NTBASE) ) >= 0; ++p ) { 290: changes |= setunion( wsets[i].ws.lset, wsets[ch].ws.lset ); 291: if( !pempty[ch] ) break; 292: } 293: } 294: } 295: } 296: 297: NTLOOP(i) pfirst[i] = flset( &wsets[i].ws ); 298: if( !indebug ) return; 299: if( (foutput!=NULL) ){ 300: NTLOOP(i) { 301: fprintf( foutput, "\n%s: ", nontrst[i].name ); 302: prlook( pfirst[i] ); 303: fprintf( foutput, " %d\n", pempty[i] ); 304: } 305: } 306: } 307: 308: state(c){ /* sorts last state,and sees if it equals earlier ones. returns state number */ 309: int size1,size2; 310: register i; 311: struct item *p1, *p2, *k, *l, *q1, *q2; 312: p1 = pstate[nstate]; 313: p2 = pstate[nstate+1]; 314: if(p1==p2) return(0); /* null state */ 315: /* sort the items */ 316: for(k=p2-1;k>p1;k--) { /* make k the biggest */ 317: for(l=k-1;l>=p1;--l)if( l->pitem > k->pitem ){ 318: int *s; 319: struct looksets *ss; 320: s = k->pitem; 321: k->pitem = l->pitem; 322: l->pitem = s; 323: ss = k->look; 324: k->look = l->look; 325: l->look = ss; 326: } 327: } 328: size1 = p2 - p1; /* size of state */ 329: 330: for( i= (c>=NTBASE)?ntstates[c-NTBASE]:tstates[c]; i != 0; i = mstates[i] ) { 331: /* get ith state */ 332: q1 = pstate[i]; 333: q2 = pstate[i+1]; 334: size2 = q2 - q1; 335: if (size1 != size2) continue; 336: k=p1; 337: for(l=q1;l<q2;l++) { 338: if( l->pitem != k->pitem ) break; 339: ++k; 340: } 341: if (l != q2) continue; 342: /* found it */ 343: pstate[nstate+1] = pstate[nstate]; /* delete last state */ 344: /* fix up lookaheads */ 345: if( nolook ) return(i); 346: for( l=q1,k=p1; l<q2; ++l,++k ){ 347: int s; 348: SETLOOP(s) clset.lset[s] = l->look->lset[s]; 349: if( setunion( clset.lset, k->look->lset ) ) { 350: tystate[i] = MUSTDO; 351: /* register the new set */ 352: l->look = flset( &clset ); 353: } 354: } 355: return (i); 356: } 357: /* state is new */ 358: if( nolook ) error( "yacc state/nolook error" ); 359: pstate[nstate+2] = p2; 360: if(nstate+1 >= NSTATES) error("too many states" ); 361: if( c >= NTBASE ){ 362: mstates[ nstate ] = ntstates[ c-NTBASE ]; 363: ntstates[ c-NTBASE ] = nstate; 364: } 365: else { 366: mstates[ nstate ] = tstates[ c ]; 367: tstates[ c ] = nstate; 368: } 369: tystate[nstate]=MUSTDO; 370: return(nstate++); 371: } 372: 373: int pidebug = 0; /* debugging flag for putitem */ 374: putitem( ptr, lptr ) int *ptr; struct looksets *lptr; { 375: register struct item *j; 376: 377: if( pidebug && (foutput!=NULL) ) { 378: fprintf( foutput, "putitem(%s), state %d\n", writem(ptr), nstate ); 379: } 380: j = pstate[nstate+1]; 381: j->pitem = ptr; 382: if( !nolook ) j->look = flset( lptr ); 383: pstate[nstate+1] = ++j; 384: if( (int *)j > zzmemsz ){ 385: zzmemsz = (int *)j; 386: if( zzmemsz >= &mem0[MEMSIZE] ) error( "out of state space" ); 387: } 388: } 389: 390: cempty(){ /* mark nonterminals which derive the empty string */ 391: /* also, look for nonterminals which don't derive any token strings */ 392: 393: # define EMPTY 1 394: # define WHOKNOWS 0 395: # define OK 1 396: 397: register i, *p; 398: 399: /* first, use the array pempty to detect productions that can never be reduced */ 400: /* set pempty to WHONOWS */ 401: aryfil( pempty, nnonter+1, WHOKNOWS ); 402: 403: /* now, look at productions, marking nonterminals which derive something */ 404: 405: more: 406: PLOOP(0,i){ 407: if( pempty[ *prdptr[i] - NTBASE ] ) continue; 408: for( p=prdptr[i]+1; *p>=0; ++p ){ 409: if( *p>=NTBASE && pempty[ *p-NTBASE ] == WHOKNOWS ) break; 410: } 411: if( *p < 0 ){ /* production can be derived */ 412: pempty[ *prdptr[i]-NTBASE ] = OK; 413: goto more; 414: } 415: } 416: 417: /* now, look at the nonterminals, to see if they are all OK */ 418: 419: NTLOOP(i){ 420: /* the added production rises or falls as the start symbol ... */ 421: if( i == 0 ) continue; 422: if( pempty[ i ] != OK ) { 423: fatfl = 0; 424: error( "nonterminal %s never derives any token string", nontrst[i].name ); 425: } 426: } 427: 428: if( nerrors ){ 429: summary(); 430: exit(1); 431: } 432: 433: /* now, compute the pempty array, to see which nonterminals derive the empty string */ 434: 435: /* set pempty to WHOKNOWS */ 436: 437: aryfil( pempty, nnonter+1, WHOKNOWS ); 438: 439: /* loop as long as we keep finding empty nonterminals */ 440: 441: again: 442: PLOOP(1,i){ 443: if( pempty[ *prdptr[i]-NTBASE ]==WHOKNOWS ){ /* not known to be empty */ 444: for( p=prdptr[i]+1; *p>=NTBASE && pempty[*p-NTBASE]==EMPTY ; ++p ) ; 445: if( *p < 0 ){ /* we have a nontrivially empty nonterminal */ 446: pempty[*prdptr[i]-NTBASE] = EMPTY; 447: goto again; /* got one ... try for another */ 448: } 449: } 450: } 451: 452: } 453: 454: int gsdebug = 0; 455: stagen(){ /* generate the states */ 456: 457: int i, j; 458: register c; 459: register struct wset *p, *q; 460: 461: /* initialize */ 462: 463: nstate = 0; 464: /* THIS IS FUNNY from the standpoint of portability */ 465: /* it represents the magic moment when the mem0 array, which has 466: /* been holding the productions, starts to hold item pointers, of a 467: /* different type... */ 468: /* someday, alloc should be used to allocate all this stuff... for now, we 469: /* accept that if pointers don't fit in integers, there is a problem... */ 470: 471: pstate[0] = pstate[1] = (struct item *)mem; 472: aryfil( clset.lset, tbitset, 0 ); 473: putitem( prdptr[0]+1, &clset ); 474: tystate[0] = MUSTDO; 475: nstate = 1; 476: pstate[2] = pstate[1]; 477: 478: aryfil( amem, ACTSIZE, 0 ); 479: 480: /* now, the main state generation loop */ 481: 482: more: 483: SLOOP(i){ 484: if( tystate[i] != MUSTDO ) continue; 485: tystate[i] = DONE; 486: aryfil( temp1, nnonter+1, 0 ); 487: /* take state i, close it, and do gotos */ 488: closure(i); 489: WSLOOP(wsets,p){ /* generate goto's */ 490: if( p->flag ) continue; 491: p->flag = 1; 492: c = *(p->pitem); 493: if( c <= 1 ) { 494: if( pstate[i+1]-pstate[i] <= p-wsets ) tystate[i] = MUSTLOOKAHEAD; 495: continue; 496: } 497: /* do a goto on c */ 498: WSLOOP(p,q){ 499: if( c == *(q->pitem) ){ /* this item contributes to the goto */ 500: putitem( q->pitem + 1, &q->ws ); 501: q->flag = 1; 502: } 503: } 504: if( c < NTBASE ) { 505: state(c); /* register new state */ 506: } 507: else { 508: temp1[c-NTBASE] = state(c); 509: } 510: } 511: if( gsdebug && (foutput!=NULL) ){ 512: fprintf( foutput, "%d: ", i ); 513: NTLOOP(j) { 514: if( temp1[j] ) fprintf( foutput, "%s %d, ", nontrst[j].name, temp1[j] ); 515: } 516: fprintf( foutput, "\n"); 517: } 518: indgo[i] = apack( &temp1[1], nnonter-1 ) - 1; 519: goto more; /* we have done one goto; do some more */ 520: } 521: /* no more to do... stop */ 522: } 523: 524: int cldebug = 0; /* debugging flag for closure */ 525: closure(i){ /* generate the closure of state i */ 526: 527: int c, ch, work, k; 528: register struct wset *u, *v; 529: int *pi; 530: int **s, **t; 531: struct item *q; 532: register struct item *p; 533: 534: ++zzclose; 535: 536: /* first, copy kernel of state i to wsets */ 537: 538: cwp = wsets; 539: ITMLOOP(i,p,q){ 540: cwp->pitem = p->pitem; 541: cwp->flag = 1; /* this item must get closed */ 542: SETLOOP(k) cwp->ws.lset[k] = p->look->lset[k]; 543: WSBUMP(cwp); 544: } 545: 546: /* now, go through the loop, closing each item */ 547: 548: work = 1; 549: while( work ){ 550: work = 0; 551: WSLOOP(wsets,u){ 552: 553: if( u->flag == 0 ) continue; 554: c = *(u->pitem); /* dot is before c */ 555: 556: if( c < NTBASE ){ 557: u->flag = 0; 558: continue; /* only interesting case is where . is before nonterminal */ 559: } 560: 561: /* compute the lookahead */ 562: aryfil( clset.lset, tbitset, 0 ); 563: 564: /* find items involving c */ 565: 566: WSLOOP(u,v){ 567: if( v->flag == 1 && *(pi=v->pitem) == c ){ 568: v->flag = 0; 569: if( nolook ) continue; 570: while( (ch= *++pi)>0 ){ 571: if( ch < NTBASE ){ /* terminal symbol */ 572: SETBIT( clset.lset, ch ); 573: break; 574: } 575: /* nonterminal symbol */ 576: setunion( clset.lset, pfirst[ch-NTBASE]->lset ); 577: if( !pempty[ch-NTBASE] ) break; 578: } 579: if( ch<=0 ) setunion( clset.lset, v->ws.lset ); 580: } 581: } 582: 583: /* now loop over productions derived from c */ 584: 585: c -= NTBASE; /* c is now nonterminal number */ 586: 587: t = pres[c+1]; 588: for( s=pres[c]; s<t; ++s ){ 589: /* put these items into the closure */ 590: WSLOOP(wsets,v){ /* is the item there */ 591: if( v->pitem == *s ){ /* yes, it is there */ 592: if( nolook ) goto nexts; 593: if( setunion( v->ws.lset, clset.lset ) ) v->flag = work = 1; 594: goto nexts; 595: } 596: } 597: 598: /* not there; make a new entry */ 599: if( cwp-wsets+1 >= WSETSIZE ) error( "working set overflow" ); 600: cwp->pitem = *s; 601: cwp->flag = 1; 602: if( !nolook ){ 603: work = 1; 604: SETLOOP(k) cwp->ws.lset[k] = clset.lset[k]; 605: } 606: WSBUMP(cwp); 607: 608: nexts: ; 609: } 610: 611: } 612: } 613: 614: /* have computed closure; flags are reset; return */ 615: 616: if( cwp > zzcwp ) zzcwp = cwp; 617: if( cldebug && (foutput!=NULL) ){ 618: fprintf( foutput, "\nState %d, nolook = %d\n", i, nolook ); 619: WSLOOP(wsets,u){ 620: if( u->flag ) fprintf( foutput, "flag set!\n"); 621: u->flag = 0; 622: fprintf( foutput, "\t%s", writem(u->pitem)); 623: prlook( &u->ws ); 624: fprintf( foutput, "\n" ); 625: } 626: } 627: } 628: 629: struct looksets *flset( p ) struct looksets *p; { 630: /* decide if the lookahead set pointed to by p is known */ 631: /* return pointer to a perminent location for the set */ 632: 633: register struct looksets *q; 634: int j, *w; 635: register *u, *v; 636: 637: for( q = &lkst[nlset]; q-- > lkst; ){ 638: u = p->lset; 639: v = q->lset; 640: w = & v[tbitset]; 641: while( v<w) if( *u++ != *v++ ) goto more; 642: /* we have matched */ 643: return( q ); 644: more: ; 645: } 646: /* add a new one */ 647: q = &lkst[nlset++]; 648: if( nlset >= LSETSIZE )error("too many lookahead sets" ); 649: SETLOOP(j){ 650: q->lset[j] = p->lset[j]; 651: } 652: return( q ); 653: }