1: /* (c) 1979 Regents of the University of California */ 2: # include "ey.h" 3: # define IDENTIFIER 257 4: # define MARK 258 5: # define TERM 259 6: # define LEFT 260 7: # define BINARY 261 8: # define RIGHT 262 9: # define PREC 263 10: # define LCURLY 264 11: # define C_IDENTIFIER 265 /* name followed by colon */ 12: # define NUMBER 266 13: 14: setup(argc,argv) int argc; char *argv[]; 15: { int i,j,lev,t; 16: int c; 17: 18: foutput = stdout; 19: i = 1; 20: while( argc >= 2 && argv[1][0] == '-' ) { 21: while( *++(argv[1]) ){ 22: switch( *argv[1] ){ 23: case 'v': 24: case 'V': 25: foutput = copen("y.output", 'w' ); 26: if( foutput < 0 ) error( "cannot open y.output"); 27: continue; 28: case 'o': 29: case 'O': 30: oflag = 1; 31: continue; 32: case 'r': 33: case 'R': 34: oflag = 1; 35: rflag = 1; 36: continue; 37: default: error( "illegal option: %c", *argv[1]); 38: } 39: } 40: argv++; 41: argc--; 42: } 43: 44: ftable = copen( oflag ? "yacc.tmp" : "y.tab.c" , 'w' ); 45: if( ftable<0 ) error( "cannot open table file" ); 46: if( argc > 1 ) cin = copen( argv[1], 'r' ); 47: if( cin < 0 ) error( "cannot open input" ); 48: settab(); 49: fprintf( cout , "#\n"); 50: ctokn = "$end"; 51: defin(0); /* eof */ 52: extval = 0400; /* beginning of assigned values */ 53: ctokn = "error"; 54: defin(0); 55: ctokn = "$accept"; 56: defin(1); 57: mem=mem0; 58: cnamp = cnames; 59: lev=0; 60: i=0; 61: 62: while( ( t = gettok() ) != EOF ) { 63: switch( t ){ 64: case IDENTIFIER: j = chfind(0); 65: trmlev[j] = lev; 66: continue; 67: case ',': 68: case ';': continue; 69: case TERM: lev=0; continue; 70: case LEFT: lev=(++i<<3)|01; continue; 71: case BINARY: lev=(++i<<3)|02; continue; 72: case RIGHT: lev=(++i<<3)|03; continue; 73: case MARK: 74: defout(); 75: if( rflag ){ /* RATFOR */ 76: fprintf( cout , "define yyerrok yyerrf = 0\n" ); 77: fprintf( cout , "define yyclearin yychar = -1\n" ); 78: fprintf( cout , "subroutine yyactr(yyprdn)\n"); 79: fprintf( cout , "common/yycomn/yylval,yyval,yypv,yyvalv(150)\n" ); 80: fprintf( cout , "common/yylcom/yychar,yyerrf,yydebu\n" ); 81: fprintf( cout , "integer yychar, yyerrf, yydebu\n" ); 82: fprintf( cout , "integer yyprdn,yyval,yylval,yypv,yyvalv\n" ); 83: } 84: else { 85: fprintf( cout , "#define yyclearin yychar = -1\n" ); 86: fprintf( cout , "#define yyerrok yyerrflag = 0\n" ); 87: fprintf( cout , "extern int yychar, yyerrflag;\n" ); 88: fprintf( cout , "\nint yyval 0;\nint *yypv;\nint yylval 0;"); 89: fprintf( cout , "\nyyactr(__np__){\n"); 90: } 91: break; 92: case LCURLY: defout(); 93: cpycode(); 94: continue; 95: case NUMBER: 96: trmset[j].value = numbval; 97: if( j < ndefout && j>2 ) 98: error("please define type # of %s earlier", trmset[j].name ); 99: continue; 100: default: error("bad precedence syntax, input %d", t ); 101: } 102: break; 103: } 104: prdptr[0]=mem; 105: /* added production */ 106: *mem++ = NTBASE; 107: *mem++ = NTBASE+1; 108: *mem++ = 1; 109: *mem++ = 0; 110: prdptr[1]=mem; 111: i=0; 112: 113: /* i is 0 when a rule can begin, 1 otherwise */ 114: 115: for(;;) switch( t=gettok() ) { 116: case C_IDENTIFIER: if( mem == prdptr[1] ) { /* first time */ 117: if( rflag ){ 118: fprintf( cout , "goto 1000\n" ); 119: } 120: else fprintf( cout , "\nswitch(__np__){\n"); 121: } 122: if( i != 0 ) error( "previous rule not terminated" ); 123: *mem = chfind(1); 124: if( *mem < NTBASE )error( "token illegal on lhs of grammar rule" ); 125: i=1; 126: ++mem; 127: continue; 128: case IDENTIFIER: 129: *mem=chfind(1); 130: if(*mem < NTBASE)levprd[nprod]=trmlev[*mem]; 131: mem++; 132: if(i==0) error("missing :"); 133: continue; 134: case '=': levprd[nprod] |= 04; 135: if( i==0 ) error("semicolon preceeds action"); 136: fprintf( cout , rflag?"\n%d ":"\ncase %d:", nprod ); 137: cpyact(); 138: fprintf( cout , rflag ? " return" : " break;" ); 139: case '|': 140: case ';': if(i){ 141: *mem++ = -nprod; 142: prdptr[++nprod] = mem; 143: levprd[nprod]=0; 144: i=0;} 145: if (t=='|'){i=1;*mem++ = *prdptr[nprod-1];} 146: continue; 147: case 0: /* End Of File */ 148: case EOF: 149: case MARK: if( i != 0 ) error( "rule not terminated before %%%% or EOF" ); 150: settab(); 151: finact(); 152: /* copy the programs which follow the rules */ 153: if( t == MARK ){ 154: while (( c=fgetc( cin)) != EOF ) fputc(c,cout); 155: } 156: return; 157: case PREC: 158: if( i==0 ) error( "%%prec must appear inside rule" ); 159: if( gettok()!=IDENTIFIER)error("illegal %%prec syntax" ); 160: j=chfind(2); 161: if(j>=NTBASE)error("nonterminal %s illegal after %%prec", nontrst[j-NTBASE].name); 162: levprd[nprod]=trmlev[j]; 163: continue; 164: case LCURLY: 165: if( i!=0 ) error( "%%{ appears within a rule" ); 166: cpycode(); 167: continue; 168: default: error( "syntax error, input %d", t ); 169: } 170: } 171: 172: finact(){ 173: /* finish action routine */ 174: register i; 175: 176: if( rflag ){ 177: 178: fprintf( cout , "\n1000 goto(" ); 179: for( i=1; i<nprod; ++i ){ 180: fprintf( cout , "%d,", (levprd[i]&04)==0?999:i ); 181: } 182: fprintf( cout , "999),yyprdn\n" ); 183: fprintf( cout , "999 return\nend\n" ); 184: fprintf( cout , "define YYERRCODE %d\n", trmset[2].value ); 185: } 186: else { 187: fprintf( cout , "\n}\n}\n" ); 188: fprintf( cout , "int yyerrval %d;\n", trmset[2].value ); 189: } 190: } 191: defin(t) { 192: /* define ctokn to be a terminal if t=0 193: or a nonterminal if t=1 */ 194: char *cp,*p; 195: int c; 196: 197: 198: if (t) { 199: if( ++nnonter >= ntlim ) error("too many nonterminals, limit %d",ntlim); 200: nontrst[nnonter].name = ctokn; 201: return( NTBASE + nnonter ); 202: } 203: else { 204: if( ++nterms >= tlim ) error("too many terminals, limit %d",tlim ); 205: trmset[nterms].name = ctokn; 206: if( ctokn[0]==' ' && ctokn[2]=='\0' ) /* single character literal */ 207: trmset[nterms].value = ctokn[1]; 208: else if ( ctokn[0]==' ' && ctokn[1]=='\\' ) { /* escape sequence */ 209: if( ctokn[3] == '\0' ){ /* single character escape sequence */ 210: switch ( ctokn[2] ){ 211: /* character which is escaped */ 212: case 'n': trmset[nterms].value = '\n'; break; 213: case 'r': trmset[nterms].value = '\r'; break; 214: case 'b': trmset[nterms].value = '\b'; break; 215: case 't': trmset[nterms].value = '\t'; break; 216: case '\'': trmset[nterms].value = '\''; break; 217: case '"': trmset[nterms].value = '"'; break; 218: case '\\': trmset[nterms].value = '\\'; break; 219: default: error( "invalid escape" ); 220: } 221: } 222: else if( ctokn[2] <= '7' && ctokn[2]>='0' ){ /* \nnn sequence */ 223: if( ctokn[3]<'0' || ctokn[3] > '7' || ctokn[4]<'0' || 224: ctokn[4]>'7' || ctokn[5] != '\0' ) error("illegal \\nnn construction" ); 225: trmset[nterms].value = 64*(ctokn[2]-'0')+8*(ctokn[3]-'0')+ctokn[4]-'0'; 226: if( trmset[nterms].value == 0 ) error( "'\\000' is illegal" ); 227: } 228: } 229: else { 230: trmset[nterms].value = extval++; 231: 232: } 233: trmlev[nterms] = 0; 234: return( nterms ); 235: } 236: } 237: 238: defout(){ /* write out the defines (at the end of the declaration section) */ 239: 240: _REGISTER int i, c; 241: _REGISTER char *cp; 242: 243: for( i=ndefout; i<=nterms; ++i ){ 244: 245: cp = trmset[i].name; 246: if( *cp == ' ' ) ++cp; /* literals */ 247: 248: for( ; (c= *cp)!='\0'; ++cp ){ 249: 250: if( c>='a' && c<='z' || 251: c>='A' && c<='Z' || 252: c>='0' && c<='9' || 253: c=='_' ) ; /* VOID */ 254: else goto nodef; 255: } 256: 257: /* define it */ 258: 259: fprintf( cout , "%c define %s %d\n", rflag?' ':'#', trmset[i].name, trmset[i].value ); 260: 261: nodef: ; 262: } 263: 264: ndefout = nterms+1; 265: 266: } 267: 268: chstash( c ){ 269: /* put character away into cnames */ 270: if( cnamp >= &cnames[cnamsz] ) error("too many characters in id's and literals" ); 271: else *cnamp++ = c; 272: } 273: 274: int gettok() { 275: int j, base; 276: static int peekline; /* number of '\n' seen in lookahead */ 277: auto int c, match, reserve; 278: 279: begin: 280: reserve = 0; 281: if( peekc>=0 ) { 282: c = peekc; 283: lineno += peekline; 284: peekc = -1; 285: peekline = 0; 286: } 287: else c = fgetc( cin); 288: while( c==' ' || c=='\n' || c=='\t' || c == '\014'){ 289: if( c == '\n' ) ++lineno; 290: c=fgetc( cin); 291: } 292: if (c=='/') 293: {if (fgetc( cin)!='*')error("illegal /"); 294: c=fgetc( cin); 295: while(c != EOF) { 296: if( c == '\n' ) ++lineno; 297: if (c=='*') 298: {if((c=fgetc( cin))=='/')break;} 299: else c=fgetc( cin);} 300: if (!c) return(0); 301: goto begin;} 302: j=0; 303: switch(c){ 304: case '"': 305: case '\'': match = c; 306: ctokn = cnamp; 307: chstash( ' ' ); 308: while(1){ 309: c = fgetc( cin); 310: if( c == '\n' || c == '\0' ) 311: error("illegal or missing ' or \""); 312: if( c == '\\' ){ 313: c = fgetc( cin); 314: chstash( '\\' ); 315: } 316: else if( c == match ) break; 317: chstash( c ); 318: } 319: break; 320: case '%': 321: case '\\': switch(c=fgetc( cin)) 322: {case '0': return(TERM); 323: case '<': return(LEFT); 324: case '2': return(BINARY); 325: case '>': return(RIGHT); 326: case '%': 327: case '\\': return(MARK); 328: case '=': return(PREC); 329: case '{': return(LCURLY); 330: default: reserve = 1; 331: } 332: default: if( c >= '0' && c <= '9' ){ /* number */ 333: numbval = c-'0' ; 334: base = (c=='0') ? 8 : 10 ; 335: for( c=fgetc( cin); c>='0' && c<='9'; c=fgetc( cin) ){ 336: numbval = numbval*base + c - '0'; 337: } 338: peekc = c; 339: return(NUMBER); 340: } 341: else if( (c>='a'&&c<='z')||(c>='A'&&c<='Z')||c=='_'||c=='.'||c=='$'){ 342: ctokn = cnamp; 343: while( (c>='a'&&c<='z') || 344: (c>='A'&&c<='Z') || 345: (c>='0'&&c<='9') || 346: c=='_' || c=='.' || c=='$' ) { 347: chstash( c ); 348: if( peekc>=0 ) { c = peekc; peekc = -1; } 349: else c = fgetc( cin); 350: } 351: } 352: else return(c); 353: 354: peekc=c; 355: } 356: chstash( '\0' ); 357: 358: if( reserve ){ /* find a reserved word */ 359: if( compare("term")) return( TERM ); 360: if( compare("TERM")) return( TERM ); 361: if( compare("token")) return( TERM ); 362: if( compare("TOKEN")) return( TERM ); 363: if( compare("left")) return( LEFT ); 364: if( compare("LEFT")) return( LEFT ); 365: if( compare("nonassoc")) return( BINARY ); 366: if( compare("NONASSOC")) return( BINARY ); 367: if( compare("binary")) return( BINARY ); 368: if( compare("BINARY")) return( BINARY ); 369: if( compare("right")) return( RIGHT ); 370: if( compare("RIGHT")) return( RIGHT ); 371: if( compare("prec")) return( PREC ); 372: if( compare("PREC")) return( PREC ); 373: error("invalid escape, or illegal reserved word: %s", ctokn ); 374: } 375: 376: /* look ahead to distinguish IDENTIFIER from C_IDENTIFIER */ 377: 378: look: 379: while( peekc==' ' || peekc=='\t' || peekc == '\n' || peekc == '\014' ) 380: { 381: if( peekc == '\n' ) ++peekline; 382: peekc = fgetc( cin); 383: } 384: 385: if( peekc != ':' ) return( IDENTIFIER ); 386: peekc = -1; 387: lineno += peekline; 388: peekline = 0; 389: return( C_IDENTIFIER ); 390: } 391: chfind(t) 392: 393: { int i,j; 394: 395: if (ctokn[0]==' ')t=0; 396: for(i=1;i<=nterms;i++) 397: if(compare(trmset[i].name)){ 398: cnamp = ctokn; 399: return( i ); 400: } 401: for(i=1;i<=nnonter;i++) 402: if(compare(nontrst[i].name)) { 403: cnamp = ctokn; 404: return( i+NTBASE ); 405: } 406: /* cannot find name */ 407: if( t>1 && ctokn[0] != ' ' ) 408: error( "%s should have been defined earlier", ctokn ); 409: return( defin( t ) ); 410: } 411: 412: cpycode(){ /* copies code between \{ and \} */ 413: 414: int c; 415: c = fgetc( cin); 416: if( c == '\n' ) { 417: c = fgetc( cin); 418: lineno++; 419: } 420: while( c != EOF ){ 421: if( c=='\\' ) 422: if( (c=fgetc( cin)) == '}' ) return; 423: else fputc('\\',cout); 424: if( c=='%' ) 425: if( (c=fgetc( cin)) == '}' ) return; 426: else fputc('%',cout); 427: fputc( c, cout ); 428: if( c == '\n' ) ++lineno; 429: c = fgetc( cin); 430: } 431: error("eof before %%}"); 432: } 433: 434: cpyact(){ /* copy C action to the next ; or closing } */ 435: int brac, c, match, *i, j, s; 436: 437: brac = 0; 438: 439: loop: 440: c = fgetc( cin); 441: swt: 442: switch( c ){ 443: 444: case ';': 445: if( brac == 0 ){ 446: fputc( c, cout ); 447: return; 448: } 449: goto lcopy; 450: 451: case '{': 452: brac++; 453: goto lcopy; 454: 455: case '$': 456: s = 1; 457: c = fgetc( cin); 458: if( c == '$' ){ 459: fprintf( cout , "yyval"); 460: goto loop; 461: } 462: if( c == '-' ){ 463: s = -s; 464: c = fgetc( cin); 465: } 466: if( c>='0' && c <= '9' ){ 467: j=0; 468: while( c>='0' && c<= '9' ){ 469: j= j*10+c-'0'; 470: c = fgetc( cin); 471: } 472: if( rflag ) fprintf( cout , "yyvalv(yypv%c%d)", s==1?'+':'-', j ); 473: else fprintf( cout , "yypv[%d]", s*j ); 474: goto swt; 475: } 476: fputc( '$' , cout); 477: if( s<0 ) fputc('-', cout); 478: goto swt; 479: 480: case '}': 481: brac--; 482: if( brac == 0 ){ 483: fputc( c , cout); 484: return; 485: } 486: goto lcopy; 487: 488: case '/': /* look for comments */ 489: fputc( c ,cout); 490: c = fgetc( cin); 491: if( c != '*' ) goto swt; 492: 493: /* it really is a comment */ 494: 495: fputc( c , cout); 496: while( (c=fgetc( cin)) != EOF ){ 497: if( c=='*' ){ 498: fputc( c , cout); 499: if( (c=fgetc( cin)) == '/' ) goto lcopy; 500: } 501: fputc( c , cout); 502: } 503: error( "EOF inside comment" ); 504: 505: case '\'': /* character constant */ 506: match = '\''; 507: goto string; 508: 509: case '"': /* character string */ 510: match = '"'; 511: 512: string: 513: 514: fputc( c , cout); 515: while( (c=fgetc( cin)) != EOF ){ 516: 517: if( c=='\\' ){ 518: fputc( c , cout); 519: c=fgetc( cin); 520: } 521: else if( c==match ) goto lcopy; 522: fputc( c , cout); 523: } 524: error( "EOF in string or character constant" ); 525: 526: case '\0': 527: error("action does not terminate"); 528: case '\n': ++lineno; 529: goto lcopy; 530: 531: } 532: 533: lcopy: 534: fputc( c , cout); 535: goto loop; 536: }