1: /* (c) 1979 Regents of the University of California */ 2: # include "ey.h" 3: # define IDENTIFIER 257 4: # define MARK 258 5: # define TERM 259 6: # define LEFT 260 7: # define BINARY 261 8: # define RIGHT 262 9: # define PREC 263 10: # define LCURLY 264 11: # define C_IDENTIFIER 265 /* name followed by colon */ 12: # define NUMBER 266 13: 14: setup(argc,argv) int argc; char *argv[]; 15: { int i,j,lev,t; 16: int c; 17: 18: foutput = stdout; 19: i = 1; 20: while( argc >= 2 && argv[1][0] == '-' ) { 21: while( *++(argv[1]) ){ 22: switch( *argv[1] ){ 23: case 'v': 24: case 'V': 25: foutput = copen("y.output", 'w' ); 26: if( foutput < 0 ) error( "cannot open y.output"); 27: continue; 28: case 'o': 29: case 'O': 30: oflag = 1; 31: continue; 32: case 'r': 33: case 'R': 34: oflag = 1; 35: rflag = 1; 36: continue; 37: default: error( "illegal option: %c", *argv[1]); 38: } 39: } 40: argv++; 41: argc--; 42: } 43: 44: ftable = copen( oflag ? "yacc.tmp" : "y.tab.c" , 'w' ); 45: if( ftable<0 ) error( "cannot open table file" ); 46: if( argc > 1 ) cin = copen( argv[1], 'r' ); 47: if( cin < 0 ) error( "cannot open input" ); 48: settab(); 49: fprintf( cout , "#\n"); 50: ctokn = "$end"; 51: defin(0); /* eof */ 52: extval = 0400; /* beginning of assigned values */ 53: ctokn = "error"; 54: defin(0); 55: ctokn = "$accept"; 56: defin(1); 57: mem=mem0; 58: cnamp = cnames; 59: lev=0; 60: i=0; 61: 62: while( ( t = gettok() ) != EOF ) { 63: switch( t ){ 64: case IDENTIFIER: j = chfind(0); 65: trmlev[j] = lev; 66: continue; 67: case ',': 68: case ';': continue; 69: case TERM: lev=0; continue; 70: case LEFT: lev=(++i<<3)|01; continue; 71: case BINARY: lev=(++i<<3)|02; continue; 72: case RIGHT: lev=(++i<<3)|03; continue; 73: case MARK: 74: defout(); 75: if( rflag ){ /* RATFOR */ 76: fprintf( cout , "define yyerrok yyerrf = 0\n" ); 77: fprintf( cout , "define yyclearin yychar = -1\n" ); 78: fprintf( cout , "subroutine yyactr(yyprdn)\n"); 79: fprintf( cout , "common/yycomn/yylval,yyval,yypv,yyvalv(150)\n" ); 80: fprintf( cout , "common/yylcom/yychar,yyerrf,yydebu\n" ); 81: fprintf( cout , "integer yychar, yyerrf, yydebu\n" ); 82: fprintf( cout , "integer yyprdn,yyval,yylval,yypv,yyvalv\n" ); 83: } 84: else { 85: fprintf( cout , "#define yyclearin yychar = -1\n" ); 86: fprintf( cout , "#define yyerrok yyerrflag = 0\n" ); 87: fprintf( cout , "extern int yychar, yyerrflag;\n" ); 88: fprintf( cout , "\nint yyval = 0;\nint *yypv;\nint yylval = 0;"); 89: fprintf( cout , "\nyyactr(__np__){\n"); 90: } 91: break; 92: case LCURLY: defout(); 93: cpycode(); 94: continue; 95: case NUMBER: 96: trmset[j].value = numbval; 97: if( j < ndefout && j>2 ) 98: error("please define type # of %s earlier", trmset[j].name ); 99: continue; 100: default: error("bad precedence syntax, input %d", t ); 101: } 102: break; 103: } 104: prdptr[0]=mem; 105: /* added production */ 106: *mem++ = NTBASE; 107: *mem++ = NTBASE+1; 108: *mem++ = 1; 109: *mem++ = 0; 110: prdptr[1]=mem; 111: i=0; 112: 113: /* i is 0 when a rule can begin, 1 otherwise */ 114: 115: for(;;) switch( t=gettok() ) { 116: case C_IDENTIFIER: if( mem == prdptr[1] ) { /* first time */ 117: if( rflag ){ 118: fprintf( cout , "goto 1000\n" ); 119: } 120: else fprintf( cout 121: , "\nswitch(__np__){\n"); 122: } 123: if( i != 0 ) error( "previous rule not terminated" ); 124: *mem = chfind(1); 125: if( *mem < NTBASE )error( "token illegal on lhs of grammar rule" ); 126: i=1; 127: ++mem; 128: continue; 129: case IDENTIFIER: 130: *mem=chfind(1); 131: if(*mem < NTBASE)levprd[nprod]=trmlev[*mem]; 132: mem++; 133: if(i==0) error("missing :"); 134: continue; 135: case '=': levprd[nprod] |= 04; 136: if( i==0 ) error("semicolon preceeds action"); 137: fprintf( cout , rflag?"\n%d ":"\ncase %d:", nprod ); 138: cpyact(); 139: fprintf( cout , rflag ? " return" : " break;" ); 140: case '|': 141: case ';': if(i){ 142: *mem++ = -nprod; 143: prdptr[++nprod] = mem; 144: levprd[nprod]=0; 145: i=0;} 146: if (t=='|'){i=1;*mem++ = *prdptr[nprod-1];} 147: continue; 148: case 0: /* End Of File */ 149: case EOF: 150: case MARK: if( i != 0 ) error( "rule not terminated before %%%% or EOF" ); 151: settab(); 152: finact(); 153: /* copy the programs which follow the rules */ 154: if( t == MARK ){ 155: while (( c=fgetc( cin)) != EOF ) fputc(c,cout); 156: } 157: return; 158: case PREC: 159: if( i==0 ) error( "%%prec must appear inside rule" ); 160: if( gettok()!=IDENTIFIER)error("illegal %%prec syntax" ); 161: j=chfind(2); 162: if(j>=NTBASE)error("nonterminal %s illegal after %%prec", nontrst[j-NTBASE].name); 163: levprd[nprod]=trmlev[j]; 164: continue; 165: case LCURLY: 166: if( i!=0 ) error( "%%{ appears within a rule" ); 167: cpycode(); 168: continue; 169: default: error( "syntax error, input %d", t ); 170: } 171: } 172: 173: finact(){ 174: /* finish action routine */ 175: register i; 176: 177: if( rflag ){ 178: 179: fprintf( cout , "\n1000 goto(" ); 180: for( i=1; i<nprod; ++i ){ 181: fprintf( cout , "%d,", (levprd[i]&04)==0?999:i ); 182: } 183: fprintf( cout , "999),yyprdn\n" ); 184: fprintf( cout , "999 return\nend\n" ); 185: fprintf( cout , "define YYERRCODE %d\n", trmset[2].value ); 186: } 187: else { 188: fprintf( cout , "\n}\n}\n" ); 189: fprintf( cout , "int yyerrval = %d;\n", trmset[2].value ); 190: } 191: } 192: defin(t) { 193: /* define ctokn to be a terminal if t=0 194: or a nonterminal if t=1 */ 195: char *cp,*p; 196: int c; 197: 198: 199: if (t) { 200: if( ++nnonter >= ntlim ) error("too many nonterminals, limit %d",ntlim); 201: nontrst[nnonter].name = ctokn; 202: return( NTBASE + nnonter ); 203: } 204: else { 205: if( ++nterms >= tlim ) error("too many terminals, limit %d",tlim ); 206: trmset[nterms].name = ctokn; 207: if( ctokn[0]==' ' && ctokn[2]=='\0' ) /* single character literal */ 208: trmset[nterms].value = ctokn[1]; 209: else if ( ctokn[0]==' ' && ctokn[1]=='\\' ) { /* escape sequence */ 210: if( ctokn[3] == '\0' ){ /* single character escape sequence */ 211: switch ( ctokn[2] ){ 212: /* character which is escaped */ 213: case 'n': trmset[nterms].value = '\n'; break; 214: case 'r': trmset[nterms].value = '\r'; break; 215: case 'b': trmset[nterms].value = '\b'; break; 216: case 't': trmset[nterms].value = '\t'; break; 217: case '\'': trmset[nterms].value = '\''; break; 218: case '"': trmset[nterms].value = '"'; break; 219: case '\\': trmset[nterms].value = '\\'; break; 220: default: error( "invalid escape" ); 221: } 222: } 223: else if( ctokn[2] <= '7' && ctokn[2]>='0' ){ /* \nnn sequence */ 224: if( ctokn[3]<'0' || ctokn[3] > '7' || ctokn[4]<'0' || 225: ctokn[4]>'7' || ctokn[5] != '\0' ) error("illegal \\nnn construction" ); 226: trmset[nterms].value = 64*(ctokn[2]-'0')+8*(ctokn[3]-'0')+ctokn[4]-'0'; 227: if( trmset[nterms].value == 0 ) error( "'\\000' is illegal" ); 228: } 229: } 230: else { 231: trmset[nterms].value = extval++; 232: 233: } 234: trmlev[nterms] = 0; 235: return( nterms ); 236: } 237: } 238: 239: defout(){ /* write out the defines (at the end of the declaration section) */ 240: 241: _REGISTER int i, c; 242: _REGISTER char *cp; 243: 244: for( i=ndefout; i<=nterms; ++i ){ 245: 246: cp = trmset[i].name; 247: if( *cp == ' ' ) ++cp; /* literals */ 248: 249: for( ; (c= *cp)!='\0'; ++cp ){ 250: 251: if( c>='a' && c<='z' || 252: c>='A' && c<='Z' || 253: c>='0' && c<='9' || 254: c=='_' ) ; /* VOID */ 255: else goto nodef; 256: } 257: 258: /* define it */ 259: 260: fprintf( cout , "%c define %s %d\n", rflag?' ':'#', trmset[i].name, trmset[i].value ); 261: 262: nodef: ; 263: } 264: 265: ndefout = nterms+1; 266: 267: } 268: 269: chstash( c ){ 270: /* put character away into cnames */ 271: if( cnamp >= &cnames[cnamsz] ) error("too many characters in id's and literals" ); 272: else *cnamp++ = c; 273: } 274: 275: int gettok() { 276: int j, base; 277: static int peekline; /* number of '\n' seen in lookahead */ 278: auto int c, match, reserve; 279: 280: begin: 281: reserve = 0; 282: if( peekc>=0 ) { 283: c = peekc; 284: lineno += peekline; 285: peekc = -1; 286: peekline = 0; 287: } 288: else c = fgetc( cin); 289: while( c==' ' || c=='\n' || c=='\t' || c == '\014'){ 290: if( c == '\n' ) ++lineno; 291: c=fgetc( cin); 292: } 293: if (c=='/') 294: {if (fgetc( cin)!='*')error("illegal /"); 295: c=fgetc( cin); 296: while(c != EOF) { 297: if( c == '\n' ) ++lineno; 298: if (c=='*') 299: {if((c=fgetc( cin))=='/')break;} 300: else c=fgetc( cin);} 301: if (!c) return(0); 302: goto begin;} 303: j=0; 304: switch(c){ 305: case '"': 306: case '\'': match = c; 307: ctokn = cnamp; 308: chstash( ' ' ); 309: while(1){ 310: c = fgetc( cin); 311: if( c == '\n' || c == '\0' ) 312: error("illegal or missing ' or \""); 313: if( c == '\\' ){ 314: c = fgetc( cin); 315: chstash( '\\' ); 316: } 317: else if( c == match ) break; 318: chstash( c ); 319: } 320: break; 321: case '%': 322: case '\\': switch(c=fgetc( cin)) 323: {case '0': return(TERM); 324: case '<': return(LEFT); 325: case '2': return(BINARY); 326: case '>': return(RIGHT); 327: case '%': 328: case '\\': return(MARK); 329: case '=': return(PREC); 330: case '{': return(LCURLY); 331: default: reserve = 1; 332: } 333: default: if( c >= '0' && c <= '9' ){ /* number */ 334: numbval = c-'0' ; 335: base = (c=='0') ? 8 : 10 ; 336: for( c=fgetc( cin); c>='0' && c<='9'; c=fgetc( cin) ){ 337: numbval = numbval*base + c - '0'; 338: } 339: peekc = c; 340: return(NUMBER); 341: } 342: else if( (c>='a'&&c<='z')||(c>='A'&&c<='Z')||c=='_'||c=='.'||c=='$'){ 343: ctokn = cnamp; 344: while( (c>='a'&&c<='z') || 345: (c>='A'&&c<='Z') || 346: (c>='0'&&c<='9') || 347: c=='_' || c=='.' || c=='$' ) { 348: chstash( c ); 349: if( peekc>=0 ) { c = peekc; peekc = -1; } 350: else c = fgetc( cin); 351: } 352: } 353: else return(c); 354: 355: peekc=c; 356: } 357: chstash( '\0' ); 358: 359: if( reserve ){ /* find a reserved word */ 360: if( compare("term")) return( TERM ); 361: if( compare("TERM")) return( TERM ); 362: if( compare("token")) return( TERM ); 363: if( compare("TOKEN")) return( TERM ); 364: if( compare("left")) return( LEFT ); 365: if( compare("LEFT")) return( LEFT ); 366: if( compare("nonassoc")) return( BINARY ); 367: if( compare("NONASSOC")) return( BINARY ); 368: if( compare("binary")) return( BINARY ); 369: if( compare("BINARY")) return( BINARY ); 370: if( compare("right")) return( RIGHT ); 371: if( compare("RIGHT")) return( RIGHT ); 372: if( compare("prec")) return( PREC ); 373: if( compare("PREC")) return( PREC ); 374: error("invalid escape, or illegal reserved word: %s", ctokn ); 375: } 376: 377: /* look ahead to distinguish IDENTIFIER from C_IDENTIFIER */ 378: 379: look: 380: while( peekc==' ' || peekc=='\t' || peekc == '\n' || peekc == '\014' ) 381: { 382: if( peekc == '\n' ) ++peekline; 383: peekc = fgetc( cin); 384: } 385: 386: if( peekc != ':' ) return( IDENTIFIER ); 387: peekc = -1; 388: lineno += peekline; 389: peekline = 0; 390: return( C_IDENTIFIER ); 391: } 392: chfind(t) 393: 394: { int i,j; 395: 396: if (ctokn[0]==' ')t=0; 397: for(i=1;i<=nterms;i++) 398: if(compare(trmset[i].name)){ 399: cnamp = ctokn; 400: return( i ); 401: } 402: for(i=1;i<=nnonter;i++) 403: if(compare(nontrst[i].name)) { 404: cnamp = ctokn; 405: return( i+NTBASE ); 406: } 407: /* cannot find name */ 408: if( t>1 && ctokn[0] != ' ' ) 409: error( "%s should have been defined earlier", ctokn ); 410: return( defin( t ) ); 411: } 412: 413: cpycode(){ /* copies code between \{ and \} */ 414: 415: int c; 416: c = fgetc( cin); 417: if( c == '\n' ) { 418: c = fgetc( cin); 419: lineno++; 420: } 421: while( c != EOF ){ 422: if( c=='\\' ) 423: if( (c=fgetc( cin)) == '}' ) return; 424: else fputc('\\',cout); 425: if( c=='%' ) 426: if( (c=fgetc( cin)) == '}' ) return; 427: else fputc('%',cout); 428: fputc( c, cout ); 429: if( c == '\n' ) ++lineno; 430: c = fgetc( cin); 431: } 432: error("eof before %%}"); 433: } 434: 435: cpyact(){ /* copy C action to the next ; or closing } */ 436: int brac, c, match, *i, j, s; 437: 438: brac = 0; 439: 440: loop: 441: c = fgetc( cin); 442: swt: 443: switch( c ){ 444: 445: case ';': 446: if( brac == 0 ){ 447: fputc( c, cout ); 448: return; 449: } 450: goto lcopy; 451: 452: case '{': 453: brac++; 454: goto lcopy; 455: 456: case '$': 457: s = 1; 458: c = fgetc( cin); 459: if( c == '$' ){ 460: fprintf( cout , "yyval"); 461: goto loop; 462: } 463: if( c == '-' ){ 464: s = -s; 465: c = fgetc( cin); 466: } 467: if( c>='0' && c <= '9' ){ 468: j=0; 469: while( c>='0' && c<= '9' ){ 470: j= j*10+c-'0'; 471: c = fgetc( cin); 472: } 473: if( rflag ) fprintf( cout , "yyvalv(yypv%c%d)", s==1?'+':'-', j ); 474: else fprintf( cout , "yypv[%d]", s*j ); 475: goto swt; 476: } 477: fputc( '$' , cout); 478: if( s<0 ) fputc('-', cout); 479: goto swt; 480: 481: case '}': 482: brac--; 483: if( brac == 0 ){ 484: fputc( c , cout); 485: return; 486: } 487: goto lcopy; 488: 489: case '/': /* look for comments */ 490: fputc( c ,cout); 491: c = fgetc( cin); 492: if( c != '*' ) goto swt; 493: 494: /* it really is a comment */ 495: 496: fputc( c , cout); 497: while( (c=fgetc( cin)) != EOF ){ 498: if( c=='*' ){ 499: fputc( c , cout); 500: if( (c=fgetc( cin)) == '/' ) goto lcopy; 501: } 502: fputc( c , cout); 503: } 504: error( "EOF inside comment" ); 505: 506: case '\'': /* character constant */ 507: match = '\''; 508: goto string; 509: 510: case '"': /* character string */ 511: match = '"'; 512: 513: string: 514: 515: fputc( c , cout); 516: while( (c=fgetc( cin)) != EOF ){ 517: 518: if( c=='\\' ){ 519: fputc( c , cout); 520: c=fgetc( cin); 521: } 522: else if( c==match ) goto lcopy; 523: fputc( c , cout); 524: } 525: error( "EOF in string or character constant" ); 526: 527: case '\0': 528: error("action does not terminate"); 529: case '\n': ++lineno; 530: goto lcopy; 531: 532: } 533: 534: lcopy: 535: fputc( c , cout); 536: goto loop; 537: }