1: /* 2: * Copyright (c) 1979 Regents of the University of California. 3: * All rights reserved. The Berkeley software License Agreement 4: * specifies the terms and conditions for redistribution. 5: */ 6: 7: #ifndef lint 8: static char sccsid[] = "@(#)ey2.c 5.1 (Berkeley) 4/29/85"; 9: #endif not lint 10: 11: # include "ey.h" 12: # define IDENTIFIER 257 13: # define MARK 258 14: # define TERM 259 15: # define LEFT 260 16: # define BINARY 261 17: # define RIGHT 262 18: # define PREC 263 19: # define LCURLY 264 20: # define C_IDENTIFIER 265 /* name followed by colon */ 21: # define NUMBER 266 22: 23: FILE *copen(); 24: 25: setup(argc,argv) int argc; char *argv[]; 26: { int i,j,lev,t; 27: int c; 28: 29: foutput = stdout; 30: i = 1; 31: while( argc >= 2 && argv[1][0] == '-' ) { 32: while( *++(argv[1]) ){ 33: switch( *argv[1] ){ 34: case 'v': 35: case 'V': 36: foutput = copen("y.output", 'w' ); 37: if( foutput == 0 ) error( "cannot open y.output"); 38: continue; 39: case 'o': 40: case 'O': 41: oflag = 1; 42: continue; 43: case 'r': 44: case 'R': 45: oflag = 1; 46: rflag = 1; 47: continue; 48: default: error( "illegal option: %c", *argv[1]); 49: } 50: } 51: argv++; 52: argc--; 53: } 54: 55: ftable = copen( oflag ? "yacc.tmp" : "y.tab.c" , 'w' ); 56: if( ftable==0 ) error( "cannot open table file" ); 57: if( argc > 1 ) { cin = copen( argv[1], 'r' ); 58: if( cin == 0 ) error( "cannot open input" ); 59: } 60: settab(); 61: fprintf( cout , "#\n"); 62: ctokn = "$end"; 63: defin(0); /* eof */ 64: extval = 0400; /* beginning of assigned values */ 65: ctokn = "error"; 66: defin(0); 67: ctokn = "$accept"; 68: defin(1); 69: mem=mem0; 70: cnamp = cnames; 71: lev=0; 72: i=0; 73: 74: while( ( t = gettok() ) != EOF ) { 75: switch( t ){ 76: case IDENTIFIER: j = chfind(0); 77: trmlev[j] = lev; 78: continue; 79: case ',': 80: case ';': continue; 81: case TERM: lev=0; continue; 82: case LEFT: lev=(++i<<3)|01; continue; 83: case BINARY: lev=(++i<<3)|02; continue; 84: case RIGHT: lev=(++i<<3)|03; continue; 85: case MARK: 86: defout(); 87: if( rflag ){ /* RATFOR */ 88: fprintf( cout , "define yyerrok yyerrf = 0\n" ); 89: fprintf( cout , "define yyclearin yychar = -1\n" ); 90: fprintf( cout , "subroutine yyactr(yyprdn)\n"); 91: fprintf( cout , "common/yycomn/yylval,yyval,yypv,yyvalv(150)\n" ); 92: fprintf( cout , "common/yylcom/yychar,yyerrf,yydebu\n" ); 93: fprintf( cout , "integer yychar, yyerrf, yydebu\n" ); 94: fprintf( cout , "integer yyprdn,yyval,yylval,yypv,yyvalv\n" ); 95: } 96: else { 97: fprintf( cout , "#define yyclearin yychar = -1\n" ); 98: fprintf( cout , "#define yyerrok yyerrflag = 0\n" ); 99: fprintf( cout , "extern int yychar, yyerrflag;\n" ); 100: fprintf( cout , "\nint yyval 0;\nint *yypv;\nint yylval 0;"); 101: fprintf( cout , "\nyyactr(__np__){\n"); 102: } 103: break; 104: case LCURLY: defout(); 105: cpycode(); 106: continue; 107: case NUMBER: 108: trmset[j].value = numbval; 109: if( j < ndefout && j>2 ) 110: error("please define type # of %s earlier", trmset[j].name ); 111: continue; 112: default: error("bad precedence syntax, input %d", t ); 113: } 114: break; 115: } 116: prdptr[0]=mem; 117: /* added production */ 118: *mem++ = NTBASE; 119: *mem++ = NTBASE+1; 120: *mem++ = 1; 121: *mem++ = 0; 122: prdptr[1]=mem; 123: i=0; 124: 125: /* i is 0 when a rule can begin, 1 otherwise */ 126: 127: for(;;) switch( t=gettok() ) { 128: case C_IDENTIFIER: if( mem == prdptr[1] ) { /* first time */ 129: if( rflag ){ 130: fprintf( cout , "goto 1000\n" ); 131: } 132: else fprintf( cout , "\nswitch(__np__){\n"); 133: } 134: if( i != 0 ) error( "previous rule not terminated" ); 135: *mem = chfind(1); 136: if( *mem < NTBASE )error( "token illegal on lhs of grammar rule" ); 137: i=1; 138: ++mem; 139: continue; 140: case IDENTIFIER: 141: *mem=chfind(1); 142: if(*mem < NTBASE)levprd[nprod]=trmlev[*mem]; 143: mem++; 144: if(i==0) error("missing :"); 145: continue; 146: case '=': levprd[nprod] |= 04; 147: if( i==0 ) error("semicolon preceeds action"); 148: fprintf( cout , rflag?"\n%d ":"\ncase %d:", nprod ); 149: cpyact(); 150: fprintf( cout , rflag ? " return" : " break;" ); 151: case '|': 152: case ';': if(i){ 153: *mem++ = -nprod; 154: prdptr[++nprod] = mem; 155: levprd[nprod]=0; 156: i=0;} 157: if (t=='|'){i=1;*mem++ = *prdptr[nprod-1];} 158: continue; 159: case 0: /* End Of File */ 160: case EOF: 161: case MARK: if( i != 0 ) error( "rule not terminated before %%%% or EOF" ); 162: settab(); 163: finact(); 164: /* copy the programs which follow the rules */ 165: if( t == MARK ){ 166: while (( c=fgetc( cin)) != EOF ) fputc(c,cout); 167: } 168: return; 169: case PREC: 170: if( i==0 ) error( "%%prec must appear inside rule" ); 171: if( gettok()!=IDENTIFIER)error("illegal %%prec syntax" ); 172: j=chfind(2); 173: if(j>=NTBASE)error("nonterminal %s illegal after %%prec", nontrst[j-NTBASE].name); 174: levprd[nprod]=trmlev[j]; 175: continue; 176: case LCURLY: 177: if( i!=0 ) error( "%%{ appears within a rule" ); 178: cpycode(); 179: continue; 180: default: error( "syntax error, input %d", t ); 181: } 182: } 183: 184: finact(){ 185: /* finish action routine */ 186: register i; 187: 188: if( rflag ){ 189: 190: fprintf( cout , "\n1000 goto(" ); 191: for( i=1; i<nprod; ++i ){ 192: fprintf( cout , "%d,", (levprd[i]&04)==0?999:i ); 193: } 194: fprintf( cout , "999),yyprdn\n" ); 195: fprintf( cout , "999 return\nend\n" ); 196: fprintf( cout , "define YYERRCODE %d\n", trmset[2].value ); 197: } 198: else { 199: fprintf( cout , "\n}\n}\n" ); 200: fprintf( cout , "int yyerrval %d;\n", trmset[2].value ); 201: } 202: } 203: defin(t) { 204: /* define ctokn to be a terminal if t=0 205: or a nonterminal if t=1 */ 206: char *cp,*p; 207: int c; 208: 209: 210: if (t) { 211: if( ++nnonter >= ntlim ) error("too many nonterminals, limit %d",ntlim); 212: nontrst[nnonter].name = ctokn; 213: return( NTBASE + nnonter ); 214: } 215: else { 216: if( ++nterms >= tlim ) error("too many terminals, limit %d",tlim ); 217: trmset[nterms].name = ctokn; 218: if( ctokn[0]==' ' && ctokn[2]=='\0' ) /* single character literal */ 219: trmset[nterms].value = ctokn[1]; 220: else if ( ctokn[0]==' ' && ctokn[1]=='\\' ) { /* escape sequence */ 221: if( ctokn[3] == '\0' ){ /* single character escape sequence */ 222: switch ( ctokn[2] ){ 223: /* character which is escaped */ 224: case 'n': trmset[nterms].value = '\n'; break; 225: case 'r': trmset[nterms].value = '\r'; break; 226: case 'b': trmset[nterms].value = '\b'; break; 227: case 't': trmset[nterms].value = '\t'; break; 228: case '\'': trmset[nterms].value = '\''; break; 229: case '"': trmset[nterms].value = '"'; break; 230: case '\\': trmset[nterms].value = '\\'; break; 231: default: error( "invalid escape" ); 232: } 233: } 234: else if( ctokn[2] <= '7' && ctokn[2]>='0' ){ /* \nnn sequence */ 235: if( ctokn[3]<'0' || ctokn[3] > '7' || ctokn[4]<'0' || 236: ctokn[4]>'7' || ctokn[5] != '\0' ) error("illegal \\nnn construction" ); 237: trmset[nterms].value = 64*(ctokn[2]-'0')+8*(ctokn[3]-'0')+ctokn[4]-'0'; 238: if( trmset[nterms].value == 0 ) error( "'\\000' is illegal" ); 239: } 240: } 241: else { 242: trmset[nterms].value = extval++; 243: 244: } 245: trmlev[nterms] = 0; 246: return( nterms ); 247: } 248: } 249: 250: defout(){ /* write out the defines (at the end of the declaration section) */ 251: 252: _REGISTER int i, c; 253: _REGISTER char *cp; 254: 255: for( i=ndefout; i<=nterms; ++i ){ 256: 257: cp = trmset[i].name; 258: if( *cp == ' ' ) ++cp; /* literals */ 259: 260: for( ; (c= *cp)!='\0'; ++cp ){ 261: 262: if( c>='a' && c<='z' || 263: c>='A' && c<='Z' || 264: c>='0' && c<='9' || 265: c=='_' ) ; /* VOID */ 266: else goto nodef; 267: } 268: 269: /* define it */ 270: 271: fprintf( cout , "%c define %s %d\n", rflag?' ':'#', trmset[i].name, trmset[i].value ); 272: 273: nodef: ; 274: } 275: 276: ndefout = nterms+1; 277: 278: } 279: 280: chstash( c ){ 281: /* put character away into cnames */ 282: if( cnamp >= &cnames[cnamsz] ) error("too many characters in id's and literals" ); 283: else *cnamp++ = c; 284: } 285: 286: int gettok() { 287: int j, base; 288: static int peekline; /* number of '\n' seen in lookahead */ 289: auto int c, match, reserve; 290: 291: begin: 292: reserve = 0; 293: if( peekc>=0 ) { 294: c = peekc; 295: lineno += peekline; 296: peekc = -1; 297: peekline = 0; 298: } 299: else c = fgetc( cin); 300: while( c==' ' || c=='\n' || c=='\t' || c == '\014'){ 301: if( c == '\n' ) ++lineno; 302: c=fgetc( cin); 303: } 304: if (c=='/') 305: {if (fgetc( cin)!='*')error("illegal /"); 306: c=fgetc( cin); 307: while(c != EOF) { 308: if( c == '\n' ) ++lineno; 309: if (c=='*') 310: {if((c=fgetc( cin))=='/')break;} 311: else c=fgetc( cin);} 312: if (!c) return(0); 313: goto begin;} 314: j=0; 315: switch(c){ 316: case '"': 317: case '\'': match = c; 318: ctokn = cnamp; 319: chstash( ' ' ); 320: while(1){ 321: c = fgetc( cin); 322: if( c == '\n' || c == '\0' ) 323: error("illegal or missing ' or \""); 324: if( c == '\\' ){ 325: c = fgetc( cin); 326: chstash( '\\' ); 327: } 328: else if( c == match ) break; 329: chstash( c ); 330: } 331: break; 332: case '%': 333: case '\\': switch(c=fgetc( cin)) 334: {case '0': return(TERM); 335: case '<': return(LEFT); 336: case '2': return(BINARY); 337: case '>': return(RIGHT); 338: case '%': 339: case '\\': return(MARK); 340: case '=': return(PREC); 341: case '{': return(LCURLY); 342: default: reserve = 1; 343: } 344: default: if( c >= '0' && c <= '9' ){ /* number */ 345: numbval = c-'0' ; 346: base = (c=='0') ? 8 : 10 ; 347: for( c=fgetc( cin); c>='0' && c<='9'; c=fgetc( cin) ){ 348: numbval = numbval*base + c - '0'; 349: } 350: peekc = c; 351: return(NUMBER); 352: } 353: else if( (c>='a'&&c<='z')||(c>='A'&&c<='Z')||c=='_'||c=='.'||c=='$'){ 354: ctokn = cnamp; 355: while( (c>='a'&&c<='z') || 356: (c>='A'&&c<='Z') || 357: (c>='0'&&c<='9') || 358: c=='_' || c=='.' || c=='$' ) { 359: chstash( c ); 360: if( peekc>=0 ) { c = peekc; peekc = -1; } 361: else c = fgetc( cin); 362: } 363: } 364: else return(c); 365: 366: peekc=c; 367: } 368: chstash( '\0' ); 369: 370: if( reserve ){ /* find a reserved word */ 371: if( compare("term")) return( TERM ); 372: if( compare("TERM")) return( TERM ); 373: if( compare("token")) return( TERM ); 374: if( compare("TOKEN")) return( TERM ); 375: if( compare("left")) return( LEFT ); 376: if( compare("LEFT")) return( LEFT ); 377: if( compare("nonassoc")) return( BINARY ); 378: if( compare("NONASSOC")) return( BINARY ); 379: if( compare("binary")) return( BINARY ); 380: if( compare("BINARY")) return( BINARY ); 381: if( compare("right")) return( RIGHT ); 382: if( compare("RIGHT")) return( RIGHT ); 383: if( compare("prec")) return( PREC ); 384: if( compare("PREC")) return( PREC ); 385: error("invalid escape, or illegal reserved word: %s", ctokn ); 386: } 387: 388: /* look ahead to distinguish IDENTIFIER from C_IDENTIFIER */ 389: 390: look: 391: while( peekc==' ' || peekc=='\t' || peekc == '\n' || peekc == '\014' ) 392: { 393: if( peekc == '\n' ) ++peekline; 394: peekc = fgetc( cin); 395: } 396: 397: if( peekc != ':' ) return( IDENTIFIER ); 398: peekc = -1; 399: lineno += peekline; 400: peekline = 0; 401: return( C_IDENTIFIER ); 402: } 403: chfind(t) 404: 405: { int i,j; 406: 407: if (ctokn[0]==' ')t=0; 408: for(i=1;i<=nterms;i++) 409: if(compare(trmset[i].name)){ 410: cnamp = ctokn; 411: return( i ); 412: } 413: for(i=1;i<=nnonter;i++) 414: if(compare(nontrst[i].name)) { 415: cnamp = ctokn; 416: return( i+NTBASE ); 417: } 418: /* cannot find name */ 419: if( t>1 && ctokn[0] != ' ' ) 420: error( "%s should have been defined earlier", ctokn ); 421: return( defin( t ) ); 422: } 423: 424: cpycode(){ /* copies code between \{ and \} */ 425: 426: int c; 427: c = fgetc( cin); 428: if( c == '\n' ) { 429: c = fgetc( cin); 430: lineno++; 431: } 432: while( c != EOF ){ 433: if( c=='\\' ) 434: if( (c=fgetc( cin)) == '}' ) return; 435: else fputc('\\',cout); 436: if( c=='%' ) 437: if( (c=fgetc( cin)) == '}' ) return; 438: else fputc('%',cout); 439: fputc( c, cout ); 440: if( c == '\n' ) ++lineno; 441: c = fgetc( cin); 442: } 443: error("eof before %%}"); 444: } 445: 446: cpyact(){ /* copy C action to the next ; or closing } */ 447: int brac, c, match, *i, j, s; 448: 449: brac = 0; 450: 451: loop: 452: c = fgetc( cin); 453: swt: 454: switch( c ){ 455: 456: case ';': 457: if( brac == 0 ){ 458: fputc( c, cout ); 459: return; 460: } 461: goto lcopy; 462: 463: case '{': 464: brac++; 465: goto lcopy; 466: 467: case '$': 468: s = 1; 469: c = fgetc( cin); 470: if( c == '$' ){ 471: fprintf( cout , "yyval"); 472: goto loop; 473: } 474: if( c == '-' ){ 475: s = -s; 476: c = fgetc( cin); 477: } 478: if( c>='0' && c <= '9' ){ 479: j=0; 480: while( c>='0' && c<= '9' ){ 481: j= j*10+c-'0'; 482: c = fgetc( cin); 483: } 484: if( rflag ) fprintf( cout , "yyvalv(yypv%c%d)", s==1?'+':'-', j ); 485: else fprintf( cout , "yypv[%d]", s*j ); 486: goto swt; 487: } 488: fputc( '$' , cout); 489: if( s<0 ) fputc('-', cout); 490: goto swt; 491: 492: case '}': 493: brac--; 494: if( brac == 0 ){ 495: fputc( c , cout); 496: return; 497: } 498: goto lcopy; 499: 500: case '/': /* look for comments */ 501: fputc( c ,cout); 502: c = fgetc( cin); 503: if( c != '*' ) goto swt; 504: 505: /* it really is a comment */ 506: 507: fputc( c , cout); 508: while( (c=fgetc( cin)) != EOF ){ 509: if( c=='*' ){ 510: fputc( c , cout); 511: if( (c=fgetc( cin)) == '/' ) goto lcopy; 512: } 513: fputc( c , cout); 514: } 515: error( "EOF inside comment" ); 516: 517: case '\'': /* character constant */ 518: match = '\''; 519: goto string; 520: 521: case '"': /* character string */ 522: match = '"'; 523: 524: string: 525: 526: fputc( c , cout); 527: while( (c=fgetc( cin)) != EOF ){ 528: 529: if( c=='\\' ){ 530: fputc( c , cout); 531: c=fgetc( cin); 532: } 533: else if( c==match ) goto lcopy; 534: fputc( c , cout); 535: } 536: error( "EOF in string or character constant" ); 537: 538: case '\0': 539: error("action does not terminate"); 540: case '\n': ++lineno; 541: goto lcopy; 542: 543: } 544: 545: lcopy: 546: fputc( c , cout); 547: goto loop; 548: }