1: /* C compiler 2: * 3: * 2.1 (2.11BSD) 1996/01/04 4: * 5: * Called from cc: 6: * c0 source temp1 temp2 [ profileflag ] 7: * temp1 gets most of the intermediate code; 8: * strings are put on temp2, which c1 reads after temp1. 9: */ 10: 11: #include "c0.h" 12: 13: int isn = 1; 14: int peeksym = -1; 15: int line = 1; 16: struct tnode funcblk = { NAME }; 17: 18: struct kwtab { 19: char *kwname; 20: int kwval; 21: } kwtab[] = { 22: "int", INT, 23: "char", CHAR, 24: "float", FLOAT, 25: "double", DOUBLE, 26: "struct", STRUCT, 27: "long", LONG, 28: "unsigned", UNSIGN, 29: "union", UNION, 30: "short", INT, 31: "void", VOID, 32: "auto", AUTO, 33: "extern", EXTERN, 34: "static", STATIC, 35: "register", REG, 36: "goto", GOTO, 37: "return", RETURN, 38: "if", IF, 39: "while", WHILE, 40: "else", ELSE, 41: "switch", SWITCH, 42: "case", CASE, 43: "break", BREAK, 44: "continue", CONTIN, 45: "do", DO, 46: "default", DEFAULT, 47: "for", FOR, 48: "sizeof", SIZEOF, 49: "typedef", TYPEDEF, 50: "enum", ENUM, 51: "asm", ASM, 52: 0, 0, 53: }; 54: 55: union tree *cmst[CMSIZ]; 56: union tree **cp = cmst; 57: int Wflag; /* print warning messages */ 58: 59: main(argc, argv) 60: int argc; 61: char *argv[]; 62: { 63: register unsigned i; 64: register struct kwtab *ip; 65: char buf1[BUFSIZ], 66: buf2[BUFSIZ]; 67: 68: if (argc>1 && strcmp(argv[1], "-u")==0) { 69: argc--; 70: argv++; 71: unscflg++; 72: } 73: if(argc<4) { 74: error("Arg count"); 75: exit(1); 76: } 77: if (freopen(argv[1], "r", stdin)==NULL) { 78: error("Can't find %s", argv[1]); 79: exit(1); 80: } 81: setbuf(stdin,buf1); /* stdio sbrk problems */ 82: if (freopen(argv[2], "w", stdout)==NULL || (sbufp=fopen(argv[3],"w"))==NULL) { 83: error("Can't create temp"); 84: exit(1); 85: } 86: setbuf(stdout,buf2); /* stdio sbrk problems */ 87: setbuf(sbufp, sbuf); 88: /* 89: * Overlays: allow an extra word on the stack for 90: * each stack from to store the overlay number. 91: */ 92: STAUTO = -8; 93: while (argc>4) { 94: switch (argv[4][1]) { 95: case 'P': 96: proflg++; 97: break; 98: case 'V': /* overlays; default, now */ 99: break; 100: case 'w': 101: case 'W': /* don't print warning messages */ 102: Wflag++; 103: break; 104: } 105: argc--; argv++; 106: } 107: /* 108: * The hash table locations of the keywords 109: * are marked; if an identifier hashes to one of 110: * these locations, it is looked up in in the keyword 111: * table first. 112: */ 113: for (ip=kwtab; ip->kwname; ip++) { 114: i = hash(ip->kwname); 115: kwhash[i/LNBPW] |= 1 << (i%LNBPW); 116: } 117: coremax = locbase = sbrk(0); 118: while(!eof) 119: extdef(); 120: outcode("B", EOFC); 121: strflg++; 122: outcode("B", EOFC); 123: blkend(); 124: exit(nerror!=0); 125: } 126: 127: /* 128: * Look up the identifier in symbuf in the symbol table. 129: * If it hashes to the same spot as a keyword, try the keyword table 130: * first. 131: * Return is a ptr to the symbol table entry. 132: */ 133: lookup() 134: { 135: unsigned ihash; 136: register struct nmlist *rp; 137: 138: ihash = hash(symbuf); 139: if (kwhash[ihash/LNBPW] & (1 << (ihash%LNBPW))) 140: if (findkw()) 141: return(KEYW); 142: rp = hshtab[ihash]; 143: while (rp) { 144: if (strcmp(symbuf, rp->name) != 0) 145: goto no; 146: if (mossym != (rp->hflag&FKIND)) 147: goto no; 148: csym = rp; 149: return(NAME); 150: no: 151: rp = rp->nextnm; 152: } 153: rp = (struct nmlist *)Dblock(sizeof(struct nmlist)); 154: rp->nextnm = hshtab[ihash]; 155: hshtab[ihash] = rp; 156: rp->hclass = 0; 157: rp->htype = 0; 158: rp->hoffset = 0; 159: rp->hsubsp = NULL; 160: rp->hstrp = NULL; 161: rp->sparent = NULL; 162: rp->hblklev = blklev; 163: rp->hflag = mossym; 164: rp->name = Dblock((strlen(symbuf) + 1 + LNCPW - 1) & ~(LNCPW - 1)); 165: strcpy(rp->name, symbuf); 166: csym = rp; 167: return(NAME); 168: } 169: 170: /* 171: * Search the keyword table. 172: */ 173: findkw() 174: { 175: register struct kwtab *kp; 176: 177: for (kp=kwtab; kp->kwname; kp++) { 178: if (strcmp(symbuf, kp->kwname) == 0) { 179: cval = kp->kwval; 180: return(1); 181: } 182: } 183: return(0); 184: } 185: 186: 187: /* 188: * Return the next symbol from the input. 189: * peeksym is a pushed-back symbol, peekc is a pushed-back 190: * character (after peeksym). 191: * mosflg means that the next symbol, if an identifier, 192: * is a member of structure or a structure tag or an enum tag 193: */ 194: symbol() 195: { 196: register c; 197: register char *sp; 198: register tline; 199: 200: if (peeksym>=0) { 201: c = peeksym; 202: peeksym = -1; 203: if (c==NAME) 204: mosflg = 0; 205: return(c); 206: } 207: if (peekc) { 208: c = peekc; 209: peekc = 0; 210: } else 211: if (eof) 212: return(EOFC); 213: else 214: c = getchar(); 215: loop: 216: if (c==EOF) { 217: eof++; 218: return(EOFC); 219: } 220: switch(ctab[c]) { 221: 222: case SHARP: 223: if ((c=symbol())!=CON) { 224: error("Illegal #"); 225: return(c); 226: } 227: tline = cval; 228: while (ctab[peekc]==SPACE) 229: peekc = getchar(); 230: if (peekc=='"') { 231: sp = filename; 232: while ((c = mapch('"')) >= 0) 233: *sp++ = c; 234: *sp++ = 0; 235: peekc = getchar(); 236: } 237: if (peekc != '\n') { 238: error("Illegal #"); 239: while (getchar()!='\n' && eof==0) 240: ; 241: } 242: peekc = 0; 243: line = tline; 244: return(symbol()); 245: 246: case NEWLN: 247: line++; 248: 249: case SPACE: 250: c = getchar(); 251: goto loop; 252: 253: case PLUS: 254: return(subseq(c,PLUS,INCBEF)); 255: 256: case MINUS: 257: if (subseq(c, 0, 1)) 258: return(DECBEF); 259: return(subseq('>', MINUS, ARROW)); 260: 261: case ASSIGN: 262: return(subseq(c, ASSIGN, EQUAL)); 263: 264: case LESS: 265: if (subseq(c,0,1)) 266: return(LSHIFT); 267: return(subseq('=',LESS,LESSEQ)); 268: 269: case GREAT: 270: if (subseq(c,0,1)) 271: return(RSHIFT); 272: return(subseq('=',GREAT,GREATEQ)); 273: 274: case EXCLA: 275: return(subseq('=',EXCLA,NEQUAL)); 276: 277: case BSLASH: 278: if (subseq('/', 0, 1)) 279: return(MAX); 280: goto unkn; 281: 282: case DIVIDE: 283: if (subseq('\\', 0, 1)) 284: return(MIN); 285: if (subseq('*',1,0)) 286: return(DIVIDE); 287: while ((c = spnextchar()) != EOFC) { 288: peekc = 0; 289: if (c=='*') { 290: if (spnextchar() == '/') { 291: peekc = 0; 292: c = getchar(); 293: goto loop; 294: } 295: } 296: } 297: eof++; 298: error("Nonterminated comment"); 299: return(0); 300: 301: case PERIOD: 302: case DIGIT: 303: peekc = c; 304: return(getnum()); 305: 306: case DQUOTE: 307: cval = isn++; 308: return(STRING); 309: 310: case SQUOTE: 311: return(getcc()); 312: 313: case LETTER: 314: sp = symbuf; 315: while (ctab[c]==LETTER || ctab[c]==DIGIT) { 316: if (sp < symbuf + MAXCPS) 317: *sp++ = c; 318: c = getchar(); 319: } 320: *sp++ = '\0'; 321: mossym = mosflg; 322: mosflg = 0; 323: peekc = c; 324: if ((c=lookup())==KEYW && cval==SIZEOF) 325: c = SIZEOF; 326: return(c); 327: 328: case AND: 329: return(subseq('&', AND, LOGAND)); 330: 331: case OR: 332: return(subseq('|', OR, LOGOR)); 333: 334: case UNKN: 335: unkn: 336: error("Unknown character"); 337: c = getchar(); 338: goto loop; 339: 340: } 341: return(ctab[c]); 342: } 343: 344: /* 345: * Read a number. Return kind. 346: */ 347: getnum() 348: { 349: register char *np; 350: register c, base; 351: int expseen, sym, ndigit; 352: char *nsyn; 353: int maxdigit; 354: 355: nsyn = "Number syntax"; 356: lcval = 0; 357: base = 10; 358: maxdigit = 0; 359: np = numbuf; 360: ndigit = 0; 361: sym = CON; 362: expseen = 0; 363: if ((c=spnextchar()) == '0') 364: base = 8; 365: for (;; c = getchar()) { 366: *np++ = c; 367: if (ctab[c]==DIGIT || (base==16) && ('a'<=c&&c<='f'||'A'<=c&&c<='F')) { 368: if (base==8) 369: lcval <<= 3; 370: else if (base==10) 371: lcval = ((lcval<<2) + lcval)<<1; 372: else 373: lcval <<= 4; 374: if (ctab[c]==DIGIT) 375: c -= '0'; 376: else if (c>='a') 377: c -= 'a'-10; 378: else 379: c -= 'A'-10; 380: lcval += c; 381: ndigit++; 382: if (c>maxdigit) 383: maxdigit = c; 384: continue; 385: } 386: if (c=='.') { 387: if (base==16 || sym==FCON) 388: error(nsyn); 389: sym = FCON; 390: base = 10; 391: continue; 392: } 393: if (ndigit==0) { 394: sym = DOT; 395: break; 396: } 397: if ((c=='e'||c=='E') && expseen==0) { 398: expseen++; 399: sym = FCON; 400: if (base==16 || maxdigit>=10) 401: error(nsyn); 402: base = 10; 403: *np++ = c = getchar(); 404: if (c!='+' && c!='-' && ctab[c]!=DIGIT) 405: break; 406: } else if (c=='x' || c=='X') { 407: if (base!=8 || lcval!=0 || sym!=CON) 408: error(nsyn); 409: base = 16; 410: } else if ((c=='l' || c=='L') && sym==CON) { 411: c = getchar(); 412: sym = LCON; 413: break; 414: } else 415: break; 416: } 417: peekc = c; 418: if (maxdigit >= base) 419: error(nsyn); 420: if (sym==FCON) { 421: np[-1] = 0; 422: cval = np-numbuf; 423: return(FCON); 424: } 425: if (sym==CON && (lcval<0 || lcval>MAXINT&&base==10 || (lcval>>1)>MAXINT)) { 426: sym = LCON; 427: } 428: cval = lcval; 429: return(sym); 430: } 431: 432: /* 433: * If the next input character is c, return b and advance. 434: * Otherwise push back the character and return a. 435: */ 436: subseq(c,a,b) 437: { 438: if (spnextchar() != c) 439: return(a); 440: peekc = 0; 441: return(b); 442: } 443: 444: /* 445: * Write out a string, either in-line 446: * or in the string temp file labelled by 447: * lab. 448: */ 449: putstr(lab, max) 450: register max; 451: { 452: register int c; 453: 454: nchstr = 0; 455: if (lab) { 456: strflg++; 457: outcode("BNB", LABEL, lab, BDATA); 458: max = 10000; 459: } else 460: outcode("B", BDATA); 461: while ((c = mapch('"')) >= 0) { 462: if (nchstr < max) { 463: nchstr++; 464: if (nchstr%15 == 0) 465: outcode("0B", BDATA); 466: outcode("1N", c & 0377); 467: } 468: } 469: if (nchstr < max) { 470: nchstr++; 471: outcode("10"); 472: } 473: outcode("0"); 474: strflg = 0; 475: } 476: 477: cntstr() 478: { 479: register int c; 480: 481: nchstr = 1; 482: while ((c = mapch('"')) >= 0) { 483: nchstr++; 484: } 485: } 486: 487: /* 488: * read a single-quoted character constant. 489: * The routine is sensitive to the layout of 490: * characters in a word. 491: */ 492: getcc() 493: { 494: register int c, cc; 495: register char *ccp; 496: char realc; 497: 498: cval = 0; 499: ccp = (char *)&cval; 500: cc = 0; 501: while((c=mapch('\'')) >= 0) 502: if(cc++ < LNCPW) 503: *ccp++ = c; 504: if (cc>LNCPW) 505: error("Long character constant"); 506: if (cc==1) { 507: realc = cval; 508: cval = realc; 509: } 510: return(CON); 511: } 512: 513: /* 514: * Read a character in a string or character constant, 515: * detecting the end of the string. 516: * It implements the escape sequences. 517: */ 518: mapch(ac) 519: { 520: register int a, c, n; 521: static mpeek; 522: 523: c = ac; 524: if (a = mpeek) 525: mpeek = 0; 526: else 527: a = getchar(); 528: loop: 529: if (a==c) 530: return(-1); 531: switch(a) { 532: 533: case '\n': 534: case '\0': 535: error("Nonterminated string"); 536: peekc = a; 537: return(-1); 538: 539: case '\\': 540: switch (a=getchar()) { 541: 542: case 't': 543: return('\t'); 544: 545: case 'n': 546: return('\n'); 547: 548: case 'b': 549: return('\b'); 550: 551: case 'f': 552: return('\014'); 553: 554: case 'v': 555: return('\013'); 556: 557: case '0': case '1': case '2': case '3': 558: case '4': case '5': case '6': case '7': 559: n = 0; 560: c = 0; 561: while (++c<=3 && '0'<=a && a<='7') { 562: n <<= 3; 563: n += a-'0'; 564: a = getchar(); 565: } 566: mpeek = a; 567: return(n); 568: 569: case 'r': 570: return('\r'); 571: 572: case '\n': 573: line++; 574: a = getchar(); 575: goto loop; 576: } 577: } 578: return(a); 579: } 580: 581: /* 582: * Read an expression and return a pointer to its tree. 583: * It's the classical bottom-up, priority-driven scheme. 584: * The initflg prevents the parse from going past 585: * "," or ":" because those delimiters are special 586: * in initializer (and some other) expressions. 587: */ 588: union tree * 589: tree(eflag) 590: { 591: int *op, opst[SSIZE], *pp, prst[SSIZE]; 592: register int andflg, o; 593: register struct nmlist *cs; 594: int p, ps, os, xo = 0, *xop; 595: char *svtree; 596: static struct cnode garbage = { CON, INT, (int *)NULL, (union str *)NULL, 0 }; 597: 598: svtree = starttree(); 599: op = opst; 600: pp = prst; 601: *op = SEOF; 602: *pp = 06; 603: andflg = 0; 604: 605: advanc: 606: switch (o=symbol()) { 607: 608: case NAME: 609: cs = csym; 610: if (cs->hclass==TYPEDEF) 611: goto atype; 612: if (cs->hclass==ENUMCON) { 613: *cp++ = cblock(cs->hoffset); 614: goto tand; 615: } 616: if (cs->hclass==0 && cs->htype==0) 617: if(nextchar()=='(') { 618: /* set function */ 619: cs->hclass = EXTERN; 620: cs->htype = FUNC; 621: } else { 622: cs->hclass = STATIC; 623: error("%s undefined; func. %s", cs->name, 624: funcsym ? funcsym->name : "(none)"); 625: } 626: *cp++ = nblock(cs); 627: goto tand; 628: 629: case FCON: 630: *cp++ = fblock(DOUBLE, copnum(cval)); 631: goto tand; 632: 633: case LCON: 634: *cp = (union tree *)Tblock(sizeof(struct lnode)); 635: (*cp)->l.op = LCON; 636: (*cp)->l.type = LONG; 637: (*cp)->l.lvalue = lcval; 638: cp++; 639: goto tand; 640: 641: case CON: 642: *cp++ = cblock(cval); 643: goto tand; 644: 645: /* fake a static char array */ 646: case STRING: 647: /* 648: * This hack is to compensate for a bit of simplemindedness I'm not sure how 649: * else to fix. 650: * 651: * i = sizeof ("foobar"); 652: * 653: * or 654: * i = sizeof "foobar"; 655: * 656: * would generate ".byte 'f,'o','o,'b,'a,'r,0" into the data segment! 657: * 658: * What I did here was to scan to "operator" stack looking for left parens 659: * "(" preceeded by a "sizeof". If both are seen and in that order or only 660: * a SIZEOF is sedn then the string is inside a 'sizeof' and should not 661: * generate any data to the object file. 662: */ 663: xop = op; 664: while (xop > opst) 665: { 666: xo = *xop--; 667: if (xo != LPARN) 668: break; 669: } 670: if (xo == SIZEOF) 671: cntstr(); 672: else 673: putstr(cval, 0); 674: cs = (struct nmlist *)Tblock(sizeof(struct nmlist)); 675: cs->hclass = STATIC; 676: cs->hoffset = cval; 677: *cp++ = block(NAME, unscflg? ARRAY+UNCHAR:ARRAY+CHAR, &nchstr, 678: (union str *)NULL, (union tree *)cs, TNULL); 679: 680: tand: 681: if(cp>=cmst+CMSIZ) { 682: error("Expression overflow"); 683: exit(1); 684: } 685: if (andflg) 686: goto syntax; 687: andflg = 1; 688: goto advanc; 689: 690: case KEYW: 691: atype: 692: if (*op != LPARN || andflg) 693: goto syntax; 694: peeksym = o; 695: *cp++ = xprtype(); 696: if ((o=symbol()) != RPARN) 697: goto syntax; 698: o = CAST; 699: --op; 700: --pp; 701: if (*op == SIZEOF) { 702: andflg = 1; 703: *pp = 100; 704: goto advanc; 705: } 706: goto oponst; 707: 708: case INCBEF: 709: case DECBEF: 710: if (andflg) 711: o += 2; 712: goto oponst; 713: 714: case COMPL: 715: case EXCLA: 716: case SIZEOF: 717: if (andflg) 718: goto syntax; 719: goto oponst; 720: 721: case MINUS: 722: if (!andflg) 723: o = NEG; 724: andflg = 0; 725: goto oponst; 726: 727: case AND: 728: case TIMES: 729: if (andflg) 730: andflg = 0; 731: else if (o==AND) 732: o = AMPER; 733: else 734: o = STAR; 735: goto oponst; 736: 737: case LPARN: 738: if (andflg) { 739: o = symbol(); 740: if (o==RPARN) 741: o = MCALL; 742: else { 743: peeksym = o; 744: o = CALL; 745: andflg = 0; 746: } 747: } 748: goto oponst; 749: 750: case RBRACK: 751: case RPARN: 752: if (!andflg) 753: goto syntax; 754: goto oponst; 755: 756: case DOT: 757: case ARROW: 758: mosflg = FMOS; 759: break; 760: 761: case ASSIGN: 762: if (andflg==0 && PLUS<=*op && *op<=EXOR) { 763: o = *op-- + ASPLUS - PLUS; 764: pp--; 765: goto oponst; 766: } 767: break; 768: 769: } 770: /* binaries */ 771: if (andflg==0) 772: goto syntax; 773: andflg = 0; 774: 775: oponst: 776: p = (opdope[o]>>9) & 037; 777: opon1: 778: if (o==COLON && op[0]==COLON && op[-1]==QUEST) { 779: build(*op--); 780: build(*op--); 781: pp -= 2; 782: } 783: ps = *pp; 784: if (p>ps || p==ps && (opdope[o]&RASSOC)!=0) { 785: switch (o) { 786: 787: case INCAFT: 788: case DECAFT: 789: p = 37; 790: break; 791: case LPARN: 792: case LBRACK: 793: case CALL: 794: p = 04; 795: } 796: if (initflg) { 797: if ((o==COMMA && *op!=LPARN && *op!=CALL) 798: || (o==COLON && *op!=QUEST)) { 799: p = 00; 800: goto opon1; 801: } 802: } 803: if (op >= &opst[SSIZE-1]) { 804: error("expression overflow"); 805: exit(1); 806: } 807: *++op = o; 808: *++pp = p; 809: goto advanc; 810: } 811: --pp; 812: os = *op--; 813: if (andflg==0 && p>5 && ((opdope[o]&BINARY)==0 || o>=INCBEF&&o<=DECAFT) && opdope[os]&BINARY) 814: goto syntax; 815: switch (os) { 816: 817: case SEOF: 818: peeksym = o; 819: build(0); /* flush conversions */ 820: if (eflag) 821: endtree(svtree); 822: return(*--cp); 823: 824: case COMMA: 825: if (*op != CALL) 826: os = SEQNC; 827: break; 828: 829: case CALL: 830: if (o!=RPARN) 831: goto syntax; 832: build(os); 833: goto advanc; 834: 835: case MCALL: 836: *cp++ = block(NULLOP, INT, (int *)NULL, 837: (union str *)NULL, TNULL, TNULL); 838: os = CALL; 839: break; 840: 841: case INCBEF: 842: case INCAFT: 843: case DECBEF: 844: case DECAFT: 845: *cp++ = cblock(1); 846: break; 847: 848: case LPARN: 849: if (o!=RPARN) 850: goto syntax; 851: goto advanc; 852: 853: case LBRACK: 854: if (o!=RBRACK) 855: goto syntax; 856: build(LBRACK); 857: goto advanc; 858: } 859: build(os); 860: goto opon1; 861: 862: syntax: 863: error("Expression syntax"); 864: errflush(o); 865: if (eflag) 866: endtree(svtree); 867: return((union tree *) &garbage); 868: } 869: 870: union tree * 871: xprtype() 872: { 873: struct nmlist typer, absname; 874: int sc; 875: register union tree **scp; 876: 877: scp = cp; 878: sc = DEFXTRN; /* will cause error if class mentioned */ 879: getkeywords(&sc, &typer); 880: absname.hclass = 0; 881: absname.hblklev = blklev; 882: absname.hsubsp = NULL; 883: absname.hstrp = NULL; 884: absname.htype = 0; 885: decl1(sc, &typer, 0, &absname); 886: cp = scp; 887: return(block(ETYPE, absname.htype, absname.hsubsp, 888: absname.hstrp, TNULL, TNULL)); 889: } 890: 891: char * 892: copnum(len) 893: { 894: register char *s1; 895: 896: s1 = Tblock((len+LNCPW-1) & ~(LNCPW-1)); 897: strcpy(s1, numbuf); 898: return(s1); 899: }