1: # 2: /* C compiler 3: * 4: * 5: * 6: * Called from cc: 7: * c0 source temp1 temp2 [ profileflag ] 8: * temp1 gets most of the intermediate code; 9: * strings are put on temp2, which c1 reads after temp1. 10: */ 11: 12: #include "c0.h" 13: 14: int isn 1; 15: int peeksym -1; 16: int line 1; 17: struct tnode funcblk { NAME, 0, NULL, NULL, NULL, NULL }; 18: 19: struct kwtab { 20: char *kwname; 21: int kwval; 22: } kwtab[] 23: { 24: "int", INT, 25: "char", CHAR, 26: "float", FLOAT, 27: "double", DOUBLE, 28: "struct", STRUCT, 29: "long", LONG, 30: "unsigned", UNSIGN, 31: "union", UNION, 32: "short", INT, 33: "auto", AUTO, 34: "extern", EXTERN, 35: "static", STATIC, 36: "register", REG, 37: "goto", GOTO, 38: "return", RETURN, 39: "if", IF, 40: "while", WHILE, 41: "else", ELSE, 42: "switch", SWITCH, 43: "case", CASE, 44: "break", BREAK, 45: "continue", CONTIN, 46: "do", DO, 47: "default", DEFAULT, 48: "for", FOR, 49: "sizeof", SIZEOF, 50: "typedef", TYPEDEF, 51: "enum", ENUM, 52: 0, 0, 53: }; 54: 55: main(argc, argv) 56: char *argv[]; 57: { 58: register char *sp; 59: register i; 60: register struct kwtab *ip; 61: 62: if(argc<4) { 63: error("Arg count"); 64: exit(1); 65: } 66: if (freopen(argv[1], "r", stdin)==NULL) { 67: error("Can't find %s", argv[1]); 68: exit(1); 69: } 70: if (freopen(argv[2], "w", stdout)==NULL || (sbufp=fopen(argv[3],"w"))==NULL) { 71: error("Can't create temp"); 72: exit(1); 73: } 74: setbuf(sbufp, sbuf); 75: if (argc>4) 76: proflg++; 77: /* 78: * The hash table locations of the keywords 79: * are marked; if an identifier hashes to one of 80: * these locations, it is looked up in in the keyword 81: * table first. 82: */ 83: for (ip=kwtab; (sp = ip->kwname); ip++) { 84: i = 0; 85: while (*sp) 86: i =+ *sp++; 87: hshtab[i%HSHSIZ].hflag = FKEYW; 88: } 89: coremax = funcbase = curbase = sbrk(0); 90: while(!eof) 91: extdef(); 92: outcode("B", EOFC); 93: strflg++; 94: outcode("B", EOFC); 95: exit(nerror!=0); 96: } 97: 98: /* 99: * Look up the identifier in symbuf in the symbol table. 100: * If it hashes to the same spot as a keyword, try the keyword table 101: * first. An initial "." is ignored in the hash. 102: * Return is a ptr to the symbol table entry. 103: */ 104: lookup() 105: { 106: int ihash; 107: register struct hshtab *rp; 108: register char *sp, *np; 109: 110: ihash = 0; 111: sp = symbuf; 112: while (sp<symbuf+NCPS) 113: ihash =+ *sp++&0177; 114: rp = &hshtab[ihash%HSHSIZ]; 115: if (rp->hflag&FKEYW) 116: if (findkw()) 117: return(KEYW); 118: while (*(np = rp->name)) { 119: for (sp=symbuf; sp<symbuf+NCPS;) 120: if (*np++ != *sp++) 121: goto no; 122: if (mossym != (rp->hflag&FMOS)) 123: goto no; 124: csym = rp; 125: return(NAME); 126: no: 127: if (++rp >= &hshtab[HSHSIZ]) 128: rp = hshtab; 129: } 130: if(++hshused >= HSHSIZ) { 131: error("Symbol table overflow"); 132: exit(1); 133: } 134: rp->hclass = 0; 135: rp->htype = 0; 136: rp->hoffset = 0; 137: rp->subsp = NULL; 138: rp->strp = NULL; 139: rp->hpdown = NULL; 140: rp->hblklev = blklev; 141: rp->hflag =| mossym; 142: sp = symbuf; 143: for (np=rp->name; sp<symbuf+NCPS;) 144: *np++ = *sp++; 145: csym = rp; 146: return(NAME); 147: } 148: 149: /* 150: * Search the keyword table. 151: * Ignore initial "." to avoid member-of-structure 152: * problems. 153: */ 154: findkw() 155: { 156: register struct kwtab *kp; 157: register char *p1, *p2; 158: char *wp; 159: int firstc; 160: 161: wp = symbuf; 162: firstc = *wp; 163: for (kp=kwtab; (p2 = kp->kwname); kp++) { 164: p1 = wp; 165: while (*p1 == *p2++) 166: if (*p1++ == '\0') { 167: cval = kp->kwval; 168: return(1); 169: } 170: } 171: *wp = firstc; 172: return(0); 173: } 174: 175: 176: /* 177: * Return the next symbol from the input. 178: * peeksym is a pushed-back symbol, peekc is a pushed-back 179: * character (after peeksym). 180: * mosflg means that the next symbol, if an identifier, 181: * is a member of structure or a structure tag, and it 182: * gets a "." prepended to it to distinguish 183: * it from other identifiers. 184: */ 185: symbol() { 186: register c; 187: register char *sp; 188: register tline; 189: 190: if (peeksym>=0) { 191: c = peeksym; 192: peeksym = -1; 193: if (c==NAME) 194: mosflg = 0; 195: return(c); 196: } 197: if (peekc) { 198: c = peekc; 199: peekc = 0; 200: } else 201: if (eof) 202: return(EOFC); 203: else 204: c = getchar(); 205: loop: 206: if (c==EOF) { 207: eof++; 208: return(EOFC); 209: } 210: switch(ctab[c]) { 211: 212: case SHARP: 213: if ((c=symbol())!=CON) { 214: error("Illegal #"); 215: return(c); 216: } 217: tline = cval; 218: while (ctab[peekc]==SPACE) 219: peekc = getchar(); 220: if (peekc=='"') { 221: sp = filename; 222: while ((c = mapch('"')) >= 0) 223: *sp++ = c; 224: *sp++ = 0; 225: peekc = getchar(); 226: } 227: if (peekc != '\n') { 228: error("Illegal #"); 229: while (getchar()!='\n' && eof==0) 230: ; 231: } 232: peekc = 0; 233: line = tline; 234: return(symbol()); 235: 236: case INSERT: /* ignore newlines */ 237: inhdr = 1; 238: c = getchar(); 239: goto loop; 240: 241: case NEWLN: 242: if (!inhdr) 243: line++; 244: inhdr = 0; 245: 246: case SPACE: 247: c = getchar(); 248: goto loop; 249: 250: case PLUS: 251: return(subseq(c,PLUS,INCBEF)); 252: 253: case MINUS: 254: return(subseq(c,subseq('>',MINUS,ARROW),DECBEF)); 255: 256: case ASSIGN: 257: c = spnextchar(); 258: peekc = 0; 259: if (c=='=') 260: return(EQUAL); 261: if (c==' ') 262: return(ASSIGN); 263: if (c=='<' || c=='>') { 264: if (spnextchar() != c) { 265: peeksym = ctab[c]; 266: return(ASSIGN); 267: } 268: peekc = 0; 269: return(c=='<'? ASLSH: ASRSH); 270: } 271: if (ctab[c]>=PLUS && ctab[c]<=EXOR) { 272: if (spnextchar() != ' ' 273: && (c=='-' || c=='&' || c=='*')) { 274: error("Warning: %c= operator assumed", c); 275: nerror--; 276: } 277: c = ctab[c]; 278: return(c+ASPLUS-PLUS); 279: } 280: peekc = c; 281: return(ASSIGN); 282: 283: case LESS: 284: if (subseq(c,0,1)) return(LSHIFT); 285: return(subseq('=',LESS,LESSEQ)); 286: 287: case GREAT: 288: if (subseq(c,0,1)) return(RSHIFT); 289: return(subseq('=',GREAT,GREATEQ)); 290: 291: case EXCLA: 292: return(subseq('=',EXCLA,NEQUAL)); 293: 294: case BSLASH: 295: if (subseq('/', 0, 1)) 296: return(MAX); 297: goto unkn; 298: 299: case DIVIDE: 300: if (subseq('\\', 0, 1)) 301: return(MIN); 302: if (subseq('*',1,0)) 303: return(DIVIDE); 304: while ((c = spnextchar()) != EOFC) { 305: peekc = 0; 306: if (c=='*') { 307: if (spnextchar() == '/') { 308: peekc = 0; 309: c = getchar(); 310: goto loop; 311: } 312: } 313: } 314: eof++; 315: error("Nonterminated comment"); 316: return(0); 317: 318: case PERIOD: 319: case DIGIT: 320: peekc = c; 321: return(getnum()); 322: 323: case DQUOTE: 324: cval = isn++; 325: return(STRING); 326: 327: case SQUOTE: 328: return(getcc()); 329: 330: case LETTER: 331: sp = symbuf; 332: while(ctab[c]==LETTER || ctab[c]==DIGIT) { 333: if (sp<symbuf+NCPS) 334: *sp++ = c; 335: c = getchar(); 336: } 337: while(sp<symbuf+NCPS) 338: *sp++ = '\0'; 339: mossym = 0; 340: if (mosflg) { 341: mossym = FMOS; 342: mosflg = 0; 343: } 344: peekc = c; 345: if ((c=lookup())==KEYW && cval==SIZEOF) 346: c = SIZEOF; 347: return(c); 348: 349: case AND: 350: return(subseq('&', AND, LOGAND)); 351: 352: case OR: 353: return(subseq('|', OR, LOGOR)); 354: 355: case UNKN: 356: unkn: 357: error("Unknown character"); 358: c = getchar(); 359: goto loop; 360: 361: } 362: return(ctab[c]); 363: } 364: 365: /* 366: * Read a number. Return kind. 367: */ 368: getnum() 369: { 370: register char *np; 371: register c, base; 372: int expseen, sym, ndigit; 373: char *nsyn; 374: int maxdigit; 375: 376: nsyn = "Number syntax"; 377: lcval = 0; 378: base = 10; 379: maxdigit = 0; 380: np = numbuf; 381: ndigit = 0; 382: sym = CON; 383: expseen = 0; 384: if ((c=spnextchar()) == '0') 385: base = 8; 386: for (;; c = getchar()) { 387: *np++ = c; 388: if (ctab[c]==DIGIT || (base==16) && ('a'<=c&&c<='f'||'A'<=c&&c<='F')) { 389: if (base==8) 390: lcval =<< 3; 391: else if (base==10) 392: lcval = ((lcval<<2) + lcval)<<1; 393: else 394: lcval =<< 4; 395: if (ctab[c]==DIGIT) 396: c =- '0'; 397: else if (c>='a') 398: c =- 'a'-10; 399: else 400: c =- 'A'-10; 401: lcval =+ c; 402: ndigit++; 403: if (c>maxdigit) 404: maxdigit = c; 405: continue; 406: } 407: if (c=='.') { 408: if (base==16 || sym==FCON) 409: error(nsyn); 410: sym = FCON; 411: base = 10; 412: continue; 413: } 414: if (ndigit==0) { 415: sym = DOT; 416: break; 417: } 418: if ((c=='e'||c=='E') && expseen==0) { 419: expseen++; 420: sym = FCON; 421: if (base==16 || maxdigit>=10) 422: error(nsyn); 423: base = 10; 424: *np++ = c = getchar(); 425: if (c!='+' && c!='-' && ctab[c]!=DIGIT) 426: break; 427: } else if (c=='x' || c=='X') { 428: if (base!=8 || lcval!=0 || sym!=CON) 429: error(nsyn); 430: base = 16; 431: } else if ((c=='l' || c=='L') && sym==CON) { 432: c = getchar(); 433: sym = LCON; 434: break; 435: } else 436: break; 437: } 438: peekc = c; 439: if (maxdigit >= base) 440: error(nsyn); 441: if (sym==FCON) { 442: np[-1] = 0; 443: cval = np-numbuf; 444: return(FCON); 445: } 446: if (sym==CON && (lcval<0 || lcval>MAXINT&&base==10 || (lcval>>1)>MAXINT)) { 447: sym = LCON; 448: } 449: cval = lcval; 450: return(sym); 451: } 452: 453: /* 454: * If the next input character is c, return b and advance. 455: * Otherwise push back the character and return a. 456: */ 457: subseq(c,a,b) 458: { 459: if (spnextchar() != c) 460: return(a); 461: peekc = 0; 462: return(b); 463: } 464: 465: /* 466: * Write out a string, either in-line 467: * or in the string temp file labelled by 468: * lab. 469: */ 470: putstr(lab, amax) 471: { 472: register int c, max; 473: 474: nchstr = 0; 475: max = amax; 476: if (lab) { 477: strflg++; 478: outcode("BNB", LABEL, lab, BDATA); 479: max = 10000; 480: } else 481: outcode("B", BDATA); 482: while ((c = mapch('"')) >= 0) { 483: if (nchstr < max) { 484: nchstr++; 485: if (nchstr%15 == 0) 486: outcode("0B", BDATA); 487: outcode("1N", c & 0377); 488: } 489: } 490: if (nchstr < max) { 491: nchstr++; 492: outcode("10"); 493: } 494: outcode("0"); 495: strflg = 0; 496: } 497: 498: /* 499: * read a single-quoted character constant. 500: * The routine is sensitive to the layout of 501: * characters in a word. 502: */ 503: getcc() 504: { 505: register int c, cc; 506: register char *ccp; 507: char realc; 508: 509: cval = 0; 510: ccp = &cval; 511: cc = 0; 512: while((c=mapch('\'')) >= 0) 513: if(cc++ < LNCPW) 514: *ccp++ = c; 515: if (cc>LNCPW) 516: error("Long character constant"); 517: if (cc==1) { 518: realc = cval; 519: cval = realc; 520: } 521: return(CON); 522: } 523: 524: /* 525: * Read a character in a string or character constant, 526: * detecting the end of the string. 527: * It implements the escape sequences. 528: */ 529: mapch(ac) 530: { 531: register int a, c, n; 532: static mpeek; 533: 534: c = ac; 535: if (a = mpeek) 536: mpeek = 0; 537: else 538: a = getchar(); 539: loop: 540: if (a==c) 541: return(-1); 542: switch(a) { 543: 544: case '\n': 545: case '\0': 546: error("Nonterminated string"); 547: peekc = a; 548: return(-1); 549: 550: case '\\': 551: switch (a=getchar()) { 552: 553: case 't': 554: return('\t'); 555: 556: case 'n': 557: return('\n'); 558: 559: case 'b': 560: return('\b'); 561: 562: case 'f': 563: return('\014'); 564: 565: case 'v': 566: return('\013'); 567: 568: case '0': case '1': case '2': case '3': 569: case '4': case '5': case '6': case '7': 570: n = 0; 571: c = 0; 572: while (++c<=3 && '0'<=a && a<='7') { 573: n =<< 3; 574: n =+ a-'0'; 575: a = getchar(); 576: } 577: mpeek = a; 578: return(n); 579: 580: case 'r': 581: return('\r'); 582: 583: case '\n': 584: if (!inhdr) 585: line++; 586: inhdr = 0; 587: a = getchar(); 588: goto loop; 589: } 590: } 591: return(a); 592: } 593: 594: /* 595: * Read an expression and return a pointer to its tree. 596: * It's the classical bottom-up, priority-driven scheme. 597: * The initflg prevents the parse from going past 598: * "," or ":" because those delimiters are special 599: * in initializer (and some other) expressions. 600: */ 601: struct tnode * 602: tree() 603: { 604: int *op, opst[SSIZE], *pp, prst[SSIZE]; 605: register int andflg, o; 606: register struct hshtab *cs; 607: int p, ps, os; 608: struct tnode *cmst[CMSIZ]; 609: struct lnode *lcp; 610: 611: curbase = funcbase; 612: op = opst; 613: pp = prst; 614: cp = cmst; 615: *op = SEOF; 616: *pp = 06; 617: andflg = 0; 618: 619: advanc: 620: switch (o=symbol()) { 621: 622: case NAME: 623: cs = csym; 624: if (cs->hclass==TYPEDEF) 625: goto atype; 626: if (cs->hclass==ENUMCON) { 627: *cp++ = cblock(cs->hoffset); 628: goto tand; 629: } 630: if (cs->hclass==0 && cs->htype==0) 631: if(nextchar()=='(') { 632: /* set function */ 633: cs->hclass = EXTERN; 634: cs->htype = FUNC; 635: } else { 636: cs->hclass = STATIC; 637: error("%.8s undefined; func. %.8s", cs->name, funcsym->name); 638: if (initflg) { 639: cs->hclass = EXTERN; 640: error("(Warning only)"); 641: nerror =- 2; 642: } 643: } 644: *cp++ = nblock(cs); 645: goto tand; 646: 647: case FCON: 648: *cp++ = fblock(DOUBLE, copnum(cval)); 649: goto tand; 650: 651: case LCON: 652: cs = gblock(sizeof(*lcp)); 653: cs->op = LCON; 654: cs->type = LONG; 655: cs->lvalue = lcval; 656: *cp++ = cs; 657: goto tand; 658: 659: case CON: 660: *cp++ = cblock(cval); 661: goto tand; 662: 663: /* fake a static char array */ 664: case STRING: 665: putstr(cval, 0); 666: cs = gblock(sizeof(*cs)); 667: cs->hclass = STATIC; 668: cs->hoffset = cval; 669: *cp++ = block(NAME, ARRAY+CHAR, &nchstr, NULL, cs); 670: 671: tand: 672: if(cp>=cmst+CMSIZ) { 673: error("Expression overflow"); 674: exit(1); 675: } 676: if (andflg) 677: goto syntax; 678: andflg = 1; 679: goto advanc; 680: 681: case KEYW: 682: atype: 683: if (*op != LPARN || andflg) 684: goto syntax; 685: peeksym = o; 686: *cp++ = xprtype(gblock(sizeof(*xprtype()))); 687: if ((o=symbol()) != RPARN) 688: goto syntax; 689: o = CAST; 690: --op; 691: --pp; 692: if (*op == SIZEOF) { 693: andflg = 1; 694: *pp = 100; 695: goto advanc; 696: } 697: goto oponst; 698: 699: case INCBEF: 700: case DECBEF: 701: if (andflg) 702: o =+ 2; 703: goto oponst; 704: 705: case COMPL: 706: case EXCLA: 707: case SIZEOF: 708: if (andflg) 709: goto syntax; 710: goto oponst; 711: 712: case MINUS: 713: if (!andflg) 714: o = NEG; 715: andflg = 0; 716: goto oponst; 717: 718: case AND: 719: case TIMES: 720: if (andflg) 721: andflg = 0; 722: else if (o==AND) 723: o = AMPER; 724: else 725: o = STAR; 726: goto oponst; 727: 728: case LPARN: 729: if (andflg) { 730: o = symbol(); 731: if (o==RPARN) 732: o = MCALL; 733: else { 734: peeksym = o; 735: o = CALL; 736: andflg = 0; 737: } 738: } 739: goto oponst; 740: 741: case RBRACK: 742: case RPARN: 743: if (!andflg) 744: goto syntax; 745: goto oponst; 746: 747: case DOT: 748: case ARROW: 749: mosflg++; 750: break; 751: 752: case ASSIGN: 753: if (andflg==0 && PLUS<=*op && *op<=EXOR) { 754: o = *op-- + ASPLUS - PLUS; 755: pp--; 756: goto oponst; 757: } 758: break; 759: 760: } 761: /* binaries */ 762: if (andflg==0) 763: goto syntax; 764: andflg = 0; 765: 766: oponst: 767: p = (opdope[o]>>9) & 077; 768: opon1: 769: ps = *pp; 770: if (p>ps || p==ps && (opdope[o]&RASSOC)!=0) { 771: switch (o) { 772: 773: case INCAFT: 774: case DECAFT: 775: p = 37; 776: break; 777: case LPARN: 778: case LBRACK: 779: case CALL: 780: p = 04; 781: } 782: if (initflg) { 783: if ((o==COMMA && *op!=LPARN && *op!=CALL) 784: || (o==COLON && *op!=QUEST)) { 785: p = 00; 786: goto opon1; 787: } 788: } 789: if (op >= &opst[SSIZE-1]) { 790: error("expression overflow"); 791: exit(1); 792: } 793: *++op = o; 794: *++pp = p; 795: goto advanc; 796: } 797: --pp; 798: switch (os = *op--) { 799: 800: case SEOF: 801: peeksym = o; 802: build(0); /* flush conversions */ 803: return(*--cp); 804: 805: case COMMA: 806: if (*op != CALL) 807: os = SEQNC; 808: break; 809: 810: case CALL: 811: if (o!=RPARN) 812: goto syntax; 813: build(os); 814: goto advanc; 815: 816: case MCALL: 817: *cp++ = NULL; /* empty arglist */ 818: os = CALL; 819: break; 820: 821: case INCBEF: 822: case INCAFT: 823: case DECBEF: 824: case DECAFT: 825: *cp++ = cblock(1); 826: break; 827: 828: case LPARN: 829: if (o!=RPARN) 830: goto syntax; 831: goto advanc; 832: 833: case LBRACK: 834: if (o!=RBRACK) 835: goto syntax; 836: build(LBRACK); 837: goto advanc; 838: } 839: build(os); 840: goto opon1; 841: 842: syntax: 843: error("Expression syntax"); 844: errflush(o); 845: return(0); 846: } 847: 848: struct hshtab * 849: xprtype(atyb) 850: struct hshtab *atyb; 851: { 852: register struct hshtab *tyb; 853: struct hshtab typer; 854: int sc; 855: register char *md, *fb; 856: struct tnode *scp; 857: 858: tyb = atyb; 859: fb = funcbase; 860: md = maxdecl; 861: scp = cp; 862: funcbase = curbase; 863: sc = DEFXTRN; /* will cause error if class mentioned */ 864: getkeywords(&sc, &typer); 865: tyb->hclass = 0; 866: tyb->hblklev = 0; 867: decl1(&sc, &typer, 0, tyb); 868: funcbase = fb; 869: maxdecl = md; 870: cp = scp; 871: tyb->op = ETYPE; 872: return(tyb); 873: } 874: 875: char * 876: copnum(len) 877: { 878: register char *s1, *s2, *s3; 879: 880: s1 = s2 = gblock((len+LNCPW-1) & ~(LNCPW-1)); 881: s3 = numbuf; 882: while (*s2++ = *s3++); 883: return(s1); 884: }