1: static char sccsid[] = "@(#)c00.c 2.3"; /* SCCS id keyword */ 2: # 3: /* C compiler 4: * 5: * 6: * 7: * Called from cc: 8: * c0 source temp1 temp2 [ profileflag ] [ ovlyflag ] 9: * temp1 gets most of the intermediate code; 10: * strings are put on temp2, which c1 reads after temp1. 11: */ 12: 13: #include "c0.h" 14: 15: int isn 1; 16: int peeksym -1; 17: int line 1; 18: struct tnode funcblk { NAME, 0, NULL, NULL, NULL, NULL }; 19: 20: struct kwtab { 21: char *kwname; 22: int kwval; 23: } kwtab[] 24: { 25: "int", INT, 26: "char", CHAR, 27: "float", FLOAT, 28: "double", DOUBLE, 29: "struct", STRUCT, 30: "long", LONG, 31: "unsigned", UNSIGN, 32: "union", UNION, 33: "short", INT, 34: "auto", AUTO, 35: "extern", EXTERN, 36: "static", STATIC, 37: "register", REG, 38: "goto", GOTO, 39: "return", RETURN, 40: "if", IF, 41: "while", WHILE, 42: "else", ELSE, 43: "switch", SWITCH, 44: "case", CASE, 45: "break", BREAK, 46: "continue", CONTIN, 47: "do", DO, 48: "default", DEFAULT, 49: "for", FOR, 50: "sizeof", SIZEOF, 51: "typedef", TYPEDEF, 52: "enum", ENUM, 53: 0, 0, 54: }; 55: 56: main(argc, argv) 57: char *argv[]; 58: { 59: register char *sp; 60: register i; 61: register struct kwtab *ip; 62: 63: if(argc<4) { 64: error("Arg count"); 65: exit(1); 66: } 67: if (freopen(argv[1], "r", stdin)==NULL) { 68: error("Can't find %s", argv[1]); 69: exit(1); 70: } 71: if (freopen(argv[2], "w", stdout)==NULL || (sbufp=fopen(argv[3],"w"))==NULL) { 72: error("Can't create temp"); 73: exit(1); 74: } 75: setbuf(sbufp, sbuf); 76: STAUTO = -6; /* default */ 77: while (argc>4) { 78: #ifdef MENLO_OVLY 79: switch (argv[4][1]) { 80: case 'P': 81: #endif MENLO_OVLY 82: proflg++; 83: #ifdef MENLO_OVLY 84: break; 85: case 'V': 86: /* 87: * Overlays: allow an extra word on the stack for 88: * each stack frame to store the overlay number. 89: */ 90: STAUTO = -8; 91: break; 92: } 93: #endif MENLO_OVLY 94: argc--; argv++; 95: } 96: /* 97: * The hash table locations of the keywords 98: * are marked; if an identifier hashes to one of 99: * these locations, it is looked up in in the keyword 100: * table first. 101: */ 102: for (ip=kwtab; (sp = ip->kwname); ip++) { 103: i = 0; 104: while (*sp) 105: i =+ *sp++; 106: hshtab[i%HSHSIZ].hflag = FKEYW; 107: } 108: coremax = funcbase = curbase = sbrk(0); 109: while(!eof) 110: extdef(); 111: outcode("B", EOFC); 112: strflg++; 113: outcode("B", EOFC); 114: exit(nerror!=0); 115: } 116: 117: /* 118: * Look up the identifier in symbuf in the symbol table. 119: * If it hashes to the same spot as a keyword, try the keyword table 120: * first. An initial "." is ignored in the hash. 121: * Return is a ptr to the symbol table entry. 122: */ 123: lookup() 124: { 125: int ihash; 126: register struct hshtab *rp; 127: register char *sp, *np; 128: 129: ihash = 0; 130: sp = symbuf; 131: while (sp<symbuf+NCPS) 132: ihash =+ *sp++&0177; 133: rp = &hshtab[ihash%HSHSIZ]; 134: if (rp->hflag&FKEYW) 135: if (findkw()) 136: return(KEYW); 137: while (*(np = rp->name)) { 138: for (sp=symbuf; sp<symbuf+NCPS;) 139: if (*np++ != *sp++) 140: goto no; 141: if (mossym != (rp->hflag&FMOS)) 142: goto no; 143: csym = rp; 144: return(NAME); 145: no: 146: if (++rp >= &hshtab[HSHSIZ]) 147: rp = hshtab; 148: } 149: if(++hshused >= HSHSIZ) { 150: error("Symbol table overflow"); 151: exit(1); 152: } 153: rp->hclass = 0; 154: rp->htype = 0; 155: rp->hoffset = 0; 156: rp->subsp = NULL; 157: rp->strp = NULL; 158: rp->hpdown = NULL; 159: rp->hblklev = blklev; 160: rp->hflag =| mossym; 161: sp = symbuf; 162: for (np=rp->name; sp<symbuf+NCPS;) 163: *np++ = *sp++; 164: csym = rp; 165: return(NAME); 166: } 167: 168: /* 169: * Search the keyword table. 170: * Ignore initial "." to avoid member-of-structure 171: * problems. 172: */ 173: findkw() 174: { 175: register struct kwtab *kp; 176: register char *p1, *p2; 177: char *wp; 178: int firstc; 179: 180: wp = symbuf; 181: firstc = *wp; 182: for (kp=kwtab; (p2 = kp->kwname); kp++) { 183: p1 = wp; 184: while (*p1 == *p2++) 185: if (*p1++ == '\0') { 186: cval = kp->kwval; 187: return(1); 188: } 189: } 190: *wp = firstc; 191: return(0); 192: } 193: 194: 195: /* 196: * Return the next symbol from the input. 197: * peeksym is a pushed-back symbol, peekc is a pushed-back 198: * character (after peeksym). 199: * mosflg means that the next symbol, if an identifier, 200: * is a member of structure or a structure tag, and it 201: * gets a "." prepended to it to distinguish 202: * it from other identifiers. 203: */ 204: symbol() { 205: register c; 206: register char *sp; 207: register tline; 208: 209: if (peeksym>=0) { 210: c = peeksym; 211: peeksym = -1; 212: if (c==NAME) 213: mosflg = 0; 214: return(c); 215: } 216: if (peekc) { 217: c = peekc; 218: peekc = 0; 219: } else 220: if (eof) 221: return(EOFC); 222: else 223: c = getchar(); 224: loop: 225: if (c==EOF) { 226: eof++; 227: return(EOFC); 228: } 229: switch(ctab[c]) { 230: 231: case SHARP: 232: if ((c=symbol())!=CON) { 233: error("Illegal #"); 234: return(c); 235: } 236: tline = cval; 237: while (ctab[peekc]==SPACE) 238: peekc = getchar(); 239: if (peekc=='"') { 240: sp = filename; 241: while ((c = mapch('"')) >= 0) 242: *sp++ = c; 243: *sp++ = 0; 244: peekc = getchar(); 245: } 246: if (peekc != '\n') { 247: error("Illegal #"); 248: while (getchar()!='\n' && eof==0) 249: ; 250: } 251: peekc = 0; 252: line = tline; 253: return(symbol()); 254: 255: case INSERT: /* ignore newlines */ 256: inhdr = 1; 257: c = getchar(); 258: goto loop; 259: 260: case NEWLN: 261: if (!inhdr) 262: line++; 263: inhdr = 0; 264: 265: case SPACE: 266: c = getchar(); 267: goto loop; 268: 269: case PLUS: 270: return(subseq(c,PLUS,INCBEF)); 271: 272: case MINUS: 273: return(subseq(c,subseq('>',MINUS,ARROW),DECBEF)); 274: 275: case ASSIGN: 276: c = spnextchar(); 277: peekc = 0; 278: if (c=='=') 279: return(EQUAL); 280: if (c==' ') 281: return(ASSIGN); 282: if (c=='<' || c=='>') { 283: if (spnextchar() != c) { 284: peeksym = ctab[c]; 285: return(ASSIGN); 286: } 287: peekc = 0; 288: return(c=='<'? ASLSH: ASRSH); 289: } 290: if (ctab[c]>=PLUS && ctab[c]<=EXOR) { 291: if (spnextchar() != ' ' 292: && (c=='-' || c=='&' || c=='*')) { 293: error("Warning: %c= operator assumed", c); 294: nerror--; 295: } 296: c = ctab[c]; 297: return(c+ASPLUS-PLUS); 298: } 299: peekc = c; 300: return(ASSIGN); 301: 302: case LESS: 303: if (subseq(c,0,1)) return(LSHIFT); 304: return(subseq('=',LESS,LESSEQ)); 305: 306: case GREAT: 307: if (subseq(c,0,1)) return(RSHIFT); 308: return(subseq('=',GREAT,GREATEQ)); 309: 310: case EXCLA: 311: return(subseq('=',EXCLA,NEQUAL)); 312: 313: case BSLASH: 314: if (subseq('/', 0, 1)) 315: return(MAX); 316: goto unkn; 317: 318: case DIVIDE: 319: if (subseq('\\', 0, 1)) 320: return(MIN); 321: if (subseq('*',1,0)) 322: return(DIVIDE); 323: while ((c = spnextchar()) != EOFC) { 324: peekc = 0; 325: if (c=='*') { 326: if (spnextchar() == '/') { 327: peekc = 0; 328: c = getchar(); 329: goto loop; 330: } 331: } 332: } 333: eof++; 334: error("Nonterminated comment"); 335: return(0); 336: 337: case PERIOD: 338: case DIGIT: 339: peekc = c; 340: return(getnum()); 341: 342: case DQUOTE: 343: cval = isn++; 344: return(STRING); 345: 346: case SQUOTE: 347: return(getcc()); 348: 349: case LETTER: 350: sp = symbuf; 351: while(ctab[c]==LETTER || ctab[c]==DIGIT) { 352: if (sp<symbuf+NCPS) 353: *sp++ = c; 354: c = getchar(); 355: } 356: while(sp<symbuf+NCPS) 357: *sp++ = '\0'; 358: mossym = 0; 359: if (mosflg) { 360: mossym = FMOS; 361: mosflg = 0; 362: } 363: peekc = c; 364: if ((c=lookup())==KEYW && cval==SIZEOF) 365: c = SIZEOF; 366: return(c); 367: 368: case AND: 369: return(subseq('&', AND, LOGAND)); 370: 371: case OR: 372: return(subseq('|', OR, LOGOR)); 373: 374: case UNKN: 375: unkn: 376: error("Unknown character"); 377: c = getchar(); 378: goto loop; 379: 380: } 381: return(ctab[c]); 382: } 383: 384: /* 385: * Read a number. Return kind. 386: */ 387: getnum() 388: { 389: register char *np; 390: register c, base; 391: int expseen, sym, ndigit; 392: char *nsyn; 393: int maxdigit; 394: 395: nsyn = "Number syntax"; 396: lcval = 0; 397: base = 10; 398: maxdigit = 0; 399: np = numbuf; 400: ndigit = 0; 401: sym = CON; 402: expseen = 0; 403: if ((c=spnextchar()) == '0') 404: base = 8; 405: for (;; c = getchar()) { 406: *np++ = c; 407: if (ctab[c]==DIGIT || (base==16) && ('a'<=c&&c<='f'||'A'<=c&&c<='F')) { 408: if (base==8) 409: lcval =<< 3; 410: else if (base==10) 411: lcval = ((lcval<<2) + lcval)<<1; 412: else 413: lcval =<< 4; 414: if (ctab[c]==DIGIT) 415: c =- '0'; 416: else if (c>='a') 417: c =- 'a'-10; 418: else 419: c =- 'A'-10; 420: lcval =+ c; 421: ndigit++; 422: if (c>maxdigit) 423: maxdigit = c; 424: continue; 425: } 426: if (c=='.') { 427: if (base==16 || sym==FCON) 428: error(nsyn); 429: sym = FCON; 430: base = 10; 431: continue; 432: } 433: if (ndigit==0) { 434: sym = DOT; 435: break; 436: } 437: if ((c=='e'||c=='E') && expseen==0) { 438: expseen++; 439: sym = FCON; 440: if (base==16 || maxdigit>=10) 441: error(nsyn); 442: base = 10; 443: *np++ = c = getchar(); 444: if (c!='+' && c!='-' && ctab[c]!=DIGIT) 445: break; 446: } else if (c=='x' || c=='X') { 447: if (base!=8 || lcval!=0 || sym!=CON) 448: error(nsyn); 449: base = 16; 450: } else if ((c=='l' || c=='L') && sym==CON) { 451: c = getchar(); 452: sym = LCON; 453: break; 454: } else 455: break; 456: } 457: peekc = c; 458: if (maxdigit >= base) 459: error(nsyn); 460: if (sym==FCON) { 461: np[-1] = 0; 462: cval = np-numbuf; 463: return(FCON); 464: } 465: if (sym==CON && (lcval<0 || lcval>MAXINT&&base==10 || (lcval>>1)>MAXINT)) { 466: sym = LCON; 467: } 468: cval = lcval; 469: return(sym); 470: } 471: 472: /* 473: * If the next input character is c, return b and advance. 474: * Otherwise push back the character and return a. 475: */ 476: subseq(c,a,b) 477: { 478: if (spnextchar() != c) 479: return(a); 480: peekc = 0; 481: return(b); 482: } 483: 484: /* 485: * Write out a string, either in-line 486: * or in the string temp file labelled by 487: * lab. 488: */ 489: putstr(lab, amax) 490: { 491: register int c, max; 492: 493: nchstr = 0; 494: max = amax; 495: if (lab) { 496: strflg++; 497: outcode("BNB", LABEL, lab, BDATA); 498: max = 10000; 499: } else 500: outcode("B", BDATA); 501: while ((c = mapch('"')) >= 0) { 502: if (nchstr < max) { 503: nchstr++; 504: if (nchstr%15 == 0) 505: outcode("0B", BDATA); 506: outcode("1N", c & 0377); 507: } 508: } 509: if (nchstr < max) { 510: nchstr++; 511: outcode("10"); 512: } 513: outcode("0"); 514: strflg = 0; 515: } 516: 517: /* 518: * read a single-quoted character constant. 519: * The routine is sensitive to the layout of 520: * characters in a word. 521: */ 522: getcc() 523: { 524: register int c, cc; 525: register char *ccp; 526: char realc; 527: 528: cval = 0; 529: ccp = &cval; 530: cc = 0; 531: while((c=mapch('\'')) >= 0) 532: if(cc++ < LNCPW) 533: *ccp++ = c; 534: if (cc>LNCPW) 535: error("Long character constant"); 536: if (cc==1) { 537: realc = cval; 538: cval = realc; 539: } 540: return(CON); 541: } 542: 543: /* 544: * Read a character in a string or character constant, 545: * detecting the end of the string. 546: * It implements the escape sequences. 547: */ 548: mapch(ac) 549: { 550: register int a, c, n; 551: static mpeek; 552: 553: c = ac; 554: if (a = mpeek) 555: mpeek = 0; 556: else 557: a = getchar(); 558: loop: 559: if (a==c) 560: return(-1); 561: switch(a) { 562: 563: case '\n': 564: case '\0': 565: error("Nonterminated string"); 566: peekc = a; 567: return(-1); 568: 569: case '\\': 570: switch (a=getchar()) { 571: 572: case 't': 573: return('\t'); 574: 575: case 'n': 576: return('\n'); 577: 578: case 'b': 579: return('\b'); 580: 581: case 'f': 582: return('\014'); 583: 584: case 'v': 585: return('\013'); 586: 587: case '0': case '1': case '2': case '3': 588: case '4': case '5': case '6': case '7': 589: n = 0; 590: c = 0; 591: while (++c<=3 && '0'<=a && a<='7') { 592: n =<< 3; 593: n =+ a-'0'; 594: a = getchar(); 595: } 596: mpeek = a; 597: return(n); 598: 599: case 'r': 600: return('\r'); 601: 602: case '\n': 603: if (!inhdr) 604: line++; 605: inhdr = 0; 606: a = getchar(); 607: goto loop; 608: } 609: } 610: return(a); 611: } 612: 613: /* 614: * Read an expression and return a pointer to its tree. 615: * It's the classical bottom-up, priority-driven scheme. 616: * The initflg prevents the parse from going past 617: * "," or ":" because those delimiters are special 618: * in initializer (and some other) expressions. 619: */ 620: struct tnode * 621: tree() 622: { 623: int *op, opst[SSIZE], *pp, prst[SSIZE]; 624: register int andflg, o; 625: register struct hshtab *cs; 626: int p, ps, os; 627: struct tnode *cmst[CMSIZ]; 628: struct lnode *lcp; 629: 630: curbase = funcbase; 631: op = opst; 632: pp = prst; 633: cp = cmst; 634: *op = SEOF; 635: *pp = 06; 636: andflg = 0; 637: 638: advanc: 639: switch (o=symbol()) { 640: 641: case NAME: 642: cs = csym; 643: if (cs->hclass==TYPEDEF) 644: goto atype; 645: if (cs->hclass==ENUMCON) { 646: *cp++ = cblock(cs->hoffset); 647: goto tand; 648: } 649: if (cs->hclass==0 && cs->htype==0) 650: if(nextchar()=='(') { 651: /* set function */ 652: cs->hclass = EXTERN; 653: cs->htype = FUNC; 654: } else { 655: cs->hclass = STATIC; 656: error("%.8s undefined; func. %.8s", cs->name, funcsym->name); 657: if (initflg) { 658: cs->hclass = EXTERN; 659: error("(Warning only)"); 660: nerror =- 2; 661: } 662: } 663: *cp++ = nblock(cs); 664: goto tand; 665: 666: case FCON: 667: *cp++ = fblock(DOUBLE, copnum(cval)); 668: goto tand; 669: 670: case LCON: 671: cs = gblock(sizeof(*lcp)); 672: cs->op = LCON; 673: cs->type = LONG; 674: cs->lvalue = lcval; 675: *cp++ = cs; 676: goto tand; 677: 678: case CON: 679: *cp++ = cblock(cval); 680: goto tand; 681: 682: /* fake a static char array */ 683: case STRING: 684: putstr(cval, 0); 685: cs = gblock(sizeof(*cs)); 686: cs->hclass = STATIC; 687: cs->hoffset = cval; 688: *cp++ = block(NAME, ARRAY+CHAR, &nchstr, NULL, cs); 689: 690: tand: 691: if(cp>=cmst+CMSIZ) { 692: error("Expression overflow"); 693: exit(1); 694: } 695: if (andflg) 696: goto syntax; 697: andflg = 1; 698: goto advanc; 699: 700: case KEYW: 701: atype: 702: if (*op != LPARN || andflg) 703: goto syntax; 704: peeksym = o; 705: *cp++ = xprtype(gblock(sizeof(*xprtype()))); 706: if ((o=symbol()) != RPARN) 707: goto syntax; 708: o = CAST; 709: --op; 710: --pp; 711: if (*op == SIZEOF) { 712: andflg = 1; 713: *pp = 100; 714: goto advanc; 715: } 716: goto oponst; 717: 718: case INCBEF: 719: case DECBEF: 720: if (andflg) 721: o =+ 2; 722: goto oponst; 723: 724: case COMPL: 725: case EXCLA: 726: case SIZEOF: 727: if (andflg) 728: goto syntax; 729: goto oponst; 730: 731: case MINUS: 732: if (!andflg) 733: o = NEG; 734: andflg = 0; 735: goto oponst; 736: 737: case AND: 738: case TIMES: 739: if (andflg) 740: andflg = 0; 741: else if (o==AND) 742: o = AMPER; 743: else 744: o = STAR; 745: goto oponst; 746: 747: case LPARN: 748: if (andflg) { 749: o = symbol(); 750: if (o==RPARN) 751: o = MCALL; 752: else { 753: peeksym = o; 754: o = CALL; 755: andflg = 0; 756: } 757: } 758: goto oponst; 759: 760: case RBRACK: 761: case RPARN: 762: if (!andflg) 763: goto syntax; 764: goto oponst; 765: 766: case DOT: 767: case ARROW: 768: mosflg++; 769: break; 770: 771: case ASSIGN: 772: if (andflg==0 && PLUS<=*op && *op<=EXOR) { 773: o = *op-- + ASPLUS - PLUS; 774: pp--; 775: goto oponst; 776: } 777: break; 778: 779: } 780: /* binaries */ 781: if (andflg==0) 782: goto syntax; 783: andflg = 0; 784: 785: oponst: 786: p = (opdope[o]>>9) & 077; 787: opon1: 788: ps = *pp; 789: if (p>ps || p==ps && (opdope[o]&RASSOC)!=0) { 790: switch (o) { 791: 792: case INCAFT: 793: case DECAFT: 794: p = 37; 795: break; 796: case LPARN: 797: case LBRACK: 798: case CALL: 799: p = 04; 800: } 801: if (initflg) { 802: if ((o==COMMA && *op!=LPARN && *op!=CALL) 803: || (o==COLON && *op!=QUEST)) { 804: p = 00; 805: goto opon1; 806: } 807: } 808: if (op >= &opst[SSIZE-1]) { 809: error("expression overflow"); 810: exit(1); 811: } 812: *++op = o; 813: *++pp = p; 814: goto advanc; 815: } 816: --pp; 817: switch (os = *op--) { 818: 819: case SEOF: 820: peeksym = o; 821: build(0); /* flush conversions */ 822: return(*--cp); 823: 824: case COMMA: 825: if (*op != CALL) 826: os = SEQNC; 827: break; 828: 829: case CALL: 830: if (o!=RPARN) 831: goto syntax; 832: build(os); 833: goto advanc; 834: 835: case MCALL: 836: *cp++ = NULL; /* empty arglist */ 837: os = CALL; 838: break; 839: 840: case INCBEF: 841: case INCAFT: 842: case DECBEF: 843: case DECAFT: 844: *cp++ = cblock(1); 845: break; 846: 847: case LPARN: 848: if (o!=RPARN) 849: goto syntax; 850: goto advanc; 851: 852: case LBRACK: 853: if (o!=RBRACK) 854: goto syntax; 855: build(LBRACK); 856: goto advanc; 857: } 858: build(os); 859: goto opon1; 860: 861: syntax: 862: error("Expression syntax"); 863: errflush(o); 864: return(0); 865: } 866: 867: struct hshtab * 868: xprtype(atyb) 869: struct hshtab *atyb; 870: { 871: register struct hshtab *tyb; 872: struct hshtab typer; 873: int sc; 874: register char *md, *fb; 875: struct tnode *scp; 876: 877: tyb = atyb; 878: fb = funcbase; 879: md = maxdecl; 880: scp = cp; 881: funcbase = curbase; 882: sc = DEFXTRN; /* will cause error if class mentioned */ 883: getkeywords(&sc, &typer); 884: tyb->hclass = 0; 885: tyb->hblklev = 0; 886: decl1(&sc, &typer, 0, tyb); 887: funcbase = fb; 888: maxdecl = md; 889: cp = scp; 890: tyb->op = ETYPE; 891: return(tyb); 892: } 893: 894: char * 895: copnum(len) 896: { 897: register char *s1, *s2, *s3; 898: 899: s1 = s2 = gblock((len+LNCPW-1) & ~(LNCPW-1)); 900: s3 = numbuf; 901: while (*s2++ = *s3++); 902: return(s1); 903: }