1: /* 2: * Copyright (c) 1983 Regents of the University of California. 3: * All rights reserved. The Berkeley software License Agreement 4: * specifies the terms and conditions for redistribution. 5: */ 6: 7: #ifndef lint 8: static char sccsid[] = "@(#)scanner.c 5.1 (Berkeley) 5/31/85"; 9: #endif not lint 10: 11: static char rcsid[] = "$Header: scanner.c,v 1.5 84/12/26 10:42:05 linton Exp $"; 12: 13: /* 14: * Debugger scanner. 15: */ 16: 17: #include "defs.h" 18: #include "scanner.h" 19: #include "main.h" 20: #include "keywords.h" 21: #include "tree.h" 22: #include "symbols.h" 23: #include "names.h" 24: #include "y.tab.h" 25: 26: #ifndef public 27: typedef int Token; 28: 29: #define MAXLINESIZE 10240 30: 31: #endif 32: 33: public String initfile = ".dbxinit"; 34: 35: typedef enum { WHITE, ALPHA, NUM, OTHER } Charclass; 36: 37: private Charclass class[256 + 1]; 38: private Charclass *lexclass = class + 1; 39: 40: #define isdigit(c) (lexclass[c] == NUM) 41: #define isalnum(c) (lexclass[c] == ALPHA or lexclass[c] == NUM) 42: #define ishexdigit(c) ( \ 43: isdigit(c) or (c >= 'a' and c <= 'f') or (c >= 'A' and c <= 'F') \ 44: ) 45: 46: public boolean chkalias; 47: public char scanner_linebuf[MAXLINESIZE]; 48: 49: private File in; 50: private char *curchar, *prevchar; 51: 52: #define MAXINCLDEPTH 10 53: 54: private struct { 55: File savefile; 56: Filename savefn; 57: int savelineno; 58: } inclinfo[MAXINCLDEPTH]; 59: 60: private unsigned int curinclindex; 61: 62: private Token getident(); 63: private Token getnum(); 64: private Token getstring(); 65: private Boolean eofinput(); 66: private char charcon(); 67: 68: private enterlexclass(class, s) 69: Charclass class; 70: String s; 71: { 72: register char *p; 73: 74: for (p = s; *p != '\0'; p++) { 75: lexclass[*p] = class; 76: } 77: } 78: 79: public scanner_init() 80: { 81: register Integer i; 82: 83: for (i = 0; i < 257; i++) { 84: class[i] = OTHER; 85: } 86: enterlexclass(WHITE, " \t"); 87: enterlexclass(ALPHA, "abcdefghijklmnopqrstuvwxyz"); 88: enterlexclass(ALPHA, "ABCDEFGHIJKLMNOPQRSTUVWXYZ_$"); 89: enterlexclass(NUM, "0123456789"); 90: in = stdin; 91: errfilename = nil; 92: errlineno = 0; 93: curchar = scanner_linebuf; 94: scanner_linebuf[0] = '\0'; 95: chkalias = true; 96: } 97: 98: /* 99: * Read a single token. 100: * 101: * The input is line buffered. Tokens cannot cross line boundaries. 102: * 103: * There are two "modes" of operation: one as in a compiler, 104: * and one for reading shell-like syntax. In the first mode 105: * there is the additional choice of doing alias processing. 106: */ 107: 108: private Boolean shellmode; 109: 110: public Token yylex() 111: { 112: register int c; 113: register char *p; 114: register Token t; 115: String line; 116: integer n; 117: 118: p = curchar; 119: if (*p == '\0') { 120: do { 121: if (isterm(in)) { 122: printf("(%s) ", cmdname); 123: } 124: fflush(stdout); 125: line = fgets(scanner_linebuf, MAXLINESIZE, in); 126: } while (line == nil and not eofinput()); 127: if (line == nil) { 128: c = EOF; 129: } else { 130: p = scanner_linebuf; 131: while (lexclass[*p] == WHITE) { 132: p++; 133: } 134: shellmode = false; 135: } 136: chkalias = true; 137: } else { 138: while (lexclass[*p] == WHITE) { 139: p++; 140: } 141: } 142: curchar = p; 143: prevchar = curchar; 144: c = *p; 145: if (lexclass[c] == ALPHA) { 146: t = getident(chkalias); 147: } else if (lexclass[c] == NUM) { 148: if (shellmode) { 149: t = getident(chkalias); 150: } else { 151: t = getnum(); 152: } 153: } else { 154: ++curchar; 155: switch (c) { 156: case '\n': 157: t = '\n'; 158: if (errlineno != 0) { 159: errlineno++; 160: } 161: break; 162: 163: case '"': 164: case '\'': 165: t = getstring(c); 166: break; 167: 168: case '.': 169: if (shellmode) { 170: --curchar; 171: t = getident(chkalias); 172: } else if (isdigit(*curchar)) { 173: --curchar; 174: t = getnum(); 175: } else { 176: t = '.'; 177: } 178: break; 179: 180: case '-': 181: if (shellmode) { 182: --curchar; 183: t = getident(chkalias); 184: } else if (*curchar == '>') { 185: ++curchar; 186: t = ARROW; 187: } else { 188: t = '-'; 189: } 190: break; 191: 192: case '#': 193: if (not isterm(in)) { 194: *p = '\0'; 195: curchar = p; 196: t = '\n'; 197: ++errlineno; 198: } else { 199: t = '#'; 200: } 201: break; 202: 203: case '\\': 204: if (*(p+1) == '\n') { 205: n = MAXLINESIZE - (p - &scanner_linebuf[0]); 206: if (n > 1) { 207: if (fgets(p, n, in) == nil) { 208: t = 0; 209: } else { 210: curchar = p; 211: t = yylex(); 212: } 213: } else { 214: t = '\\'; 215: } 216: } else { 217: t = '\\'; 218: } 219: break; 220: 221: case EOF: 222: t = 0; 223: break; 224: 225: default: 226: if (shellmode and index("!&*<>()[]", c) == nil) { 227: --curchar; 228: t = getident(chkalias); 229: } else { 230: t = c; 231: } 232: break; 233: } 234: } 235: chkalias = false; 236: # ifdef LEXDEBUG 237: if (lexdebug) { 238: fprintf(stderr, "yylex returns "); 239: print_token(stderr, t); 240: fprintf(stderr, "\n"); 241: } 242: # endif 243: return t; 244: } 245: 246: /* 247: * Put the given string before the current character 248: * in the current line, thus inserting it into the input stream. 249: */ 250: 251: public insertinput (s) 252: String s; 253: { 254: register char *p, *q; 255: int need, avail, shift; 256: 257: q = s; 258: need = strlen(q); 259: avail = curchar - &scanner_linebuf[0]; 260: if (need <= avail) { 261: curchar = &scanner_linebuf[avail - need]; 262: p = curchar; 263: while (*q != '\0') { 264: *p++ = *q++; 265: } 266: } else { 267: p = curchar; 268: while (*p != '\0') { 269: ++p; 270: } 271: shift = need - avail; 272: if (p + shift >= &scanner_linebuf[MAXLINESIZE]) { 273: error("alias expansion too large"); 274: } 275: for (;;) { 276: *(p + shift) = *p; 277: if (p == curchar) { 278: break; 279: } 280: --p; 281: } 282: p = &scanner_linebuf[0]; 283: while (*q != '\0') { 284: *p++ = *q++; 285: } 286: curchar = &scanner_linebuf[0]; 287: } 288: } 289: 290: /* 291: * Get the actuals for a macro call. 292: */ 293: 294: private String movetochar (str, c) 295: String str; 296: char c; 297: { 298: register char *p; 299: 300: while (*p != c) { 301: if (*p == '\0') { 302: error("missing ')' in macro call"); 303: } else if (*p == ')') { 304: error("not enough parameters in macro call"); 305: } else if (*p == ',') { 306: error("too many parameters in macro call"); 307: } 308: ++p; 309: } 310: return p; 311: } 312: 313: private String *getactuals (n) 314: integer n; 315: { 316: String *a; 317: register char *p; 318: int i; 319: 320: a = newarr(String, n); 321: p = curchar; 322: while (*p != '(') { 323: if (lexclass[*p] != WHITE) { 324: error("missing actuals for macro"); 325: } 326: ++p; 327: } 328: ++p; 329: for (i = 0; i < n - 1; i++) { 330: a[i] = p; 331: p = movetochar(p, ','); 332: *p = '\0'; 333: ++p; 334: } 335: a[n-1] = p; 336: p = movetochar(p, ')'); 337: *p = '\0'; 338: curchar = p + 1; 339: return a; 340: } 341: 342: /* 343: * Do command macro expansion, assuming curchar points to the beginning 344: * of the actuals, and we are not in shell mode. 345: */ 346: 347: private expand (pl, str) 348: List pl; 349: String str; 350: { 351: char buf[4096], namebuf[100]; 352: register char *p, *q, *r; 353: String *actual; 354: Name n; 355: integer i; 356: boolean match; 357: 358: if (pl == nil) { 359: insertinput(str); 360: } else { 361: actual = getactuals(list_size(pl)); 362: p = buf; 363: q = str; 364: while (*q != '\0') { 365: if (p >= &buf[4096]) { 366: error("alias expansion too large"); 367: } 368: if (lexclass[*q] == ALPHA) { 369: r = namebuf; 370: do { 371: *r++ = *q++; 372: } while (isalnum(*q)); 373: *r = '\0'; 374: i = 0; 375: match = false; 376: foreach(Name, n, pl) 377: if (streq(ident(n), namebuf)) { 378: match = true; 379: break; 380: } 381: ++i; 382: endfor 383: if (match) { 384: r = actual[i]; 385: } else { 386: r = namebuf; 387: } 388: while (*r != '\0') { 389: *p++ = *r++; 390: } 391: } else { 392: *p++ = *q++; 393: } 394: } 395: *p = '\0'; 396: insertinput(buf); 397: } 398: } 399: 400: /* 401: * Parser error handling. 402: */ 403: 404: public yyerror(s) 405: String s; 406: { 407: register char *p; 408: register integer start; 409: 410: if (streq(s, "syntax error")) { 411: beginerrmsg(); 412: p = prevchar; 413: start = p - &scanner_linebuf[0]; 414: if (p > &scanner_linebuf[0]) { 415: while (lexclass[*p] == WHITE and p > &scanner_linebuf[0]) { 416: --p; 417: } 418: } 419: fprintf(stderr, "%s", scanner_linebuf); 420: if (start != 0) { 421: fprintf(stderr, "%*c", start, ' '); 422: } 423: if (p == &scanner_linebuf[0]) { 424: fprintf(stderr, "^ unrecognized command"); 425: } else { 426: fprintf(stderr, "^ syntax error"); 427: } 428: enderrmsg(); 429: } else { 430: error(s); 431: } 432: } 433: 434: /* 435: * Eat the current line. 436: */ 437: 438: public gobble () 439: { 440: curchar = scanner_linebuf; 441: scanner_linebuf[0] = '\0'; 442: } 443: 444: /* 445: * Scan an identifier. 446: * 447: * If chkalias is true, check first to see if it's an alias. 448: * Otherwise, check to see if it's a keyword. 449: */ 450: 451: private Token getident (chkalias) 452: boolean chkalias; 453: { 454: char buf[1024]; 455: register char *p, *q; 456: register Token t; 457: List pl; 458: String str; 459: 460: p = curchar; 461: q = buf; 462: if (shellmode) { 463: do { 464: *q++ = *p++; 465: } while (index(" \t\n!&<>*[]()'\"", *p) == nil); 466: } else { 467: do { 468: *q++ = *p++; 469: } while (isalnum(*p)); 470: } 471: curchar = p; 472: *q = '\0'; 473: yylval.y_name = identname(buf, false); 474: if (chkalias) { 475: if (findalias(yylval.y_name, &pl, &str)) { 476: expand(pl, str); 477: while (lexclass[*curchar] == WHITE) { 478: ++curchar; 479: } 480: if (pl == nil) { 481: t = getident(false); 482: } else { 483: t = getident(true); 484: } 485: } else if (shellmode) { 486: t = NAME; 487: } else { 488: t = findkeyword(yylval.y_name, NAME); 489: } 490: } else if (shellmode) { 491: t = NAME; 492: } else { 493: t = findkeyword(yylval.y_name, NAME); 494: } 495: return t; 496: } 497: 498: /* 499: * Scan a number. 500: */ 501: 502: private Token getnum() 503: { 504: char buf[1024]; 505: register Char *p, *q; 506: register Token t; 507: Integer base; 508: 509: p = curchar; 510: q = buf; 511: if (*p == '0') { 512: if (*(p+1) == 'x') { 513: p += 2; 514: base = 16; 515: } else if (*(p+1) == 't') { 516: base = 10; 517: } else if (varIsSet("$hexin")) { 518: base = 16; 519: } else { 520: base = 8; 521: } 522: } else if (varIsSet("$hexin")) { 523: base = 16; 524: } else if (varIsSet("$octin")) { 525: base = 8; 526: } else { 527: base = 10; 528: } 529: if (base == 16) { 530: do { 531: *q++ = *p++; 532: } while (ishexdigit(*p)); 533: } else { 534: do { 535: *q++ = *p++; 536: } while (isdigit(*p)); 537: } 538: if (*p == '.') { 539: do { 540: *q++ = *p++; 541: } while (isdigit(*p)); 542: if (*p == 'e' or *p == 'E') { 543: p++; 544: if (*p == '+' or *p == '-' or isdigit(*p)) { 545: *q++ = 'e'; 546: do { 547: *q++ = *p++; 548: } while (isdigit(*p)); 549: } 550: } 551: *q = '\0'; 552: yylval.y_real = atof(buf); 553: t = REAL; 554: } else { 555: *q = '\0'; 556: switch (base) { 557: case 10: 558: yylval.y_int = atol(buf); 559: break; 560: 561: case 8: 562: yylval.y_int = octal(buf); 563: break; 564: 565: case 16: 566: yylval.y_int = hex(buf); 567: break; 568: 569: default: 570: badcaseval(base); 571: } 572: t = INT; 573: } 574: curchar = p; 575: return t; 576: } 577: 578: /* 579: * Convert a string of octal digits to an integer. 580: */ 581: 582: private int octal(s) 583: String s; 584: { 585: register Char *p; 586: register Integer n; 587: 588: n = 0; 589: for (p = s; *p != '\0'; p++) { 590: n = 8*n + (*p - '0'); 591: } 592: return n; 593: } 594: 595: /* 596: * Convert a string of hexadecimal digits to an integer. 597: */ 598: 599: private int hex(s) 600: String s; 601: { 602: register Char *p; 603: register Integer n; 604: 605: n = 0; 606: for (p = s; *p != '\0'; p++) { 607: n *= 16; 608: if (*p >= 'a' and *p <= 'f') { 609: n += (*p - 'a' + 10); 610: } else if (*p >= 'A' and *p <= 'F') { 611: n += (*p - 'A' + 10); 612: } else { 613: n += (*p - '0'); 614: } 615: } 616: return n; 617: } 618: 619: /* 620: * Scan a string. 621: */ 622: 623: private Token getstring (quote) 624: char quote; 625: { 626: register char *p, *q; 627: char buf[MAXLINESIZE]; 628: boolean endofstring; 629: Token t; 630: 631: p = curchar; 632: q = buf; 633: endofstring = false; 634: while (not endofstring) { 635: if (*p == '\\' and *(p+1) == '\n') { 636: if (fgets(scanner_linebuf, MAXLINESIZE, in) == nil) { 637: error("non-terminated string"); 638: } 639: p = &scanner_linebuf[0] - 1; 640: } else if (*p == '\n' or *p == '\0') { 641: error("non-terminated string"); 642: endofstring = true; 643: } else if (*p == quote) { 644: endofstring = true; 645: } else { 646: curchar = p; 647: *q++ = charcon(p); 648: p = curchar; 649: } 650: p++; 651: } 652: curchar = p; 653: *q = '\0'; 654: if (quote == '\'' and buf[1] == '\0') { 655: yylval.y_char = buf[0]; 656: t = CHAR; 657: } else { 658: yylval.y_string = strdup(buf); 659: t = STRING; 660: } 661: return t; 662: } 663: 664: /* 665: * Process a character constant. 666: * Watch out for backslashes. 667: */ 668: 669: private char charcon (s) 670: String s; 671: { 672: register char *p, *q; 673: char c, buf[10]; 674: 675: p = s; 676: if (*p == '\\') { 677: ++p; 678: switch (*p) { 679: case '\\': 680: c = '\\'; 681: break; 682: 683: case 'n': 684: c = '\n'; 685: break; 686: 687: case 'r': 688: c = '\r'; 689: break; 690: 691: case 't': 692: c = '\t'; 693: break; 694: 695: case '\'': 696: case '"': 697: c = *p; 698: break; 699: 700: default: 701: if (isdigit(*p)) { 702: q = buf; 703: do { 704: *q++ = *p++; 705: } while (isdigit(*p)); 706: *q = '\0'; 707: c = (char) octal(buf); 708: } 709: --p; 710: break; 711: } 712: curchar = p; 713: } else { 714: c = *p; 715: } 716: return c; 717: } 718: 719: /* 720: * Input file management routines. 721: */ 722: 723: public setinput(filename) 724: Filename filename; 725: { 726: File f; 727: 728: f = fopen(filename, "r"); 729: if (f == nil) { 730: error("can't open %s", filename); 731: } else { 732: if (curinclindex >= MAXINCLDEPTH) { 733: error("unreasonable input nesting on \"%s\"", filename); 734: } 735: inclinfo[curinclindex].savefile = in; 736: inclinfo[curinclindex].savefn = errfilename; 737: inclinfo[curinclindex].savelineno = errlineno; 738: curinclindex++; 739: in = f; 740: errfilename = filename; 741: errlineno = 1; 742: } 743: } 744: 745: private Boolean eofinput() 746: { 747: register Boolean b; 748: 749: if (curinclindex == 0) { 750: if (isterm(in)) { 751: putchar('\n'); 752: clearerr(in); 753: b = false; 754: } else { 755: b = true; 756: } 757: } else { 758: fclose(in); 759: --curinclindex; 760: in = inclinfo[curinclindex].savefile; 761: errfilename = inclinfo[curinclindex].savefn; 762: errlineno = inclinfo[curinclindex].savelineno; 763: b = false; 764: } 765: return b; 766: } 767: 768: /* 769: * Pop the current input. Return whether successful. 770: */ 771: 772: public Boolean popinput() 773: { 774: Boolean b; 775: 776: if (curinclindex == 0) { 777: b = false; 778: } else { 779: b = (Boolean) (not eofinput()); 780: } 781: return b; 782: } 783: 784: /* 785: * Return whether we are currently reading from standard input. 786: */ 787: 788: public Boolean isstdin() 789: { 790: return (Boolean) (in == stdin); 791: } 792: 793: /* 794: * Send the current line to the shell. 795: */ 796: 797: public shellline() 798: { 799: register char *p; 800: 801: p = curchar; 802: while (*p != '\0' and (*p == '\n' or lexclass[*p] == WHITE)) { 803: ++p; 804: } 805: shell(p); 806: if (*p == '\0' and isterm(in)) { 807: putchar('\n'); 808: } 809: erecover(); 810: } 811: 812: /* 813: * Read the rest of the current line in "shell mode". 814: */ 815: 816: public beginshellmode() 817: { 818: shellmode = true; 819: } 820: 821: /* 822: * Print out a token for debugging. 823: */ 824: 825: public print_token(f, t) 826: File f; 827: Token t; 828: { 829: if (t == '\n') { 830: fprintf(f, "char '\\n'"); 831: } else if (t == EOF) { 832: fprintf(f, "EOF"); 833: } else if (t < 256) { 834: fprintf(f, "char '%c'", t); 835: } else { 836: fprintf(f, "\"%s\"", keywdstring(t)); 837: } 838: }