1: /* 2: * Copyright (c) 1982 Regents of the University of California. 3: * All rights reserved. The Berkeley software License Agreement 4: * specifies the terms and conditions for redistribution. 5: */ 6: 7: #ifndef lint 8: static char sccsid[] = "@(#)asscan2.c 5.1 (Berkeley) 4/30/85"; 9: #endif not lint 10: 11: #include "asscanl.h" 12: 13: static inttoktype oval = NL; 14: #define ASINBUFSIZ 4096 15: char inbufunget[8]; 16: char inbuffer[ASINBUFSIZ]; 17: char *Ginbufptr = inbuffer; 18: int Ginbufcnt = 0; 19: int scannerhadeof; 20: 21: fillinbuffer() 22: { 23: int nread; 24: int goal; 25: int got; 26: 27: nread = 0; 28: if (scannerhadeof == 0){ 29: goal = sizeof(inbuffer); 30: do { 31: got = read(stdin->_file, inbuffer + nread, goal); 32: if (got == 0) 33: scannerhadeof = 1; 34: if (got <= 0) 35: break; 36: nread += got; 37: goal -= got; 38: } while (goal); 39: } else { 40: scannerhadeof = 0; 41: } 42: /* 43: * getchar assumes that Ginbufcnt and Ginbufptr 44: * are adjusted as if one character has been removed 45: * from the input. 46: */ 47: if (nread == 0){ 48: inbuffer[0] = EOFCHAR; 49: nread = 1; 50: } 51: Ginbufcnt = nread - 1; 52: Ginbufptr = inbuffer + 1; 53: } 54: 55: scan_dot_s(bufferbox) 56: struct tokbufdesc *bufferbox; 57: { 58: reg char *inbufptr; 59: reg int inbufcnt; 60: reg int ryylval; /* local copy of lexical value */ 61: extern int yylval; /* global copy of lexical value */ 62: reg int val; /* the value returned */ 63: int i; /* simple counter */ 64: reg char *rcp; 65: int ch; /* treated as a character */ 66: int ch1; /* shadow value */ 67: struct symtab *op; 68: ptrall lgbackpatch; /* where to stuff a string length */ 69: reg ptrall bufptr; /* where to stuff tokens */ 70: ptrall bufub; /* where not to stuff tokens */ 71: long intval; /* value of int */ 72: int linescrossed; /* when doing strings and comments */ 73: struct Opcode opstruct; 74: reg int strlg; /* the length of a string */ 75: 76: (bytetoktype *)bufptr = (bytetoktype *) & (bufferbox->toks[0]); 77: (bytetoktype *)bufub = &(bufferbox->toks[AVAILTOKS]); 78: 79: MEMTOREGBUF; 80: if (newfflag){ 81: newfflag = 0; 82: ryylval = (int)savestr(newfname, strlen(newfname)+1, STR_BOTH); 83: 84: ptoken(bufptr, IFILE); 85: ptoken(bufptr, STRING); 86: pptr(bufptr, ryylval); 87: 88: ptoken(bufptr, ILINENO); 89: ptoken(bufptr, INT); 90: pint(bufptr, 1); 91: } 92: 93: while (bufptr < bufub){ 94: loop: 95: switch(ryylval = (type+1)[ch = getchar()]) { 96: case SCANEOF: 97: endoffile: ; 98: inbufptr = 0; 99: ptoken(bufptr, PARSEEOF); 100: goto done; 101: 102: case DIV: /*process C style comments*/ 103: if ( (ch = getchar()) == '*') { /*comment prelude*/ 104: int incomment; 105: linescrossed = 0; 106: incomment = 1; 107: ch = getchar(); /*skip over the * */ 108: while(incomment){ 109: switch(ch){ 110: case '*': 111: ch = getchar(); 112: incomment = (ch != '/'); 113: break; 114: case '\n': 115: scanlineno++; 116: linescrossed++; 117: ch = getchar(); 118: break; 119: case EOFCHAR: 120: goto endoffile; 121: default: 122: ch = getchar(); 123: break; 124: } 125: } 126: val = ILINESKIP; 127: ryylval = linescrossed; 128: goto ret; 129: } else { /*just an ordinary DIV*/ 130: ungetc(ch); 131: val = ryylval = DIV; 132: goto ret; 133: } 134: case SH: 135: if (oval == NL){ 136: /* 137: * Attempt to recognize a C preprocessor 138: * style comment '^#[ \t]*[0-9]*[ \t]*".*" 139: */ 140: ch = getchar(); /*bump the #*/ 141: while (INCHARSET(ch, SPACE)) 142: ch = getchar();/*bump white */ 143: if (INCHARSET(ch, DIGIT)){ 144: intval = 0; 145: while(INCHARSET(ch, DIGIT)){ 146: intval = intval*10 + ch - '0'; 147: ch = getchar(); 148: } 149: while (INCHARSET(ch, SPACE)) 150: ch = getchar(); 151: if (ch == '"'){ 152: ptoken(bufptr, ILINENO); 153: ptoken(bufptr, INT); 154: pint(bufptr, intval - 1); 155: ptoken(bufptr, IFILE); 156: /* 157: * The '"' has already been 158: * munched 159: * 160: * eatstr will not eat 161: * the trailing \n, so 162: * it is given to the parser 163: * and counted. 164: */ 165: goto eatstr; 166: } 167: } 168: } 169: /* 170: * Well, its just an ordinary decadent comment 171: */ 172: while ((ch != '\n') && (ch != EOFCHAR)) 173: ch = getchar(); 174: if (ch == EOFCHAR) 175: goto endoffile; 176: val = ryylval = oval = NL; 177: scanlineno++; 178: goto ret; 179: 180: case NL: 181: scanlineno++; 182: val = ryylval; 183: goto ret; 184: 185: case SP: 186: oval = SP; /*invalidate ^# meta comments*/ 187: goto loop; 188: 189: case REGOP: /* % , could be used as modulo, or register*/ 190: ch = getchar(); 191: if (INCHARSET(ch, DIGIT)){ 192: ryylval = ch-'0'; 193: if (ch=='1') { 194: if (INCHARSET( (ch = getchar()), REGDIGIT)) 195: ryylval = 10+ch-'0'; 196: else 197: ungetc(ch); 198: } 199: /* 200: * God only knows what the original author 201: * wanted this undocumented feature to 202: * do. 203: * %5++ is really r7 204: */ 205: while(INCHARSET( (ch = getchar()), SIGN)) { 206: if (ch=='+') 207: ryylval++; 208: else 209: ryylval--; 210: } 211: ungetc(ch); 212: val = REG; 213: } else { 214: ungetc(ch); 215: val = REGOP; 216: } 217: goto ret; 218: 219: case ALPH: 220: ch1 = ch; 221: if (INCHARSET(ch, SZSPECBEGIN)){ 222: if( (ch = getchar()) == '`' || ch == '^'){ 223: ch1 |= 0100; /*convert to lower*/ 224: switch(ch1){ 225: case 'b': ryylval = 1; break; 226: case 'w': ryylval = 2; break; 227: case 'l': ryylval = 4; break; 228: default: ryylval = d124; break; 229: } 230: val = SIZESPEC; 231: goto ret; 232: } else { 233: ungetc(ch); 234: ch = ch1; /*restore first character*/ 235: } 236: } 237: rcp = yytext; 238: do { 239: if (rcp < &yytext[NCPName]) 240: *rcp++ = ch; 241: } while (INCHARSET ( (ch = getchar()), ALPHA | DIGIT)); 242: *rcp = '\0'; 243: while (INCHARSET(ch, SPACE)) 244: ch = getchar(); 245: ungetc(ch); 246: 247: switch((op = *lookup(1))->s_tag){ 248: case 0: 249: case LABELID: 250: /* 251: * Its a name... (Labels are subsets of name) 252: */ 253: ryylval = (int)op; 254: val = NAME; 255: break; 256: case INST0: 257: case INSTn: 258: case IJXXX: 259: opstruct.Op_popcode = ( (struct instab *)op)->i_popcode; 260: opstruct.Op_eopcode = ( (struct instab *)op)->i_eopcode; 261: val = op->s_tag; 262: break; 263: default: 264: ryylval = ( (struct instab *)op)->i_popcode; 265: val = op->s_tag; 266: break; 267: } 268: goto ret; 269: 270: case DIG: 271: /* 272: * restore local inbufptr and inbufcnt 273: */ 274: REGTOMEMBUF; 275: val = number(ch); 276: MEMTOREGBUF; 277: /* 278: * yylval or yybignum has been stuffed as a side 279: * effect to number(); get the global yylval 280: * into our fast local copy in case it was an INT. 281: */ 282: ryylval = yylval; 283: goto ret; 284: 285: case LSH: 286: case RSH: 287: /* 288: * We allow the C style operators 289: * << and >>, as well as < and > 290: */ 291: if ( (ch1 = getchar()) != ch) 292: ungetc(ch1); 293: val = ryylval; 294: goto ret; 295: 296: case MINUS: 297: if ( (ch = getchar()) =='(') 298: ryylval=val=MP; 299: else { 300: ungetc(ch); 301: val=MINUS; 302: } 303: goto ret; 304: 305: case SQ: 306: if ((ryylval = getchar()) == '\n') 307: scanlineno++; /*not entirely correct*/ 308: val = INT; 309: goto ret; 310: 311: case DQ: 312: eatstr: 313: linescrossed = 0; 314: for (strlg = 0; /*VOID*/; strlg++){ 315: switch(ch = getchar()){ 316: case '"': 317: goto tailDQ; 318: default: 319: stuff: 320: putc(ch, strfile); 321: break; 322: case '\n': 323: yywarning("New line in a string constant"); 324: scanlineno++; 325: linescrossed++; 326: ch = getchar(); 327: switch(ch){ 328: case EOFCHAR: 329: putc('\n', strfile); 330: ungetc(EOFCHAR); 331: goto tailDQ; 332: default: 333: ungetc(ch); 334: ch = '\n'; 335: goto stuff; 336: } 337: break; 338: 339: case '\\': 340: ch = getchar(); /*skip the '\\'*/ 341: if ( INCHARSET(ch, BSESCAPE)){ 342: switch (ch){ 343: case 'b': ch = '\b'; goto stuff; 344: case 'f': ch = '\f'; goto stuff; 345: case 'n': ch = '\n'; goto stuff; 346: case 'r': ch = '\r'; goto stuff; 347: case 't': ch = '\t'; goto stuff; 348: } 349: } 350: if ( !(INCHARSET(ch, OCTDIGIT)) ) 351: goto stuff; 352: i = 0; 353: intval = 0; 354: while ( (i < 3) && (INCHARSET(ch, OCTDIGIT))){ 355: i++; 356: intval <<= 3; 357: intval += ch - '0'; 358: ch = getchar(); 359: } 360: ungetc(ch); 361: ch = (char)intval; 362: goto stuff; 363: } 364: } 365: tailDQ: ; 366: /* 367: * account for any lines that were crossed 368: */ 369: if (linescrossed){ 370: ptoken(bufptr, ILINESKIP); 371: pint(bufptr, linescrossed); 372: } 373: /* 374: * Cheat: append a trailing null to the string 375: * and then adjust the string length to ignore 376: * the trailing null. If any STRING client requires 377: * the trailing null, the client can just change STRLEN 378: */ 379: putc(0, strfile); 380: ryylval = (int)savestr((char *)0, strlg + 1, STR_FILE); 381: val = STRING; 382: ((struct strdesc *)ryylval)->sd_strlen -= 1; 383: goto ret; 384: 385: case BADCHAR: 386: linescrossed = lineno; 387: lineno = scanlineno; 388: yyerror("Illegal character mapped: %d, char read:(octal) %o", 389: ryylval, ch); 390: lineno = linescrossed; 391: val = BADCHAR; 392: goto ret; 393: 394: default: 395: val = ryylval; 396: goto ret; 397: } /*end of the switch*/ 398: /* 399: * here with one token, so stuff it 400: */ 401: ret: 402: oval = val; 403: ptoken(bufptr, val); 404: switch(val){ 405: case ILINESKIP: 406: pint(bufptr, ryylval); 407: break; 408: case SIZESPEC: 409: pchar(bufptr, ryylval); 410: break; 411: case BFINT: plong(bufptr, ryylval); 412: break; 413: case INT: plong(bufptr, ryylval); 414: break; 415: case BIGNUM: pnumber(bufptr, yybignum); 416: break; 417: case STRING: pptr(bufptr, (int)(char *)ryylval); 418: break; 419: case NAME: pptr(bufptr, (int)(struct symtab *)ryylval); 420: break; 421: case REG: pchar(bufptr, ryylval); 422: break; 423: case INST0: 424: case INSTn: 425: popcode(bufptr, opstruct); 426: break; 427: case IJXXX: 428: popcode(bufptr, opstruct); 429: pptr(bufptr, (int)(struct symtab *)symalloc()); 430: break; 431: case ISTAB: 432: case ISTABSTR: 433: case ISTABNONE: 434: case ISTABDOT: 435: case IALIGN: 436: pptr(bufptr, (int)(struct symtab *)symalloc()); 437: break; 438: /* 439: * default: 440: */ 441: } 442: builtval: ; 443: } /*end of the while to stuff the buffer*/ 444: done: 445: bufferbox->tok_count = (bytetoktype *)bufptr - &(bufferbox->toks[0]); 446: /* 447: * This is a real kludge: 448: * 449: * We put the last token in the buffer to be a MINUS 450: * symbol. This last token will never be picked up 451: * in the normal way, but can be looked at during 452: * a peekahead look that the short circuit expression 453: * evaluator uses to see if an expression is complicated. 454: * 455: * Consider the following situation: 456: * 457: * .word 45 + 47 458: * buffer 1 | buffer 0 459: * the peekahead would want to look across the buffer, 460: * but will look in the buffer end zone, see the minus, and 461: * fail. 462: */ 463: ptoken(bufptr, MINUS); 464: REGTOMEMBUF; 465: }