1: /* 2: * RCS file input 3: */ 4: #ifndef lint 5: static char rcsid[]= "$Id: rcslex.c,v 4.4 87/12/18 11:44:47 narten Exp $ Purdue CS"; 6: #endif 7: /********************************************************************************* 8: * Lexical Analysis. 9: * Character mapping table, 10: * hashtable, Lexinit, nextlex, getlex, getkey, 11: * getid, getnum, readstring, printstring, savestring, 12: * checkid, serror, fatserror, error, faterror, warn, diagnose 13: * fflsbuf, puts, fprintf 14: * Testprogram: define LEXDB 15: ********************************************************************************* 16: * 17: * Copyright (C) 1982 by Walter F. Tichy 18: * Purdue University 19: * Computer Science Department 20: * West Lafayette, IN 47907 21: * 22: * All rights reserved. No part of this software may be sold or distributed 23: * in any form or by any means without the prior written permission of the 24: * author. 25: * Report problems and direct all inquiries to Tichy@purdue (ARPA net). 26: */ 27: 28: /* $Log: rcslex.c,v $ 29: * Revision 4.4 87/12/18 11:44:47 narten 30: * fixed to use "varargs" in "fprintf"; this is required if it is to 31: * work on a SPARC machine such as a Sun-4 32: * 33: * Revision 4.3 87/10/18 10:37:18 narten 34: * Updating version numbers. Changes relative to 1.1 actually relative 35: * to version 4.1 36: * 37: * Revision 1.3 87/09/24 14:00:17 narten 38: * Sources now pass through lint (if you ignore printf/sprintf/fprintf 39: * warnings) 40: * 41: * Revision 1.2 87/03/27 14:22:33 jenkins 42: * Port to suns 43: * 44: * Revision 1.1 84/01/23 14:50:33 kcs 45: * Initial revision 46: * 47: * Revision 4.1 83/03/25 18:12:51 wft 48: * Only changed $Header to $Id. 49: * 50: * Revision 3.3 82/12/10 16:22:37 wft 51: * Improved error messages, changed exit status on error to 1. 52: * 53: * Revision 3.2 82/11/28 21:27:10 wft 54: * Renamed ctab to map and included EOFILE; ctab is now a macro in rcsbase.h. 55: * Added fflsbuf(), fputs(), and fprintf(), which abort the RCS operations 56: * properly in case there is an IO-error (e.g., file system full). 57: * 58: * Revision 3.1 82/10/11 19:43:56 wft 59: * removed unused label out:; 60: * made sure all calls to getc() return into an integer, not a char. 61: */ 62: 63: 64: /* 65: #define LEXDB 66: /* version LEXDB is for testing the lexical analyzer. The testprogram 67: * reads a stream of lexemes, enters the revision numbers into the 68: * hashtable, and prints the recognized tokens. Keywords are recognized 69: * as identifiers. 70: */ 71: 72: 73: 74: #include "rcsbase.h" 75: #include <varargs.h> 76: 77: 78: 79: /* character mapping table */ 80: enum tokens map[] = { 81: EOFILE, /* this will end up at ctab[-1] */ 82: UNKN, INSERT, UNKN, UNKN, UNKN, UNKN, UNKN, UNKN, 83: UNKN, SPACE, NEWLN, UNKN, SPACE, UNKN, UNKN, UNKN, 84: UNKN, UNKN, UNKN, UNKN, UNKN, UNKN, UNKN, UNKN, 85: UNKN, UNKN, UNKN, UNKN, UNKN, UNKN, UNKN, UNKN, 86: SPACE, EXCLA, DQUOTE, HASH, DOLLAR, PERCNT, AMPER, SQUOTE, 87: LPARN, RPARN, TIMES, PLUS, COMMA, MINUS, PERIOD, DIVIDE, 88: DIGIT, DIGIT, DIGIT, DIGIT, DIGIT, DIGIT, DIGIT, DIGIT, 89: DIGIT, DIGIT, COLON, SEMI, LESS, EQUAL, GREAT, QUEST, 90: AT, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, 91: LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, 92: LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, 93: LETTER, LETTER, LETTER, LBRACK, BACKSL, RBRACK, UPARR, UNDER, 94: ACCENT, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, 95: LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, 96: LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, 97: LETTER, LETTER, LETTER, LBRACE, BAR, RBRACE, TILDE, UNKN 98: }; 99: 100: 101: 102: 103: struct hshentry * nexthsh; /*pointer to next hashtable-entry, set by lookup*/ 104: 105: enum tokens nexttok; /*next token, set by nextlex */ 106: 107: int hshenter /*if true, next suitable lexeme will be entered */ 108: = true; /*into the symbol table. Handle with care. */ 109: int nextc; /*next input character, initialized by Lexinit */ 110: 111: int eof /*end-of-file indicator, set to >0 on end of file*/ 112: = 0; 113: int line /*current line-number of input */ 114: = 1; 115: int nerror /*counter for errors */ 116: = 0; 117: int nwarn /*counter for warnings */ 118: = 0; 119: char * cmdid /*command identification for error messages */ 120: = nil; 121: int quietflag /*indicates quiet mode */ 122: = false; 123: FILE * finptr; /*input file descriptor */ 124: 125: FILE * frewrite; /*file descriptor for echoing input */ 126: 127: int rewriteflag;/*indicates whether to echo to frewrite */ 128: 129: char StringTab[strtsize]; /* string table and heap */ 130: 131: char * NextString /*pointer to next identifier in StringTab*/ 132: = nil; 133: char * Topchar /*pointer to next free byte in StringTab*/ 134: = &StringTab[0]; /*set by nextlex, lookup */ 135: struct hshentry hshtab[hshsize]; /*hashtable */ 136: 137: 138: 139: 140: 141: lookup() { 142: 143: /* Function: Looks up the character string pointed to by NextString in the 144: * hashtable. If the string is not present, a new entry for it is created. 145: * If the string is present, TopChar is moved back to save the space for 146: * the string, and NextString is set to point to the original string. 147: * In any case, the address of the corresponding hashtable entry is placed 148: * into nexthsh. 149: * Algorithm: Quadratic hash, covering all entries. 150: * Assumptions: NextString points at the first character of the string. 151: * Topchar points at the first empty byte after the string. 152: */ 153: 154: register int ihash; /* index into hashtable */ 155: register char * sp, * np; 156: int c, delta, final, FirstScan; /*loop control*/ 157: 158: /* calculate hash code */ 159: sp = NextString; 160: ihash = 0; 161: while (*sp) ihash += *sp++; 162: 163: /* set up first search loop (c=0,step=1,until (hshsiz-1)/2 */ 164: c=0;delta=1;final=(hshsize-1)/2; 165: FirstScan=true; /*first loop */ 166: 167: for (;;) { 168: ihash = (ihash+c)%hshsize; /*next index*/ 169: 170: if (hshtab[ihash].num == nil) { 171: /*empty slot found*/ 172: hshtab[ihash].num = NextString; 173: nexthsh= &hshtab[ihash];/*save hashtable address*/ 174: # ifdef LEXDB 175: VOID printf("\nEntered: %s at %d ",nexthsh->num, ihash); 176: # endif 177: return; 178: } 179: /* compare strings */ 180: sp=NextString;np=hshtab[ihash].num; 181: while (*sp == *np++) { 182: if (*sp == 0) { 183: /* match found */ 184: nexthsh= &hshtab[ihash]; 185: Topchar = NextString; 186: NextString = nexthsh->num; 187: return; 188: } else sp++; 189: } 190: 191: /* neither empty slot nor string found */ 192: /* calculate next index and repeat */ 193: if (c != final) 194: c += delta; 195: else { 196: if (FirstScan) { 197: /*set up second sweep*/ 198: delta = -1; final = 1; FirstScan= false; 199: } else { 200: fatserror("Hashtable overflow"); 201: } 202: } 203: } 204: }; 205: 206: 207: 208: 209: 210: 211: Lexinit() 212: /* Function: Initialization of lexical analyzer: 213: * initializes the hastable, 214: * initializes nextc, nexttok if finptr != NULL 215: */ 216: { register int i; 217: 218: for (i=hshsize-1; i>=0; i--) { 219: hshtab[i].num = nil; 220: } 221: 222: hshenter=true; eof=0; line=1; nerror=0; nwarn=0; 223: NextString=nil; Topchar = &StringTab[0]; 224: if (finptr) { 225: nextc = GETC(finptr,frewrite,rewriteflag); /*initial character*/ 226: nextlex(); /*initial token*/ 227: } else { 228: nextc = '\0'; 229: nexttok=EOFILE; 230: } 231: } 232: 233: 234: 235: 236: 237: 238: 239: nextlex() 240: 241: /* Function: Reads the next token and sets nexttok to the next token code. 242: * Only if the hshenter==true, a revision number is entered into the 243: * hashtable and a pointer to it is placed into nexthsh. 244: * This is useful for avoiding that dates are placed into the hashtable. 245: * For ID's and NUM's, NextString is set to the character string in the 246: * string table. Assumption: nextc contains the next character. 247: */ 248: { register c; 249: register char * sp; 250: register enum tokens d; 251: 252: if (eof) { 253: nexttok=EOFILE; 254: return; 255: } 256: loop: 257: switch(nexttok=ctab[nextc]) { 258: 259: case UNKN: 260: case IDCHAR: 261: case PERIOD: 262: serror("unknown Character: %c",nextc); 263: nextc=GETC(finptr,frewrite,rewriteflag); 264: goto loop; 265: 266: case NEWLN: 267: line++; 268: # ifdef LEXDB 269: VOID putchar('\n'); 270: # endif 271: /* Note: falls into next case */ 272: 273: case SPACE: 274: nextc=GETC(finptr,frewrite,rewriteflag); 275: goto loop; 276: 277: case EOFILE: 278: eof++; 279: nexttok=EOFILE; 280: return; 281: 282: case DIGIT: 283: NextString = sp = Topchar; 284: *sp++ = nextc; 285: while ((d=ctab[c=GETC(finptr,frewrite,rewriteflag)])==DIGIT || 286: d==PERIOD) { 287: *sp++ = c; /* 1.2. and 1.2 are different */ 288: } 289: *sp++ = '\0'; 290: if (sp >= StringTab+strtsize) { 291: /*may have written outside stringtable already*/ 292: fatserror("Stringtable overflow"); 293: } 294: Topchar = sp; 295: nextc = c; 296: if (hshenter == true) 297: lookup(); /* lookup updates NextString, Topchar*/ 298: nexttok = NUM; 299: return; 300: 301: 302: case LETTER: 303: NextString = sp = Topchar; 304: *sp++ = nextc; 305: while ((d=ctab[c=GETC(finptr,frewrite,rewriteflag)])==LETTER || 306: d==DIGIT || d==IDCHAR) { 307: *sp++ = c; 308: } 309: *sp++ = '\0'; 310: if (sp >= StringTab+strtsize) { 311: /*may have written outside stringtable already*/ 312: fatserror("Stringtable overflow"); 313: } 314: Topchar = sp; 315: nextc = c; 316: nexttok = ID; /* may be ID or keyword */ 317: return; 318: 319: case SBEGIN: /* long string */ 320: nexttok = STRING; 321: /* note: only the initial SBEGIN has been read*/ 322: /* read the string, and reset nextc afterwards*/ 323: return; 324: 325: default: 326: nextc=GETC(finptr,frewrite,rewriteflag); 327: return; 328: } 329: } 330: 331: 332: int getlex(token) 333: enum tokens token; 334: /* Function: Checks if nexttok is the same as token. If so, 335: * advances the input by calling nextlex and returns true. 336: * otherwise returns false. 337: * Doesn't work for strings and keywords; loses the character string for ids. 338: */ 339: { 340: if (nexttok==token) { 341: nextlex(); 342: return(true); 343: } else return(false); 344: } 345: 346: int getkey (key) 347: char * key; 348: /* Function: If the current token is a keyword identical to key, 349: * getkey advances the input by calling nextlex and returns true; 350: * otherwise returns false. 351: */ 352: { 353: register char *s1,*s2; 354: 355: if (nexttok==ID) { 356: s1=key; s2=NextString; 357: while(*s1 == *s2++) 358: if (*s1++ == '\0') { 359: /* match found */ 360: Topchar = NextString; /*reset Topchar */ 361: nextlex(); 362: return(true); 363: } 364: } 365: return(false); 366: } 367: 368: 369: 370: char * getid() 371: /* Function: Checks if nexttok is an identifier. If so, 372: * advances the input by calling nextlex and returns a pointer 373: * to the identifier; otherwise returns nil. 374: * Treats keywords as identifiers. 375: */ 376: { 377: register char * name; 378: if (nexttok==ID) { 379: name = NextString; 380: nextlex(); 381: return name; 382: } else return nil; 383: } 384: 385: 386: struct hshentry * getnum() 387: /* Function: Checks if nexttok is a number. If so, 388: * advances the input by calling nextlex and returns a pointer 389: * to the hashtable entry. Otherwise returns nil. 390: * Doesn't work if hshenter is false. 391: */ 392: { 393: register struct hshentry * num; 394: if (nexttok==NUM) { 395: num=nexthsh; 396: nextlex(); 397: return num; 398: } else return nil; 399: } 400: 401: 402: readstring() 403: /* skip over characters until terminating single SDELIM */ 404: /* if rewriteflag==true, copy every character read to frewrite.*/ 405: /* Does not advance nextlex at the end. */ 406: { register c; 407: if (rewriteflag) { 408: /* copy string verbatim to frewrite */ 409: while ((c=putc(getc(finptr),frewrite)) != EOF) { 410: if (c==SDELIM) { 411: if ((c=putc(getc(finptr),frewrite)) != SDELIM) { 412: /* end of string */ 413: nextc=c; 414: return; 415: } 416: } 417: } 418: } else { 419: /* skip string */ 420: while ((c=getc(finptr)) != EOF) { 421: if (c==SDELIM) { 422: if ((c=getc(finptr)) != SDELIM) { 423: /* end of string */ 424: nextc=c; 425: return; 426: } 427: } 428: } 429: } 430: nextc = c; 431: error("Unterminated string"); 432: } 433: 434: 435: printstring() 436: /* Function: copy a string to stdout, until terminated with a single SDELIM. 437: * Does not advance nextlex at the end. 438: */ 439: { 440: register c; 441: while ((c=getc(finptr)) != EOF) { 442: if (c==SDELIM) { 443: if ((c=getc(finptr)) != SDELIM) { 444: /* end of string */ 445: nextc=c; 446: return; 447: } 448: } 449: VOID putchar(c); 450: } 451: nextc = c; 452: error("Unterminated string"); 453: } 454: 455: 456: 457: int savestring(target,length) 458: char * target; int length; 459: /* copies a string terminated with SDELIM from file finptr to buffer target, 460: * but not more than length bytes. If the string is longer than length, 461: * the extra characters are skipped. The string may be empty, in which 462: * case a '\0' is placed into target. 463: * Double SDELIM is replaced with SDELIM. 464: * If rewriteflag==true, the string is also copied unchanged to frewrite. 465: * Returns the length of the saved string. 466: * Does not advance nextlex at the end. 467: */ 468: { 469: register char * tp, * max; 470: register c; 471: 472: tp=target; max= target+length; /*max is one too large*/ 473: while ((c=GETC(finptr,frewrite,rewriteflag))!=EOF) { 474: *tp++ =c; 475: if (c== SDELIM) { 476: if ((c=GETC(finptr,frewrite,rewriteflag))!=SDELIM) { 477: /* end of string */ 478: *(tp-1)='\0'; 479: nextc=c; 480: return tp-target; 481: } 482: } 483: if (tp >= max) { 484: /* overflow */ 485: error("string buffer overflow -- truncating string"); 486: target[length-1]='\0'; 487: /* skip rest of string */ 488: while ((c=GETC(finptr,frewrite,rewriteflag))!=EOF) { 489: if ((c==SDELIM) && ((c=GETC(finptr,frewrite,rewriteflag))!=SDELIM)) { 490: /* end of string */ 491: nextc=c; 492: return length; 493: } 494: } 495: nextc = c; 496: error("Can't find %c to terminate string before end of file",SDELIM); 497: return length; 498: } 499: } 500: nextc = c; 501: error("Can't find %c to terminate string before end of file",SDELIM); 502: return length; 503: } 504: 505: 506: char *checkid(id, delim) 507: char *id, delim; 508: /* Function: check whether the string starting at id is an */ 509: /* identifier and return a pointer to the last char*/ 510: /* of the identifer. White space, delim and '\0' */ 511: /* are legal delimeters. Aborts the program if not */ 512: /* a legal identifier. Useful for checking commands*/ 513: { 514: register enum tokens d; 515: register char *temp; 516: register char c,tc; 517: 518: temp = id; 519: if ( ctab[*id] == LETTER ) { 520: while( (d=ctab[c=(*++id)]) == LETTER || d==DIGIT || d==IDCHAR) ; 521: if ( c!=' ' && c!='\t' && c!='\n' && c!='\0' && c!=delim) { 522: /* append \0 to end of id before error message */ 523: tc = c; 524: while( (c=(*++id))!=' ' && c!='\t' && c!='\n' && c!='\0' && c!=delim) ; 525: *id = '\0'; 526: faterror("Invalid character %c in identifier %s",tc,temp); 527: return nil ; 528: } else 529: return id; 530: } else { 531: /* append \0 to end of id before error message */ 532: while( (c=(*++id))!=' ' && c!='\t' && c!='\n' && c!='\0' && c!=delim) ; 533: *id = '\0'; 534: faterror("Identifier %s does not start with letter",temp); 535: return nil; 536: } 537: } 538: 539: 540: /*VARARGS1*/ 541: serror(e,e1,e2,e3,e4,e5) 542: char * e, * e1, * e2, * e3, * e4, * e5; 543: /* non-fatal syntax error */ 544: { nerror++; 545: VOID fprintf(stderr,"%s error, line %d: ", cmdid, line); 546: VOID fprintf(stderr,e, e1, e2, e3, e4, e5); 547: VOID putc('\n',stderr); 548: } 549: 550: /*VARARGS1*/ 551: error(e,e1,e2,e3,e4,e5) 552: char * e, * e1, * e2, * e3, * e4, * e5; 553: /* non-fatal error */ 554: { nerror++; 555: VOID fprintf(stderr,"%s error: ",cmdid); 556: VOID fprintf(stderr,e, e1, e2, e3, e4, e5); 557: VOID putc('\n',stderr); 558: } 559: 560: /*VARARGS1*/ 561: fatserror(e,e1,e2,e3,e4,e5) 562: char * e, * e1, * e2, * e3, * e4, * e5; 563: /* fatal syntax error */ 564: { nerror++; 565: VOID fprintf(stderr,"%s error, line %d: ", cmdid,line); 566: VOID fprintf(stderr,e, e1, e2, e3, e4, e5); 567: VOID fprintf(stderr,"\n%s aborted\n",cmdid); 568: VOID cleanup(); 569: exit(1); 570: } 571: 572: /*VARARGS1*/ 573: faterror(e,e1,e2,e3,e4,e5) 574: char * e, * e1, * e2, * e3, * e4, * e5; 575: /* fatal error, terminates program after cleanup */ 576: { nerror++; 577: VOID fprintf(stderr,"%s error: ",cmdid); 578: VOID fprintf(stderr,e, e1, e2, e3, e4, e5); 579: VOID fprintf(stderr,"\n%s aborted\n",cmdid); 580: VOID cleanup(); 581: exit(1); 582: } 583: 584: /*VARARGS1*/ 585: warn(e,e1,e2,e3,e4,e5) 586: char * e, * e1, * e2, * e3, * e4, * e5; 587: /* prints a warning message */ 588: { nwarn++; 589: VOID fprintf(stderr,"%s warning: ",cmdid); 590: VOID fprintf(stderr,e, e1, e2, e3, e4, e5); 591: VOID putc('\n',stderr); 592: } 593: 594: 595: /*VARARGS1*/ 596: diagnose(e,e1,e2,e3,e4,e5) 597: char * e, * e1, * e2, * e3, * e4, * e5; 598: /* prints a diagnostic message */ 599: { 600: if (!quietflag) { 601: VOID fprintf(stderr,e, e1, e2, e3, e4, e5); 602: VOID putc('\n',stderr); 603: } 604: } 605: 606: 607: 608: fflsbuf(c, iop) 609: unsigned c; register FILE * iop; 610: /* Function: Flush iop. 611: * Same routine as _flsbuf in stdio, but aborts program on error. 612: */ 613: { register result; 614: if ((result=_flsbuf(c,iop))==EOF) 615: faterror("write error"); 616: return result; 617: } 618: 619: 620: fputs(s, iop) 621: register char *s; 622: register FILE *iop; 623: /* Function: Put string s on file iop, abort on error. 624: * Same as puts in stdio, but with different putc macro. 625: */ 626: { 627: register r; 628: register c; 629: 630: while (c = *s++) 631: r = putc(c, iop); 632: return(r); 633: } 634: 635: 636: 637: fprintf(iop, fmt, va_alist) 638: FILE *iop; 639: char *fmt; 640: va_dcl 641: /* Function: formatted output. Same as fprintf in stdio, 642: * but aborts program on error 643: */ 644: { 645: register int value; 646: va_list ap; 647: 648: va_start(ap); 649: #ifdef VFPRINTF 650: VOID vfprintf(iop, fmt, ap); 651: #else 652: _doprnt(fmt, ap, iop); 653: #endif VFPRINTF 654: if (ferror(iop)) { 655: faterror("write error"); 656: value = EOF; 657: } else value = 0; 658: va_end(ap); 659: return value; 660: } 661: 662: 663: 664: #ifdef LEXDB 665: /* test program reading a stream of lexems and printing the tokens. 666: */ 667: 668: 669: 670: main(argc,argv) 671: int argc; char * argv[]; 672: { 673: cmdid="lextest"; 674: if (argc<2) { 675: VOID fputs("No input file\n",stderr); 676: exit(1); 677: } 678: if ((finptr=fopen(argv[1], "r")) == NULL) { 679: faterror("Can't open input file %s\n",argv[1]); 680: } 681: Lexinit(); 682: rewriteflag=false; 683: while (nexttok != EOFILE) { 684: switch (nexttok) { 685: 686: case ID: 687: VOID printf("ID: %s",NextString); 688: break; 689: 690: case NUM: 691: if (hshenter==true) 692: VOID printf("NUM: %s, index: %d",nexthsh->num, nexthsh-hshtab); 693: else 694: VOID printf("NUM, unentered: %s",NextString); 695: hshenter = !hshenter; /*alternate between dates and numbers*/ 696: break; 697: 698: case COLON: 699: VOID printf("COLON"); break; 700: 701: case SEMI: 702: VOID printf("SEMI"); break; 703: 704: case STRING: 705: readstring(); 706: VOID printf("STRING"); break; 707: 708: case UNKN: 709: VOID printf("UNKN"); break; 710: 711: default: 712: VOID printf("DEFAULT"); break; 713: } 714: VOID printf(" | "); 715: nextlex(); 716: } 717: VOID printf("\nEnd of lexical analyzer test\n"); 718: } 719: 720: cleanup() 721: /* dummy */ 722: {} 723: 724: 725: #endif