1: char *wartv = "Wart Version 2A(009) 14 Jan 92"; 2: 3: #ifdef MDEBUG 4: /* Use the real ones in this module only */ 5: #ifdef malloc 6: #undef malloc 7: #endif /* malloc */ 8: #ifdef calloc 9: #undef calloc 10: #endif /* calloc */ 11: #ifdef realloc 12: #undef realloc 13: #endif /* realloc */ 14: #ifdef free 15: #undef free 16: #endif /* free */ 17: #endif /* MDEBUG */ 18: 19: #ifdef MAC 20: #define VOID void 21: #endif /* MAC */ 22: 23: /* W A R T */ 24: 25: /* 26: A small subset of "lex". 27: 28: Authors: Jeff Damens, Frank da Cruz 29: Columbia University Center for Computing Activites. 30: First released November 1984. 31: Copyright (C) 1984, 1992, Trustees of Columbia University in the City of New 32: York. Permission is granted to any individual or institution to use this 33: software as long as it is not sold for profit. This copyright notice must be 34: retained. This software may not be included in commercial products without 35: written permission of Columbia University. 36: */ 37: 38: /* 39: * input format is: 40: * lines to be copied | %state <state names...> 41: * %% 42: * <state> | <state,state,...> CHAR { actions } 43: * ... 44: * %% 45: * more lines to be copied 46: */ 47: 48: #include "ckcdeb.h" /* Includes */ 49: 50: /* 51: The following "char" should be changed to "short", "int", or "long" if your 52: wart program will generate more than 127 states. Since wart is used mainly 53: with C-Kermit, which has about 50 states, "char" is adequate. This 54: keeps the program about 3K-4K smaller. 55: */ 56: 57: #define TBL_TYPE "char" /* C data type of state table */ 58: 59: #define C_L 014 /* Formfeed */ 60: 61: #define SEP 1 /* Token types */ 62: #define LBRACK 2 63: #define RBRACK 3 64: #define WORD 4 65: #define COMMA 5 66: 67: /* Storage sizes */ 68: 69: #define MAXSTATES 50 /* max number of states */ 70: #define MAXWORD 50 /* max # of chars/word */ 71: #define SBYTES ((MAXSTATES+6)/8) /* # of bytes for state bitmask */ 72: 73: /* Name of wart function in generated program */ 74: 75: #ifndef FNAME 76: #define FNAME "wart" 77: #endif /* FNAME */ 78: 79: /* Structure for state information */ 80: 81: struct transx { 82: CHAR states[SBYTES]; /* included states */ 83: int anyst; /* true if this good from any state */ 84: CHAR inchr; /* input character */ 85: int actno; /* associated action */ 86: struct transx *nxt; 87: }; /* next transition */ 88: typedef struct transx *trans; 89: 90: /* Function prototypes */ 91: 92: _PROTOTYP( VOID setwstate, (int, trans) ); 93: _PROTOTYP( int teststate, (int, trans) ); 94: _PROTOTYP( trans rdinput, (FILE *, FILE *) ); 95: _PROTOTYP( VOID initial, (FILE *, FILE *) ); 96: _PROTOTYP( int isin, (char *, int) ); 97: _PROTOTYP( int isword, (int) ); 98: _PROTOTYP( VOID rdword, (FILE *, char *) ); 99: _PROTOTYP( VOID rdstates, (FILE *, FILE *) ); 100: _PROTOTYP( trans newtrans, (void) ); 101: _PROTOTYP( trans rdrules, (FILE *, FILE *) ); 102: _PROTOTYP( VOID statelist, (FILE *, trans) ); 103: _PROTOTYP( VOID copyact, (FILE *, FILE *, int) ); 104: _PROTOTYP( int faction, (trans, int, int) ); 105: _PROTOTYP( VOID emptytbl, (void) ); 106: _PROTOTYP( VOID addaction, (int, int, int) ); 107: _PROTOTYP( VOID writetbl, (FILE *) ); 108: _PROTOTYP( VOID warray, (FILE *, char *, int [], int, char *) ); 109: _PROTOTYP( VOID fatal, (char *) ); 110: _PROTOTYP( VOID prolog, (FILE *) ); 111: _PROTOTYP( VOID epilogue, (FILE *) ); 112: _PROTOTYP( VOID copyrest, (FILE *, FILE *) ); 113: _PROTOTYP( int gettoken, (FILE *) ); 114: _PROTOTYP( VOID rdcmnt, (FILE *) ); 115: _PROTOTYP( VOID clrhash, (void) ); 116: _PROTOTYP( int hash, (char *) ); 117: _PROTOTYP( VOID enter, (char *, int) ); 118: _PROTOTYP( int lkup, (char *) ); 119: _PROTOTYP( static char* copy, (char *s) ); 120: 121: /* Variables and tables */ 122: 123: /* lt 1992-10-08 Begin 124: * provide definition for deblog variable 125: * ckcdeb.h declares as extern. DECC AXP is strict about ref/def model 126: * Variable is unused herein, to the best of my knowledge. 127: */ 128: #ifdef VMS 129: int deblog; 130: #endif /* VMS */ 131: /* lt 1992-10-08 End 132: */ 133: 134: static int lines, nstates, nacts; 135: 136: static char tokval[MAXWORD]; 137: 138: static int tbl[MAXSTATES*96]; 139: 140: char *tbl_type = TBL_TYPE; 141: 142: char *txt1 = "\n#define BEGIN state =\n\nint state = 0;\n\nint\n"; 143: 144: char *fname = FNAME; /* Generated function name goes here */ 145: 146: /* rest of program... */ 147: 148: char *txt2 = "()\n\ 149: {\n\ 150: int c,actno;\n\ 151: extern "; 152: 153: /* Data type of state table is inserted here (short or int) */ 154: 155: char *txt2a = " tbl[];\n while (1) {\n c = input() - 32;\n\ 156: if (c < 0 || c > 95) c = 0;\n"; 157: 158: char *txt2b = " if ((actno = tbl[c + state*96]) != -1)\n\ 159: switch(actno) {\n"; 160: 161: /* this program's output goes here, followed by final text... */ 162: 163: char *txt3 = "\n }\n }\n}\n\n"; 164: 165: 166: /* 167: * turn on the bit associated with the given state 168: * 169: */ 170: VOID 171: setwstate(state,t) int state; trans t; { 172: int idx,msk; 173: idx = state/8; /* byte associated with state */ 174: msk = 0x80 >> (state % 8); /* bit mask for state */ 175: t->states[idx] |= msk; 176: } 177: 178: /* 179: * see if the state is involved in the transition 180: * 181: */ 182: int 183: teststate(state,t) int state; trans t; { 184: int idx,msk; 185: idx = state/8; 186: msk = 0x80 >> (state % 8); 187: return(t->states[idx] & msk); 188: } 189: 190: 191: /* 192: * read input from here... 193: * 194: */ 195: 196: trans 197: rdinput(infp,outfp) FILE *infp,*outfp; { 198: trans x,rdrules(); 199: lines = 1; /* line counter */ 200: nstates = 0; /* no states */ 201: nacts = 0; /* no actions yet */ 202: fprintf(outfp,"\n%c* WARNING -- This C source program generated by ",'/'); 203: fprintf(outfp,"Wart preprocessor. */\n"); 204: fprintf(outfp,"%c* Do not edit this file; edit the Wart-format ",'/'); 205: fprintf(outfp,"source file instead, */\n"); 206: fprintf(outfp,"%c* and then run it through Wart to produce a new ",'/'); 207: fprintf(outfp,"C source file. */\n\n"); 208: fprintf(outfp,"%c* Wart Version Info: */\n",'/'); 209: fprintf(outfp,"char *wartv = \"%s\";\n\n",wartv); 210: 211: initial(infp,outfp); /* read state names, initial defs */ 212: prolog(outfp); /* write out our initial code */ 213: x = rdrules(infp,outfp); /* read rules */ 214: epilogue(outfp); /* write out epilogue code */ 215: return(x); 216: } 217: 218: 219: /* 220: * initial - read initial definitions and state names. Returns 221: * on EOF or %%. 222: * 223: */ 224: VOID 225: initial(infp,outfp) FILE *infp, *outfp; { 226: int c; 227: char wordbuf[MAXWORD]; 228: while ((c = getc(infp)) != EOF) { 229: if (c == '%') { 230: rdword(infp,wordbuf); 231: if (strcmp(wordbuf,"states") == 0) 232: rdstates(infp,outfp); 233: else if (strcmp(wordbuf,"%") == 0) return; 234: else fprintf(outfp,"%%%s",wordbuf); 235: } 236: else putc(c,outfp); 237: if (c == '\n') lines++; 238: } 239: } 240: 241: /* 242: * boolean function to tell if the given character can be part of 243: * a word. 244: * 245: */ 246: int 247: isin(s,c) char *s; int c; { 248: for (; *s != '\0'; s++) 249: if (*s == (char) c) return(1); 250: return(0); 251: } 252: int 253: isword(c) int c; { 254: static char special[] = ".%_-$@"; /* these are allowable */ 255: return(isalnum(c) || isin(special,c)); 256: } 257: 258: /* 259: * read the next word into the given buffer. 260: * 261: */ 262: VOID 263: rdword(fp,buf) FILE *fp; char *buf; { 264: int len = 0,c; 265: while (isword(c = getc(fp)) && ++len < MAXWORD) *buf++ = (char) c; 266: *buf++ = '\0'; /* tie off word */ 267: ungetc(c,fp); /* put break char back */ 268: } 269: 270: /* 271: * read state names, up to a newline. 272: * 273: */ 274: VOID 275: rdstates(fp,ofp) FILE *fp,*ofp; { 276: int c; 277: char wordbuf[MAXWORD]; 278: while ((c = getc(fp)) != EOF && c != '\n') { 279: if (isspace(c) || c == C_L) continue; /* skip whitespace */ 280: ungetc(c,fp); /* put char back */ 281: rdword(fp,wordbuf); /* read the whole word */ 282: enter(wordbuf,++nstates); /* put into symbol tbl */ 283: fprintf(ofp,"#define %s %d\n",wordbuf,nstates); 284: } 285: lines++; 286: } 287: 288: /* 289: * allocate a new, empty transition node 290: * 291: */ 292: trans 293: newtrans() { 294: trans new; 295: int i; 296: new = (trans) malloc(sizeof (struct transx)); 297: for (i=0; i<SBYTES; i++) new->states[i] = 0; 298: new->anyst = 0; 299: new->nxt = NULL; 300: return(new); 301: } 302: 303: 304: /* 305: * read all the rules. 306: * 307: */ 308: 309: trans 310: rdrules(fp,out) FILE *fp,*out; { 311: trans head,cur,prev; 312: int curtok; 313: head = cur = prev = NULL; 314: while ((curtok = gettoken(fp)) != SEP) 315: 316: switch(curtok) { 317: case LBRACK: 318: if (cur == NULL) 319: cur = newtrans(); 320: else 321: fatal("duplicate state list"); 322: statelist(fp,cur); /* set states */ 323: continue; /* prepare to read char */ 324: 325: case WORD: 326: if ((int)strlen(tokval) != 1) 327: fatal("multiple chars in state"); 328: if (cur == NULL) { 329: cur = newtrans(); 330: cur->anyst = 1; 331: } 332: cur->actno = ++nacts; 333: cur->inchr = (char) (tokval[0] - 32); 334: if (head == NULL) 335: head = cur; 336: else 337: prev->nxt = cur; 338: prev = cur; 339: cur = NULL; 340: copyact(fp,out,nacts); 341: break; 342: default: fatal("bad input format"); 343: } 344: return(head); 345: } 346: 347: /* 348: * read a list of (comma-separated) states, set them in the 349: * given transition. 350: * 351: */ 352: VOID 353: statelist(fp,t) FILE *fp; trans t; { 354: int curtok,sval; 355: curtok = COMMA; 356: while (curtok != RBRACK) { 357: if (curtok != COMMA) fatal("missing comma"); 358: if ((curtok = gettoken(fp)) != WORD) fatal("missing state name"); 359: if ((sval = lkup(tokval)) == -1) { 360: fprintf(stderr,"state %s undefined\n",tokval); 361: fatal("undefined state"); 362: } 363: setwstate(sval,t); 364: curtok = gettoken(fp); 365: } 366: } 367: 368: /* 369: * copy an action from the input to the output file 370: * 371: */ 372: VOID 373: copyact(inp,outp,actno) FILE *inp,*outp; int actno; { 374: int c,bcnt; 375: fprintf(outp,"case %d:\n",actno); 376: while (c = getc(inp), (isspace(c) || c == C_L)) 377: if (c == '\n') lines++; 378: if (c == '{') { 379: bcnt = 1; 380: fputs(" {",outp); 381: while (bcnt > 0 && (c = getc(inp)) != EOF) { 382: if (c == '{') bcnt++; 383: else if (c == '}') bcnt--; 384: else if (c == '\n') lines++; 385: putc(c,outp); 386: } 387: if (bcnt > 0) fatal("action doesn't end"); 388: } else { 389: while (c != '\n' && c != EOF) { 390: putc(c,outp); 391: c = getc(inp); 392: } 393: lines++; 394: } 395: fprintf(outp,"\n break;\n"); 396: } 397: 398: /* 399: * find the action associated with a given character and state. 400: * returns -1 if one can't be found. 401: * 402: */ 403: int 404: faction(hd,state,chr) trans hd; int state,chr; { 405: while (hd != NULL) { 406: if (hd->anyst || teststate(state,hd)) 407: if (hd->inchr == ('.' - 32) || hd->inchr == (char) chr) 408: return(hd->actno); 409: hd = hd->nxt; 410: } 411: return(-1); 412: } 413: 414: /* 415: * empty the table... 416: * 417: */ 418: VOID 419: emptytbl() { 420: int i; 421: for (i=0; i<nstates*96; i++) tbl[i] = -1; 422: } 423: 424: /* 425: * add the specified action to the output for the given state and chr. 426: * 427: */ 428: VOID 429: addaction(act,state,chr) int act,state,chr; { 430: tbl[state*96 + chr] = act; 431: } 432: 433: VOID 434: writetbl(fp) FILE *fp; { 435: warray(fp,"tbl",tbl,96*(nstates+1),TBL_TYPE); 436: } 437: 438: 439: /* 440: * write an array to the output file, given its name and size. 441: * 442: */ 443: VOID 444: warray(fp,nam,cont,siz,typ) FILE *fp; char *nam; int cont[],siz; char *typ; { 445: int i; 446: fprintf(fp,"%s %s[] = {\n",typ,nam); 447: for (i = 0; i < siz - 1; ) { 448: fprintf(fp,"%2d, ",cont[i]); 449: if ((++i % 16) == 0) putc('\n',fp); 450: } 451: fprintf(fp,"%2d ",cont[siz-1]); 452: fprintf(fp,"};\n"); 453: } 454: 455: VOID 456: main(argc,argv) int argc; char *argv[]; { 457: trans head; 458: int state,c; 459: FILE *infile,*outfile; 460: 461: if (argc > 1) { 462: if ((infile = fopen(argv[1],"r")) == NULL) { 463: fprintf(stderr,"Can't open %s\n",argv[1]); 464: fatal("unreadable input file"); 465: } 466: } else infile = stdin; 467: 468: if (argc > 2) { 469: if ((outfile = fopen(argv[2],"w")) == NULL) { 470: fprintf(stderr,"Can't write to %s\n",argv[2]); 471: fatal("bad output file"); 472: } 473: } else outfile = stdout; 474: 475: clrhash(); /* empty hash table */ 476: head = rdinput(infile,outfile); /* read input file */ 477: emptytbl(); /* empty our tables */ 478: for (state = 0; state <= nstates; state++) 479: for (c = 1; c < 96; c++) /* find actions, */ 480: addaction(faction(head,state,c),state,c); /* add to tbl */ 481: writetbl(outfile); 482: copyrest(infile,outfile); 483: printf("%d states, %d actions\n",nstates,nacts); 484: exit(GOOD_EXIT); 485: } 486: 487: 488: /* 489: * fatal error handler 490: * 491: */ 492: 493: VOID 494: fatal(msg) char *msg; { 495: fprintf(stderr,"error in line %d: %s\n",lines,msg); 496: exit(BAD_EXIT); 497: } 498: 499: VOID 500: prolog(outfp) FILE *outfp; { 501: int c; 502: while ((c = *txt1++) != '\0') putc(c,outfp); 503: while ((c = *fname++) != '\0') putc(c,outfp); 504: while ((c = *txt2++) != '\0') putc(c,outfp); 505: while ((c = *tbl_type++) != '\0') putc(c,outfp); 506: while ((c = *txt2a++) != '\0') putc(c,outfp); 507: while ((c = *txt2b++) != '\0') putc(c,outfp); 508: } 509: 510: VOID 511: epilogue(outfp) FILE *outfp; { 512: int c; 513: while ((c = *txt3++) != '\0') putc(c,outfp); 514: } 515: 516: VOID 517: copyrest(in,out) FILE *in,*out; { 518: int c; 519: while ((c = getc(in)) != EOF) putc(c,out); 520: } 521: 522: /* 523: * gettoken - returns token type of next token, sets tokval 524: * to the string value of the token if appropriate. 525: * 526: */ 527: 528: int 529: gettoken(fp) FILE *fp; { 530: int c; 531: while (1) { /* loop if reading comments... */ 532: do { 533: c = getc(fp); 534: if (c == '\n') lines++; 535: } while ((isspace(c) || c == C_L)); /* skip whitespace */ 536: switch(c) { 537: case EOF: 538: return(SEP); 539: case '%': 540: if ((c = getc(fp)) == '%') return(SEP); 541: tokval[0] = '%'; 542: tokval[1] = (char) c; 543: rdword(fp,tokval+2); 544: return(WORD); 545: case '<': 546: return(LBRACK); 547: case '>': 548: return(RBRACK); 549: case ',': 550: return(COMMA); 551: case '/': 552: if ((c = getc(fp)) == '*') { 553: rdcmnt(fp); /* skip over the comment */ 554: continue; 555: } else { /* and keep looping */ 556: ungetc(c,fp); /* put this back into input */ 557: c = '/'; /* put character back, fall thru */ 558: } 559: 560: default: 561: if (isword(c)) { 562: ungetc(c,fp); 563: rdword(fp,tokval); 564: return(WORD); 565: } else fatal("Invalid character in input"); 566: } 567: } 568: } 569: 570: /* 571: * skip over a comment 572: * 573: */ 574: 575: VOID 576: rdcmnt(fp) FILE *fp; { 577: int c,star,prcnt; 578: prcnt = star = 0; /* no star seen yet */ 579: while (!((c = getc(fp)) == '/' && star)) { 580: if (c == EOF || (prcnt && c == '%')) fatal("Unterminated comment"); 581: prcnt = (c == '%'); 582: star = (c == '*'); 583: if (c == '\n') lines++; 584: } 585: } 586: 587: /* 588: * symbol table management for wart 589: * 590: * entry points: 591: * clrhash - empty hash table. 592: * enter - enter a name into the symbol table 593: * lkup - find a name's value in the symbol table. 594: * 595: */ 596: 597: #define HASHSIZE 101 /* # of entries in hash table */ 598: 599: struct sym { 600: char *name; /* symbol name */ 601: int val; /* value */ 602: struct sym *hnxt; /* next on collision chain */ 603: } *htab[HASHSIZE]; /* the hash table */ 604: 605: /* 606: * empty the hash table before using it... 607: * 608: */ 609: VOID 610: clrhash() { 611: int i; 612: for (i=0; i<HASHSIZE; i++) htab[i] = NULL; 613: } 614: 615: /* 616: * compute the value of the hash for a symbol 617: * 618: */ 619: int 620: hash(name) char *name; { 621: int sum; 622: for (sum = 0; *name != '\0'; name++) sum += (sum + *name); 623: sum %= HASHSIZE; /* take sum mod hashsize */ 624: if (sum < 0) sum += HASHSIZE; /* disallow negative hash value */ 625: return(sum); 626: } 627: 628: /* 629: * make a private copy of a string... 630: * 631: */ 632: static char* 633: copy(s) char *s; { 634: char *new; 635: new = (char *) malloc((int)strlen(s) + 1); 636: strcpy(new,s); 637: return(new); 638: } 639: 640: /* 641: * enter state name into the hash table 642: * 643: */ 644: VOID 645: enter(name,svalue) char *name; int svalue; { 646: int h; 647: struct sym *cur; 648: if (lkup(name) != -1) { 649: fprintf(stderr,"state \"%s\" appears twice...\n", name); 650: exit(BAD_EXIT); 651: } 652: h = hash(name); 653: cur = (struct sym *)malloc(sizeof (struct sym)); 654: cur->name = copy(name); 655: cur->val = svalue; 656: cur->hnxt = htab[h]; 657: htab[h] = cur; 658: } 659: 660: /* 661: * find name in the symbol table, return its value. Returns -1 662: * if not found. 663: * 664: */ 665: int 666: lkup(name) char *name; { 667: struct sym *cur; 668: for (cur = htab[hash(name)]; cur != NULL; cur = cur->hnxt) 669: if (strcmp(cur->name,name) == 0) return(cur->val); 670: return(-1); 671: }