1: char *wartv = "Wart Version 1A(003) 27 May 85"; 2: 3: /* W A R T */ 4: 5: /* 6: pre-process a lex-like file into a C program. 7: 8: Author:Jeff Damens, Columbia University Center for Computing Activites, 11/84. 9: Copyright (C) 1985, Trustees of Columbia University in the City of New York. 10: Permission is granted to any individual or institution to use, copy, or 11: redistribute this software so long as it is not sold for profit, provided this 12: copyright notice is retained. 13: 14: * input format is: 15: * lines to be copied | %state <state names...> 16: * %% 17: * <state> | <state,state,...> CHAR { actions } 18: * ... 19: * %% 20: */ 21: 22: #include "ckcdeb.h" /* Includes */ 23: #include <stdio.h> 24: #include <ctype.h> 25: 26: #define C_L 014 /* Formfeed */ 27: 28: #define SEP 1 /* Token types */ 29: #define LBRACK 2 30: #define RBRACK 3 31: #define WORD 4 32: #define COMMA 5 33: 34: /* Storage sizes */ 35: 36: #define MAXSTATES 50 /* max number of states */ 37: #define MAXWORD 50 /* max # of chars/word */ 38: #define SBYTES ((MAXSTATES+7)/8) /* # of bytes for state bitmask */ 39: 40: /* Name of wart function in generated program */ 41: 42: #ifndef FNAME 43: #define FNAME "wart" 44: #endif 45: 46: /* Structure for state information */ 47: 48: struct trans { CHAR states[SBYTES]; /* included states */ 49: int anyst; /* true if this good from any state */ 50: CHAR inchr; /* input character */ 51: int actno; /* associated action */ 52: struct trans *nxt; }; /* next transition */ 53: 54: typedef struct trans *Trans; 55: 56: char *malloc(); /* Returns pointer (not int) */ 57: 58: 59: /* Variables and tables */ 60: 61: int lines,nstates,nacts; 62: 63: char tokval[MAXWORD]; 64: 65: int tbl[MAXSTATES*128]; 66: 67: 68: 69: char *txt1 = "\n#define BEGIN state =\n\nint state = 0;\n\n"; 70: 71: char *fname = FNAME; /* function name goes here */ 72: 73: /* rest of program... */ 74: 75: char *txt2 = "()\n\ 76: {\n\ 77: int c,actno;\n\ 78: extern int tbl[];\n\ 79: while (1) {\n\ 80: c = input();\n\ 81: if ((actno = tbl[c + state*128]) != -1)\n\ 82: switch(actno) {\n"; 83: 84: /* this program's output goes here, followed by final text... */ 85: 86: char *txt3 = "\n }\n }\n\}\n\n"; 87: 88: 89: /* 90: * turn on the bit associated with the given state 91: * 92: */ 93: setstate(state,t) 94: int state; 95: Trans t; 96: { 97: int idx,msk; 98: idx = state/8; /* byte associated with state */ 99: msk = 0x80 >> (state % 8); /* bit mask for state */ 100: t->states[idx] |= msk; 101: } 102: 103: /* 104: * see if the state is involved in the transition 105: * 106: */ 107: 108: teststate(state,t) 109: int state; 110: Trans t; 111: { 112: int idx,msk; 113: idx = state/8; 114: msk = 0x80 >> (state % 8); 115: return(t->states[idx] & msk); 116: } 117: 118: 119: /* 120: * read input from here... 121: * 122: */ 123: 124: Trans 125: rdinput(infp,outfp) 126: FILE *infp,*outfp; 127: { 128: Trans x,rdrules(); 129: lines = 1; /* line counter */ 130: nstates = 0; /* no states */ 131: nacts = 0; /* no actions yet */ 132: fprintf(outfp,"\n%c* WARNING -- This C source program generated by ",'/'); 133: fprintf(outfp,"Wart preprocessor. */\n"); 134: fprintf(outfp,"%c* Do not edit this file; edit the Wart-format ",'/'); 135: fprintf(outfp,"source file instead, */\n"); 136: fprintf(outfp,"%c* and then run it through Wart to produce a new ",'/'); 137: fprintf(outfp,"C source file. */\n\n"); 138: fprintf(outfp,"%c* Wart Version Info: */\n",'/'); 139: fprintf(outfp,"char *wartv = \"%s\";\n\n",wartv); 140: 141: initial(infp,outfp); /* read state names, initial defs */ 142: prolog(outfp); /* write out our initial code */ 143: x = rdrules(infp,outfp); /* read rules */ 144: epilogue(outfp); /* write out epilogue code */ 145: return(x); 146: } 147: 148: 149: /* 150: * initial - read initial definitions and state names. Returns 151: * on EOF or %%. 152: * 153: */ 154: 155: initial(infp,outfp) 156: FILE *infp,*outfp; 157: { 158: int c; 159: char wordbuf[MAXWORD]; 160: while ((c = getc(infp)) != EOF) { 161: if (c == '%') { 162: rdword(infp,wordbuf); 163: if (strcmp(wordbuf,"states") == 0) 164: rdstates(infp,outfp); 165: else if (strcmp(wordbuf,"%") == 0) return; 166: else fprintf(outfp,"%%%s",wordbuf); 167: } 168: else putc(c,outfp); 169: if (c == '\n') lines++; 170: } 171: } 172: 173: /* 174: * boolean function to tell if the given character can be part of 175: * a word. 176: * 177: */ 178: isin(s,c) char *s; int c; { 179: for (; *s != '\0'; s++) 180: if (*s == c) return(1); 181: return(0); 182: } 183: isword(c) 184: int c; 185: { 186: static char special[] = ".%_-$@"; /* these are allowable */ 187: return(isalnum(c) || isin(special,c)); 188: } 189: 190: /* 191: * read the next word into the given buffer. 192: * 193: */ 194: rdword(fp,buf) 195: FILE *fp; 196: char *buf; 197: { 198: int len = 0,c; 199: while (isword(c = getc(fp)) && ++len < MAXWORD) *buf++ = c; 200: *buf++ = '\0'; /* tie off word */ 201: ungetc(c,fp); /* put break char back */ 202: } 203: 204: 205: /* 206: * read state names, up to a newline. 207: * 208: */ 209: 210: rdstates(fp,ofp) 211: FILE *fp,*ofp; 212: { 213: int c; 214: char wordbuf[MAXWORD]; 215: while ((c = getc(fp)) != EOF && c != '\n') 216: { 217: if (isspace(c) || c == C_L) continue; /* skip whitespace */ 218: ungetc(c,fp); /* put char back */ 219: rdword(fp,wordbuf); /* read the whole word */ 220: enter(wordbuf,++nstates); /* put into symbol tbl */ 221: fprintf(ofp,"#define %s %d\n",wordbuf,nstates); 222: } 223: lines++; 224: } 225: 226: /* 227: * allocate a new, empty transition node 228: * 229: */ 230: 231: Trans 232: newtrans() 233: { 234: Trans new; 235: int i; 236: new = (Trans) malloc(sizeof (struct trans)); 237: for (i=0; i<SBYTES; i++) new->states[i] = 0; 238: new->anyst = 0; 239: new->nxt = NULL; 240: return(new); 241: } 242: 243: 244: /* 245: * read all the rules. 246: * 247: */ 248: 249: Trans 250: rdrules(fp,out) 251: FILE *fp,*out; 252: { 253: Trans head,cur,prev; 254: int curtok,i; 255: head = cur = NULL; 256: while ((curtok = gettoken(fp)) != SEP) 257: 258: switch(curtok) { 259: case LBRACK: if (cur == NULL) cur = newtrans(); 260: else fatal("duplicate state list"); 261: statelist(fp,cur);/* set states */ 262: continue; /* prepare to read char */ 263: 264: case WORD: if (strlen(tokval) != 1) 265: fatal("multiple chars in state"); 266: if (cur == NULL) { 267: cur = newtrans(); 268: cur->anyst = 1; 269: } 270: cur->actno = ++nacts; 271: cur->inchr = tokval[0]; 272: if (head == NULL) head = cur; 273: else prev->nxt = cur; 274: prev = cur; 275: cur = NULL; 276: copyact(fp,out,nacts); 277: break; 278: default: fatal("bad input format"); 279: } 280: 281: return(head); 282: } 283: 284: 285: /* 286: * read a list of (comma-separated) states, set them in the 287: * given transition. 288: * 289: */ 290: statelist(fp,t) 291: FILE *fp; 292: Trans t; 293: { 294: int curtok,sval; 295: curtok = COMMA; 296: while (curtok != RBRACK) { 297: if (curtok != COMMA) fatal("missing comma"); 298: if ((curtok = gettoken(fp)) != WORD) fatal("missing state name"); 299: if ((sval = lkup(tokval)) == -1) { 300: fprintf(stderr,"state %s undefined\n",tokval); 301: fatal("undefined state"); 302: } 303: setstate(sval,t); 304: curtok = gettoken(fp); 305: } 306: } 307: 308: /* 309: * copy an action from the input to the output file 310: * 311: */ 312: copyact(inp,outp,actno) 313: FILE *inp,*outp; 314: int actno; 315: { 316: int c,bcnt; 317: fprintf(outp,"case %d:\n",actno); 318: while (((c = getc(inp)) != '\n') && (isspace(c) || c == C_L)); 319: if (c == '{') { 320: bcnt = 1; 321: putc(c,outp); 322: while (bcnt > 0 && (c = getc(inp)) != EOF) { 323: if (c == '{') bcnt++; 324: else if (c == '}') bcnt--; 325: else if (c == '\n') lines++; 326: putc(c,outp); 327: } 328: if (bcnt > 0) fatal("action doesn't end"); 329: } 330: else { 331: while (c != '\n' && c != EOF) { 332: putc(c,outp); 333: c = getc(inp); 334: } 335: lines++; 336: } 337: fprintf(outp,"\nbreak;\n"); 338: } 339: 340: 341: /* 342: * find the action associated with a given character and state. 343: * returns -1 if one can't be found. 344: * 345: */ 346: faction(hd,state,chr) 347: Trans hd; 348: int state,chr; 349: { 350: while (hd != NULL) { 351: if (hd->anyst || teststate(state,hd)) 352: if (hd->inchr == '.' || hd->inchr == chr) return(hd->actno); 353: hd = hd->nxt; 354: } 355: return(-1); 356: } 357: 358: 359: /* 360: * empty the table... 361: * 362: */ 363: emptytbl() 364: { 365: int i; 366: for (i=0; i<nstates*128; i++) tbl[i] = -1; 367: } 368: 369: /* 370: * add the specified action to the output for the given state and chr. 371: * 372: */ 373: 374: addaction(act,state,chr) 375: int act,state,chr; 376: { 377: tbl[state*128 + chr] = act; 378: } 379: 380: writetbl(fp) 381: FILE *fp; 382: { 383: warray(fp,"tbl",tbl,128*(nstates+1)); 384: } 385: 386: 387: /* 388: * write an array to the output file, given its name and size. 389: * 390: */ 391: warray(fp,nam,cont,siz) 392: FILE *fp; 393: char *nam; 394: int cont[],siz; 395: { 396: int i; 397: fprintf(fp,"int %s[] = {\n",nam); 398: for (i = 0; i < siz; i++) { 399: fprintf(fp,"%d, ",cont[i]); 400: if ((i % 20) == 0) putc('\n',fp); 401: } 402: fprintf(fp,"};\n"); 403: } 404: 405: main(argc,argv) 406: int argc; 407: char *argv[]; 408: { 409: Trans head; 410: int state,c; 411: FILE *infile,*outfile; 412: 413: if (argc > 1) { 414: if ((infile = fopen(argv[1],"r")) == NULL) { 415: fprintf(stderr,"Can't open %s\n",argv[1]); 416: fatal("unreadable input file"); } } 417: else infile = stdin; 418: 419: if (argc > 2) { 420: if ((outfile = fopen(argv[2],"w")) == NULL) { 421: fprintf(stderr,"Can't write to %s\n",argv[2]); 422: fatal("bad output file"); } } 423: else outfile = stdout; 424: 425: clrhash(); /* empty hash table */ 426: head = rdinput(infile,outfile); /* read input file */ 427: emptytbl(); /* empty our tables */ 428: for (state = 0; state <= nstates; state++) 429: for (c = 1; c < 128; c++) 430: addaction(faction(head,state,c),state,c); /* find actions, add to tbl */ 431: writetbl(outfile); 432: copyrest(infile,outfile); 433: fprintf(stderr,"%d states, %d actions\n",nstates,nacts); 434: #ifdef undef 435: for (state = 1; state <= nstates; state ++) 436: for (c = 1; c < 128; c++) 437: if (tbl[state*128 + c] != -1) printf("state %d, chr %d, act %d\n", 438: state,c,tbl[state*128 + c]); 439: #endif 440: exit(GOOD_EXIT); 441: } 442: 443: 444: /* 445: * fatal error handler 446: * 447: */ 448: 449: fatal(msg) 450: char *msg; 451: { 452: fprintf(stderr,"error in line %d: %s\n",lines,msg); 453: exit(BAD_EXIT); 454: } 455: 456: prolog(outfp) 457: FILE *outfp; 458: { 459: int c; 460: while ((c = *txt1++) != '\0') putc(c,outfp); 461: while ((c = *fname++) != '\0') putc(c,outfp); 462: while ((c = *txt2++) != '\0') putc(c,outfp); 463: } 464: 465: epilogue(outfp) 466: FILE *outfp; 467: { 468: int c; 469: while ((c = *txt3++) != '\0') putc(c,outfp); 470: } 471: 472: copyrest(in,out) 473: FILE *in,*out; 474: { 475: int c; 476: while ((c = getc(in)) != EOF) putc(c,out); 477: } 478: 479: 480: /* 481: * gettoken - returns token type of next token, sets tokval 482: * to the string value of the token if appropriate. 483: * 484: */ 485: 486: gettoken(fp) 487: FILE *fp; 488: { 489: int c; 490: while (1) { /* loop if reading comments... */ 491: do { 492: c = getc(fp); 493: if (c == '\n') lines++; 494: } while ((isspace(c) || c == C_L)); /* skip whitespace */ 495: switch(c) { 496: case EOF: return(SEP); 497: case '%': if ((c = getc(fp)) == '%') return(SEP); 498: tokval[0] = '%'; 499: tokval[1] = c; 500: rdword(fp,tokval+2); 501: return(WORD); 502: case '<': return(LBRACK); 503: case '>': return(RBRACK); 504: case ',': return(COMMA); 505: case '/': if ((c = getc(fp)) == '*') { 506: rdcmnt(fp); /* skip over the comment */ 507: continue; } /* and keep looping */ 508: else { 509: ungetc(c); /* put this back into input */ 510: c = '/'; } /* put character back, fall thru */ 511: 512: default: if (isword(c)) { 513: ungetc(c,fp); 514: rdword(fp,tokval); 515: return(WORD); 516: } 517: else fatal("Invalid character in input"); 518: } 519: } 520: } 521: 522: /* 523: * skip over a comment 524: * 525: */ 526: 527: rdcmnt(fp) 528: FILE *fp; 529: { 530: int c,star,prcnt; 531: prcnt = star = 0; /* no star seen yet */ 532: while (!((c = getc(fp)) == '/' && star)) { 533: if (c == EOF || (prcnt && c == '%')) fatal("Unterminated comment"); 534: prcnt = (c == '%'); 535: star = (c == '*'); 536: if (c == '\n') lines++; } 537: } 538: 539: 540: 541: /* 542: * symbol table management for wart 543: * 544: * entry points: 545: * clrhash - empty hash table. 546: * enter - enter a name into the symbol table 547: * lkup - find a name's value in the symbol table. 548: * 549: */ 550: 551: #define HASHSIZE 101 /* # of entries in hash table */ 552: 553: struct sym { char *name; /* symbol name */ 554: int val; /* value */ 555: struct sym *hnxt; } /* next on collision chain */ 556: *htab[HASHSIZE]; /* the hash table */ 557: 558: 559: /* 560: * empty the hash table before using it... 561: * 562: */ 563: clrhash() 564: { 565: int i; 566: for (i=0; i<HASHSIZE; i++) htab[i] = NULL; 567: } 568: 569: /* 570: * compute the value of the hash for a symbol 571: * 572: */ 573: hash(name) 574: char *name; 575: { 576: int sum; 577: for (sum = 0; *name != '\0'; name++) sum += (sum + *name); 578: sum %= HASHSIZE; /* take sum mod hashsize */ 579: if (sum < 0) sum += HASHSIZE; /* disallow negative hash value */ 580: return(sum); 581: } 582: 583: /* 584: * make a private copy of a string... 585: * 586: */ 587: char * 588: copy(s) 589: char *s; 590: { 591: char *new; 592: new = (char *) malloc(strlen(s) + 1); 593: strcpy(new,s); 594: return(new); 595: } 596: 597: 598: /* 599: * enter state name into the hash table 600: * 601: */ 602: enter(name,svalue) 603: char *name; 604: int svalue; 605: { 606: int h; 607: struct sym *cur; 608: if (lkup(name) != -1) { 609: fprintf(stderr,"state %s appears twice...\n"); 610: exit(BAD_EXIT); } 611: h = hash(name); 612: cur = (struct sym *)malloc(sizeof (struct sym)); 613: cur->name = copy(name); 614: cur->val = svalue; 615: cur->hnxt = htab[h]; 616: htab[h] = cur; 617: } 618: 619: /* 620: * find name in the symbol table, return its value. Returns -1 621: * if not found. 622: * 623: */ 624: lkup(name) 625: char *name; 626: { 627: struct sym *cur; 628: for (cur = htab[hash(name)]; cur != NULL; cur = cur->hnxt) 629: if (strcmp(cur->name,name) == 0) return(cur->val); 630: return(-1); 631: }