1: /* $Header: a2py.c,v 1.0.1.1 88/01/28 11:07:08 root Exp $ 2: * 3: * $Log: a2py.c,v $ 4: * Revision 1.0.1.1 88/01/28 11:07:08 root 5: * patch8: added support for FOO=bar switches using eval. 6: * 7: * Revision 1.0 87/12/18 17:50:33 root 8: * Initial revision 9: * 10: */ 11: 12: #include "util.h" 13: char *index(); 14: 15: char *filename; 16: 17: main(argc,argv,env) 18: register int argc; 19: register char **argv; 20: register char **env; 21: { 22: register STR *str; 23: register char *s; 24: int i; 25: STR *walk(); 26: STR *tmpstr; 27: 28: linestr = str_new(80); 29: str = str_new(0); /* first used for -I flags */ 30: for (argc--,argv++; argc; argc--,argv++) { 31: if (argv[0][0] != '-' || !argv[0][1]) 32: break; 33: reswitch: 34: switch (argv[0][1]) { 35: #ifdef DEBUGGING 36: case 'D': 37: debug = atoi(argv[0]+2); 38: #ifdef YYDEBUG 39: yydebug = (debug & 1); 40: #endif 41: break; 42: #endif 43: case '0': case '1': case '2': case '3': case '4': 44: case '5': case '6': case '7': case '8': case '9': 45: maxfld = atoi(argv[0]+1); 46: absmaxfld = TRUE; 47: break; 48: case 'F': 49: fswitch = argv[0][2]; 50: break; 51: case 'n': 52: namelist = savestr(argv[0]+2); 53: break; 54: case '-': 55: argc--,argv++; 56: goto switch_end; 57: case 0: 58: break; 59: default: 60: fatal("Unrecognized switch: %s\n",argv[0]); 61: } 62: } 63: switch_end: 64: 65: /* open script */ 66: 67: if (argv[0] == Nullch) 68: argv[0] = "-"; 69: filename = savestr(argv[0]); 70: if (strEQ(filename,"-")) 71: argv[0] = ""; 72: if (!*argv[0]) 73: rsfp = stdin; 74: else 75: rsfp = fopen(argv[0],"r"); 76: if (rsfp == Nullfp) 77: fatal("Awk script \"%s\" doesn't seem to exist.\n",filename); 78: 79: /* init tokener */ 80: 81: bufptr = str_get(linestr); 82: symtab = hnew(); 83: 84: /* now parse the report spec */ 85: 86: if (yyparse()) 87: fatal("Translation aborted due to syntax errors.\n"); 88: 89: #ifdef DEBUGGING 90: if (debug & 2) { 91: int type, len; 92: 93: for (i=1; i<mop;) { 94: type = ops[i].ival; 95: len = type >> 8; 96: type &= 255; 97: printf("%d\t%d\t%d\t%-10s",i++,type,len,opname[type]); 98: if (type == OSTRING) 99: printf("\t\"%s\"\n",ops[i].cval),i++; 100: else { 101: while (len--) { 102: printf("\t%d",ops[i].ival),i++; 103: } 104: putchar('\n'); 105: } 106: } 107: } 108: if (debug & 8) 109: dump(root); 110: #endif 111: 112: /* first pass to look for numeric variables */ 113: 114: prewalk(0,0,root,&i); 115: 116: /* second pass to produce new program */ 117: 118: tmpstr = walk(0,0,root,&i); 119: str = str_make("#!/bin/perl\n\n"); 120: str_cat(str, 121: "eval '$'.$1.'$2;' while $ARGV[0] =~ /^([A-Za-z_]+=)(.*)/ && shift;\n"); 122: str_cat(str, 123: " # process any FOO=bar switches\n\n"); 124: if (do_opens && opens) { 125: str_scat(str,opens); 126: str_free(opens); 127: str_cat(str,"\n"); 128: } 129: str_scat(str,tmpstr); 130: str_free(tmpstr); 131: #ifdef DEBUGGING 132: if (!(debug & 16)) 133: #endif 134: fixup(str); 135: putlines(str); 136: exit(0); 137: } 138: 139: #define RETURN(retval) return (bufptr = s,retval) 140: #define XTERM(retval) return (expectterm = TRUE,bufptr = s,retval) 141: #define XOP(retval) return (expectterm = FALSE,bufptr = s,retval) 142: #define ID(x) return (yylval=string(x,0),expectterm = FALSE,bufptr = s,VAR) 143: 144: yylex() 145: { 146: register char *s = bufptr; 147: register char *d; 148: register int tmp; 149: 150: retry: 151: #ifdef YYDEBUG 152: if (yydebug) 153: if (index(s,'\n')) 154: fprintf(stderr,"Tokener at %s",s); 155: else 156: fprintf(stderr,"Tokener at %s\n",s); 157: #endif 158: switch (*s) { 159: default: 160: fprintf(stderr, 161: "Unrecognized character %c in file %s line %d--ignoring.\n", 162: *s++,filename,line); 163: goto retry; 164: case '\\': 165: case 0: 166: s = str_get(linestr); 167: *s = '\0'; 168: if (!rsfp) 169: RETURN(0); 170: line++; 171: if ((s = str_gets(linestr, rsfp)) == Nullch) { 172: if (rsfp != stdin) 173: fclose(rsfp); 174: rsfp = Nullfp; 175: s = str_get(linestr); 176: RETURN(0); 177: } 178: goto retry; 179: case ' ': case '\t': 180: s++; 181: goto retry; 182: case '\n': 183: *s = '\0'; 184: XTERM(NEWLINE); 185: case '#': 186: yylval = string(s,0); 187: *s = '\0'; 188: XTERM(COMMENT); 189: case ';': 190: tmp = *s++; 191: if (*s == '\n') { 192: s++; 193: XTERM(SEMINEW); 194: } 195: XTERM(tmp); 196: case '(': 197: case '{': 198: case '[': 199: case ')': 200: case ']': 201: tmp = *s++; 202: XOP(tmp); 203: case 127: 204: s++; 205: XTERM('}'); 206: case '}': 207: for (d = s + 1; isspace(*d); d++) ; 208: if (!*d) 209: s = d - 1; 210: *s = 127; 211: XTERM(';'); 212: case ',': 213: tmp = *s++; 214: XTERM(tmp); 215: case '~': 216: s++; 217: XTERM(MATCHOP); 218: case '+': 219: case '-': 220: if (s[1] == *s) { 221: s++; 222: if (*s++ == '+') 223: XTERM(INCR); 224: else 225: XTERM(DECR); 226: } 227: /* FALL THROUGH */ 228: case '*': 229: case '%': 230: tmp = *s++; 231: if (*s == '=') { 232: yylval = string(s-1,2); 233: s++; 234: XTERM(ASGNOP); 235: } 236: XTERM(tmp); 237: case '&': 238: s++; 239: tmp = *s++; 240: if (tmp == '&') 241: XTERM(ANDAND); 242: s--; 243: XTERM('&'); 244: case '|': 245: s++; 246: tmp = *s++; 247: if (tmp == '|') 248: XTERM(OROR); 249: s--; 250: XTERM('|'); 251: case '=': 252: s++; 253: tmp = *s++; 254: if (tmp == '=') { 255: yylval = string("==",2); 256: XTERM(RELOP); 257: } 258: s--; 259: yylval = string("=",1); 260: XTERM(ASGNOP); 261: case '!': 262: s++; 263: tmp = *s++; 264: if (tmp == '=') { 265: yylval = string("!=",2); 266: XTERM(RELOP); 267: } 268: if (tmp == '~') { 269: yylval = string("!~",2); 270: XTERM(MATCHOP); 271: } 272: s--; 273: XTERM(NOT); 274: case '<': 275: s++; 276: tmp = *s++; 277: if (tmp == '=') { 278: yylval = string("<=",2); 279: XTERM(RELOP); 280: } 281: s--; 282: yylval = string("<",1); 283: XTERM(RELOP); 284: case '>': 285: s++; 286: tmp = *s++; 287: if (tmp == '=') { 288: yylval = string(">=",2); 289: XTERM(RELOP); 290: } 291: s--; 292: yylval = string(">",1); 293: XTERM(RELOP); 294: 295: #define SNARFWORD \ 296: d = tokenbuf; \ 297: while (isalpha(*s) || isdigit(*s) || *s == '_') \ 298: *d++ = *s++; \ 299: *d = '\0'; \ 300: d = tokenbuf; 301: 302: case '$': 303: s++; 304: if (*s == '0') { 305: s++; 306: do_chop = TRUE; 307: need_entire = TRUE; 308: ID("0"); 309: } 310: do_split = TRUE; 311: if (isdigit(*s)) { 312: for (d = s; isdigit(*s); s++) ; 313: yylval = string(d,s-d); 314: tmp = atoi(d); 315: if (tmp > maxfld) 316: maxfld = tmp; 317: XOP(FIELD); 318: } 319: split_to_array = set_array_base = TRUE; 320: XOP(VFIELD); 321: 322: case '/': /* may either be division or pattern */ 323: if (expectterm) { 324: s = scanpat(s); 325: XTERM(REGEX); 326: } 327: tmp = *s++; 328: if (*s == '=') { 329: yylval = string("/=",2); 330: s++; 331: XTERM(ASGNOP); 332: } 333: XTERM(tmp); 334: 335: case '0': case '1': case '2': case '3': case '4': 336: case '5': case '6': case '7': case '8': case '9': 337: s = scannum(s); 338: XOP(NUMBER); 339: case '"': 340: s++; 341: s = cpy2(tokenbuf,s,s[-1]); 342: if (!*s) 343: fatal("String not terminated:\n%s",str_get(linestr)); 344: s++; 345: yylval = string(tokenbuf,0); 346: XOP(STRING); 347: 348: case 'a': case 'A': 349: SNARFWORD; 350: ID(d); 351: case 'b': case 'B': 352: SNARFWORD; 353: if (strEQ(d,"break")) 354: XTERM(BREAK); 355: if (strEQ(d,"BEGIN")) 356: XTERM(BEGIN); 357: ID(d); 358: case 'c': case 'C': 359: SNARFWORD; 360: if (strEQ(d,"continue")) 361: XTERM(CONTINUE); 362: ID(d); 363: case 'd': case 'D': 364: SNARFWORD; 365: ID(d); 366: case 'e': case 'E': 367: SNARFWORD; 368: if (strEQ(d,"END")) 369: XTERM(END); 370: if (strEQ(d,"else")) 371: XTERM(ELSE); 372: if (strEQ(d,"exit")) { 373: saw_line_op = TRUE; 374: XTERM(EXIT); 375: } 376: if (strEQ(d,"exp")) { 377: yylval = OEXP; 378: XTERM(FUN1); 379: } 380: ID(d); 381: case 'f': case 'F': 382: SNARFWORD; 383: if (strEQ(d,"FS")) { 384: saw_FS++; 385: if (saw_FS == 1 && in_begin) { 386: for (d = s; *d && isspace(*d); d++) ; 387: if (*d == '=') { 388: for (d++; *d && isspace(*d); d++) ; 389: if (*d == '"' && d[2] == '"') 390: const_FS = d[1]; 391: } 392: } 393: ID(tokenbuf); 394: } 395: if (strEQ(d,"FILENAME")) 396: d = "ARGV"; 397: if (strEQ(d,"for")) 398: XTERM(FOR); 399: ID(d); 400: case 'g': case 'G': 401: SNARFWORD; 402: if (strEQ(d,"getline")) 403: XTERM(GETLINE); 404: ID(d); 405: case 'h': case 'H': 406: SNARFWORD; 407: ID(d); 408: case 'i': case 'I': 409: SNARFWORD; 410: if (strEQ(d,"if")) 411: XTERM(IF); 412: if (strEQ(d,"in")) 413: XTERM(IN); 414: if (strEQ(d,"index")) { 415: set_array_base = TRUE; 416: XTERM(INDEX); 417: } 418: if (strEQ(d,"int")) { 419: yylval = OINT; 420: XTERM(FUN1); 421: } 422: ID(d); 423: case 'j': case 'J': 424: SNARFWORD; 425: ID(d); 426: case 'k': case 'K': 427: SNARFWORD; 428: ID(d); 429: case 'l': case 'L': 430: SNARFWORD; 431: if (strEQ(d,"length")) { 432: yylval = OLENGTH; 433: XTERM(FUN1); 434: } 435: if (strEQ(d,"log")) { 436: yylval = OLOG; 437: XTERM(FUN1); 438: } 439: ID(d); 440: case 'm': case 'M': 441: SNARFWORD; 442: ID(d); 443: case 'n': case 'N': 444: SNARFWORD; 445: if (strEQ(d,"NF")) 446: do_split = split_to_array = set_array_base = TRUE; 447: if (strEQ(d,"next")) { 448: saw_line_op = TRUE; 449: XTERM(NEXT); 450: } 451: ID(d); 452: case 'o': case 'O': 453: SNARFWORD; 454: if (strEQ(d,"ORS")) { 455: saw_ORS = TRUE; 456: d = "$\\"; 457: } 458: if (strEQ(d,"OFS")) { 459: saw_OFS = TRUE; 460: d = "$,"; 461: } 462: if (strEQ(d,"OFMT")) { 463: d = "$#"; 464: } 465: ID(d); 466: case 'p': case 'P': 467: SNARFWORD; 468: if (strEQ(d,"print")) { 469: XTERM(PRINT); 470: } 471: if (strEQ(d,"printf")) { 472: XTERM(PRINTF); 473: } 474: ID(d); 475: case 'q': case 'Q': 476: SNARFWORD; 477: ID(d); 478: case 'r': case 'R': 479: SNARFWORD; 480: if (strEQ(d,"RS")) { 481: d = "$/"; 482: saw_RS = TRUE; 483: } 484: ID(d); 485: case 's': case 'S': 486: SNARFWORD; 487: if (strEQ(d,"split")) { 488: set_array_base = TRUE; 489: XOP(SPLIT); 490: } 491: if (strEQ(d,"substr")) { 492: set_array_base = TRUE; 493: XTERM(SUBSTR); 494: } 495: if (strEQ(d,"sprintf")) 496: XTERM(SPRINTF); 497: if (strEQ(d,"sqrt")) { 498: yylval = OSQRT; 499: XTERM(FUN1); 500: } 501: ID(d); 502: case 't': case 'T': 503: SNARFWORD; 504: ID(d); 505: case 'u': case 'U': 506: SNARFWORD; 507: ID(d); 508: case 'v': case 'V': 509: SNARFWORD; 510: ID(d); 511: case 'w': case 'W': 512: SNARFWORD; 513: if (strEQ(d,"while")) 514: XTERM(WHILE); 515: ID(d); 516: case 'x': case 'X': 517: SNARFWORD; 518: ID(d); 519: case 'y': case 'Y': 520: SNARFWORD; 521: ID(d); 522: case 'z': case 'Z': 523: SNARFWORD; 524: ID(d); 525: } 526: } 527: 528: char * 529: scanpat(s) 530: register char *s; 531: { 532: register char *d; 533: 534: switch (*s++) { 535: case '/': 536: break; 537: default: 538: fatal("Search pattern not found:\n%s",str_get(linestr)); 539: } 540: s = cpytill(tokenbuf,s,s[-1]); 541: if (!*s) 542: fatal("Search pattern not terminated:\n%s",str_get(linestr)); 543: s++; 544: yylval = string(tokenbuf,0); 545: return s; 546: } 547: 548: yyerror(s) 549: char *s; 550: { 551: fprintf(stderr,"%s in file %s at line %d\n", 552: s,filename,line); 553: } 554: 555: char * 556: scannum(s) 557: register char *s; 558: { 559: register char *d; 560: 561: switch (*s) { 562: case '1': case '2': case '3': case '4': case '5': 563: case '6': case '7': case '8': case '9': case '0' : case '.': 564: d = tokenbuf; 565: while (isdigit(*s) || *s == '_') 566: *d++ = *s++; 567: if (*s == '.' && index("0123456789eE",s[1])) 568: *d++ = *s++; 569: while (isdigit(*s) || *s == '_') 570: *d++ = *s++; 571: if (index("eE",*s) && index("+-0123456789",s[1])) 572: *d++ = *s++; 573: if (*s == '+' || *s == '-') 574: *d++ = *s++; 575: while (isdigit(*s)) 576: *d++ = *s++; 577: *d = '\0'; 578: yylval = string(tokenbuf,0); 579: break; 580: } 581: return s; 582: } 583: 584: string(ptr,len) 585: char *ptr; 586: { 587: int retval = mop; 588: 589: ops[mop++].ival = OSTRING + (1<<8); 590: if (!len) 591: len = strlen(ptr); 592: ops[mop].cval = safemalloc(len+1); 593: strncpy(ops[mop].cval,ptr,len); 594: ops[mop++].cval[len] = '\0'; 595: return retval; 596: } 597: 598: oper0(type) 599: int type; 600: { 601: int retval = mop; 602: 603: if (type > 255) 604: fatal("type > 255 (%d)\n",type); 605: ops[mop++].ival = type; 606: return retval; 607: } 608: 609: oper1(type,arg1) 610: int type; 611: int arg1; 612: { 613: int retval = mop; 614: 615: if (type > 255) 616: fatal("type > 255 (%d)\n",type); 617: ops[mop++].ival = type + (1<<8); 618: ops[mop++].ival = arg1; 619: return retval; 620: } 621: 622: oper2(type,arg1,arg2) 623: int type; 624: int arg1; 625: int arg2; 626: { 627: int retval = mop; 628: 629: if (type > 255) 630: fatal("type > 255 (%d)\n",type); 631: ops[mop++].ival = type + (2<<8); 632: ops[mop++].ival = arg1; 633: ops[mop++].ival = arg2; 634: return retval; 635: } 636: 637: oper3(type,arg1,arg2,arg3) 638: int type; 639: int arg1; 640: int arg2; 641: int arg3; 642: { 643: int retval = mop; 644: 645: if (type > 255) 646: fatal("type > 255 (%d)\n",type); 647: ops[mop++].ival = type + (3<<8); 648: ops[mop++].ival = arg1; 649: ops[mop++].ival = arg2; 650: ops[mop++].ival = arg3; 651: return retval; 652: } 653: 654: oper4(type,arg1,arg2,arg3,arg4) 655: int type; 656: int arg1; 657: int arg2; 658: int arg3; 659: int arg4; 660: { 661: int retval = mop; 662: 663: if (type > 255) 664: fatal("type > 255 (%d)\n",type); 665: ops[mop++].ival = type + (4<<8); 666: ops[mop++].ival = arg1; 667: ops[mop++].ival = arg2; 668: ops[mop++].ival = arg3; 669: ops[mop++].ival = arg4; 670: return retval; 671: } 672: 673: oper5(type,arg1,arg2,arg3,arg4,arg5) 674: int type; 675: int arg1; 676: int arg2; 677: int arg3; 678: int arg4; 679: int arg5; 680: { 681: int retval = mop; 682: 683: if (type > 255) 684: fatal("type > 255 (%d)\n",type); 685: ops[mop++].ival = type + (5<<8); 686: ops[mop++].ival = arg1; 687: ops[mop++].ival = arg2; 688: ops[mop++].ival = arg3; 689: ops[mop++].ival = arg4; 690: ops[mop++].ival = arg5; 691: return retval; 692: } 693: 694: int depth = 0; 695: 696: dump(branch) 697: int branch; 698: { 699: register int type; 700: register int len; 701: register int i; 702: 703: type = ops[branch].ival; 704: len = type >> 8; 705: type &= 255; 706: for (i=depth; i; i--) 707: printf(" "); 708: if (type == OSTRING) { 709: printf("%-5d\"%s\"\n",branch,ops[branch+1].cval); 710: } 711: else { 712: printf("(%-5d%s %d\n",branch,opname[type],len); 713: depth++; 714: for (i=1; i<=len; i++) 715: dump(ops[branch+i].ival); 716: depth--; 717: for (i=depth; i; i--) 718: printf(" "); 719: printf(")\n"); 720: } 721: } 722: 723: bl(arg,maybe) 724: int arg; 725: int maybe; 726: { 727: if (!arg) 728: return 0; 729: else if ((ops[arg].ival & 255) != OBLOCK) 730: return oper2(OBLOCK,arg,maybe); 731: else if ((ops[arg].ival >> 8) != 2) 732: return oper2(OBLOCK,ops[arg+1].ival,maybe); 733: else 734: return arg; 735: } 736: 737: fixup(str) 738: STR *str; 739: { 740: register char *s; 741: register char *t; 742: 743: for (s = str->str_ptr; *s; s++) { 744: if (*s == ';' && s[1] == ' ' && s[2] == '\n') { 745: strcpy(s+1,s+2); 746: s++; 747: } 748: else if (*s == '\n') { 749: for (t = s+1; isspace(*t & 127); t++) ; 750: t--; 751: while (isspace(*t & 127) && *t != '\n') t--; 752: if (*t == '\n' && t-s > 1) { 753: if (s[-1] == '{') 754: s--; 755: strcpy(s+1,t); 756: } 757: s++; 758: } 759: } 760: } 761: 762: putlines(str) 763: STR *str; 764: { 765: register char *d, *s, *t, *e; 766: register int pos, newpos; 767: 768: d = tokenbuf; 769: pos = 0; 770: for (s = str->str_ptr; *s; s++) { 771: *d++ = *s; 772: pos++; 773: if (*s == '\n') { 774: *d = '\0'; 775: d = tokenbuf; 776: pos = 0; 777: putone(); 778: } 779: else if (*s == '\t') 780: pos += 7; 781: if (pos > 78) { /* split a long line? */ 782: *d-- = '\0'; 783: newpos = 0; 784: for (t = tokenbuf; isspace(*t & 127); t++) { 785: if (*t == '\t') 786: newpos += 8; 787: else 788: newpos += 1; 789: } 790: e = d; 791: while (d > tokenbuf && (*d != ' ' || d[-1] != ';')) 792: d--; 793: if (d < t+10) { 794: d = e; 795: while (d > tokenbuf && 796: (*d != ' ' || d[-1] != '|' || d[-2] != '|') ) 797: d--; 798: } 799: if (d < t+10) { 800: d = e; 801: while (d > tokenbuf && 802: (*d != ' ' || d[-1] != '&' || d[-2] != '&') ) 803: d--; 804: } 805: if (d < t+10) { 806: d = e; 807: while (d > tokenbuf && (*d != ' ' || d[-1] != ',')) 808: d--; 809: } 810: if (d < t+10) { 811: d = e; 812: while (d > tokenbuf && *d != ' ') 813: d--; 814: } 815: if (d > t+3) { 816: *d = '\0'; 817: putone(); 818: putchar('\n'); 819: if (d[-1] != ';' && !(newpos % 4)) { 820: *t++ = ' '; 821: *t++ = ' '; 822: newpos += 2; 823: } 824: strcpy(t,d+1); 825: newpos += strlen(t); 826: d = t + strlen(t); 827: pos = newpos; 828: } 829: else 830: d = e + 1; 831: } 832: } 833: } 834: 835: putone() 836: { 837: register char *t; 838: 839: for (t = tokenbuf; *t; t++) { 840: *t &= 127; 841: if (*t == 127) { 842: *t = ' '; 843: strcpy(t+strlen(t)-1, "\t#???\n"); 844: } 845: } 846: t = tokenbuf; 847: if (*t == '#') { 848: if (strnEQ(t,"#!/bin/awk",10) || strnEQ(t,"#! /bin/awk",11)) 849: return; 850: } 851: fputs(tokenbuf,stdout); 852: } 853: 854: numary(arg) 855: int arg; 856: { 857: STR *key; 858: int dummy; 859: 860: key = walk(0,0,arg,&dummy); 861: str_cat(key,"[]"); 862: hstore(symtab,key->str_ptr,str_make("1")); 863: str_free(key); 864: set_array_base = TRUE; 865: return arg; 866: }