1: /* $Header: a2py.c,v 1.0 87/12/18 17:50:33 root Exp $ 2: * 3: * $Log: a2py.c,v $ 4: * Revision 1.0 87/12/18 17:50:33 root 5: * Initial revision 6: * 7: */ 8: 9: #include "util.h" 10: char *index(); 11: 12: char *filename; 13: 14: main(argc,argv,env) 15: register int argc; 16: register char **argv; 17: register char **env; 18: { 19: register STR *str; 20: register char *s; 21: int i; 22: STR *walk(); 23: STR *tmpstr; 24: 25: linestr = str_new(80); 26: str = str_new(0); /* first used for -I flags */ 27: for (argc--,argv++; argc; argc--,argv++) { 28: if (argv[0][0] != '-' || !argv[0][1]) 29: break; 30: reswitch: 31: switch (argv[0][1]) { 32: #ifdef DEBUGGING 33: case 'D': 34: debug = atoi(argv[0]+2); 35: #ifdef YYDEBUG 36: yydebug = (debug & 1); 37: #endif 38: break; 39: #endif 40: case '0': case '1': case '2': case '3': case '4': 41: case '5': case '6': case '7': case '8': case '9': 42: maxfld = atoi(argv[0]+1); 43: absmaxfld = TRUE; 44: break; 45: case 'F': 46: fswitch = argv[0][2]; 47: break; 48: case 'n': 49: namelist = savestr(argv[0]+2); 50: break; 51: case '-': 52: argc--,argv++; 53: goto switch_end; 54: case 0: 55: break; 56: default: 57: fatal("Unrecognized switch: %s\n",argv[0]); 58: } 59: } 60: switch_end: 61: 62: /* open script */ 63: 64: if (argv[0] == Nullch) 65: argv[0] = "-"; 66: filename = savestr(argv[0]); 67: if (strEQ(filename,"-")) 68: argv[0] = ""; 69: if (!*argv[0]) 70: rsfp = stdin; 71: else 72: rsfp = fopen(argv[0],"r"); 73: if (rsfp == Nullfp) 74: fatal("Awk script \"%s\" doesn't seem to exist.\n",filename); 75: 76: /* init tokener */ 77: 78: bufptr = str_get(linestr); 79: symtab = hnew(); 80: 81: /* now parse the report spec */ 82: 83: if (yyparse()) 84: fatal("Translation aborted due to syntax errors.\n"); 85: 86: #ifdef DEBUGGING 87: if (debug & 2) { 88: int type, len; 89: 90: for (i=1; i<mop;) { 91: type = ops[i].ival; 92: len = type >> 8; 93: type &= 255; 94: printf("%d\t%d\t%d\t%-10s",i++,type,len,opname[type]); 95: if (type == OSTRING) 96: printf("\t\"%s\"\n",ops[i].cval),i++; 97: else { 98: while (len--) { 99: printf("\t%d",ops[i].ival),i++; 100: } 101: putchar('\n'); 102: } 103: } 104: } 105: if (debug & 8) 106: dump(root); 107: #endif 108: 109: /* first pass to look for numeric variables */ 110: 111: prewalk(0,0,root,&i); 112: 113: /* second pass to produce new program */ 114: 115: tmpstr = walk(0,0,root,&i); 116: str = str_make("#!/bin/perl\n\n"); 117: if (do_opens && opens) { 118: str_scat(str,opens); 119: str_free(opens); 120: str_cat(str,"\n"); 121: } 122: str_scat(str,tmpstr); 123: str_free(tmpstr); 124: #ifdef DEBUGGING 125: if (!(debug & 16)) 126: #endif 127: fixup(str); 128: putlines(str); 129: exit(0); 130: } 131: 132: #define RETURN(retval) return (bufptr = s,retval) 133: #define XTERM(retval) return (expectterm = TRUE,bufptr = s,retval) 134: #define XOP(retval) return (expectterm = FALSE,bufptr = s,retval) 135: #define ID(x) return (yylval=string(x,0),expectterm = FALSE,bufptr = s,VAR) 136: 137: yylex() 138: { 139: register char *s = bufptr; 140: register char *d; 141: register int tmp; 142: 143: retry: 144: #ifdef YYDEBUG 145: if (yydebug) 146: if (index(s,'\n')) 147: fprintf(stderr,"Tokener at %s",s); 148: else 149: fprintf(stderr,"Tokener at %s\n",s); 150: #endif 151: switch (*s) { 152: default: 153: fprintf(stderr, 154: "Unrecognized character %c in file %s line %d--ignoring.\n", 155: *s++,filename,line); 156: goto retry; 157: case '\\': 158: case 0: 159: s = str_get(linestr); 160: *s = '\0'; 161: if (!rsfp) 162: RETURN(0); 163: line++; 164: if ((s = str_gets(linestr, rsfp)) == Nullch) { 165: if (rsfp != stdin) 166: fclose(rsfp); 167: rsfp = Nullfp; 168: s = str_get(linestr); 169: RETURN(0); 170: } 171: goto retry; 172: case ' ': case '\t': 173: s++; 174: goto retry; 175: case '\n': 176: *s = '\0'; 177: XTERM(NEWLINE); 178: case '#': 179: yylval = string(s,0); 180: *s = '\0'; 181: XTERM(COMMENT); 182: case ';': 183: tmp = *s++; 184: if (*s == '\n') { 185: s++; 186: XTERM(SEMINEW); 187: } 188: XTERM(tmp); 189: case '(': 190: case '{': 191: case '[': 192: case ')': 193: case ']': 194: tmp = *s++; 195: XOP(tmp); 196: case 127: 197: s++; 198: XTERM('}'); 199: case '}': 200: for (d = s + 1; isspace(*d); d++) ; 201: if (!*d) 202: s = d - 1; 203: *s = 127; 204: XTERM(';'); 205: case ',': 206: tmp = *s++; 207: XTERM(tmp); 208: case '~': 209: s++; 210: XTERM(MATCHOP); 211: case '+': 212: case '-': 213: if (s[1] == *s) { 214: s++; 215: if (*s++ == '+') 216: XTERM(INCR); 217: else 218: XTERM(DECR); 219: } 220: /* FALL THROUGH */ 221: case '*': 222: case '%': 223: tmp = *s++; 224: if (*s == '=') { 225: yylval = string(s-1,2); 226: s++; 227: XTERM(ASGNOP); 228: } 229: XTERM(tmp); 230: case '&': 231: s++; 232: tmp = *s++; 233: if (tmp == '&') 234: XTERM(ANDAND); 235: s--; 236: XTERM('&'); 237: case '|': 238: s++; 239: tmp = *s++; 240: if (tmp == '|') 241: XTERM(OROR); 242: s--; 243: XTERM('|'); 244: case '=': 245: s++; 246: tmp = *s++; 247: if (tmp == '=') { 248: yylval = string("==",2); 249: XTERM(RELOP); 250: } 251: s--; 252: yylval = string("=",1); 253: XTERM(ASGNOP); 254: case '!': 255: s++; 256: tmp = *s++; 257: if (tmp == '=') { 258: yylval = string("!=",2); 259: XTERM(RELOP); 260: } 261: if (tmp == '~') { 262: yylval = string("!~",2); 263: XTERM(MATCHOP); 264: } 265: s--; 266: XTERM(NOT); 267: case '<': 268: s++; 269: tmp = *s++; 270: if (tmp == '=') { 271: yylval = string("<=",2); 272: XTERM(RELOP); 273: } 274: s--; 275: yylval = string("<",1); 276: XTERM(RELOP); 277: case '>': 278: s++; 279: tmp = *s++; 280: if (tmp == '=') { 281: yylval = string(">=",2); 282: XTERM(RELOP); 283: } 284: s--; 285: yylval = string(">",1); 286: XTERM(RELOP); 287: 288: #define SNARFWORD \ 289: d = tokenbuf; \ 290: while (isalpha(*s) || isdigit(*s) || *s == '_') \ 291: *d++ = *s++; \ 292: *d = '\0'; \ 293: d = tokenbuf; 294: 295: case '$': 296: s++; 297: if (*s == '0') { 298: s++; 299: do_chop = TRUE; 300: need_entire = TRUE; 301: ID("0"); 302: } 303: do_split = TRUE; 304: if (isdigit(*s)) { 305: for (d = s; isdigit(*s); s++) ; 306: yylval = string(d,s-d); 307: tmp = atoi(d); 308: if (tmp > maxfld) 309: maxfld = tmp; 310: XOP(FIELD); 311: } 312: split_to_array = set_array_base = TRUE; 313: XOP(VFIELD); 314: 315: case '/': /* may either be division or pattern */ 316: if (expectterm) { 317: s = scanpat(s); 318: XTERM(REGEX); 319: } 320: tmp = *s++; 321: if (*s == '=') { 322: yylval = string("/=",2); 323: s++; 324: XTERM(ASGNOP); 325: } 326: XTERM(tmp); 327: 328: case '0': case '1': case '2': case '3': case '4': 329: case '5': case '6': case '7': case '8': case '9': 330: s = scannum(s); 331: XOP(NUMBER); 332: case '"': 333: s++; 334: s = cpy2(tokenbuf,s,s[-1]); 335: if (!*s) 336: fatal("String not terminated:\n%s",str_get(linestr)); 337: s++; 338: yylval = string(tokenbuf,0); 339: XOP(STRING); 340: 341: case 'a': case 'A': 342: SNARFWORD; 343: ID(d); 344: case 'b': case 'B': 345: SNARFWORD; 346: if (strEQ(d,"break")) 347: XTERM(BREAK); 348: if (strEQ(d,"BEGIN")) 349: XTERM(BEGIN); 350: ID(d); 351: case 'c': case 'C': 352: SNARFWORD; 353: if (strEQ(d,"continue")) 354: XTERM(CONTINUE); 355: ID(d); 356: case 'd': case 'D': 357: SNARFWORD; 358: ID(d); 359: case 'e': case 'E': 360: SNARFWORD; 361: if (strEQ(d,"END")) 362: XTERM(END); 363: if (strEQ(d,"else")) 364: XTERM(ELSE); 365: if (strEQ(d,"exit")) { 366: saw_line_op = TRUE; 367: XTERM(EXIT); 368: } 369: if (strEQ(d,"exp")) { 370: yylval = OEXP; 371: XTERM(FUN1); 372: } 373: ID(d); 374: case 'f': case 'F': 375: SNARFWORD; 376: if (strEQ(d,"FS")) { 377: saw_FS++; 378: if (saw_FS == 1 && in_begin) { 379: for (d = s; *d && isspace(*d); d++) ; 380: if (*d == '=') { 381: for (d++; *d && isspace(*d); d++) ; 382: if (*d == '"' && d[2] == '"') 383: const_FS = d[1]; 384: } 385: } 386: ID(tokenbuf); 387: } 388: if (strEQ(d,"FILENAME")) 389: d = "ARGV"; 390: if (strEQ(d,"for")) 391: XTERM(FOR); 392: ID(d); 393: case 'g': case 'G': 394: SNARFWORD; 395: if (strEQ(d,"getline")) 396: XTERM(GETLINE); 397: ID(d); 398: case 'h': case 'H': 399: SNARFWORD; 400: ID(d); 401: case 'i': case 'I': 402: SNARFWORD; 403: if (strEQ(d,"if")) 404: XTERM(IF); 405: if (strEQ(d,"in")) 406: XTERM(IN); 407: if (strEQ(d,"index")) { 408: set_array_base = TRUE; 409: XTERM(INDEX); 410: } 411: if (strEQ(d,"int")) { 412: yylval = OINT; 413: XTERM(FUN1); 414: } 415: ID(d); 416: case 'j': case 'J': 417: SNARFWORD; 418: ID(d); 419: case 'k': case 'K': 420: SNARFWORD; 421: ID(d); 422: case 'l': case 'L': 423: SNARFWORD; 424: if (strEQ(d,"length")) { 425: yylval = OLENGTH; 426: XTERM(FUN1); 427: } 428: if (strEQ(d,"log")) { 429: yylval = OLOG; 430: XTERM(FUN1); 431: } 432: ID(d); 433: case 'm': case 'M': 434: SNARFWORD; 435: ID(d); 436: case 'n': case 'N': 437: SNARFWORD; 438: if (strEQ(d,"NF")) 439: do_split = split_to_array = set_array_base = TRUE; 440: if (strEQ(d,"next")) { 441: saw_line_op = TRUE; 442: XTERM(NEXT); 443: } 444: ID(d); 445: case 'o': case 'O': 446: SNARFWORD; 447: if (strEQ(d,"ORS")) { 448: saw_ORS = TRUE; 449: d = "$\\"; 450: } 451: if (strEQ(d,"OFS")) { 452: saw_OFS = TRUE; 453: d = "$,"; 454: } 455: if (strEQ(d,"OFMT")) { 456: d = "$#"; 457: } 458: ID(d); 459: case 'p': case 'P': 460: SNARFWORD; 461: if (strEQ(d,"print")) { 462: XTERM(PRINT); 463: } 464: if (strEQ(d,"printf")) { 465: XTERM(PRINTF); 466: } 467: ID(d); 468: case 'q': case 'Q': 469: SNARFWORD; 470: ID(d); 471: case 'r': case 'R': 472: SNARFWORD; 473: if (strEQ(d,"RS")) { 474: d = "$/"; 475: saw_RS = TRUE; 476: } 477: ID(d); 478: case 's': case 'S': 479: SNARFWORD; 480: if (strEQ(d,"split")) { 481: set_array_base = TRUE; 482: XOP(SPLIT); 483: } 484: if (strEQ(d,"substr")) { 485: set_array_base = TRUE; 486: XTERM(SUBSTR); 487: } 488: if (strEQ(d,"sprintf")) 489: XTERM(SPRINTF); 490: if (strEQ(d,"sqrt")) { 491: yylval = OSQRT; 492: XTERM(FUN1); 493: } 494: ID(d); 495: case 't': case 'T': 496: SNARFWORD; 497: ID(d); 498: case 'u': case 'U': 499: SNARFWORD; 500: ID(d); 501: case 'v': case 'V': 502: SNARFWORD; 503: ID(d); 504: case 'w': case 'W': 505: SNARFWORD; 506: if (strEQ(d,"while")) 507: XTERM(WHILE); 508: ID(d); 509: case 'x': case 'X': 510: SNARFWORD; 511: ID(d); 512: case 'y': case 'Y': 513: SNARFWORD; 514: ID(d); 515: case 'z': case 'Z': 516: SNARFWORD; 517: ID(d); 518: } 519: } 520: 521: char * 522: scanpat(s) 523: register char *s; 524: { 525: register char *d; 526: 527: switch (*s++) { 528: case '/': 529: break; 530: default: 531: fatal("Search pattern not found:\n%s",str_get(linestr)); 532: } 533: s = cpytill(tokenbuf,s,s[-1]); 534: if (!*s) 535: fatal("Search pattern not terminated:\n%s",str_get(linestr)); 536: s++; 537: yylval = string(tokenbuf,0); 538: return s; 539: } 540: 541: yyerror(s) 542: char *s; 543: { 544: fprintf(stderr,"%s in file %s at line %d\n", 545: s,filename,line); 546: } 547: 548: char * 549: scannum(s) 550: register char *s; 551: { 552: register char *d; 553: 554: switch (*s) { 555: case '1': case '2': case '3': case '4': case '5': 556: case '6': case '7': case '8': case '9': case '0' : case '.': 557: d = tokenbuf; 558: while (isdigit(*s) || *s == '_') 559: *d++ = *s++; 560: if (*s == '.' && index("0123456789eE",s[1])) 561: *d++ = *s++; 562: while (isdigit(*s) || *s == '_') 563: *d++ = *s++; 564: if (index("eE",*s) && index("+-0123456789",s[1])) 565: *d++ = *s++; 566: if (*s == '+' || *s == '-') 567: *d++ = *s++; 568: while (isdigit(*s)) 569: *d++ = *s++; 570: *d = '\0'; 571: yylval = string(tokenbuf,0); 572: break; 573: } 574: return s; 575: } 576: 577: string(ptr,len) 578: char *ptr; 579: { 580: int retval = mop; 581: 582: ops[mop++].ival = OSTRING + (1<<8); 583: if (!len) 584: len = strlen(ptr); 585: ops[mop].cval = safemalloc(len+1); 586: strncpy(ops[mop].cval,ptr,len); 587: ops[mop++].cval[len] = '\0'; 588: return retval; 589: } 590: 591: oper0(type) 592: int type; 593: { 594: int retval = mop; 595: 596: if (type > 255) 597: fatal("type > 255 (%d)\n",type); 598: ops[mop++].ival = type; 599: return retval; 600: } 601: 602: oper1(type,arg1) 603: int type; 604: int arg1; 605: { 606: int retval = mop; 607: 608: if (type > 255) 609: fatal("type > 255 (%d)\n",type); 610: ops[mop++].ival = type + (1<<8); 611: ops[mop++].ival = arg1; 612: return retval; 613: } 614: 615: oper2(type,arg1,arg2) 616: int type; 617: int arg1; 618: int arg2; 619: { 620: int retval = mop; 621: 622: if (type > 255) 623: fatal("type > 255 (%d)\n",type); 624: ops[mop++].ival = type + (2<<8); 625: ops[mop++].ival = arg1; 626: ops[mop++].ival = arg2; 627: return retval; 628: } 629: 630: oper3(type,arg1,arg2,arg3) 631: int type; 632: int arg1; 633: int arg2; 634: int arg3; 635: { 636: int retval = mop; 637: 638: if (type > 255) 639: fatal("type > 255 (%d)\n",type); 640: ops[mop++].ival = type + (3<<8); 641: ops[mop++].ival = arg1; 642: ops[mop++].ival = arg2; 643: ops[mop++].ival = arg3; 644: return retval; 645: } 646: 647: oper4(type,arg1,arg2,arg3,arg4) 648: int type; 649: int arg1; 650: int arg2; 651: int arg3; 652: int arg4; 653: { 654: int retval = mop; 655: 656: if (type > 255) 657: fatal("type > 255 (%d)\n",type); 658: ops[mop++].ival = type + (4<<8); 659: ops[mop++].ival = arg1; 660: ops[mop++].ival = arg2; 661: ops[mop++].ival = arg3; 662: ops[mop++].ival = arg4; 663: return retval; 664: } 665: 666: oper5(type,arg1,arg2,arg3,arg4,arg5) 667: int type; 668: int arg1; 669: int arg2; 670: int arg3; 671: int arg4; 672: int arg5; 673: { 674: int retval = mop; 675: 676: if (type > 255) 677: fatal("type > 255 (%d)\n",type); 678: ops[mop++].ival = type + (5<<8); 679: ops[mop++].ival = arg1; 680: ops[mop++].ival = arg2; 681: ops[mop++].ival = arg3; 682: ops[mop++].ival = arg4; 683: ops[mop++].ival = arg5; 684: return retval; 685: } 686: 687: int depth = 0; 688: 689: dump(branch) 690: int branch; 691: { 692: register int type; 693: register int len; 694: register int i; 695: 696: type = ops[branch].ival; 697: len = type >> 8; 698: type &= 255; 699: for (i=depth; i; i--) 700: printf(" "); 701: if (type == OSTRING) { 702: printf("%-5d\"%s\"\n",branch,ops[branch+1].cval); 703: } 704: else { 705: printf("(%-5d%s %d\n",branch,opname[type],len); 706: depth++; 707: for (i=1; i<=len; i++) 708: dump(ops[branch+i].ival); 709: depth--; 710: for (i=depth; i; i--) 711: printf(" "); 712: printf(")\n"); 713: } 714: } 715: 716: bl(arg,maybe) 717: int arg; 718: int maybe; 719: { 720: if (!arg) 721: return 0; 722: else if ((ops[arg].ival & 255) != OBLOCK) 723: return oper2(OBLOCK,arg,maybe); 724: else if ((ops[arg].ival >> 8) != 2) 725: return oper2(OBLOCK,ops[arg+1].ival,maybe); 726: else 727: return arg; 728: } 729: 730: fixup(str) 731: STR *str; 732: { 733: register char *s; 734: register char *t; 735: 736: for (s = str->str_ptr; *s; s++) { 737: if (*s == ';' && s[1] == ' ' && s[2] == '\n') { 738: strcpy(s+1,s+2); 739: s++; 740: } 741: else if (*s == '\n') { 742: for (t = s+1; isspace(*t & 127); t++) ; 743: t--; 744: while (isspace(*t & 127) && *t != '\n') t--; 745: if (*t == '\n' && t-s > 1) { 746: if (s[-1] == '{') 747: s--; 748: strcpy(s+1,t); 749: } 750: s++; 751: } 752: } 753: } 754: 755: putlines(str) 756: STR *str; 757: { 758: register char *d, *s, *t, *e; 759: register int pos, newpos; 760: 761: d = tokenbuf; 762: pos = 0; 763: for (s = str->str_ptr; *s; s++) { 764: *d++ = *s; 765: pos++; 766: if (*s == '\n') { 767: *d = '\0'; 768: d = tokenbuf; 769: pos = 0; 770: putone(); 771: } 772: else if (*s == '\t') 773: pos += 7; 774: if (pos > 78) { /* split a long line? */ 775: *d-- = '\0'; 776: newpos = 0; 777: for (t = tokenbuf; isspace(*t & 127); t++) { 778: if (*t == '\t') 779: newpos += 8; 780: else 781: newpos += 1; 782: } 783: e = d; 784: while (d > tokenbuf && (*d != ' ' || d[-1] != ';')) 785: d--; 786: if (d < t+10) { 787: d = e; 788: while (d > tokenbuf && 789: (*d != ' ' || d[-1] != '|' || d[-2] != '|') ) 790: d--; 791: } 792: if (d < t+10) { 793: d = e; 794: while (d > tokenbuf && 795: (*d != ' ' || d[-1] != '&' || d[-2] != '&') ) 796: d--; 797: } 798: if (d < t+10) { 799: d = e; 800: while (d > tokenbuf && (*d != ' ' || d[-1] != ',')) 801: d--; 802: } 803: if (d < t+10) { 804: d = e; 805: while (d > tokenbuf && *d != ' ') 806: d--; 807: } 808: if (d > t+3) { 809: *d = '\0'; 810: putone(); 811: putchar('\n'); 812: if (d[-1] != ';' && !(newpos % 4)) { 813: *t++ = ' '; 814: *t++ = ' '; 815: newpos += 2; 816: } 817: strcpy(t,d+1); 818: newpos += strlen(t); 819: d = t + strlen(t); 820: pos = newpos; 821: } 822: else 823: d = e + 1; 824: } 825: } 826: } 827: 828: putone() 829: { 830: register char *t; 831: 832: for (t = tokenbuf; *t; t++) { 833: *t &= 127; 834: if (*t == 127) { 835: *t = ' '; 836: strcpy(t+strlen(t)-1, "\t#???\n"); 837: } 838: } 839: t = tokenbuf; 840: if (*t == '#') { 841: if (strnEQ(t,"#!/bin/awk",10) || strnEQ(t,"#! /bin/awk",11)) 842: return; 843: } 844: fputs(tokenbuf,stdout); 845: } 846: 847: numary(arg) 848: int arg; 849: { 850: STR *key; 851: int dummy; 852: 853: key = walk(0,0,arg,&dummy); 854: str_cat(key,"[]"); 855: hstore(symtab,key->str_ptr,str_make("1")); 856: str_free(key); 857: set_array_base = TRUE; 858: return arg; 859: }