1: /* Copyright (c) 1979 Regents of the University of California */ 2: #include "ex.h" 3: #include "ex_re.h" 4: 5: /* 6: * Global, substitute and regular expressions. 7: * Very similar to ed, with some re extensions and 8: * confirmed substitute. 9: */ 10: global(k) 11: bool k; 12: { 13: register char *gp; 14: register int c; 15: register line *a1; 16: char globuf[GBSIZE], *Cwas; 17: int lines = lineDOL(); 18: int oinglobal = inglobal; 19: char *oglobp = globp; 20: 21: Cwas = Command; 22: /* 23: * States of inglobal: 24: * 0: ordinary - not in a global command. 25: * 1: text coming from some buffer, not tty. 26: * 2: like 1, but the source of the buffer is a global command. 27: * Hence you're only in a global command if inglobal==2. This 28: * strange sounding convention is historically derived from 29: * everybody simulating a global command. 30: */ 31: if (inglobal==2) 32: error("Global within global@not allowed"); 33: markDOT(); 34: setall(); 35: nonzero(); 36: if (skipend()) 37: error("Global needs re|Missing regular expression for global"); 38: c = getchar(); 39: ignore(compile(c, 1)); 40: savere(scanre); 41: gp = globuf; 42: while ((c = getchar()) != '\n') { 43: switch (c) { 44: 45: case EOF: 46: c = '\n'; 47: goto brkwh; 48: 49: case '\\': 50: c = getchar(); 51: switch (c) { 52: 53: case '\\': 54: ungetchar(c); 55: break; 56: 57: case '\n': 58: break; 59: 60: default: 61: *gp++ = '\\'; 62: break; 63: } 64: break; 65: } 66: *gp++ = c; 67: if (gp >= &globuf[GBSIZE - 2]) 68: error("Global command too long"); 69: } 70: brkwh: 71: ungetchar(c); 72: out: 73: newline(); 74: *gp++ = c; 75: *gp++ = 0; 76: saveall(); 77: inglobal = 2; 78: for (a1 = one; a1 <= dol; a1++) { 79: *a1 &= ~01; 80: if (a1 >= addr1 && a1 <= addr2 && execute(0, a1) == k) 81: *a1 |= 01; 82: } 83: /* should use gdelete from ed to avoid n**2 here on g/.../d */ 84: if (inopen) 85: inopen = -1; 86: for (a1 = one; a1 <= dol; a1++) { 87: if (*a1 & 01) { 88: *a1 &= ~01; 89: dot = a1; 90: globp = globuf; 91: commands(1, 1); 92: a1 = zero; 93: } 94: } 95: globp = oglobp; 96: inglobal = oinglobal; 97: endline = 1; 98: Command = Cwas; 99: netchHAD(lines); 100: setlastchar(EOF); 101: if (inopen) { 102: ungetchar(EOF); 103: inopen = 1; 104: } 105: } 106: 107: bool cflag; 108: int scount, slines, stotal; 109: 110: substitute(c) 111: int c; 112: { 113: register line *addr; 114: register int n; 115: int gsubf; 116: 117: gsubf = compsub(c); 118: if (!inglobal) 119: save12(), undkind = UNDCHANGE; 120: stotal = 0; 121: slines = 0; 122: for (addr = addr1; addr <= addr2; addr++) { 123: scount = 0; 124: if (dosubcon(0, addr) == 0) 125: continue; 126: if (gsubf) { 127: #ifdef notdef 128: /* 129: * should check but loc2 is already munged. 130: * This needs a fancier check later. 131: */ 132: if (loc1 == loc2) 133: error("substitution loop"); 134: #endif 135: while (*loc2) 136: if (dosubcon(1, addr) == 0) 137: break; 138: } 139: if (scount) { 140: stotal += scount; 141: slines++; 142: putmark(addr); 143: n = append(getsub, addr); 144: addr += n; 145: addr2 += n; 146: } 147: } 148: if (stotal == 0 && !inglobal && !cflag) 149: error("Fail|Substitute pattern match failed"); 150: snote(stotal, slines); 151: return (stotal); 152: } 153: 154: compsub(ch) 155: { 156: register int seof, c; 157: int gsubf; 158: 159: gsubf = 0; 160: cflag = 0; 161: switch (ch) { 162: 163: case 's': 164: ignore(skipwh()); 165: seof = getchar(); 166: if (endcmd(seof)) 167: error("Substitute needs re|Missing regular expression for substitute"); 168: seof = compile(seof, 1); 169: savere(subre); 170: comprhs(seof); 171: break; 172: 173: case '&': 174: if (subre.Expbuf[0] == 0) 175: error("No previous substitute re|No previous substitute to repeat"); 176: resre(subre); 177: break; 178: 179: case '~': 180: if (re.Expbuf[0] == 0) 181: error("No previous re|No previous regular expression"); 182: savere(subre); 183: break; 184: } 185: for (;;) { 186: c = getchar(); 187: switch (c) { 188: 189: case 'g': 190: gsubf++; 191: continue; 192: 193: case 'c': 194: cflag++; 195: continue; 196: 197: default: 198: ungetchar(c); 199: setcount(); 200: newline(); 201: return (gsubf); 202: } 203: } 204: } 205: 206: comprhs(seof) 207: int seof; 208: { 209: register char *rp, *orp; 210: register int c; 211: char orhsbuf[LBSIZE / 2]; 212: 213: rp = rhsbuf; 214: CP(orhsbuf, rp); 215: for (;;) { 216: c = getchar(); 217: if (c == seof) 218: break; 219: switch (c) { 220: 221: case '\\': 222: c = getchar(); 223: if (c == EOF) { 224: ungetchar(c); 225: break; 226: } 227: if (value(MAGIC)) { 228: /* 229: * When "magic", \& turns into a plain &, 230: * and all other chars work fine quoted. 231: */ 232: if (c != '&') 233: c |= QUOTE; 234: break; 235: } 236: magic: 237: if (c == '~') { 238: for (orp = orhsbuf; *orp; *rp++ = *orp++) 239: if (rp >= &rhsbuf[LBSIZE / 2 + 1]) 240: goto toobig; 241: continue; 242: } 243: c |= QUOTE; 244: break; 245: 246: case '\n': 247: case EOF: 248: if (!(globp && globp[0])) { 249: ungetchar(c); 250: goto endrhs; 251: } 252: 253: case '~': 254: case '&': 255: if (value(MAGIC)) 256: goto magic; 257: break; 258: } 259: if (rp >= &rhsbuf[LBSIZE / 2 - 1]) 260: toobig: 261: error("Replacement pattern too long@- limit 256 characters"); 262: *rp++ = c; 263: } 264: endrhs: 265: *rp++ = 0; 266: } 267: 268: getsub() 269: { 270: register char *p; 271: 272: if ((p = linebp) == 0) 273: return (EOF); 274: strcLIN(p); 275: linebp = 0; 276: return (0); 277: } 278: 279: dosubcon(f, a) 280: bool f; 281: line *a; 282: { 283: 284: if (execute(f, a) == 0) 285: return (0); 286: if (confirmed(a)) { 287: dosub(); 288: scount++; 289: } 290: return (1); 291: } 292: 293: confirmed(a) 294: line *a; 295: { 296: register int c, ch; 297: 298: if (cflag == 0) 299: return (1); 300: pofix(); 301: pline(lineno(a)); 302: if (inopen) 303: putchar('\n' | QUOTE); 304: c = column(loc1 - 1); 305: ugo(c - 1 + (inopen ? 1 : 0), ' '); 306: ugo(column(loc2 - 1) - c, '^'); 307: flush(); 308: ch = c = getkey(); 309: again: 310: if (c == '\r') 311: c = '\n'; 312: if (inopen) 313: putchar(c), flush(); 314: if (c != '\n' && c != EOF) { 315: c = getkey(); 316: goto again; 317: } 318: noteinp(); 319: return (ch == 'y'); 320: } 321: 322: getch() 323: { 324: char c; 325: 326: if (read(2, &c, 1) != 1) 327: return (EOF); 328: return (c & TRIM); 329: } 330: 331: ugo(cnt, with) 332: int with; 333: int cnt; 334: { 335: 336: if (cnt > 0) 337: do 338: putchar(with); 339: while (--cnt > 0); 340: } 341: 342: int casecnt; 343: bool destuc; 344: 345: dosub() 346: { 347: register char *lp, *sp, *rp; 348: int c; 349: 350: lp = linebuf; 351: sp = genbuf; 352: rp = rhsbuf; 353: while (lp < loc1) 354: *sp++ = *lp++; 355: casecnt = 0; 356: while (c = *rp++) { 357: if (c & QUOTE) 358: switch (c & TRIM) { 359: 360: case '&': 361: sp = place(sp, loc1, loc2); 362: if (sp == 0) 363: goto ovflo; 364: continue; 365: 366: case 'l': 367: casecnt = 1; 368: destuc = 0; 369: continue; 370: 371: case 'L': 372: casecnt = LBSIZE; 373: destuc = 0; 374: continue; 375: 376: case 'u': 377: casecnt = 1; 378: destuc = 1; 379: continue; 380: 381: case 'U': 382: casecnt = LBSIZE; 383: destuc = 1; 384: continue; 385: 386: case 'E': 387: case 'e': 388: casecnt = 0; 389: continue; 390: } 391: if (c < 0 && (c &= TRIM) >= '1' && c < nbra + '1') { 392: sp = place(sp, braslist[c - '1'], braelist[c - '1']); 393: if (sp == 0) 394: goto ovflo; 395: continue; 396: } 397: if (casecnt) 398: *sp++ = fixcase(c & TRIM); 399: else 400: *sp++ = c & TRIM; 401: if (sp >= &genbuf[LBSIZE]) 402: ovflo: 403: error("Line overflow@in substitute - limit 512 chars"); 404: } 405: lp = loc2; 406: loc2 = sp + (linebuf - genbuf); 407: while (*sp++ = *lp++) 408: if (sp >= &genbuf[LBSIZE]) 409: goto ovflo; 410: strcLIN(genbuf); 411: } 412: 413: fixcase(c) 414: register int c; 415: { 416: 417: if (casecnt == 0) 418: return (c); 419: casecnt--; 420: if (destuc) { 421: if (islower(c)) 422: c = toupper(c); 423: } else 424: if (isupper(c)) 425: c = tolower(c); 426: return (c); 427: } 428: 429: char * 430: place(sp, l1, l2) 431: register char *sp, *l1, *l2; 432: { 433: 434: while (l1 < l2) { 435: *sp++ = fixcase(*l1++); 436: if (sp >= &genbuf[LBSIZE]) 437: return (0); 438: } 439: return (sp); 440: } 441: 442: snote(total, lines) 443: register int total, lines; 444: { 445: 446: if (!notable(total)) 447: return; 448: printf(mesg("%d subs|%d substitutions"), total); 449: if (lines != 1 && lines != total) 450: printf(" on %d lines", lines); 451: noonl(); 452: flush(); 453: } 454: 455: compile(eof, oknl) 456: int eof; 457: int oknl; 458: { 459: register int c; 460: register char *ep; 461: char *lastep; 462: char bracket[NBRA], *bracketp, *rhsp; 463: int cclcnt; 464: 465: if (isalpha(eof) || isdigit(eof)) 466: error("Regular expressions cannot be delimited by letters or digits"); 467: ep = expbuf; 468: c = getchar(); 469: if (eof == '\\') 470: switch (c) { 471: 472: case '/': 473: case '?': 474: if (scanre.Expbuf[0] == 0) 475: error("No previous scan re|No previous scanning regular expression"); 476: resre(scanre); 477: return (c); 478: 479: case '&': 480: if (subre.Expbuf[0] == 0) 481: error("No previous substitute re|No previous substitute regular expression"); 482: resre(subre); 483: return (c); 484: 485: default: 486: error("Badly formed re|Regular expression \\ must be followed by / or ?"); 487: } 488: if (c == eof || c == '\n' || c == EOF) { 489: if (*ep == 0) 490: error("No previous re|No previous regular expression"); 491: if (c == '\n' && oknl == 0) 492: error("Missing closing delimiter@for regular expression"); 493: if (c != eof) 494: ungetchar(c); 495: return (eof); 496: } 497: bracketp = bracket; 498: nbra = 0; 499: circfl = 0; 500: if (c == '^') { 501: c = getchar(); 502: circfl++; 503: } 504: ungetchar(c); 505: for (;;) { 506: if (ep >= &expbuf[ESIZE - 2]) 507: complex: 508: cerror("Re too complex|Regular expression too complicated"); 509: c = getchar(); 510: if (c == eof || c == EOF) { 511: if (bracketp != bracket) 512: cerror("Unmatched \\(|More \\('s than \\)'s in regular expression"); 513: *ep++ = CEOFC; 514: if (c == EOF) 515: ungetchar(c); 516: return (eof); 517: } 518: if (value(MAGIC)) { 519: if (c != '*' || ep == expbuf) 520: lastep = ep; 521: } else 522: if (c != '\\' || peekchar() != '*' || ep == expbuf) 523: lastep = ep; 524: switch (c) { 525: 526: case '\\': 527: c = getchar(); 528: switch (c) { 529: 530: case '(': 531: if (nbra >= NBRA) 532: cerror("Awash in \\('s!|Too many \\('d subexressions in a regular expression"); 533: *bracketp++ = nbra; 534: *ep++ = CBRA; 535: *ep++ = nbra++; 536: continue; 537: 538: case ')': 539: if (bracketp <= bracket) 540: cerror("Extra \\)|More \\)'s than \\('s in regular expression"); 541: *ep++ = CKET; 542: *ep++ = *--bracketp; 543: continue; 544: 545: case '<': 546: *ep++ = CBRC; 547: continue; 548: 549: case '>': 550: *ep++ = CLET; 551: continue; 552: } 553: if (value(MAGIC) == 0) 554: magic: 555: switch (c) { 556: 557: case '.': 558: *ep++ = CDOT; 559: continue; 560: 561: case '~': 562: rhsp = rhsbuf; 563: while (*rhsp) { 564: if (*rhsp & QUOTE) { 565: c = *rhsp & TRIM; 566: if (c == '&') 567: error("Replacement pattern contains &@- cannot use in re"); 568: if (c >= '1' && c <= '9') 569: error("Replacement pattern contains \\d@- cannot use in re"); 570: } 571: if (ep >= &expbuf[ESIZE-2]) 572: goto complex; 573: *ep++ = CCHR; 574: *ep++ = *rhsp++ & TRIM; 575: } 576: continue; 577: 578: case '*': 579: if (ep == expbuf) 580: break; 581: if (*lastep == CBRA || *lastep == CKET) 582: cerror("Illegal *|Can't * a \\( ... \\) in regular expression"); 583: if (*lastep == CCHR && (lastep[1] & QUOTE)) 584: cerror("Illegal *|Can't * a \\n in regular expression"); 585: *lastep |= STAR; 586: continue; 587: 588: case '[': 589: *ep++ = CCL; 590: *ep++ = 0; 591: cclcnt = 1; 592: c = getchar(); 593: if (c == '^') { 594: c = getchar(); 595: ep[-2] = NCCL; 596: } 597: if (c == ']') 598: cerror("Bad character class|Empty character class '[]' or '[^]' cannot match"); 599: while (c != ']') { 600: if (c == '\\' && any(peekchar(), "]-^\\")) 601: c = getchar() | QUOTE; 602: if (c == '\n' || c == EOF) 603: cerror("Missing ]"); 604: *ep++ = c; 605: cclcnt++; 606: if (ep >= &expbuf[ESIZE]) 607: goto complex; 608: c = getchar(); 609: } 610: lastep[1] = cclcnt; 611: continue; 612: } 613: if (c == EOF) { 614: ungetchar(EOF); 615: c = '\\'; 616: goto defchar; 617: } 618: *ep++ = CCHR; 619: if (c == '\n') 620: cerror("No newlines in re's|Can't escape newlines into regular expressions"); 621: /* 622: if (c < '1' || c > NBRA + '1') { 623: */ 624: *ep++ = c; 625: continue; 626: /* 627: } 628: c -= '1'; 629: if (c >= nbra) 630: cerror("Bad \\n|\\n in regular expression with n greater than the number of \\('s"); 631: *ep++ = c | QUOTE; 632: continue; 633: */ 634: 635: case '\n': 636: if (oknl) { 637: ungetchar(c); 638: *ep++ = CEOFC; 639: return (eof); 640: } 641: cerror("Badly formed re|Missing closing delimiter for regular expression"); 642: 643: case '$': 644: if (peekchar() == eof || peekchar() == EOF || oknl && peekchar() == '\n') { 645: *ep++ = CDOL; 646: continue; 647: } 648: goto defchar; 649: 650: case '.': 651: case '~': 652: case '*': 653: case '[': 654: if (value(MAGIC)) 655: goto magic; 656: defchar: 657: default: 658: *ep++ = CCHR; 659: *ep++ = c; 660: continue; 661: } 662: } 663: } 664: 665: cerror(s) 666: char *s; 667: { 668: 669: expbuf[0] = 0; 670: error(s); 671: } 672: 673: same(a, b) 674: register int a, b; 675: { 676: 677: return (a == b || value(IGNORECASE) && 678: ((islower(a) && toupper(a) == b) || (islower(b) && toupper(b) == a))); 679: } 680: 681: char *locs; 682: 683: execute(gf, addr) 684: line *addr; 685: { 686: register char *p1, *p2; 687: register int c; 688: 689: if (gf) { 690: if (circfl) 691: return (0); 692: #ifdef notdef 693: if (loc1 == loc2) 694: loc2++; 695: #endif 696: locs = p1 = loc2; 697: } else { 698: if (addr == zero) 699: return (0); 700: p1 = linebuf; 701: getline(*addr); 702: locs = 0; 703: } 704: p2 = expbuf; 705: if (circfl) { 706: loc1 = p1; 707: return (advance(p1, p2)); 708: } 709: /* fast check for first character */ 710: if (*p2 == CCHR) { 711: c = p2[1]; 712: do { 713: if (c != *p1 && (!value(IGNORECASE) || 714: !((islower(c) && toupper(c) == *p1) || 715: (islower(*p1) && toupper(*p1) == c)))) 716: continue; 717: if (advance(p1, p2)) { 718: loc1 = p1; 719: return (1); 720: } 721: } while (*p1++); 722: return (0); 723: } 724: /* regular algorithm */ 725: do { 726: if (advance(p1, p2)) { 727: loc1 = p1; 728: return (1); 729: } 730: } while (*p1++); 731: return (0); 732: } 733: 734: #define uletter(c) (isalpha(c) || c == '_') 735: 736: advance(lp, ep) 737: register char *lp, *ep; 738: { 739: register char *curlp; 740: char *sp, *sp1; 741: int c; 742: 743: for (;;) switch (*ep++) { 744: 745: case CCHR: 746: /* useless 747: if (*ep & QUOTE) { 748: c = *ep++ & TRIM; 749: sp = braslist[c]; 750: sp1 = braelist[c]; 751: while (sp < sp1) { 752: if (!same(*sp, *lp)) 753: return (0); 754: sp++, lp++; 755: } 756: continue; 757: } 758: */ 759: if (!same(*ep, *lp)) 760: return (0); 761: ep++, lp++; 762: continue; 763: 764: case CDOT: 765: if (*lp++) 766: continue; 767: return (0); 768: 769: case CDOL: 770: if (*lp == 0) 771: continue; 772: return (0); 773: 774: case CEOFC: 775: loc2 = lp; 776: return (1); 777: 778: case CCL: 779: if (cclass(ep, *lp++, 1)) { 780: ep += *ep; 781: continue; 782: } 783: return (0); 784: 785: case NCCL: 786: if (cclass(ep, *lp++, 0)) { 787: ep += *ep; 788: continue; 789: } 790: return (0); 791: 792: case CBRA: 793: braslist[*ep++] = lp; 794: continue; 795: 796: case CKET: 797: braelist[*ep++] = lp; 798: continue; 799: 800: case CDOT|STAR: 801: curlp = lp; 802: while (*lp++) 803: continue; 804: goto star; 805: 806: case CCHR|STAR: 807: curlp = lp; 808: while (same(*lp, *ep)) 809: lp++; 810: lp++; 811: ep++; 812: goto star; 813: 814: case CCL|STAR: 815: case NCCL|STAR: 816: curlp = lp; 817: while (cclass(ep, *lp++, ep[-1] == (CCL|STAR))) 818: continue; 819: ep += *ep; 820: goto star; 821: star: 822: do { 823: lp--; 824: if (lp == locs) 825: break; 826: if (advance(lp, ep)) 827: return (1); 828: } while (lp > curlp); 829: return (0); 830: 831: case CBRC: 832: if (lp == expbuf) 833: continue; 834: if ((isdigit(*lp) || uletter(*lp)) && !uletter(lp[-1]) && !isdigit(lp[-1])) 835: continue; 836: return (0); 837: 838: case CLET: 839: if (!uletter(*lp) && !isdigit(*lp)) 840: continue; 841: return (0); 842: 843: default: 844: error("Re internal error"); 845: } 846: } 847: 848: cclass(set, c, af) 849: register char *set; 850: register int c; 851: int af; 852: { 853: register int n; 854: 855: if (c == 0) 856: return (0); 857: if (value(IGNORECASE) && isupper(c)) 858: c = tolower(c); 859: n = *set++; 860: while (--n) 861: if (n > 2 && set[1] == '-') { 862: if (c >= (set[0] & TRIM) && c <= (set[2] & TRIM)) 863: return (af); 864: set += 3; 865: n -= 2; 866: } else 867: if ((*set++ & TRIM) == c) 868: return (af); 869: return (!af); 870: }