1: /* Copyright (c) 1981 Regents of the University of California */ 2: static char *sccsid = "@(#)ex_re.c 7.2 10/16/81"; 3: #include "ex.h" 4: #include "ex_re.h" 5: 6: /* 7: * Global, substitute and regular expressions. 8: * Very similar to ed, with some re extensions and 9: * confirmed substitute. 10: */ 11: global(k) 12: bool k; 13: { 14: register char *gp; 15: register int c; 16: register line *a1; 17: char globuf[GBSIZE], *Cwas; 18: int lines = lineDOL(); 19: int oinglobal = inglobal; 20: char *oglobp = globp; 21: 22: Cwas = Command; 23: /* 24: * States of inglobal: 25: * 0: ordinary - not in a global command. 26: * 1: text coming from some buffer, not tty. 27: * 2: like 1, but the source of the buffer is a global command. 28: * Hence you're only in a global command if inglobal==2. This 29: * strange sounding convention is historically derived from 30: * everybody simulating a global command. 31: */ 32: if (inglobal==2) 33: error("Global within global@not allowed"); 34: markDOT(); 35: setall(); 36: nonzero(); 37: if (skipend()) 38: error("Global needs re|Missing regular expression for global"); 39: c = getchar(); 40: ignore(compile(c, 1)); 41: savere(scanre); 42: gp = globuf; 43: while ((c = getchar()) != '\n') { 44: switch (c) { 45: 46: case EOF: 47: c = '\n'; 48: goto brkwh; 49: 50: case '\\': 51: c = getchar(); 52: switch (c) { 53: 54: case '\\': 55: ungetchar(c); 56: break; 57: 58: case '\n': 59: break; 60: 61: default: 62: *gp++ = '\\'; 63: break; 64: } 65: break; 66: } 67: *gp++ = c; 68: if (gp >= &globuf[GBSIZE - 2]) 69: error("Global command too long"); 70: } 71: brkwh: 72: ungetchar(c); 73: out: 74: newline(); 75: *gp++ = c; 76: *gp++ = 0; 77: saveall(); 78: inglobal = 2; 79: for (a1 = one; a1 <= dol; a1++) { 80: *a1 &= ~01; 81: if (a1 >= addr1 && a1 <= addr2 && execute(0, a1) == k) 82: *a1 |= 01; 83: } 84: #ifdef notdef 85: /* 86: * This code is commented out for now. The problem is that we don't 87: * fix up the undo area the way we should. Basically, I think what has 88: * to be done is to copy the undo area down (since we shrunk everything) 89: * and move the various pointers into it down too. I will do this later 90: * when I have time. (Mark, 10-20-80) 91: */ 92: /* 93: * Special case: g/.../d (avoid n^2 algorithm) 94: */ 95: if (globuf[0]=='d' && globuf[1]=='\n' && globuf[2]=='\0') { 96: gdelete(); 97: return; 98: } 99: #endif 100: if (inopen) 101: inopen = -1; 102: /* 103: * Now for each marked line, set dot there and do the commands. 104: * Note the n^2 behavior here for lots of lines matching. 105: * This is really needed: in some cases you could delete lines, 106: * causing a marked line to be moved before a1 and missed if 107: * we didn't restart at zero each time. 108: */ 109: for (a1 = one; a1 <= dol; a1++) { 110: if (*a1 & 01) { 111: *a1 &= ~01; 112: dot = a1; 113: globp = globuf; 114: commands(1, 1); 115: a1 = zero; 116: } 117: } 118: globp = oglobp; 119: inglobal = oinglobal; 120: endline = 1; 121: Command = Cwas; 122: netchHAD(lines); 123: setlastchar(EOF); 124: if (inopen) { 125: ungetchar(EOF); 126: inopen = 1; 127: } 128: } 129: 130: /* 131: * gdelete: delete inside a global command. Handles the 132: * special case g/r.e./d. All lines to be deleted have 133: * already been marked. Squeeze the remaining lines together. 134: * Note that other cases such as g/r.e./p, g/r.e./s/r.e.2/rhs/, 135: * and g/r.e./.,/r.e.2/d are not treated specially. There is no 136: * good reason for this except the question: where to you draw the line? 137: */ 138: gdelete() 139: { 140: register line *a1, *a2, *a3; 141: 142: a3 = dol; 143: /* find first marked line. can skip all before it */ 144: for (a1=zero; (*a1&01)==0; a1++) 145: if (a1>=a3) 146: return; 147: /* copy down unmarked lines, compacting as we go. */ 148: for (a2=a1+1; a2<=a3;) { 149: if (*a2&01) { 150: a2++; /* line is marked, skip it */ 151: dot = a1; /* dot left after line deletion */ 152: } else 153: *a1++ = *a2++; /* unmarked, copy it */ 154: } 155: dol = a1-1; 156: if (dot>dol) 157: dot = dol; 158: change(); 159: } 160: 161: bool cflag; 162: int scount, slines, stotal; 163: 164: substitute(c) 165: int c; 166: { 167: register line *addr; 168: register int n; 169: int gsubf, hopcount; 170: 171: gsubf = compsub(c); 172: if(FIXUNDO) 173: save12(), undkind = UNDCHANGE; 174: stotal = 0; 175: slines = 0; 176: for (addr = addr1; addr <= addr2; addr++) { 177: scount = hopcount = 0; 178: if (dosubcon(0, addr) == 0) 179: continue; 180: if (gsubf) { 181: /* 182: * The loop can happen from s/\</&/g 183: * but we don't want to break other, reasonable cases. 184: */ 185: while (*loc2) { 186: if (++hopcount > sizeof linebuf) 187: error("substitution loop"); 188: if (dosubcon(1, addr) == 0) 189: break; 190: } 191: } 192: if (scount) { 193: stotal += scount; 194: slines++; 195: putmark(addr); 196: n = append(getsub, addr); 197: addr += n; 198: addr2 += n; 199: } 200: } 201: if (stotal == 0 && !inglobal && !cflag) 202: error("Fail|Substitute pattern match failed"); 203: snote(stotal, slines); 204: return (stotal); 205: } 206: 207: compsub(ch) 208: { 209: register int seof, c, uselastre; 210: static int gsubf; 211: 212: if (!value(EDCOMPATIBLE)) 213: gsubf = cflag = 0; 214: uselastre = 0; 215: switch (ch) { 216: 217: case 's': 218: ignore(skipwh()); 219: seof = getchar(); 220: if (endcmd(seof) || any(seof, "gcr")) { 221: ungetchar(seof); 222: goto redo; 223: } 224: if (isalpha(seof) || isdigit(seof)) 225: error("Substitute needs re|Missing regular expression for substitute"); 226: seof = compile(seof, 1); 227: uselastre = 1; 228: comprhs(seof); 229: gsubf = 0; 230: cflag = 0; 231: break; 232: 233: case '~': 234: uselastre = 1; 235: /* fall into ... */ 236: case '&': 237: redo: 238: if (re.Expbuf[0] == 0) 239: error("No previous re|No previous regular expression"); 240: if (subre.Expbuf[0] == 0) 241: error("No previous substitute re|No previous substitute to repeat"); 242: break; 243: } 244: for (;;) { 245: c = getchar(); 246: switch (c) { 247: 248: case 'g': 249: gsubf = !gsubf; 250: continue; 251: 252: case 'c': 253: cflag = !cflag; 254: continue; 255: 256: case 'r': 257: uselastre = 1; 258: continue; 259: 260: default: 261: ungetchar(c); 262: setcount(); 263: newline(); 264: if (uselastre) 265: savere(subre); 266: else 267: resre(subre); 268: return (gsubf); 269: } 270: } 271: } 272: 273: comprhs(seof) 274: int seof; 275: { 276: register char *rp, *orp; 277: register int c; 278: char orhsbuf[RHSSIZE]; 279: 280: rp = rhsbuf; 281: CP(orhsbuf, rp); 282: for (;;) { 283: c = getchar(); 284: if (c == seof) 285: break; 286: switch (c) { 287: 288: case '\\': 289: c = getchar(); 290: if (c == EOF) { 291: ungetchar(c); 292: break; 293: } 294: if (value(MAGIC)) { 295: /* 296: * When "magic", \& turns into a plain &, 297: * and all other chars work fine quoted. 298: */ 299: if (c != '&') 300: c |= QUOTE; 301: break; 302: } 303: magic: 304: if (c == '~') { 305: for (orp = orhsbuf; *orp; *rp++ = *orp++) 306: if (rp >= &rhsbuf[RHSSIZE - 1]) 307: goto toobig; 308: continue; 309: } 310: c |= QUOTE; 311: break; 312: 313: case '\n': 314: case EOF: 315: if (!(globp && globp[0])) { 316: ungetchar(c); 317: goto endrhs; 318: } 319: 320: case '~': 321: case '&': 322: if (value(MAGIC)) 323: goto magic; 324: break; 325: } 326: if (rp >= &rhsbuf[RHSSIZE - 1]) { 327: toobig: 328: *rp = 0; 329: error("Replacement pattern too long@- limit 256 characters"); 330: } 331: *rp++ = c; 332: } 333: endrhs: 334: *rp++ = 0; 335: } 336: 337: getsub() 338: { 339: register char *p; 340: 341: if ((p = linebp) == 0) 342: return (EOF); 343: strcLIN(p); 344: linebp = 0; 345: return (0); 346: } 347: 348: dosubcon(f, a) 349: bool f; 350: line *a; 351: { 352: 353: if (execute(f, a) == 0) 354: return (0); 355: if (confirmed(a)) { 356: dosub(); 357: scount++; 358: } 359: return (1); 360: } 361: 362: confirmed(a) 363: line *a; 364: { 365: register int c, ch; 366: 367: if (cflag == 0) 368: return (1); 369: pofix(); 370: pline(lineno(a)); 371: if (inopen) 372: putchar('\n' | QUOTE); 373: c = column(loc1 - 1); 374: ugo(c - 1 + (inopen ? 1 : 0), ' '); 375: ugo(column(loc2 - 1) - c, '^'); 376: flush(); 377: ch = c = getkey(); 378: again: 379: if (c == '\r') 380: c = '\n'; 381: if (inopen) 382: putchar(c), flush(); 383: if (c != '\n' && c != EOF) { 384: c = getkey(); 385: goto again; 386: } 387: noteinp(); 388: return (ch == 'y'); 389: } 390: 391: getch() 392: { 393: char c; 394: 395: if (read(2, &c, 1) != 1) 396: return (EOF); 397: return (c & TRIM); 398: } 399: 400: ugo(cnt, with) 401: int with; 402: int cnt; 403: { 404: 405: if (cnt > 0) 406: do 407: putchar(with); 408: while (--cnt > 0); 409: } 410: 411: int casecnt; 412: bool destuc; 413: 414: dosub() 415: { 416: register char *lp, *sp, *rp; 417: int c; 418: 419: lp = linebuf; 420: sp = genbuf; 421: rp = rhsbuf; 422: while (lp < loc1) 423: *sp++ = *lp++; 424: casecnt = 0; 425: while (c = *rp++) { 426: /* ^V <return> from vi to split lines */ 427: if (c == '\r') 428: c = '\n'; 429: 430: if (c & QUOTE) 431: switch (c & TRIM) { 432: 433: case '&': 434: sp = place(sp, loc1, loc2); 435: if (sp == 0) 436: goto ovflo; 437: continue; 438: 439: case 'l': 440: casecnt = 1; 441: destuc = 0; 442: continue; 443: 444: case 'L': 445: casecnt = LBSIZE; 446: destuc = 0; 447: continue; 448: 449: case 'u': 450: casecnt = 1; 451: destuc = 1; 452: continue; 453: 454: case 'U': 455: casecnt = LBSIZE; 456: destuc = 1; 457: continue; 458: 459: case 'E': 460: case 'e': 461: casecnt = 0; 462: continue; 463: } 464: if (c < 0 && (c &= TRIM) >= '1' && c < nbra + '1') { 465: sp = place(sp, braslist[c - '1'], braelist[c - '1']); 466: if (sp == 0) 467: goto ovflo; 468: continue; 469: } 470: if (casecnt) 471: *sp++ = fixcase(c & TRIM); 472: else 473: *sp++ = c & TRIM; 474: if (sp >= &genbuf[LBSIZE]) 475: ovflo: 476: error("Line overflow@in substitute"); 477: } 478: lp = loc2; 479: loc2 = sp + (linebuf - genbuf); 480: while (*sp++ = *lp++) 481: if (sp >= &genbuf[LBSIZE]) 482: goto ovflo; 483: strcLIN(genbuf); 484: } 485: 486: fixcase(c) 487: register int c; 488: { 489: 490: if (casecnt == 0) 491: return (c); 492: casecnt--; 493: if (destuc) { 494: if (islower(c)) 495: c = toupper(c); 496: } else 497: if (isupper(c)) 498: c = tolower(c); 499: return (c); 500: } 501: 502: char * 503: place(sp, l1, l2) 504: register char *sp, *l1, *l2; 505: { 506: 507: while (l1 < l2) { 508: *sp++ = fixcase(*l1++); 509: if (sp >= &genbuf[LBSIZE]) 510: return (0); 511: } 512: return (sp); 513: } 514: 515: snote(total, lines) 516: register int total, lines; 517: { 518: 519: if (!notable(total)) 520: return; 521: printf(mesg("%d subs|%d substitutions"), total); 522: if (lines != 1 && lines != total) 523: printf(" on %d lines", lines); 524: noonl(); 525: flush(); 526: } 527: 528: compile(eof, oknl) 529: int eof; 530: int oknl; 531: { 532: register int c; 533: register char *ep; 534: char *lastep; 535: char bracket[NBRA], *bracketp, *rhsp; 536: int cclcnt; 537: 538: if (isalpha(eof) || isdigit(eof)) 539: error("Regular expressions cannot be delimited by letters or digits"); 540: ep = expbuf; 541: c = getchar(); 542: if (eof == '\\') 543: switch (c) { 544: 545: case '/': 546: case '?': 547: if (scanre.Expbuf[0] == 0) 548: error("No previous scan re|No previous scanning regular expression"); 549: resre(scanre); 550: return (c); 551: 552: case '&': 553: if (subre.Expbuf[0] == 0) 554: error("No previous substitute re|No previous substitute regular expression"); 555: resre(subre); 556: return (c); 557: 558: default: 559: error("Badly formed re|Regular expression \\ must be followed by / or ?"); 560: } 561: if (c == eof || c == '\n' || c == EOF) { 562: if (*ep == 0) 563: error("No previous re|No previous regular expression"); 564: if (c == '\n' && oknl == 0) 565: error("Missing closing delimiter@for regular expression"); 566: if (c != eof) 567: ungetchar(c); 568: return (eof); 569: } 570: bracketp = bracket; 571: nbra = 0; 572: circfl = 0; 573: if (c == '^') { 574: c = getchar(); 575: circfl++; 576: } 577: ungetchar(c); 578: for (;;) { 579: if (ep >= &expbuf[ESIZE - 2]) 580: complex: 581: cerror("Re too complex|Regular expression too complicated"); 582: c = getchar(); 583: if (c == eof || c == EOF) { 584: if (bracketp != bracket) 585: cerror("Unmatched \\(|More \\('s than \\)'s in regular expression"); 586: *ep++ = CEOFC; 587: if (c == EOF) 588: ungetchar(c); 589: return (eof); 590: } 591: if (value(MAGIC)) { 592: if (c != '*' || ep == expbuf) 593: lastep = ep; 594: } else 595: if (c != '\\' || peekchar() != '*' || ep == expbuf) 596: lastep = ep; 597: switch (c) { 598: 599: case '\\': 600: c = getchar(); 601: switch (c) { 602: 603: case '(': 604: if (nbra >= NBRA) 605: cerror("Awash in \\('s!|Too many \\('d subexressions in a regular expression"); 606: *bracketp++ = nbra; 607: *ep++ = CBRA; 608: *ep++ = nbra++; 609: continue; 610: 611: case ')': 612: if (bracketp <= bracket) 613: cerror("Extra \\)|More \\)'s than \\('s in regular expression"); 614: *ep++ = CKET; 615: *ep++ = *--bracketp; 616: continue; 617: 618: case '<': 619: *ep++ = CBRC; 620: continue; 621: 622: case '>': 623: *ep++ = CLET; 624: continue; 625: } 626: if (value(MAGIC) == 0) 627: magic: 628: switch (c) { 629: 630: case '.': 631: *ep++ = CDOT; 632: continue; 633: 634: case '~': 635: rhsp = rhsbuf; 636: while (*rhsp) { 637: if (*rhsp & QUOTE) { 638: c = *rhsp & TRIM; 639: if (c == '&') 640: error("Replacement pattern contains &@- cannot use in re"); 641: if (c >= '1' && c <= '9') 642: error("Replacement pattern contains \\d@- cannot use in re"); 643: } 644: if (ep >= &expbuf[ESIZE-2]) 645: goto complex; 646: *ep++ = CCHR; 647: *ep++ = *rhsp++ & TRIM; 648: } 649: continue; 650: 651: case '*': 652: if (ep == expbuf) 653: break; 654: if (*lastep == CBRA || *lastep == CKET) 655: cerror("Illegal *|Can't * a \\( ... \\) in regular expression"); 656: if (*lastep == CCHR && (lastep[1] & QUOTE)) 657: cerror("Illegal *|Can't * a \\n in regular expression"); 658: *lastep |= STAR; 659: continue; 660: 661: case '[': 662: *ep++ = CCL; 663: *ep++ = 0; 664: cclcnt = 1; 665: c = getchar(); 666: if (c == '^') { 667: c = getchar(); 668: ep[-2] = NCCL; 669: } 670: if (c == ']') 671: cerror("Bad character class|Empty character class '[]' or '[^]' cannot match"); 672: while (c != ']') { 673: if (c == '\\' && any(peekchar(), "]-^\\")) 674: c = getchar() | QUOTE; 675: if (c == '\n' || c == EOF) 676: cerror("Missing ]"); 677: *ep++ = c; 678: cclcnt++; 679: if (ep >= &expbuf[ESIZE]) 680: goto complex; 681: c = getchar(); 682: } 683: lastep[1] = cclcnt; 684: continue; 685: } 686: if (c == EOF) { 687: ungetchar(EOF); 688: c = '\\'; 689: goto defchar; 690: } 691: *ep++ = CCHR; 692: if (c == '\n') 693: cerror("No newlines in re's|Can't escape newlines into regular expressions"); 694: /* 695: if (c < '1' || c > NBRA + '1') { 696: */ 697: *ep++ = c; 698: continue; 699: /* 700: } 701: c -= '1'; 702: if (c >= nbra) 703: cerror("Bad \\n|\\n in regular expression with n greater than the number of \\('s"); 704: *ep++ = c | QUOTE; 705: continue; 706: */ 707: 708: case '\n': 709: if (oknl) { 710: ungetchar(c); 711: *ep++ = CEOFC; 712: return (eof); 713: } 714: cerror("Badly formed re|Missing closing delimiter for regular expression"); 715: 716: case '$': 717: if (peekchar() == eof || peekchar() == EOF || oknl && peekchar() == '\n') { 718: *ep++ = CDOL; 719: continue; 720: } 721: goto defchar; 722: 723: case '.': 724: case '~': 725: case '*': 726: case '[': 727: if (value(MAGIC)) 728: goto magic; 729: defchar: 730: default: 731: *ep++ = CCHR; 732: *ep++ = c; 733: continue; 734: } 735: } 736: } 737: 738: cerror(s) 739: char *s; 740: { 741: 742: expbuf[0] = 0; 743: error(s); 744: } 745: 746: same(a, b) 747: register int a, b; 748: { 749: 750: return (a == b || value(IGNORECASE) && 751: ((islower(a) && toupper(a) == b) || (islower(b) && toupper(b) == a))); 752: } 753: 754: char *locs; 755: 756: execute(gf, addr) 757: line *addr; 758: { 759: register char *p1, *p2; 760: register int c; 761: 762: if (gf) { 763: if (circfl) 764: return (0); 765: locs = p1 = loc2; 766: } else { 767: if (addr == zero) 768: return (0); 769: p1 = linebuf; 770: getline(*addr); 771: locs = 0; 772: } 773: p2 = expbuf; 774: if (circfl) { 775: loc1 = p1; 776: return (advance(p1, p2)); 777: } 778: /* fast check for first character */ 779: if (*p2 == CCHR) { 780: c = p2[1]; 781: do { 782: if (c != *p1 && (!value(IGNORECASE) || 783: !((islower(c) && toupper(c) == *p1) || 784: (islower(*p1) && toupper(*p1) == c)))) 785: continue; 786: if (advance(p1, p2)) { 787: loc1 = p1; 788: return (1); 789: } 790: } while (*p1++); 791: return (0); 792: } 793: /* regular algorithm */ 794: do { 795: if (advance(p1, p2)) { 796: loc1 = p1; 797: return (1); 798: } 799: } while (*p1++); 800: return (0); 801: } 802: 803: #define uletter(c) (isalpha(c) || c == '_') 804: 805: advance(lp, ep) 806: register char *lp, *ep; 807: { 808: register char *curlp; 809: char *sp, *sp1; 810: int c; 811: 812: for (;;) switch (*ep++) { 813: 814: case CCHR: 815: /* useless 816: if (*ep & QUOTE) { 817: c = *ep++ & TRIM; 818: sp = braslist[c]; 819: sp1 = braelist[c]; 820: while (sp < sp1) { 821: if (!same(*sp, *lp)) 822: return (0); 823: sp++, lp++; 824: } 825: continue; 826: } 827: */ 828: if (!same(*ep, *lp)) 829: return (0); 830: ep++, lp++; 831: continue; 832: 833: case CDOT: 834: if (*lp++) 835: continue; 836: return (0); 837: 838: case CDOL: 839: if (*lp == 0) 840: continue; 841: return (0); 842: 843: case CEOFC: 844: loc2 = lp; 845: return (1); 846: 847: case CCL: 848: if (cclass(ep, *lp++, 1)) { 849: ep += *ep; 850: continue; 851: } 852: return (0); 853: 854: case NCCL: 855: if (cclass(ep, *lp++, 0)) { 856: ep += *ep; 857: continue; 858: } 859: return (0); 860: 861: case CBRA: 862: braslist[*ep++] = lp; 863: continue; 864: 865: case CKET: 866: braelist[*ep++] = lp; 867: continue; 868: 869: case CDOT|STAR: 870: curlp = lp; 871: while (*lp++) 872: continue; 873: goto star; 874: 875: case CCHR|STAR: 876: curlp = lp; 877: while (same(*lp, *ep)) 878: lp++; 879: lp++; 880: ep++; 881: goto star; 882: 883: case CCL|STAR: 884: case NCCL|STAR: 885: curlp = lp; 886: while (cclass(ep, *lp++, ep[-1] == (CCL|STAR))) 887: continue; 888: ep += *ep; 889: goto star; 890: star: 891: do { 892: lp--; 893: if (lp == locs) 894: break; 895: if (advance(lp, ep)) 896: return (1); 897: } while (lp > curlp); 898: return (0); 899: 900: case CBRC: 901: if (lp == expbuf) 902: continue; 903: if ((isdigit(*lp) || uletter(*lp)) && !uletter(lp[-1]) && !isdigit(lp[-1])) 904: continue; 905: return (0); 906: 907: case CLET: 908: if (!uletter(*lp) && !isdigit(*lp)) 909: continue; 910: return (0); 911: 912: default: 913: error("Re internal error"); 914: } 915: } 916: 917: cclass(set, c, af) 918: register char *set; 919: register int c; 920: int af; 921: { 922: register int n; 923: 924: if (c == 0) 925: return (0); 926: if (value(IGNORECASE) && isupper(c)) 927: c = tolower(c); 928: n = *set++; 929: while (--n) 930: if (n > 2 && set[1] == '-') { 931: if (c >= (set[0] & TRIM) && c <= (set[2] & TRIM)) 932: return (af); 933: set += 3; 934: n -= 2; 935: } else 936: if ((*set++ & TRIM) == c) 937: return (af); 938: return (!af); 939: }