1: /* 2: * Copyright (c) 1980 Regents of the University of California. 3: * All rights reserved. The Berkeley software License Agreement 4: * specifies the terms and conditions for redistribution. 5: */ 6: 7: #ifndef lint 8: static char *sccsid = "@(#)ex_re.c 7.5 (Berkeley) 6/7/85"; 9: #endif not lint 10: 11: #include "ex.h" 12: #include "ex_re.h" 13: 14: /* 15: * Global, substitute and regular expressions. 16: * Very similar to ed, with some re extensions and 17: * confirmed substitute. 18: */ 19: global(k) 20: bool k; 21: { 22: register char *gp; 23: register int c; 24: register line *a1; 25: char globuf[GBSIZE], *Cwas; 26: int lines = lineDOL(); 27: int oinglobal = inglobal; 28: char *oglobp = globp; 29: 30: Cwas = Command; 31: /* 32: * States of inglobal: 33: * 0: ordinary - not in a global command. 34: * 1: text coming from some buffer, not tty. 35: * 2: like 1, but the source of the buffer is a global command. 36: * Hence you're only in a global command if inglobal==2. This 37: * strange sounding convention is historically derived from 38: * everybody simulating a global command. 39: */ 40: if (inglobal==2) 41: error("Global within global@not allowed"); 42: markDOT(); 43: setall(); 44: nonzero(); 45: if (skipend()) 46: error("Global needs re|Missing regular expression for global"); 47: c = getchar(); 48: ignore(compile(c, 1)); 49: savere(scanre); 50: gp = globuf; 51: while ((c = getchar()) != '\n') { 52: switch (c) { 53: 54: case EOF: 55: c = '\n'; 56: goto brkwh; 57: 58: case '\\': 59: c = getchar(); 60: switch (c) { 61: 62: case '\\': 63: ungetchar(c); 64: break; 65: 66: case '\n': 67: break; 68: 69: default: 70: *gp++ = '\\'; 71: break; 72: } 73: break; 74: } 75: *gp++ = c; 76: if (gp >= &globuf[GBSIZE - 2]) 77: error("Global command too long"); 78: } 79: brkwh: 80: ungetchar(c); 81: out: 82: newline(); 83: *gp++ = c; 84: *gp++ = 0; 85: saveall(); 86: inglobal = 2; 87: for (a1 = one; a1 <= dol; a1++) { 88: *a1 &= ~01; 89: if (a1 >= addr1 && a1 <= addr2 && execute(0, a1) == k) 90: *a1 |= 01; 91: } 92: #ifdef notdef 93: /* 94: * This code is commented out for now. The problem is that we don't 95: * fix up the undo area the way we should. Basically, I think what has 96: * to be done is to copy the undo area down (since we shrunk everything) 97: * and move the various pointers into it down too. I will do this later 98: * when I have time. (Mark, 10-20-80) 99: */ 100: /* 101: * Special case: g/.../d (avoid n^2 algorithm) 102: */ 103: if (globuf[0]=='d' && globuf[1]=='\n' && globuf[2]=='\0') { 104: gdelete(); 105: return; 106: } 107: #endif 108: if (inopen) 109: inopen = -1; 110: /* 111: * Now for each marked line, set dot there and do the commands. 112: * Note the n^2 behavior here for lots of lines matching. 113: * This is really needed: in some cases you could delete lines, 114: * causing a marked line to be moved before a1 and missed if 115: * we didn't restart at zero each time. 116: */ 117: for (a1 = one; a1 <= dol; a1++) { 118: if (*a1 & 01) { 119: *a1 &= ~01; 120: dot = a1; 121: globp = globuf; 122: commands(1, 1); 123: a1 = zero; 124: } 125: } 126: globp = oglobp; 127: inglobal = oinglobal; 128: endline = 1; 129: Command = Cwas; 130: netchHAD(lines); 131: setlastchar(EOF); 132: if (inopen) { 133: ungetchar(EOF); 134: inopen = 1; 135: } 136: } 137: 138: /* 139: * gdelete: delete inside a global command. Handles the 140: * special case g/r.e./d. All lines to be deleted have 141: * already been marked. Squeeze the remaining lines together. 142: * Note that other cases such as g/r.e./p, g/r.e./s/r.e.2/rhs/, 143: * and g/r.e./.,/r.e.2/d are not treated specially. There is no 144: * good reason for this except the question: where to you draw the line? 145: */ 146: gdelete() 147: { 148: register line *a1, *a2, *a3; 149: 150: a3 = dol; 151: /* find first marked line. can skip all before it */ 152: for (a1=zero; (*a1&01)==0; a1++) 153: if (a1>=a3) 154: return; 155: /* copy down unmarked lines, compacting as we go. */ 156: for (a2=a1+1; a2<=a3;) { 157: if (*a2&01) { 158: a2++; /* line is marked, skip it */ 159: dot = a1; /* dot left after line deletion */ 160: } else 161: *a1++ = *a2++; /* unmarked, copy it */ 162: } 163: dol = a1-1; 164: if (dot>dol) 165: dot = dol; 166: change(); 167: } 168: 169: bool cflag; 170: int scount, slines, stotal; 171: 172: substitute(c) 173: int c; 174: { 175: register line *addr; 176: register int n; 177: int gsubf, hopcount; 178: 179: gsubf = compsub(c); 180: if(FIXUNDO) 181: save12(), undkind = UNDCHANGE; 182: stotal = 0; 183: slines = 0; 184: for (addr = addr1; addr <= addr2; addr++) { 185: scount = hopcount = 0; 186: if (dosubcon(0, addr) == 0) 187: continue; 188: if (gsubf) { 189: /* 190: * The loop can happen from s/\</&/g 191: * but we don't want to break other, reasonable cases. 192: */ 193: while (*loc2) { 194: if (++hopcount > sizeof linebuf) 195: error("substitution loop"); 196: if (dosubcon(1, addr) == 0) 197: break; 198: } 199: } 200: if (scount) { 201: stotal += scount; 202: slines++; 203: putmark(addr); 204: n = append(getsub, addr); 205: addr += n; 206: addr2 += n; 207: } 208: } 209: if (stotal == 0 && !inglobal && !cflag) 210: error("Fail|Substitute pattern match failed"); 211: snote(stotal, slines); 212: return (stotal); 213: } 214: 215: compsub(ch) 216: { 217: register int seof, c, uselastre; 218: static int gsubf; 219: 220: if (!value(EDCOMPATIBLE)) 221: gsubf = cflag = 0; 222: uselastre = 0; 223: switch (ch) { 224: 225: case 's': 226: ignore(skipwh()); 227: seof = getchar(); 228: if (endcmd(seof) || any(seof, "gcr")) { 229: ungetchar(seof); 230: goto redo; 231: } 232: if (isalpha(seof) || isdigit(seof)) 233: error("Substitute needs re|Missing regular expression for substitute"); 234: seof = compile(seof, 1); 235: uselastre = 1; 236: comprhs(seof); 237: gsubf = 0; 238: cflag = 0; 239: break; 240: 241: case '~': 242: uselastre = 1; 243: /* fall into ... */ 244: case '&': 245: redo: 246: if (re.Expbuf[0] == 0) 247: error("No previous re|No previous regular expression"); 248: if (subre.Expbuf[0] == 0) 249: error("No previous substitute re|No previous substitute to repeat"); 250: break; 251: } 252: for (;;) { 253: c = getchar(); 254: switch (c) { 255: 256: case 'g': 257: gsubf = !gsubf; 258: continue; 259: 260: case 'c': 261: cflag = !cflag; 262: continue; 263: 264: case 'r': 265: uselastre = 1; 266: continue; 267: 268: default: 269: ungetchar(c); 270: setcount(); 271: newline(); 272: if (uselastre) 273: savere(subre); 274: else 275: resre(subre); 276: return (gsubf); 277: } 278: } 279: } 280: 281: comprhs(seof) 282: int seof; 283: { 284: register char *rp, *orp; 285: register int c; 286: char orhsbuf[RHSSIZE]; 287: 288: rp = rhsbuf; 289: CP(orhsbuf, rp); 290: for (;;) { 291: c = getchar(); 292: if (c == seof) 293: break; 294: switch (c) { 295: 296: case '\\': 297: c = getchar(); 298: if (c == EOF) { 299: ungetchar(c); 300: break; 301: } 302: if (value(MAGIC)) { 303: /* 304: * When "magic", \& turns into a plain &, 305: * and all other chars work fine quoted. 306: */ 307: if (c != '&') 308: c |= QUOTE; 309: break; 310: } 311: magic: 312: if (c == '~') { 313: for (orp = orhsbuf; *orp; *rp++ = *orp++) 314: if (rp >= &rhsbuf[RHSSIZE - 1]) 315: goto toobig; 316: continue; 317: } 318: c |= QUOTE; 319: break; 320: 321: case '\n': 322: case EOF: 323: if (!(globp && globp[0])) { 324: ungetchar(c); 325: goto endrhs; 326: } 327: 328: case '~': 329: case '&': 330: if (value(MAGIC)) 331: goto magic; 332: break; 333: } 334: if (rp >= &rhsbuf[RHSSIZE - 1]) { 335: toobig: 336: *rp = 0; 337: error("Replacement pattern too long@- limit 256 characters"); 338: } 339: *rp++ = c; 340: } 341: endrhs: 342: *rp++ = 0; 343: } 344: 345: getsub() 346: { 347: register char *p; 348: 349: if ((p = linebp) == 0) 350: return (EOF); 351: strcLIN(p); 352: linebp = 0; 353: return (0); 354: } 355: 356: dosubcon(f, a) 357: bool f; 358: line *a; 359: { 360: 361: if (execute(f, a) == 0) 362: return (0); 363: if (confirmed(a)) { 364: dosub(); 365: scount++; 366: } 367: return (1); 368: } 369: 370: confirmed(a) 371: line *a; 372: { 373: register int c, ch; 374: 375: if (cflag == 0) 376: return (1); 377: pofix(); 378: pline(lineno(a)); 379: if (inopen) 380: putchar('\n' | QUOTE); 381: c = column(loc1 - 1); 382: ugo(c - 1 + (inopen ? 1 : 0), ' '); 383: ugo(column(loc2 - 1) - c, '^'); 384: flush(); 385: ch = c = getkey(); 386: again: 387: if (c == '\r') 388: c = '\n'; 389: if (inopen) 390: putchar(c), flush(); 391: if (c != '\n' && c != EOF) { 392: c = getkey(); 393: goto again; 394: } 395: noteinp(); 396: return (ch == 'y'); 397: } 398: 399: getch() 400: { 401: char c; 402: 403: if (read(2, &c, 1) != 1) 404: return (EOF); 405: return (c & TRIM); 406: } 407: 408: ugo(cnt, with) 409: int with; 410: int cnt; 411: { 412: 413: if (cnt > 0) 414: do 415: putchar(with); 416: while (--cnt > 0); 417: } 418: 419: int casecnt; 420: bool destuc; 421: 422: dosub() 423: { 424: register char *lp, *sp, *rp; 425: int c; 426: 427: lp = linebuf; 428: sp = genbuf; 429: rp = rhsbuf; 430: while (lp < loc1) 431: *sp++ = *lp++; 432: casecnt = 0; 433: while (c = *rp++) { 434: /* ^V <return> from vi to split lines */ 435: if (c == '\r') 436: c = '\n'; 437: 438: if (c & QUOTE) 439: switch (c & TRIM) { 440: 441: case '&': 442: sp = place(sp, loc1, loc2); 443: if (sp == 0) 444: goto ovflo; 445: continue; 446: 447: case 'l': 448: casecnt = 1; 449: destuc = 0; 450: continue; 451: 452: case 'L': 453: casecnt = LBSIZE; 454: destuc = 0; 455: continue; 456: 457: case 'u': 458: casecnt = 1; 459: destuc = 1; 460: continue; 461: 462: case 'U': 463: casecnt = LBSIZE; 464: destuc = 1; 465: continue; 466: 467: case 'E': 468: case 'e': 469: casecnt = 0; 470: continue; 471: } 472: if (c < 0 && (c &= TRIM) >= '1' && c < nbra + '1') { 473: sp = place(sp, braslist[c - '1'], braelist[c - '1']); 474: if (sp == 0) 475: goto ovflo; 476: continue; 477: } 478: if (casecnt) 479: *sp++ = fixcase(c & TRIM); 480: else 481: *sp++ = c & TRIM; 482: if (sp >= &genbuf[LBSIZE]) 483: ovflo: 484: error("Line overflow@in substitute"); 485: } 486: lp = loc2; 487: loc2 = sp + (linebuf - genbuf); 488: while (*sp++ = *lp++) 489: if (sp >= &genbuf[LBSIZE]) 490: goto ovflo; 491: strcLIN(genbuf); 492: } 493: 494: fixcase(c) 495: register int c; 496: { 497: 498: if (casecnt == 0) 499: return (c); 500: casecnt--; 501: if (destuc) { 502: if (islower(c)) 503: c = toupper(c); 504: } else 505: if (isupper(c)) 506: c = tolower(c); 507: return (c); 508: } 509: 510: char * 511: place(sp, l1, l2) 512: register char *sp, *l1, *l2; 513: { 514: 515: while (l1 < l2) { 516: *sp++ = fixcase(*l1++); 517: if (sp >= &genbuf[LBSIZE]) 518: return (0); 519: } 520: return (sp); 521: } 522: 523: snote(total, lines) 524: register int total, lines; 525: { 526: 527: if (!notable(total)) 528: return; 529: printf(mesg("%d subs|%d substitutions"), total); 530: if (lines != 1 && lines != total) 531: printf(" on %d lines", lines); 532: noonl(); 533: flush(); 534: } 535: 536: compile(eof, oknl) 537: int eof; 538: int oknl; 539: { 540: register int c; 541: register char *ep; 542: char *lastep; 543: char bracket[NBRA], *bracketp, *rhsp; 544: int cclcnt; 545: 546: if (isalpha(eof) || isdigit(eof)) 547: error("Regular expressions cannot be delimited by letters or digits"); 548: ep = expbuf; 549: c = getchar(); 550: if (eof == '\\') 551: switch (c) { 552: 553: case '/': 554: case '?': 555: if (scanre.Expbuf[0] == 0) 556: error("No previous scan re|No previous scanning regular expression"); 557: resre(scanre); 558: return (c); 559: 560: case '&': 561: if (subre.Expbuf[0] == 0) 562: error("No previous substitute re|No previous substitute regular expression"); 563: resre(subre); 564: return (c); 565: 566: default: 567: error("Badly formed re|Regular expression \\ must be followed by / or ?"); 568: } 569: if (c == eof || c == '\n' || c == EOF) { 570: if (*ep == 0) 571: error("No previous re|No previous regular expression"); 572: if (c == '\n' && oknl == 0) 573: error("Missing closing delimiter@for regular expression"); 574: if (c != eof) 575: ungetchar(c); 576: return (eof); 577: } 578: bracketp = bracket; 579: nbra = 0; 580: circfl = 0; 581: if (c == '^') { 582: c = getchar(); 583: circfl++; 584: } 585: ungetchar(c); 586: for (;;) { 587: if (ep >= &expbuf[ESIZE - 2]) 588: complex: 589: cerror("Re too complex|Regular expression too complicated"); 590: c = getchar(); 591: if (c == eof || c == EOF) { 592: if (bracketp != bracket) 593: cerror("Unmatched \\(|More \\('s than \\)'s in regular expression"); 594: *ep++ = CEOFC; 595: if (c == EOF) 596: ungetchar(c); 597: return (eof); 598: } 599: if (value(MAGIC)) { 600: if (c != '*' || ep == expbuf) 601: lastep = ep; 602: } else 603: if (c != '\\' || peekchar() != '*' || ep == expbuf) 604: lastep = ep; 605: switch (c) { 606: 607: case '\\': 608: c = getchar(); 609: switch (c) { 610: 611: case '(': 612: if (nbra >= NBRA) 613: cerror("Awash in \\('s!|Too many \\('d subexressions in a regular expression"); 614: *bracketp++ = nbra; 615: *ep++ = CBRA; 616: *ep++ = nbra++; 617: continue; 618: 619: case ')': 620: if (bracketp <= bracket) 621: cerror("Extra \\)|More \\)'s than \\('s in regular expression"); 622: *ep++ = CKET; 623: *ep++ = *--bracketp; 624: continue; 625: 626: case '<': 627: *ep++ = CBRC; 628: continue; 629: 630: case '>': 631: *ep++ = CLET; 632: continue; 633: } 634: if (value(MAGIC) == 0) 635: magic: 636: switch (c) { 637: 638: case '.': 639: *ep++ = CDOT; 640: continue; 641: 642: case '~': 643: rhsp = rhsbuf; 644: while (*rhsp) { 645: if (*rhsp & QUOTE) { 646: c = *rhsp & TRIM; 647: if (c == '&') 648: error("Replacement pattern contains &@- cannot use in re"); 649: if (c >= '1' && c <= '9') 650: error("Replacement pattern contains \\d@- cannot use in re"); 651: } 652: if (ep >= &expbuf[ESIZE-2]) 653: goto complex; 654: *ep++ = CCHR; 655: *ep++ = *rhsp++ & TRIM; 656: } 657: continue; 658: 659: case '*': 660: if (ep == expbuf) 661: break; 662: if (*lastep == CBRA || *lastep == CKET) 663: cerror("Illegal *|Can't * a \\( ... \\) in regular expression"); 664: if (*lastep == CCHR && (lastep[1] & QUOTE)) 665: cerror("Illegal *|Can't * a \\n in regular expression"); 666: *lastep |= STAR; 667: continue; 668: 669: case '[': 670: *ep++ = CCL; 671: *ep++ = 0; 672: cclcnt = 1; 673: c = getchar(); 674: if (c == '^') { 675: c = getchar(); 676: ep[-2] = NCCL; 677: } 678: if (c == ']') 679: cerror("Bad character class|Empty character class '[]' or '[^]' cannot match"); 680: while (c != ']') { 681: if (c == '\\' && any(peekchar(), "]-^\\")) 682: c = getchar() | QUOTE; 683: if (c == '\n' || c == EOF) 684: cerror("Missing ]"); 685: *ep++ = c; 686: cclcnt++; 687: if (ep >= &expbuf[ESIZE]) 688: goto complex; 689: c = getchar(); 690: } 691: lastep[1] = cclcnt; 692: continue; 693: } 694: if (c == EOF) { 695: ungetchar(EOF); 696: c = '\\'; 697: goto defchar; 698: } 699: *ep++ = CCHR; 700: if (c == '\n') 701: cerror("No newlines in re's|Can't escape newlines into regular expressions"); 702: /* 703: if (c < '1' || c > NBRA + '1') { 704: */ 705: *ep++ = c; 706: continue; 707: /* 708: } 709: c -= '1'; 710: if (c >= nbra) 711: cerror("Bad \\n|\\n in regular expression with n greater than the number of \\('s"); 712: *ep++ = c | QUOTE; 713: continue; 714: */ 715: 716: case '\n': 717: if (oknl) { 718: ungetchar(c); 719: *ep++ = CEOFC; 720: return (eof); 721: } 722: cerror("Badly formed re|Missing closing delimiter for regular expression"); 723: 724: case '$': 725: if (peekchar() == eof || peekchar() == EOF || oknl && peekchar() == '\n') { 726: *ep++ = CDOL; 727: continue; 728: } 729: goto defchar; 730: 731: case '.': 732: case '~': 733: case '*': 734: case '[': 735: if (value(MAGIC)) 736: goto magic; 737: defchar: 738: default: 739: *ep++ = CCHR; 740: *ep++ = c; 741: continue; 742: } 743: } 744: } 745: 746: cerror(s) 747: char *s; 748: { 749: 750: expbuf[0] = 0; 751: error(s); 752: } 753: 754: same(a, b) 755: register int a, b; 756: { 757: 758: return (a == b || value(IGNORECASE) && 759: ((islower(a) && toupper(a) == b) || (islower(b) && toupper(b) == a))); 760: } 761: 762: char *locs; 763: 764: execute(gf, addr) 765: line *addr; 766: { 767: register char *p1, *p2; 768: register int c; 769: 770: if (gf) { 771: if (circfl) 772: return (0); 773: locs = p1 = loc2; 774: } else { 775: if (addr == zero) 776: return (0); 777: p1 = linebuf; 778: getline(*addr); 779: locs = 0; 780: } 781: p2 = expbuf; 782: if (circfl) { 783: loc1 = p1; 784: return (advance(p1, p2)); 785: } 786: /* fast check for first character */ 787: if (*p2 == CCHR) { 788: c = p2[1]; 789: do { 790: if (c != *p1 && (!value(IGNORECASE) || 791: !((islower(c) && toupper(c) == *p1) || 792: (islower(*p1) && toupper(*p1) == c)))) 793: continue; 794: if (advance(p1, p2)) { 795: loc1 = p1; 796: return (1); 797: } 798: } while (*p1++); 799: return (0); 800: } 801: /* regular algorithm */ 802: do { 803: if (advance(p1, p2)) { 804: loc1 = p1; 805: return (1); 806: } 807: } while (*p1++); 808: return (0); 809: } 810: 811: #define uletter(c) (isalpha(c) || c == '_') 812: 813: advance(lp, ep) 814: register char *lp, *ep; 815: { 816: register char *curlp; 817: char *sp, *sp1; 818: int c; 819: 820: for (;;) switch (*ep++) { 821: 822: case CCHR: 823: /* useless 824: if (*ep & QUOTE) { 825: c = *ep++ & TRIM; 826: sp = braslist[c]; 827: sp1 = braelist[c]; 828: while (sp < sp1) { 829: if (!same(*sp, *lp)) 830: return (0); 831: sp++, lp++; 832: } 833: continue; 834: } 835: */ 836: if (!same(*ep, *lp)) 837: return (0); 838: ep++, lp++; 839: continue; 840: 841: case CDOT: 842: if (*lp++) 843: continue; 844: return (0); 845: 846: case CDOL: 847: if (*lp == 0) 848: continue; 849: return (0); 850: 851: case CEOFC: 852: loc2 = lp; 853: return (1); 854: 855: case CCL: 856: if (cclass(ep, *lp++, 1)) { 857: ep += *ep; 858: continue; 859: } 860: return (0); 861: 862: case NCCL: 863: if (cclass(ep, *lp++, 0)) { 864: ep += *ep; 865: continue; 866: } 867: return (0); 868: 869: case CBRA: 870: braslist[*ep++] = lp; 871: continue; 872: 873: case CKET: 874: braelist[*ep++] = lp; 875: continue; 876: 877: case CDOT|STAR: 878: curlp = lp; 879: while (*lp++) 880: continue; 881: goto star; 882: 883: case CCHR|STAR: 884: curlp = lp; 885: while (same(*lp, *ep)) 886: lp++; 887: lp++; 888: ep++; 889: goto star; 890: 891: case CCL|STAR: 892: case NCCL|STAR: 893: curlp = lp; 894: while (cclass(ep, *lp++, ep[-1] == (CCL|STAR))) 895: continue; 896: ep += *ep; 897: goto star; 898: star: 899: do { 900: lp--; 901: if (lp == locs) 902: break; 903: if (advance(lp, ep)) 904: return (1); 905: } while (lp > curlp); 906: return (0); 907: 908: case CBRC: 909: if (lp == linebuf) 910: continue; 911: if ((isdigit(*lp) || uletter(*lp)) && !uletter(lp[-1]) && !isdigit(lp[-1])) 912: continue; 913: return (0); 914: 915: case CLET: 916: if (!uletter(*lp) && !isdigit(*lp)) 917: continue; 918: return (0); 919: 920: default: 921: error("Re internal error"); 922: } 923: } 924: 925: cclass(set, c, af) 926: register char *set; 927: register int c; 928: int af; 929: { 930: register int n; 931: 932: if (c == 0) 933: return (0); 934: if (value(IGNORECASE) && isupper(c)) 935: c = tolower(c); 936: n = *set++; 937: while (--n) 938: if (n > 2 && set[1] == '-') { 939: if (c >= (set[0] & TRIM) && c <= (set[2] & TRIM)) 940: return (af); 941: set += 3; 942: n -= 2; 943: } else 944: if ((*set++ & TRIM) == c) 945: return (af); 946: return (!af); 947: }