1: /************************************************************************* 2: * This program is copyright (C) 1985, 1986 by Jonathan Payne. It is * 3: * provided to you without charge for use only on a licensed Unix * 4: * system. You may copy JOVE provided that this notice is included with * 5: * the copy. You may not sell copies of this program or versions * 6: * modified for use on microcomputer systems, unless the copies are * 7: * included with a Unix system distribution and the source is provided. * 8: *************************************************************************/ 9: 10: /* Search package. */ 11: 12: #include "jove.h" 13: #include "ctype.h" 14: 15: #define NALTS 10 /* number of alternate search strings */ 16: 17: char searchstr[128], 18: compbuf[128], /* global default compbuf */ 19: rep_search[128], /* replace search string */ 20: rep_str[128], /* contains replacement string */ 21: *cur_compb, /* usually points at compbuf */ 22: REbuf[LBSIZE], /* points at line we're scanning */ 23: *alternates[NALTS]; 24: 25: int REdirection; 26: 27: int CaseIgnore = 0, 28: WrapScan = 0, 29: UseRE = 0; 30: 31: #define cind_cmp(a, b) (Upper(a) == Upper(b)) 32: 33: private int REpeekc; 34: private char *REptr; 35: 36: private 37: REgetc() 38: { 39: int c; 40: 41: if ((c = REpeekc) != -1) 42: REpeekc = -1; 43: else if (*REptr) 44: c = *REptr++; 45: else 46: c = 0; 47: 48: return c; 49: } 50: 51: #define STAR 01 /* Match any number of last RE. */ 52: #define AT_BOL 2 /* ^ */ 53: #define AT_EOL 4 /* $ */ 54: #define AT_BOW 6 /* \< */ 55: #define AT_EOW 8 /* \> */ 56: #define OPENP 10 /* \( */ 57: #define CLOSEP 12 /* \) */ 58: #define CURLYB 14 /* \{ */ 59: 60: #define NOSTR 14 /* Codes <= NOSTR can't be *'d. */ 61: 62: #define ANYC NOSTR+2 /* . */ 63: #define NORMC ANYC+2 /* normal character */ 64: #define CINDC NORMC+2 /* case independent character */ 65: #define ONE_OF CINDC+2 /* [xxx] */ 66: #define NONE_OF ONE_OF+2 /* [^xxx] */ 67: #define BACKREF NONE_OF+2 /* \# */ 68: #define EOP BACKREF+2 /* end of pattern */ 69: 70: #define NPAR 9 /* [1-9] */ 71: private int nparens; 72: private char *comp_p, 73: **alt_p, 74: **alt_endp; 75: 76: REcompile(pattern, re, into_buf, alt_bufp) 77: char *pattern, 78: *into_buf, 79: **alt_bufp; 80: { 81: REptr = pattern; 82: REpeekc = -1; 83: comp_p = cur_compb = into_buf; 84: alt_p = alt_bufp; 85: alt_endp = alt_p + NALTS; 86: *alt_p++ = comp_p; 87: nparens = 0; 88: (void) do_comp(re ? OKAY_RE : NORM); 89: *alt_p = 0; 90: } 91: 92: /* Compile the pattern into an internal code. */ 93: 94: private 95: do_comp(kind) 96: { 97: char *last_p; 98: int parens[NPAR], 99: *parenp, 100: c, 101: ret_code; 102: 103: parenp = parens; 104: last_p = 0; 105: ret_code = 1; 106: 107: while (c = REgetc()) { 108: if (comp_p > &cur_compb[(sizeof compbuf) - 4]) 109: complain("Search string too long."); 110: if (c != '*') 111: last_p = comp_p; 112: if (kind == NORM && index(".[*", c) != 0) 113: goto defchar; 114: switch (c) { 115: case '\\': 116: switch (c = REgetc()) { 117: case 0: 118: complain("Premature end of pattern."); 119: 120: case '{': 121: { 122: char *wcntp; /* Word count. */ 123: 124: *comp_p++ = CURLYB; 125: wcntp = comp_p; 126: *comp_p++ = 0; 127: for (;;) { 128: int comp_val; 129: char *comp_len; 130: 131: comp_len = comp_p++; 132: comp_val = do_comp(IN_CB); 133: *comp_len = comp_p - comp_len; 134: (*wcntp)++; 135: if (comp_val == 0) 136: break; 137: } 138: continue; 139: } 140: 141: case '}': 142: if (kind != IN_CB) 143: complain("Unexpected \}."); 144: ret_code = 0; 145: goto outahere; 146: 147: case '(': 148: if (nparens >= NPAR) 149: complain("Too many ('s; max is %d.", NPAR); 150: *comp_p++ = OPENP; 151: *comp_p++ = nparens; 152: *parenp++ = nparens++; 153: continue; 154: 155: case ')': 156: if (parenp == parens) 157: complain("Too many )'s."); 158: *comp_p++ = CLOSEP; 159: *comp_p++ = *--parenp; 160: continue; 161: 162: case '|': 163: if (alt_p >= alt_endp) 164: complain("Too many alternates; max %d.", NALTS); 165: *comp_p++ = EOP; 166: *alt_p++ = comp_p; 167: continue; 168: 169: case '1': 170: case '2': 171: case '3': 172: case '4': 173: case '5': 174: case '6': 175: case '7': 176: case '8': 177: case '9': 178: *comp_p++ = BACKREF; 179: *comp_p++ = c - '1'; 180: continue; 181: 182: case '<': 183: *comp_p++ = AT_BOW; 184: continue; 185: 186: case '>': 187: *comp_p++ = AT_EOW; 188: continue; 189: 190: default: 191: goto defchar; 192: } 193: 194: case ',': 195: if (kind != IN_CB) 196: goto defchar; 197: goto outahere; 198: 199: case '.': 200: *comp_p++ = ANYC; 201: continue; 202: 203: case '^': 204: if (comp_p == cur_compb || comp_p[-1] == EOP) { 205: *comp_p++ = AT_BOL; 206: continue; 207: } 208: goto defchar; 209: 210: case '$': 211: if ((REpeekc = REgetc()) != 0 && REpeekc != '\\') 212: goto defchar; 213: *comp_p++ = AT_EOL; 214: continue; 215: 216: case '[': 217: { 218: int chrcnt; 219: 220: *comp_p++ = ONE_OF; 221: bzero(comp_p, 16); 222: if ((REpeekc = REgetc()) == '^') { 223: *last_p = NONE_OF; 224: /* Get it for real this time. */ 225: (void) REgetc(); 226: } 227: chrcnt = 1; 228: while ((c = REgetc()) != ']' && c != 0) { 229: if (c == '\\') 230: c = REgetc(); 231: else if ((REpeekc = REgetc()) == '-') { 232: int c2; 233: 234: (void) REgetc(); /* read '-' */ 235: c2 = REgetc(); 236: while (c < c2) { 237: comp_p[c/8] |= (1 << (c%8)); 238: c++; 239: } 240: } 241: comp_p[c/8] |= (1 << (c%8)); 242: chrcnt++; 243: } 244: if (c == 0) 245: complain("Missing ]."); 246: if (chrcnt == 1) 247: complain("Empty []."); 248: comp_p += 16; 249: continue; 250: } 251: 252: case '*': 253: if (last_p == 0 || *last_p <= NOSTR) 254: goto defchar; 255: *last_p |= STAR; 256: continue; 257: 258: default: 259: defchar: *comp_p++ = (CaseIgnore) ? CINDC : NORMC; 260: *comp_p++ = c; 261: } 262: } 263: outahere: 264: /* End of pattern, let's do some error checking. */ 265: if (parenp != parens) 266: complain("Unmatched ()'s."); 267: if (kind == IN_CB && c == 0) /* End of pattern with \}. */ 268: complain("Missing \}."); 269: *comp_p++ = EOP; 270: 271: return ret_code; 272: } 273: 274: private char *pstrtlst[NPAR], /* index into REbuf */ 275: *pendlst[NPAR], 276: *REbolp, 277: *locs, 278: *loc1, 279: *loc2; 280: 281: int REbom, 282: REeom, /* beginning and end of match */ 283: REalt_num; /* if alternatives, which one matched? */ 284: 285: private 286: backref(n, linep) 287: register char *linep; 288: { 289: register char *backsp, 290: *backep; 291: 292: backsp = pstrtlst[n]; 293: backep = pendlst[n]; 294: while (*backsp++ == *linep++) 295: if (backsp >= backep) 296: return 1; 297: return 0; 298: } 299: 300: private 301: member(comp_p, c, af) 302: register char *comp_p; 303: register int c; 304: { 305: register int n; 306: char *base; 307: 308: if (c == 0) 309: return 0; /* try to match EOL always fails */ 310: if (comp_p[c/8] & (1 << (c%8))) 311: return af; 312: return !af; 313: } 314: 315: private 316: REmatch(linep, comp_p) 317: register char *linep, 318: *comp_p; 319: { 320: char *first_p = linep; 321: register int n; 322: 323: for (;;) switch (*comp_p++) { 324: case NORMC: 325: if (*linep++ == *comp_p++) 326: continue; 327: return 0; 328: 329: case CINDC: /* case independent comparison */ 330: if (cind_cmp(*linep++, *comp_p++)) 331: continue; 332: return 0; 333: 334: case EOP: 335: loc2 = linep; 336: REeom = (loc2 - REbolp); 337: return 1; /* Success! */ 338: 339: case AT_BOL: 340: if (linep == REbolp) 341: continue; 342: return 0; 343: 344: case AT_EOL: 345: if (*linep == 0) 346: continue; 347: return 0; 348: 349: case ANYC: 350: if (*linep++ != 0) 351: continue; 352: return 0; 353: 354: case AT_BOW: 355: if (ismword(*linep) && (linep == REbolp || !ismword(linep[-1]))) 356: continue; 357: return 0; 358: 359: case AT_EOW: 360: if ((*linep == 0 || !ismword(*linep)) && 361: (linep != REbolp && ismword(linep[-1]))) 362: continue; 363: return 0; 364: 365: case ONE_OF: 366: case NONE_OF: 367: if (member(comp_p, *linep++, comp_p[-1] == ONE_OF)) { 368: comp_p += 16; 369: continue; 370: } 371: return 0; 372: 373: case OPENP: 374: pstrtlst[*comp_p++] = linep; 375: continue; 376: 377: case CLOSEP: 378: pendlst[*comp_p++] = linep; 379: continue; 380: 381: case BACKREF: 382: if (pstrtlst[n = *comp_p++] == 0) 383: complain("\\%d was not specified.", n + 1); 384: if (backref(n, linep)) { 385: linep += pendlst[n] - pstrtlst[n]; 386: continue; 387: } 388: return 0; 389: 390: case CURLYB: 391: { 392: int wcnt, 393: any; 394: 395: wcnt = *comp_p++; 396: any = 0; 397: 398: while (--wcnt >= 0) { 399: if (any == 0) 400: any = REmatch(linep, comp_p + 1); 401: comp_p += *comp_p; 402: } 403: if (any == 0) 404: return 0; 405: linep = loc2; 406: continue; 407: } 408: 409: case ANYC | STAR: 410: first_p = linep; 411: while (*linep++) 412: ; 413: goto star; 414: 415: case NORMC | STAR: 416: first_p = linep; 417: while (*comp_p == *linep++) 418: ; 419: comp_p++; 420: goto star; 421: 422: case CINDC | STAR: 423: first_p = linep; 424: while (cind_cmp(*comp_p, *linep++)) 425: ; 426: comp_p++; 427: goto star; 428: 429: case ONE_OF | STAR: 430: case NONE_OF | STAR: 431: first_p = linep; 432: while (member(comp_p, *linep++, comp_p[-1] == (ONE_OF | STAR))) 433: ; 434: comp_p += 16; 435: goto star; 436: 437: case BACKREF | STAR: 438: first_p = linep; 439: n = *comp_p++; 440: while (backref(n, linep)) 441: linep += pendlst[n] - pstrtlst[n]; 442: while (linep >= first_p) { 443: if (REmatch(linep, comp_p)) 444: return 1; 445: linep -= pendlst[n] - pstrtlst[n]; 446: } 447: continue; 448: 449: star: do { 450: linep--; 451: if (linep < locs) 452: break; 453: if (REmatch(linep, comp_p)) 454: return 1; 455: } while (linep > first_p); 456: return 0; 457: 458: default: 459: complain("RE error match (%d).", comp_p[-1]); 460: } 461: /* NOTREACHED. */ 462: } 463: 464: private 465: REreset() 466: { 467: register int i; 468: 469: for (i = 0; i < NPAR; i++) 470: pstrtlst[i] = pendlst[i] = 0; 471: } 472: 473: /* Index LINE at OFFSET, the compiled EXPR, with alternates ALTS. If 474: lbuf_okay is nonzero it's okay to use linebuf if LINE is the current 475: line. This should save lots of time in things like paren matching in 476: LISP mode. Saves all that copying from linebuf to REbuf. substitute() 477: is the guy who calls re_lindex with lbuf_okay as 0, since the substitution 478: gets placed in linebuf ... doesn't work too well when the source and 479: destination strings are the same. I hate all these arguments! */ 480: 481: re_lindex(line, offset, expr, alts, lbuf_okay) 482: Line *line; 483: char *expr, 484: **alts; 485: { 486: int isquick; 487: register int firstc, 488: c; 489: register char *resp; 490: 491: REreset(); 492: if (lbuf_okay) { 493: REbolp = lbptr(line); 494: if (offset == -1) 495: offset = strlen(REbolp); /* arg! */ 496: } else { 497: REbolp = ltobuf(line, REbuf); 498: if (offset == -1) { /* Reverse search, find end of line. */ 499: extern int Jr_Len; 500: 501: offset = Jr_Len; /* Just Read Len. */ 502: } 503: } 504: resp = REbolp; 505: isquick = (expr[0] == NORMC && alternates[1] == 0); 506: if (isquick) 507: firstc = expr[1]; 508: locs = REbolp + offset; 509: 510: if (REdirection == FORWARD) { 511: do { 512: char **altp = alts; 513: 514: if (isquick) { 515: while ((c = *locs++) != 0 && c != firstc) 516: ; 517: if (*--locs == 0) 518: break; 519: } 520: REalt_num = 1; 521: while (*altp) { 522: if (REmatch(locs, *altp++)) { 523: loc1 = locs; 524: REbom = loc1 - REbolp; 525: return 1; 526: } 527: REalt_num++; 528: } 529: } while (*locs++); 530: } else { 531: do { 532: char **altp = alts; 533: 534: if (isquick) { 535: while (locs >= REbolp && *locs-- != firstc) 536: ; 537: if (*++locs != firstc) 538: break; 539: } 540: REalt_num = 1; 541: while (*altp) { 542: if (REmatch(locs, *altp++)) { 543: loc1 = locs; 544: REbom = loc1 - REbolp; 545: return 1; 546: } 547: REalt_num++; 548: } 549: } while (--locs >= resp); 550: } 551: 552: return 0; 553: } 554: 555: int okay_wrap = 0; /* Do a wrap search ... not when we're 556: parsing errors ... */ 557: 558: Bufpos * 559: dosearch(pattern, dir, re) 560: char *pattern; 561: { 562: Bufpos *pos; 563: 564: if (bobp() && eobp()) /* Can't match! There's no buffer. */ 565: return 0; 566: 567: REcompile(pattern, re, compbuf, alternates); 568: 569: okay_wrap++; 570: pos = docompiled(dir, compbuf, alternates); 571: okay_wrap = 0; 572: return pos; 573: } 574: 575: Bufpos * 576: docompiled(dir, expr, alts) 577: char *expr, 578: **alts; 579: { 580: static Bufpos ret; 581: Bufpos push_dot; 582: register Line *lp; 583: register int offset; 584: int we_wrapped = 0; 585: 586: lsave(); 587: /* Search now lsave()'s so it doesn't make any assumptions on 588: whether the the contents of curline/curchar are in linebuf. 589: Nowhere does search write all over linebuf. However, we have to 590: be careful about what calls we make here, because many of them 591: assume (and rightly so) that curline is in linebuf. */ 592: 593: REdirection = dir; 594: DOTsave(&push_dot); 595: if (dir == BACKWARD) { 596: if (bobp()) { 597: if (okay_wrap && WrapScan) { 598: lp = curline; 599: goto doit; 600: } 601: return 0; 602: } 603: /* here we simulate BackChar() */ 604: if (bolp()) { 605: curline = curline->l_prev; 606: curchar = strlen(lbptr(curline)); 607: } else 608: --curchar; 609: } else if (dir == FORWARD && (lbptr(curline)[curchar] == '\0') && !lastp(curline)) { 610: curline = curline->l_next; 611: curchar = 0; 612: } 613: lp = curline; 614: offset = curchar; 615: 616: do { 617: if (re_lindex(lp, offset, expr, alts, 1)) 618: break; 619: doit: lp = (dir == FORWARD) ? lp->l_next : lp->l_prev; 620: if (lp == 0) { 621: if (okay_wrap && WrapScan) { 622: lp = (dir == FORWARD) ? 623: curbuf->b_first : curbuf->b_last; 624: we_wrapped++; 625: } else 626: break; 627: } 628: if (dir == FORWARD) 629: offset = 0; 630: else 631: offset = -1; /* Signals re_lindex ... */ 632: 633: } while (lp != push_dot.p_line); 634: 635: if (lp == push_dot.p_line && we_wrapped) 636: lp = 0; 637: curline = push_dot.p_line; 638: curchar = push_dot.p_char; 639: if (lp == 0) 640: return 0; 641: ret.p_line = lp; 642: ret.p_char = (dir == FORWARD) ? REeom : REbom; 643: return &ret; 644: } 645: 646: private char * 647: insert(off, endp, which) 648: char *off, 649: *endp; 650: { 651: register char *pp; 652: register int n; 653: 654: n = pendlst[which] - pstrtlst[which]; 655: pp = pstrtlst[which]; 656: while (--n >= 0) { 657: *off++ = *pp++; 658: if (off >= endp) 659: len_error(ERROR); 660: } 661: return off; 662: } 663: 664: /* Perform the substitution. If DELP is nonzero the matched string is 665: deleted, i.e., the substitution string is not inserted. */ 666: 667: re_dosub(tobuf, delp) 668: char *tobuf; 669: { 670: register char *tp, 671: *rp, 672: *repp; 673: int c; 674: char *endp; 675: 676: tp = tobuf; 677: endp = tp + LBSIZE; 678: rp = REbuf; 679: repp = rep_str; 680: 681: while (rp < loc1) 682: *tp++ = *rp++; 683: 684: if (!delp) while (c = *repp++) { 685: if (c == '\\' && (c = *repp++) >= '1' && c <= nparens + '1') { 686: tp = insert(tp, endp, c - '1'); 687: continue; 688: } 689: *tp++ = c; 690: if (tp >= endp) 691: len_error(ERROR); 692: } 693: rp = loc2; 694: loc2 = REbuf + max(1, tp - tobuf); 695: REeom = loc2 - REbuf; 696: /* At least one character past the match, to prevent an infinite 697: number of replacements in the same position, e.g., 698: replace "^" with "". */ 699: while (*tp++ = *rp++) 700: if (tp >= endp) 701: len_error(ERROR); 702: } 703: 704: putmatch(which, buf, size) 705: char *buf; 706: { 707: *(insert(buf, buf + size, which - 1)) = 0; 708: } 709: 710: setsearch(str) 711: char *str; 712: { 713: strcpy(searchstr, str); 714: } 715: 716: char * 717: getsearch() 718: { 719: return searchstr; 720: } 721: 722: RErecur() 723: { 724: char sbuf[sizeof searchstr], 725: cbuf[sizeof compbuf], 726: repbuf[sizeof rep_str], 727: *altbuf[NALTS]; 728: int npars; 729: Mark *m = MakeMark(curline, REbom, FLOATER); 730: 731: message("Type C-X C-C to continue with query replace."); 732: 733: npars = nparens; 734: byte_copy(compbuf, cbuf, sizeof compbuf); 735: byte_copy(searchstr, sbuf, sizeof searchstr); 736: byte_copy(rep_str, repbuf, sizeof rep_str); 737: byte_copy((char *) alternates, (char *) altbuf, sizeof alternates); 738: Recur(); 739: nparens = npars; 740: byte_copy(cbuf, compbuf, sizeof compbuf); 741: byte_copy(sbuf, searchstr, sizeof searchstr); 742: byte_copy(repbuf, rep_str, sizeof rep_str); 743: byte_copy((char *) altbuf, (char *) alternates, sizeof alternates); 744: if (!exp_p) 745: ToMark(m); 746: DelMark(m); 747: } 748: 749: ForSearch() 750: { 751: search(FORWARD, UseRE); 752: } 753: 754: RevSearch() 755: { 756: search(BACKWARD, UseRE); 757: } 758: 759: private 760: search(dir, re) 761: { 762: Bufpos *newdot; 763: 764: setsearch(ask(searchstr, ProcFmt)); 765: if ((newdot = dosearch(searchstr, dir, re)) == 0) { 766: if (WrapScan) 767: complain("No \"%s\" in buffer.", searchstr); 768: else 769: complain("No \"%s\" found to %s.", searchstr, 770: (dir == FORWARD) ? "bottom" : "top"); 771: } 772: PushPntp(newdot->p_line); 773: SetDot(newdot); 774: } 775: 776: /* Do we match PATTERN at OFFSET in BUF? */ 777: 778: LookingAt(pattern, buf, offset) 779: char *pattern, 780: *buf; 781: { 782: register char **alt = alternates; 783: 784: REcompile(pattern, 1, compbuf, alternates); 785: REreset(); 786: locs = buf + offset; 787: REbolp = buf; 788: 789: while (*alt) 790: if (REmatch(locs, *alt++)) 791: return 1; 792: return 0; 793: } 794: 795: look_at(expr) 796: char *expr; 797: { 798: REcompile(expr, 0, compbuf, alternates); 799: REreset(); 800: locs = linebuf + curchar; 801: REbolp = linebuf; 802: if (REmatch(locs, alternates[0])) 803: return 1; 804: return 0; 805: }