1: /* 2: * Copyright (c) 1980 Regents of the University of California. 3: * All rights reserved. The Berkeley Software License Agreement 4: * specifies the terms and conditions for redistribution. 5: */ 6: 7: #ifndef lint 8: static char *sccsid = "@(#)sh.dol.c 5.3 (Berkeley) 3/29/86"; 9: #endif 10: 11: #include "sh.h" 12: 13: /* 14: * C shell 15: */ 16: 17: /* 18: * These routines perform variable substitution and quoting via ' and ". 19: * To this point these constructs have been preserved in the divided 20: * input words. Here we expand variables and turn quoting via ' and " into 21: * QUOTE bits on characters (which prevent further interpretation). 22: * If the `:q' modifier was applied during history expansion, then 23: * some QUOTEing may have occurred already, so we dont "trim()" here. 24: */ 25: 26: int Dpeekc, Dpeekrd; /* Peeks for DgetC and Dreadc */ 27: char *Dcp, **Dvp; /* Input vector for Dreadc */ 28: 29: #define DEOF -1 30: 31: #define unDgetC(c) Dpeekc = c 32: 33: #define QUOTES (_Q|_Q1|_ESC) /* \ ' " ` */ 34: 35: /* 36: * The following variables give the information about the current 37: * $ expansion, recording the current word position, the remaining 38: * words within this expansion, the count of remaining words, and the 39: * information about any : modifier which is being applied. 40: */ 41: char *dolp; /* Remaining chars from this word */ 42: char **dolnxt; /* Further words */ 43: int dolcnt; /* Count of further words */ 44: char dolmod; /* : modifier character */ 45: int dolmcnt; /* :gx -> 10000, else 1 */ 46: 47: /* 48: * Fix up the $ expansions and quotations in the 49: * argument list to command t. 50: */ 51: Dfix(t) 52: register struct command *t; 53: { 54: register char **pp; 55: register char *p; 56: 57: if (noexec) 58: return; 59: /* Note that t_dcom isn't trimmed thus !...:q's aren't lost */ 60: for (pp = t->t_dcom; p = *pp++;) 61: while (*p) 62: if (cmap(*p++, _DOL|QUOTES)) { /* $, \, ', ", ` */ 63: Dfix2(t->t_dcom); /* found one */ 64: blkfree(t->t_dcom); 65: t->t_dcom = gargv; 66: gargv = 0; 67: return; 68: } 69: } 70: 71: /* 72: * $ substitute one word, for i/o redirection 73: */ 74: char * 75: Dfix1(cp) 76: register char *cp; 77: { 78: char *Dv[2]; 79: 80: if (noexec) 81: return (0); 82: Dv[0] = cp; Dv[1] = NOSTR; 83: Dfix2(Dv); 84: if (gargc != 1) { 85: setname(cp); 86: bferr("Ambiguous"); 87: } 88: cp = savestr(gargv[0]); 89: blkfree(gargv), gargv = 0; 90: return (cp); 91: } 92: 93: /* 94: * Subroutine to do actual fixing after state initialization. 95: */ 96: Dfix2(v) 97: char **v; 98: { 99: char *agargv[GAVSIZ]; 100: 101: ginit(agargv); /* Initialize glob's area pointers */ 102: Dvp = v; Dcp = ""; /* Setup input vector for Dreadc */ 103: unDgetC(0); unDredc(0); /* Clear out any old peeks (at error) */ 104: dolp = 0; dolcnt = 0; /* Clear out residual $ expands (...) */ 105: while (Dword()) 106: continue; 107: gargv = copyblk(gargv); 108: } 109: 110: /* 111: * Get a word. This routine is analogous to the routine 112: * word() in sh.lex.c for the main lexical input. One difference 113: * here is that we don't get a newline to terminate our expansion. 114: * Rather, DgetC will return a DEOF when we hit the end-of-input. 115: */ 116: Dword() 117: { 118: register int c, c1; 119: char wbuf[BUFSIZ]; 120: register char *wp = wbuf; 121: register int i = BUFSIZ - 4; 122: register bool dolflg; 123: bool sofar = 0; 124: 125: loop: 126: c = DgetC(DODOL); 127: switch (c) { 128: 129: case DEOF: 130: deof: 131: if (sofar == 0) 132: return (0); 133: /* finish this word and catch the code above the next time */ 134: unDredc(c); 135: /* fall into ... */ 136: 137: case '\n': 138: *wp = 0; 139: goto ret; 140: 141: case ' ': 142: case '\t': 143: goto loop; 144: 145: case '`': 146: /* We preserve ` quotations which are done yet later */ 147: *wp++ = c, --i; 148: case '\'': 149: case '"': 150: /* 151: * Note that DgetC never returns a QUOTES character 152: * from an expansion, so only true input quotes will 153: * get us here or out. 154: */ 155: c1 = c; 156: dolflg = c1 == '"' ? DODOL : 0; 157: for (;;) { 158: c = DgetC(dolflg); 159: if (c == c1) 160: break; 161: if (c == '\n' || c == DEOF) 162: error("Unmatched %c", c1); 163: if ((c & (QUOTE|TRIM)) == ('\n' | QUOTE)) 164: --wp, ++i; 165: if (--i <= 0) 166: goto toochars; 167: switch (c1) { 168: 169: case '"': 170: /* 171: * Leave any `s alone for later. 172: * Other chars are all quoted, thus `...` 173: * can tell it was within "...". 174: */ 175: *wp++ = c == '`' ? '`' : c | QUOTE; 176: break; 177: 178: case '\'': 179: /* Prevent all further interpretation */ 180: *wp++ = c | QUOTE; 181: break; 182: 183: case '`': 184: /* Leave all text alone for later */ 185: *wp++ = c; 186: break; 187: } 188: } 189: if (c1 == '`') 190: *wp++ = '`', --i; 191: goto pack; /* continue the word */ 192: 193: case '\\': 194: c = DgetC(0); /* No $ subst! */ 195: if (c == '\n' || c == DEOF) 196: goto loop; 197: c |= QUOTE; 198: break; 199: } 200: unDgetC(c); 201: pack: 202: sofar = 1; 203: /* pack up more characters in this word */ 204: for (;;) { 205: c = DgetC(DODOL); 206: if (c == '\\') { 207: c = DgetC(0); 208: if (c == DEOF) 209: goto deof; 210: if (c == '\n') 211: c = ' '; 212: else 213: c |= QUOTE; 214: } 215: if (c == DEOF) 216: goto deof; 217: if (cmap(c, _SP|_NL|_Q|_Q1)) { /* sp \t\n'"` */ 218: unDgetC(c); 219: if (cmap(c, QUOTES)) 220: goto loop; 221: *wp++ = 0; 222: goto ret; 223: } 224: if (--i <= 0) 225: toochars: 226: error("Word too long"); 227: *wp++ = c; 228: } 229: ret: 230: Gcat("", wbuf); 231: return (1); 232: } 233: 234: /* 235: * Get a character, performing $ substitution unless flag is 0. 236: * Any QUOTES character which is returned from a $ expansion is 237: * QUOTEd so that it will not be recognized above. 238: */ 239: DgetC(flag) 240: register int flag; 241: { 242: register int c; 243: 244: top: 245: if (c = Dpeekc) { 246: Dpeekc = 0; 247: return (c); 248: } 249: if (lap) { 250: c = *lap++ & (QUOTE|TRIM); 251: if (c == 0) { 252: lap = 0; 253: goto top; 254: } 255: quotspec: 256: if (cmap(c, QUOTES)) 257: return (c | QUOTE); 258: return (c); 259: } 260: if (dolp) { 261: if (c = *dolp++ & (QUOTE|TRIM)) 262: goto quotspec; 263: if (dolcnt > 0) { 264: setDolp(*dolnxt++); 265: --dolcnt; 266: return (' '); 267: } 268: dolp = 0; 269: } 270: if (dolcnt > 0) { 271: setDolp(*dolnxt++); 272: --dolcnt; 273: goto top; 274: } 275: c = Dredc(); 276: if (c == '$' && flag) { 277: Dgetdol(); 278: goto top; 279: } 280: return (c); 281: } 282: 283: char *nulvec[] = { 0 }; 284: struct varent nulargv = { nulvec, "argv", 0 }; 285: 286: /* 287: * Handle the multitudinous $ expansion forms. 288: * Ugh. 289: */ 290: Dgetdol() 291: { 292: register char *np; 293: register struct varent *vp; 294: char name[20]; 295: int c, sc; 296: int subscr = 0, lwb = 1, upb = 0; 297: bool dimen = 0, bitset = 0; 298: char wbuf[BUFSIZ]; 299: 300: dolmod = dolmcnt = 0; 301: c = sc = DgetC(0); 302: if (c == '{') 303: c = DgetC(0); /* sc is { to take } later */ 304: if ((c & TRIM) == '#') 305: dimen++, c = DgetC(0); /* $# takes dimension */ 306: else if (c == '?') 307: bitset++, c = DgetC(0); /* $? tests existence */ 308: switch (c) { 309: 310: case '$': 311: if (dimen || bitset) 312: goto syntax; /* No $?$, $#$ */ 313: setDolp(doldol); 314: goto eatbrac; 315: 316: case '<'|QUOTE: 317: if (dimen || bitset) 318: goto syntax; /* No $?<, $#< */ 319: for (np = wbuf; read(OLDSTD, np, 1) == 1; np++) { 320: if (np >= &wbuf[BUFSIZ-1]) 321: error("$< line too long"); 322: if (*np <= 0 || *np == '\n') 323: break; 324: } 325: *np = 0; 326: /* 327: * KLUDGE: dolmod is set here because it will 328: * cause setDolp to call domod and thus to copy wbuf. 329: * Otherwise setDolp would use it directly. If we saved 330: * it ourselves, no one would know when to free it. 331: * The actual function of the 'q' causes filename 332: * expansion not to be done on the interpolated value. 333: */ 334: dolmod = 'q'; 335: dolmcnt = 10000; 336: setDolp(wbuf); 337: goto eatbrac; 338: 339: case DEOF: 340: case '\n': 341: goto syntax; 342: 343: case '*': 344: (void) strcpy(name, "argv"); 345: vp = adrof("argv"); 346: subscr = -1; /* Prevent eating [...] */ 347: break; 348: 349: default: 350: np = name; 351: if (digit(c)) { 352: if (dimen) 353: goto syntax; /* No $#1, e.g. */ 354: subscr = 0; 355: do { 356: subscr = subscr * 10 + c - '0'; 357: c = DgetC(0); 358: } while (digit(c)); 359: unDredc(c); 360: if (subscr < 0) 361: goto oob; 362: if (subscr == 0) { 363: if (bitset) { 364: dolp = file ? "1" : "0"; 365: goto eatbrac; 366: } 367: if (file == 0) 368: error("No file for $0"); 369: setDolp(file); 370: goto eatbrac; 371: } 372: if (bitset) 373: goto syntax; 374: vp = adrof("argv"); 375: if (vp == 0) { 376: vp = &nulargv; 377: goto eatmod; 378: } 379: break; 380: } 381: if (!alnum(c)) 382: goto syntax; 383: for (;;) { 384: *np++ = c; 385: c = DgetC(0); 386: if (!alnum(c)) 387: break; 388: if (np >= &name[sizeof name - 2]) 389: syntax: 390: error("Variable syntax"); 391: } 392: *np++ = 0; 393: unDredc(c); 394: vp = adrof(name); 395: } 396: if (bitset) { 397: dolp = (vp || getenv(name)) ? "1" : "0"; 398: goto eatbrac; 399: } 400: if (vp == 0) { 401: np = getenv(name); 402: if (np) { 403: addla(np); 404: goto eatbrac; 405: } 406: udvar(name); 407: /*NOTREACHED*/ 408: } 409: c = DgetC(0); 410: upb = blklen(vp->vec); 411: if (dimen == 0 && subscr == 0 && c == '[') { 412: np = name; 413: for (;;) { 414: c = DgetC(DODOL); /* Allow $ expand within [ ] */ 415: if (c == ']') 416: break; 417: if (c == '\n' || c == DEOF) 418: goto syntax; 419: if (np >= &name[sizeof name - 2]) 420: goto syntax; 421: *np++ = c; 422: } 423: *np = 0, np = name; 424: if (dolp || dolcnt) /* $ exp must end before ] */ 425: goto syntax; 426: if (!*np) 427: goto syntax; 428: if (digit(*np)) { 429: register int i = 0; 430: 431: while (digit(*np)) 432: i = i * 10 + *np++ - '0'; 433: if ((i < 0 || i > upb) && !any(*np, "-*")) { 434: oob: 435: setname(vp->v_name); 436: error("Subscript out of range"); 437: } 438: lwb = i; 439: if (!*np) 440: upb = lwb, np = "*"; 441: } 442: if (*np == '*') 443: np++; 444: else if (*np != '-') 445: goto syntax; 446: else { 447: register int i = upb; 448: 449: np++; 450: if (digit(*np)) { 451: i = 0; 452: while (digit(*np)) 453: i = i * 10 + *np++ - '0'; 454: if (i < 0 || i > upb) 455: goto oob; 456: } 457: if (i < lwb) 458: upb = lwb - 1; 459: else 460: upb = i; 461: } 462: if (lwb == 0) { 463: if (upb != 0) 464: goto oob; 465: upb = -1; 466: } 467: if (*np) 468: goto syntax; 469: } else { 470: if (subscr > 0) 471: if (subscr > upb) 472: lwb = 1, upb = 0; 473: else 474: lwb = upb = subscr; 475: unDredc(c); 476: } 477: if (dimen) { 478: char *cp = putn(upb - lwb + 1); 479: 480: addla(cp); 481: xfree(cp); 482: } else { 483: eatmod: 484: c = DgetC(0); 485: if (c == ':') { 486: c = DgetC(0), dolmcnt = 1; 487: if (c == 'g') 488: c = DgetC(0), dolmcnt = 10000; 489: if (!any(c, "htrqxe")) 490: error("Bad : mod in $"); 491: dolmod = c; 492: if (c == 'q') 493: dolmcnt = 10000; 494: } else 495: unDredc(c); 496: dolnxt = &vp->vec[lwb - 1]; 497: dolcnt = upb - lwb + 1; 498: } 499: eatbrac: 500: if (sc == '{') { 501: c = Dredc(); 502: if (c != '}') 503: goto syntax; 504: } 505: } 506: 507: setDolp(cp) 508: register char *cp; 509: { 510: register char *dp; 511: 512: if (dolmod == 0 || dolmcnt == 0) { 513: dolp = cp; 514: return; 515: } 516: dp = domod(cp, dolmod); 517: if (dp) { 518: dolmcnt--; 519: addla(dp); 520: xfree(dp); 521: } else 522: addla(cp); 523: dolp = ""; 524: } 525: 526: unDredc(c) 527: int c; 528: { 529: 530: Dpeekrd = c; 531: } 532: 533: Dredc() 534: { 535: register int c; 536: 537: if (c = Dpeekrd) { 538: Dpeekrd = 0; 539: return (c); 540: } 541: if (Dcp && (c = *Dcp++)) 542: return (c&(QUOTE|TRIM)); 543: if (*Dvp == 0) { 544: Dcp = 0; 545: return (DEOF); 546: } 547: Dcp = *Dvp++; 548: return (' '); 549: } 550: 551: Dtestq(c) 552: register int c; 553: { 554: 555: if (cmap(c, QUOTES)) 556: gflag = 1; 557: } 558: 559: /* 560: * Form a shell temporary file (in unit 0) from the words 561: * of the shell input up to a line the same as "term". 562: * Unit 0 should have been closed before this call. 563: */ 564: heredoc(term) 565: char *term; 566: { 567: register int c; 568: char *Dv[2]; 569: char obuf[BUFSIZ], lbuf[BUFSIZ], mbuf[BUFSIZ]; 570: int ocnt, lcnt, mcnt; 571: register char *lbp, *obp, *mbp; 572: char **vp; 573: bool quoted; 574: 575: if (creat(shtemp, 0600) < 0) 576: Perror(shtemp); 577: (void) close(0); 578: if (open(shtemp, 2) < 0) { 579: int oerrno = errno; 580: 581: (void) unlink(shtemp); 582: errno = oerrno; 583: Perror(shtemp); 584: } 585: (void) unlink(shtemp); /* 0 0 inode! */ 586: Dv[0] = term; Dv[1] = NOSTR; gflag = 0; 587: trim(Dv); rscan(Dv, Dtestq); quoted = gflag; 588: ocnt = BUFSIZ; obp = obuf; 589: for (;;) { 590: /* 591: * Read up a line 592: */ 593: lbp = lbuf; lcnt = BUFSIZ - 4; 594: for (;;) { 595: c = readc(1); /* 1 -> Want EOF returns */ 596: if (c < 0) { 597: setname(term); 598: bferr("<< terminator not found"); 599: } 600: if (c == '\n') 601: break; 602: if (c &= TRIM) { 603: *lbp++ = c; 604: if (--lcnt < 0) { 605: setname("<<"); 606: error("Line overflow"); 607: } 608: } 609: } 610: *lbp = 0; 611: 612: /* 613: * Compare to terminator -- before expansion 614: */ 615: if (eq(lbuf, term)) { 616: (void) write(0, obuf, BUFSIZ - ocnt); 617: (void) lseek(0, (off_t)0, 0); 618: return; 619: } 620: 621: /* 622: * If term was quoted or -n just pass it on 623: */ 624: if (quoted || noexec) { 625: *lbp++ = '\n'; *lbp = 0; 626: for (lbp = lbuf; c = *lbp++;) { 627: *obp++ = c; 628: if (--ocnt == 0) { 629: (void) write(0, obuf, BUFSIZ); 630: obp = obuf; ocnt = BUFSIZ; 631: } 632: } 633: continue; 634: } 635: 636: /* 637: * Term wasn't quoted so variable and then command 638: * expand the input line 639: */ 640: Dcp = lbuf; Dvp = Dv + 1; mbp = mbuf; mcnt = BUFSIZ - 4; 641: for (;;) { 642: c = DgetC(DODOL); 643: if (c == DEOF) 644: break; 645: if ((c &= TRIM) == 0) 646: continue; 647: /* \ quotes \ $ ` here */ 648: if (c =='\\') { 649: c = DgetC(0); 650: if (!any(c, "$\\`")) 651: unDgetC(c | QUOTE), c = '\\'; 652: else 653: c |= QUOTE; 654: } 655: *mbp++ = c; 656: if (--mcnt == 0) { 657: setname("<<"); 658: bferr("Line overflow"); 659: } 660: } 661: *mbp++ = 0; 662: 663: /* 664: * If any ` in line do command substitution 665: */ 666: mbp = mbuf; 667: if (any('`', mbp)) { 668: /* 669: * 1 arg to dobackp causes substitution to be literal. 670: * Words are broken only at newlines so that all blanks 671: * and tabs are preserved. Blank lines (null words) 672: * are not discarded. 673: */ 674: vp = dobackp(mbuf, 1); 675: } else 676: /* Setup trivial vector similar to return of dobackp */ 677: Dv[0] = mbp, Dv[1] = NOSTR, vp = Dv; 678: 679: /* 680: * Resurrect the words from the command substitution 681: * each separated by a newline. Note that the last 682: * newline of a command substitution will have been 683: * discarded, but we put a newline after the last word 684: * because this represents the newline after the last 685: * input line! 686: */ 687: for (; *vp; vp++) { 688: for (mbp = *vp; *mbp; mbp++) { 689: *obp++ = *mbp & TRIM; 690: if (--ocnt == 0) { 691: (void) write(0, obuf, BUFSIZ); 692: obp = obuf; ocnt = BUFSIZ; 693: } 694: } 695: *obp++ = '\n'; 696: if (--ocnt == 0) { 697: (void) write(0, obuf, BUFSIZ); 698: obp = obuf; ocnt = BUFSIZ; 699: } 700: } 701: if (pargv) 702: blkfree(pargv), pargv = 0; 703: } 704: }