1: # include <stdio.h> 2: # include <ctype.h> 3: # include "dlvrmail.h" 4: 5: static char SccsId[] = "@(#)parse.c 2.5 1/8/81"; 6: 7: /* 8: ** PARSE -- Parse an address 9: ** 10: ** Parses an address and breaks it up into three parts: a 11: ** net to transmit the message on, the host to transmit it 12: ** to, and a user on that host. These are loaded into an 13: ** addrq header with the values squirreled away if necessary. 14: ** The "user" part may not be a real user; the process may 15: ** just reoccur on that machine. For example, on a machine 16: ** with an arpanet connection, the address 17: ** csvax.bill@berkeley 18: ** will break up to a "user" of 'csvax.bill' and a host 19: ** of 'berkeley' -- to be transmitted over the arpanet. 20: ** 21: ** Parameters: 22: ** addr -- the address to parse. 23: ** a -- a pointer to the address descriptor buffer. 24: ** If NULL, a header will be created. 25: ** copyf -- determines what shall be copied: 26: ** -1 -- don't copy anything. The printname 27: ** (q_paddr) is just addr, and the 28: ** user & host are allocated internally 29: ** to parse. 30: ** 0 -- copy out the parsed user & host, but 31: ** don't copy the printname. 32: ** +1 -- copy everything. 33: ** 34: ** Returns: 35: ** A pointer to the address descriptor header (`a' if 36: ** `a' is non-NULL). 37: ** NULL on error. 38: ** 39: ** Side Effects: 40: ** none 41: ** 42: ** Called By: 43: ** main 44: ** sendto 45: ** alias 46: ** savemail 47: */ 48: 49: # define DELIMCHARS "()<>@!.,;:\\\" \t\r\n" /* word delimiters */ 50: # define SPACESUB ('.'|0200) /* substitution for <lwsp> */ 51: 52: addrq * 53: parse(addr, a, copyf) 54: char *addr; 55: register addrq *a; 56: int copyf; 57: { 58: register char *p; 59: register struct parsetab *t; 60: extern struct parsetab ParseTab[]; 61: static char buf[MAXNAME]; 62: register char c; 63: register char *q; 64: bool got_one; 65: extern char *prescan(); 66: extern char *xalloc(); 67: char **pvp; 68: 69: /* 70: ** Initialize and prescan address. 71: */ 72: 73: To = addr; 74: if (prescan(addr, buf, &buf[sizeof buf], '\0') == NULL) 75: return (NULL); 76: 77: /* 78: ** Scan parse table. 79: ** Look for the first entry designating a character 80: ** that is contained in the address. 81: ** Arrange for q to point to that character. 82: ** Check to see that there is only one of the char 83: ** if it must be unique. 84: ** Find the last one if the host is on the RHS. 85: ** Insist that the host name is atomic. 86: ** If just doing a map, do the map and then start all 87: ** over. 88: */ 89: 90: rescan: 91: got_one = FALSE; 92: for (t = ParseTab; t->p_char != '\0'; t++) 93: { 94: q = NULL; 95: for (p = buf; (c = *p) != '\0'; p++) 96: { 97: /* find the end of this token */ 98: while (isalnum(c) || c == '-' || c == '_') 99: c = *++p; 100: if (c == '\0') 101: break; 102: 103: if (c == t->p_char) 104: { 105: got_one = TRUE; 106: 107: /* do mapping as appropriate */ 108: if (flagset(P_MAP, t->p_flags)) 109: { 110: *p = t->p_arg[0]; 111: if (flagset(P_ONE, t->p_flags)) 112: goto rescan; 113: else 114: continue; 115: } 116: 117: /* arrange for q to point to it */ 118: if (q != NULL && flagset(P_ONE, t->p_flags)) 119: { 120: usrerr("multichar error"); 121: ExitStat = EX_USAGE; 122: return (NULL); 123: } 124: if (q == NULL || flagset(P_HLAST, t->p_flags)) 125: q = p; 126: } 127: else 128: { 129: /* insist that host name is atomic */ 130: if (flagset(P_HLAST, t->p_flags)) 131: q = NULL; 132: else 133: break; 134: } 135: } 136: 137: if (q != NULL) 138: break; 139: } 140: 141: /* 142: ** If we matched nothing cleanly, but we did match something 143: ** somewhere in the process of scanning, then we have a 144: ** syntax error. This can happen on things like a@b:c where 145: ** @ has a right host and : has a left host. 146: ** 147: ** We also set `q' to the null string, in case someone forgets 148: ** to put the P_MOVE bit in the local mailer entry of the 149: ** configuration table. 150: */ 151: 152: if (q == NULL) 153: { 154: q = ""; 155: if (got_one) 156: { 157: usrerr("syntax error"); 158: ExitStat = EX_USAGE; 159: return (NULL); 160: } 161: } 162: 163: /* 164: ** Interpret entry. 165: ** t points to the entry for the mailer we will use. 166: ** q points to the significant character. 167: */ 168: 169: if (a == NULL) 170: a = (addrq *) xalloc(sizeof *a); 171: if (copyf > 0) 172: { 173: p = xalloc((unsigned) strlen(addr) + 1); 174: strcpy(p, addr); 175: a->q_paddr = p; 176: } 177: else 178: a->q_paddr = addr; 179: a->q_mailer = &Mailer[t->p_mailer]; 180: 181: if (flagset(P_MOVE, t->p_flags)) 182: { 183: /* send the message to another host & retry */ 184: a->q_host = t->p_arg; 185: if (copyf >= 0) 186: { 187: p = xalloc((unsigned) strlen(buf) + 1); 188: strcpy(p, buf); 189: a->q_user = p; 190: } 191: else 192: a->q_user = buf; 193: } 194: else 195: { 196: /* 197: ** Make local copies of the host & user and then 198: ** transport them out. 199: */ 200: 201: *q++ = '\0'; 202: if (flagset(P_HLAST, t->p_flags)) 203: { 204: a->q_host = q; 205: a->q_user = buf; 206: } 207: else 208: { 209: a->q_host = buf; 210: a->q_user = q; 211: } 212: 213: /* 214: ** Don't go to the net if already on the target host. 215: ** This is important on the berkeley network, since 216: ** it get confused if we ask to send to ourselves. 217: ** For nets like the ARPANET, we probably will have 218: ** the local list set to NULL to simplify testing. 219: ** The canonical representation of the name is also set 220: ** to be just the local name so the duplicate letter 221: ** suppression algorithm will work. 222: */ 223: 224: if ((pvp = a->q_mailer->m_local) != NULL) 225: { 226: while (*pvp != NULL) 227: { 228: auto char buf2[MAXNAME]; 229: 230: strcpy(buf2, a->q_host); 231: if (!flagset(P_HST_UPPER, t->p_flags)) 232: makelower(buf2); 233: if (strcmp(*pvp++, buf2) == 0) 234: { 235: strcpy(buf2, a->q_user); 236: p = a->q_paddr; 237: if (parse(buf2, a, -1) == NULL) 238: { 239: To = addr; 240: return (NULL); 241: } 242: To = a->q_paddr = p; 243: break; 244: } 245: } 246: } 247: 248: /* make copies if specified */ 249: if (copyf >= 0) 250: { 251: p = xalloc((unsigned) strlen(a->q_host) + 1); 252: strcpy(p, a->q_host); 253: a->q_host = p; 254: p = xalloc((unsigned) strlen(a->q_user) + 1); 255: strcpy(p, a->q_user); 256: a->q_user = p; 257: } 258: } 259: 260: /* 261: ** Do UPPER->lower case mapping unless inhibited. 262: */ 263: 264: if (!flagset(P_HST_UPPER, t->p_flags)) 265: makelower(a->q_host); 266: if (!flagset(P_USR_UPPER, t->p_flags)) 267: makelower(a->q_user); 268: 269: /* 270: ** Compute return value. 271: */ 272: 273: # ifdef DEBUG 274: if (Debug) 275: printf("parse(\"%s\"): host \"%s\" user \"%s\" mailer %d\n", 276: addr, a->q_host, a->q_user, t->p_mailer); 277: # endif DEBUG 278: 279: return (a); 280: } 281: /* 282: ** MAKELOWER -- Translate a line into lower case 283: ** 284: ** Parameters: 285: ** p -- the string to translate. If NULL, return is 286: ** immediate. 287: ** 288: ** Returns: 289: ** none. 290: ** 291: ** Side Effects: 292: ** String pointed to by p is translated to lower case. 293: ** 294: ** Called By: 295: ** parse 296: */ 297: 298: makelower(p) 299: register char *p; 300: { 301: register char c; 302: 303: if (p == NULL) 304: return; 305: for (; (c = *p) != '\0'; p++) 306: if ((c & 0200) == 0 && isupper(c)) 307: *p = c - 'A' + 'a'; 308: } 309: /* 310: ** PRESCAN -- Prescan name and make it canonical 311: ** 312: ** Scans a name and turns it into canonical form. This involves 313: ** deleting blanks, comments (in parentheses), and turning the 314: ** word "at" into an at-sign ("@"). The name is copied as this 315: ** is done; it is legal to copy a name onto itself, since this 316: ** process can only make things smaller. 317: ** 318: ** This routine knows about quoted strings and angle brackets. 319: ** 320: ** There are certain subtleties to this routine. The one that 321: ** comes to mind now is that backslashes on the ends of names 322: ** are silently stripped off; this is intentional. The problem 323: ** is that some versions of sndmsg (like at LBL) set the kill 324: ** character to something other than @ when reading addresses; 325: ** so people type "csvax.eric\@berkeley" -- which screws up the 326: ** berknet mailer. 327: ** 328: ** Parameters: 329: ** addr -- the name to chomp. 330: ** buf -- the buffer to copy it into. 331: ** buflim -- the last usable address in the buffer 332: ** (which will old a null byte). Normally 333: ** &buf[sizeof buf - 1]. 334: ** delim -- the delimiter for the address, normally 335: ** '\0' or ','; \0 is accepted in any case. 336: ** are moving in place; set buflim to high core. 337: ** 338: ** Returns: 339: ** A pointer to the terminator of buf. 340: ** NULL on error. 341: ** 342: ** Side Effects: 343: ** buf gets clobbered. 344: ** 345: ** Called By: 346: ** parse 347: ** maketemp 348: */ 349: 350: char * 351: prescan(addr, buf, buflim, delim) 352: char *addr; 353: char *buf; 354: char *buflim; 355: char delim; 356: { 357: register char *p; 358: bool space; 359: bool quotemode; 360: bool bslashmode; 361: bool delimmode; 362: int cmntcnt; 363: int brccnt; 364: register char c; 365: register char *q; 366: extern bool any(); 367: 368: space = FALSE; 369: delimmode = TRUE; 370: q = buf; 371: bslashmode = quotemode = FALSE; 372: cmntcnt = brccnt = 0; 373: for (p = addr; (c = *p++) != '\0'; ) 374: { 375: /* chew up special characters */ 376: *q = '\0'; 377: if (bslashmode) 378: { 379: c |= 0200; 380: bslashmode = FALSE; 381: } 382: else if (c == '"') 383: quotemode = !quotemode; 384: else if (c == '\\') 385: { 386: bslashmode++; 387: continue; 388: } 389: else if (quotemode) 390: c |= 0200; 391: else if (c == delim) 392: break; 393: else if (c == '(') 394: { 395: cmntcnt++; 396: continue; 397: } 398: else if (c == ')') 399: { 400: if (cmntcnt <= 0) 401: { 402: usrerr("Unbalanced ')'"); 403: return (NULL); 404: } 405: else 406: { 407: cmntcnt--; 408: continue; 409: } 410: } 411: if (cmntcnt > 0) 412: continue; 413: else if (isascii(c) && isspace(c) && (space || delimmode)) 414: continue; 415: else if (c == '<') 416: { 417: if (brccnt < 0) 418: { 419: usrerr("multiple < spec"); 420: return (NULL); 421: } 422: brccnt++; 423: delimmode = TRUE; 424: space = FALSE; 425: if (brccnt == 1) 426: { 427: /* we prefer using machine readable name */ 428: q = buf; 429: *q = '\0'; 430: continue; 431: } 432: } 433: else if (c == '>') 434: { 435: if (brccnt <= 0) 436: { 437: usrerr("Unbalanced `>'"); 438: return (NULL); 439: } 440: else 441: brccnt--; 442: if (brccnt <= 0) 443: { 444: brccnt = -1; 445: continue; 446: } 447: } 448: 449: /* 450: ** Turn "at" into "@", 451: ** but only if "at" is a word. 452: ** By the way, I violate the ARPANET RFC-733 453: ** standard here, by assuming that 'space' delimits 454: ** atoms. I assume that is just a mistake, since 455: ** it violates the spirit of the semantics 456: ** of the document..... 457: */ 458: 459: if (delimmode && (c == 'a' || c == 'A') && 460: (p[0] == 't' || p[0] == 'T') && 461: (any(p[1], DELIMCHARS) || p[1] <= 040)) 462: { 463: c = '@'; 464: p++; 465: } 466: 467: if (delimmode = any(c, DELIMCHARS)) 468: space = FALSE; 469: 470: /* if not a space, squirrel it away */ 471: if ((!isascii(c) || !isspace(c)) && brccnt >= 0) 472: { 473: if (q >= buflim-1) 474: { 475: usrerr("Address too long"); 476: return (NULL); 477: } 478: if (space) 479: *q++ = SPACESUB; 480: *q++ = c; 481: } 482: space = isascii(c) && isspace(c); 483: } 484: *q = '\0'; 485: if (c == '\0') 486: p--; 487: if (cmntcnt > 0) 488: usrerr("Unbalanced '('"); 489: else if (quotemode) 490: usrerr("Unbalanced '\"'"); 491: else if (brccnt > 0) 492: usrerr("Unbalanced '<'"); 493: else if (buf[0] != '\0') 494: return (p); 495: return (NULL); 496: }