1: /* mf.c - mail filter subroutines */ 2: 3: #include "mf.h" 4: #include <ctype.h> 5: #include <stdio.h> 6: 7: /* */ 8: 9: static char *getcpy (s) 10: register char *s; 11: { 12: register char *p; 13: 14: if ((p = malloc ((unsigned) (strlen (s) + 2))) != NULL) 15: (void) strcpy (p, s); 16: return p; 17: } 18: 19: 20: static char *add (s1, s2) 21: register char *s1, 22: *s2; 23: { 24: register char *p; 25: 26: if (s2 == NULL) 27: return getcpy (s1); 28: 29: if ((p = malloc ((unsigned) (strlen (s1) + strlen (s2) + 2))) != NULL) 30: (void) sprintf (p, "%s%s", s2, s1); 31: free (s2); 32: return p; 33: } 34: 35: /* */ 36: 37: int isfrom (string) 38: register char *string; 39: { 40: return (strncmp (string, "From ", 5) == 0 41: || strncmp (string, ">From ", 6) == 0); 42: } 43: 44: 45: int lequal (a, b) 46: register char *a, 47: *b; 48: { 49: for (; *a; a++, b++) 50: if (*b == NULL) 51: return FALSE; 52: else { 53: char c1 = islower (*a) ? toupper (*a) : *a; 54: char c2 = islower (*b) ? toupper (*b) : *b; 55: if (c1 != c2) 56: return FALSE; 57: } 58: 59: return (*b == NULL); 60: } 61: 62: /* */ 63: 64: /* 65: * 66: * seekadrx() is tricky. We want to cover both UUCP-style and ARPA-style 67: * addresses, so for each list of addresses we see if we can find some 68: * character to give us a hint. 69: * 70: */ 71: 72: 73: #define CHKADR 0 /* undertermined address style */ 74: #define UNIXDR 1 /* UNIX-style address */ 75: #define ARPADR 2 /* ARPAnet-style address */ 76: 77: 78: static char *punctuators = ";<>.()[]"; 79: static char *vp = NULL; 80: static char *tp = NULL; 81: 82: static struct adrx adrxs1; 83: 84: /* */ 85: 86: struct adrx *seekadrx (addrs) 87: register char *addrs; 88: { 89: static int state = CHKADR; 90: register char *cp; 91: register struct adrx *adrxp; 92: 93: if (state == CHKADR) 94: for (state = UNIXDR, cp = addrs; *cp; cp++) 95: if (index (punctuators, *cp)) { 96: state = ARPADR; 97: break; 98: } 99: 100: switch (state) { 101: case UNIXDR: 102: adrxp = uucpadrx (addrs); 103: break; 104: 105: case ARPADR: 106: default: 107: adrxp = getadrx (addrs); 108: break; 109: } 110: 111: if (adrxp == NULL) 112: state = CHKADR; 113: 114: return adrxp; 115: } 116: 117: /* */ 118: 119: /* 120: * 121: * uucpadrx() implements a partial UUCP-style address parser. It's based 122: * on the UUCP notion that addresses are separated by spaces or commas. 123: * 124: */ 125: 126: 127: struct adrx *uucpadrx (addrs) 128: register char *addrs; 129: { 130: register char *cp, 131: *wp, 132: *xp, 133: *yp, 134: *zp; 135: register struct adrx *adrxp = &adrxs1; 136: 137: if (vp == NULL) { 138: vp = tp = getcpy (addrs); 139: compress (addrs, vp); 140: } 141: else 142: if (tp == NULL) { 143: free (vp); 144: vp = NULL; 145: return NULL; 146: } 147: 148: for (cp = tp; isspace (*cp); cp++) 149: continue; 150: if (*cp == NULL) { 151: free (vp); 152: vp = tp = NULL; 153: return NULL; 154: } 155: 156: /* */ 157: 158: if ((wp = index (cp, ',')) == NULL) 159: if ((wp = index (cp, ' ')) != NULL) { 160: xp = wp; 161: while (isspace (*xp)) 162: xp++; 163: if (*xp != NULL && isat (--xp)) { 164: yp = xp + 4; 165: while (isspace (*yp)) 166: yp++; 167: if (*yp != NULL) 168: if ((zp = index (yp, ' ')) != NULL) 169: *zp = NULL, tp = ++zp; 170: else 171: tp = NULL; 172: else 173: *wp = NULL, tp = ++wp; 174: } 175: else 176: *wp = NULL, tp = ++wp; 177: } 178: else 179: tp = NULL; 180: else 181: *wp = NULL, tp = ++wp; 182: 183: if (adrxp -> text) 184: free (adrxp -> text); 185: adrxp -> text = getcpy (cp); 186: adrxp -> mbox = cp; 187: adrxp -> host = adrxp -> path = NULL; 188: if ((wp = rindex (cp, '@')) != NULL) { 189: *wp++ = NULL; 190: adrxp -> host = *wp ? wp : NULL; 191: } 192: else 193: for (wp = cp + strlen (cp) - 4; wp >= cp; wp--) 194: if (isat (wp)) { 195: *wp++ = NULL; 196: adrxp -> host = wp + 3; 197: } 198: 199: adrxp -> pers = adrxp -> grp = adrxp -> note = adrxp -> err = NULL; 200: adrxp -> ingrp = 0; 201: 202: return adrxp; 203: } 204: 205: /* */ 206: 207: static int compress (fp, tp) 208: register char *fp, 209: *tp; 210: { 211: register char c, 212: *cp; 213: 214: for (c = ' ', cp = tp; (*tp = *fp++) != NULL;) 215: if (isspace (*tp)) { 216: if (c != ' ') 217: *tp++ = c = ' '; 218: } 219: else 220: c = *tp++; 221: 222: if (c == ' ' && cp < tp) 223: *--tp = NULL; 224: } 225: 226: 227: static int isat (p) 228: register char *p; 229: { 230: return (strncmp (p, " AT ", 4) 231: && strncmp (p, " At ", 4) 232: && strncmp (p, " aT ", 4) 233: && strncmp (p, " at ", 4) ? FALSE : TRUE); 234: } 235: 236: /* */ 237: 238: /* 239: * 240: * getadrx() implements a partial 822-style address parser. The parser 241: * is neither complete nor correct. It does however recognize nearly all 242: * of the 822 address syntax. In addition it handles the majority of the 243: * 733 syntax as well. Most problems arise from trying to accomodate both. 244: * 245: * In terms of 822, the route-specification in 246: * 247: * "<" [route] local-part "@" domain ">" 248: * 249: * is parsed and returned unchanged. Multiple at-signs are compressed 250: * via source-routing. Recursive groups are not allowed as per the 251: * standard. 252: * 253: * In terms of 733, " at " is recognized as equivalent to "@". 254: * 255: * In terms of both the parser will not complain about missing hosts. 256: * 257: * ----- 258: * 259: * We should not allow addresses like 260: * 261: * Marshall T. Rose <MRose@UCI> 262: * 263: * but should insist on 264: * 265: * "Marshall T. Rose" <MRose@UCI> 266: * 267: * Unfortunately, a lot of mailers stupidly let people get away with this. 268: * 269: * ----- 270: * 271: * We should not allow addresses like 272: * 273: * <MRose@UCI> 274: * 275: * but should insist on 276: * 277: * MRose@UCI 278: * 279: * Unfortunately, a lot of mailers stupidly let people's UAs get away with 280: * this. 281: * 282: * ----- 283: * 284: * We should not allow addresses like 285: * 286: * @UCI:MRose@UCI-750a 287: * 288: * but should insist on 289: * 290: * Marshall Rose <@UCI:MRose@UCI-750a> 291: * 292: * Unfortunately, a lot of mailers stupidly do this. 293: * 294: */ 295: 296: /* */ 297: 298: #define QUOTE '\\' 299: 300: #define LX_END 0 301: #define LX_ERR 1 302: #define LX_ATOM 2 303: #define LX_QSTR 3 304: #define LX_DLIT 4 305: #define LX_SEMI 5 306: #define LX_COMA 6 307: #define LX_LBRK 7 308: #define LX_RBRK 8 309: #define LX_COLN 9 310: #define LX_DOT 10 311: #define LX_AT 11 312: 313: static struct { 314: char lx_chr; 315: int lx_val; 316: } special[] = { 317: ';', LX_SEMI, 318: ',', LX_COMA, 319: '<', LX_LBRK, 320: '>', LX_RBRK, 321: ':', LX_COLN, 322: '.', LX_DOT, 323: '@', LX_AT, 324: '(', LX_ERR, 325: ')', LX_ERR, 326: QUOTE, LX_ERR, 327: '"', LX_ERR, 328: '[', LX_ERR, 329: ']', LX_ERR, 330: NULL, NULL 331: }; 332: 333: /* */ 334: 335: static int glevel = 0; 336: static int ingrp = 0; 337: static int last_lex = LX_END; 338: 339: static char *dp = NULL; 340: static char *cp = NULL; 341: static char *ap = NULL; 342: static char *pers = NULL; 343: static char *mbox = NULL; 344: static char *host = NULL; 345: static char *path = NULL; 346: static char *grp = NULL; 347: static char *note = NULL; 348: static char err[BUFSIZ]; 349: static char adr[BUFSIZ]; 350: 351: static struct adrx adrxs2; 352: 353: /* */ 354: 355: struct adrx *getadrx (addrs) 356: register char *addrs; 357: { 358: register char *bp; 359: register struct adrx *adrxp = &adrxs2; 360: 361: if (pers) 362: free (pers); 363: if (mbox) 364: free (mbox); 365: if (host) 366: free (host); 367: if (path) 368: free (path); 369: if (grp) 370: free (grp); 371: if (note) 372: free (note); 373: pers = mbox = host = path = grp = note = NULL; 374: err[0] = NULL; 375: 376: if (dp == NULL) { 377: dp = cp = getcpy (addrs); 378: glevel = 0; 379: } 380: else 381: if (cp == NULL) { 382: free (dp); 383: dp = NULL; 384: return NULL; 385: } 386: 387: switch (parse_address ()) { 388: case DONE: 389: free (dp); 390: dp = cp = NULL; 391: return NULL; 392: 393: case OK: 394: switch (last_lex) { 395: case LX_COMA: 396: case LX_END: 397: break; 398: 399: default: /* catch trailing comments */ 400: bp = cp; 401: (void) my_lex (adr); 402: cp = bp; 403: break; 404: } 405: break; 406: 407: default: 408: break; 409: } 410: 411: if (err[0]) 412: for (;;) { 413: switch (last_lex) { 414: case LX_COMA: 415: case LX_END: 416: break; 417: 418: default: 419: (void) my_lex (adr); 420: continue; 421: } 422: break; 423: } 424: while (isspace (*ap)) 425: ap++; 426: if (cp) 427: (void) sprintf (adr, "%.*s", cp - ap, ap); 428: else 429: (void) strcpy (adr, ap); 430: bp = adr + strlen (adr) - 1; 431: if (*bp == ',' || *bp == ';' || *bp == '\n') 432: *bp = NULL; 433: 434: /* */ 435: 436: adrxp -> text = adr; 437: adrxp -> pers = pers; 438: adrxp -> mbox = mbox; 439: adrxp -> host = host; 440: adrxp -> path = path; 441: adrxp -> grp = grp; 442: adrxp -> ingrp = ingrp; 443: adrxp -> note = note; 444: adrxp -> err = err[0] ? err : NULL; 445: 446: return adrxp; 447: } 448: 449: /* */ 450: 451: static int parse_address () { 452: char buffer[BUFSIZ]; 453: 454: again: ; 455: ap = cp; 456: switch (my_lex (buffer)) { 457: case LX_ATOM: 458: case LX_QSTR: 459: pers = getcpy (buffer); 460: break; 461: 462: case LX_SEMI: 463: if (glevel-- <= 0) { 464: (void) strcpy (err, "extraneous semi-colon"); 465: return NOTOK; 466: } 467: case LX_COMA: 468: if (note) { 469: free (note); 470: note = NULL; 471: } 472: goto again; 473: 474: case LX_END: 475: return DONE; 476: 477: case LX_LBRK: /* sigh (2) */ 478: goto get_addr; 479: 480: case LX_AT: /* sigh (3) */ 481: cp = ap; 482: if (route_addr (buffer) == NOTOK) 483: return NOTOK; 484: return OK; /* why be choosy? */ 485: 486: default: 487: (void) sprintf (err, "illegal address construct (%s)", buffer); 488: return NOTOK; 489: } 490: 491: /* */ 492: 493: switch (my_lex (buffer)) { 494: case LX_ATOM: 495: case LX_QSTR: 496: pers = add (buffer, add (" ", pers)); 497: more_phrase: ; /* sigh (1) */ 498: if (phrase (buffer) == NOTOK) 499: return NOTOK; 500: 501: switch (last_lex) { 502: case LX_LBRK: 503: get_addr: ; 504: if (route_addr (buffer) == NOTOK) 505: return NOTOK; 506: if (last_lex == LX_RBRK) 507: return OK; 508: (void) sprintf (err, "missing right-bracket (%s)", buffer); 509: return NOTOK; 510: 511: case LX_COLN: 512: get_group: ; 513: if (glevel++ > 0) { 514: (void) sprintf (err, "nested groups not allowed (%s)", 515: pers); 516: return NOTOK; 517: } 518: grp = add (": ", pers); 519: pers = NULL; 520: { 521: char *pp = cp; 522: 523: for (;;) 524: switch (my_lex (buffer)) { 525: case LX_SEMI: 526: case LX_END: /* tsk, tsk */ 527: glevel--; 528: return OK; 529: 530: case LX_COMA: 531: continue; 532: 533: default: 534: cp = pp; 535: return parse_address (); 536: } 537: } 538: 539: case LX_DOT: /* sigh (1) */ 540: pers = add (".", pers); 541: goto more_phrase; 542: 543: default: 544: (void) sprintf (err, 545: "no mailbox in address, only a phrase (%s%s)", 546: pers, buffer); 547: return NOTOK; 548: } 549: 550: /* */ 551: 552: case LX_LBRK: 553: goto get_addr; 554: 555: case LX_COLN: 556: goto get_group; 557: 558: case LX_DOT: 559: mbox = add (buffer, pers); 560: pers = NULL; 561: if (route_addr (buffer) == NOTOK) 562: return NOTOK; 563: goto check_end; 564: 565: case LX_AT: 566: ingrp = glevel; 567: mbox = pers; 568: pers = NULL; 569: if (domain (buffer) == NOTOK) 570: return NOTOK; 571: check_end: ; 572: switch (last_lex) { 573: case LX_SEMI: 574: if (glevel-- <= 0) { 575: (void) strcpy (err, "extraneous semi-colon"); 576: return NOTOK; 577: } 578: case LX_COMA: 579: case LX_END: 580: return OK; 581: 582: default: 583: (void) sprintf (err, "junk after local@domain (%s)", 584: buffer); 585: return NOTOK; 586: } 587: 588: case LX_SEMI: /* no host */ 589: case LX_COMA: 590: case LX_END: 591: ingrp = glevel; 592: if (last_lex == LX_SEMI && glevel-- <= 0) { 593: (void) strcpy (err, "extraneous semi-colon"); 594: return NOTOK; 595: } 596: mbox = pers; 597: pers = NULL; 598: return OK; 599: 600: default: 601: (void) sprintf (err, "missing mailbox (%s)", buffer); 602: return NOTOK; 603: } 604: } 605: 606: /* */ 607: 608: static int phrase (buffer) 609: register char *buffer; 610: { 611: for (;;) 612: switch (my_lex (buffer)) { 613: case LX_ATOM: 614: case LX_QSTR: 615: pers = add (buffer, add (" ", pers)); 616: continue; 617: 618: default: 619: return OK; 620: } 621: } 622: 623: /* */ 624: 625: static int route_addr (buffer) 626: register char *buffer; 627: { 628: register char *pp = cp; 629: 630: if (my_lex (buffer) == LX_AT) { 631: if (route (buffer) == NOTOK) 632: return NOTOK; 633: } 634: else 635: cp = pp; 636: 637: if (local_part (buffer) == NOTOK) 638: return NOTOK; 639: 640: switch (last_lex) { 641: case LX_AT: 642: return domain (buffer); 643: 644: case LX_RBRK: /* no host */ 645: case LX_COMA: 646: case LX_END: 647: return OK; 648: 649: default: 650: (void) sprintf (err, "no at-sign after local-part (%s)", buffer); 651: return NOTOK; 652: } 653: } 654: 655: /* */ 656: 657: static int local_part (buffer) 658: register char *buffer; 659: { 660: ingrp = glevel; 661: 662: for (;;) { 663: switch (my_lex (buffer)) { 664: case LX_ATOM: 665: case LX_QSTR: 666: mbox = add (buffer, mbox); 667: break; 668: 669: default: 670: (void) sprintf (err, "no mailbox in local-part (%s)", buffer); 671: return NOTOK; 672: } 673: 674: switch (my_lex (buffer)) { 675: case LX_DOT: 676: mbox = add (buffer, mbox); 677: continue; 678: 679: default: 680: return OK; 681: } 682: } 683: } 684: 685: /* */ 686: 687: static int domain (buffer) 688: register char *buffer; 689: { 690: for (;;) { 691: switch (my_lex (buffer)) { 692: case LX_ATOM: 693: case LX_DLIT: 694: host = add (buffer, host); 695: break; 696: 697: default: 698: (void) sprintf (err, 699: "no sub-domain in domain-part of address (%s)", 700: buffer); 701: return NOTOK; 702: } 703: 704: switch (my_lex (buffer)) { 705: case LX_DOT: 706: host = add (buffer, host); 707: continue; 708: 709: case LX_AT: /* sigh (0) */ 710: mbox = add (host, add ("%", mbox)); 711: free (host); 712: host = NULL; 713: continue; 714: 715: default: 716: return OK; 717: } 718: } 719: } 720: 721: /* */ 722: 723: static int route (buffer) 724: register char *buffer; 725: { 726: path = getcpy ("@"); 727: 728: for (;;) { 729: switch (my_lex (buffer)) { 730: case LX_ATOM: 731: case LX_DLIT: 732: path = add (buffer, path); 733: break; 734: 735: default: 736: (void) sprintf (err, 737: "no sub-domain in domain-part of address (%s)", 738: buffer); 739: return NOTOK; 740: } 741: switch (my_lex (buffer)) { 742: case LX_COMA: 743: path = add (buffer, path); 744: for (;;) { 745: switch (my_lex (buffer)) { 746: case LX_COMA: 747: continue; 748: 749: case LX_AT: 750: path = add (buffer, path); 751: break; 752: 753: default: 754: (void) sprintf (err, 755: "no at-sign found for next domain in route (%s)", 756: buffer); 757: } 758: break; 759: } 760: continue; 761: 762: case LX_AT: /* XXX */ 763: case LX_DOT: 764: path = add (buffer, path); 765: continue; 766: 767: case LX_COLN: 768: path = add (buffer, path); 769: return OK; 770: 771: default: 772: (void) sprintf (err, 773: "no colon found to terminate route (%s)", buffer); 774: return NOTOK; 775: } 776: } 777: } 778: 779: /* */ 780: 781: static int my_lex (buffer) 782: register char *buffer; 783: { 784: int i; 785: register char c, 786: *bp; 787: 788: bp = buffer; 789: *bp = NULL; 790: if (!cp) 791: return (last_lex = LX_END); 792: 793: c = *cp++; 794: while (isspace (c)) 795: c = *cp++; 796: if (c == NULL) { 797: cp = NULL; 798: return (last_lex = LX_END); 799: } 800: 801: if (c == '(') 802: for (*bp++ = c, i = 0;;) 803: switch (c = *cp++) { 804: case NULL: 805: cp = NULL; 806: return (last_lex = LX_ERR); 807: case QUOTE: 808: *bp++ = c; 809: if ((c = *cp++) == NULL) { 810: cp = NULL; 811: return (last_lex = LX_ERR); 812: } 813: *bp++ = c; 814: continue; 815: case '(': 816: i++; 817: default: 818: *bp++ = c; 819: continue; 820: case ')': 821: *bp++ = c; 822: if (--i < 0) { 823: *bp = NULL; 824: note = note ? add (buffer, add (" ", note)) 825: : getcpy (buffer); 826: return my_lex (buffer); 827: } 828: } 829: 830: /* */ 831: 832: if (c == '"') 833: for (*bp++ = c;;) 834: switch (c = *cp++) { 835: case NULL: 836: cp = NULL; 837: return (last_lex = LX_ERR); 838: case QUOTE: 839: *bp++ = c; 840: if ((c = *cp++) == NULL) { 841: cp = NULL; 842: return (last_lex = LX_ERR); 843: } 844: default: 845: *bp++ = c; 846: continue; 847: case '"': 848: *bp++ = c; 849: *bp = NULL; 850: return (last_lex = LX_QSTR); 851: } 852: 853: if (c == '[') 854: for (*bp++ = c;;) 855: switch (c = *cp++) { 856: case NULL: 857: cp = NULL; 858: return (last_lex = LX_ERR); 859: case QUOTE: 860: *bp++ = c; 861: if ((c = *cp++) == NULL) { 862: cp = NULL; 863: return (last_lex = LX_ERR); 864: } 865: default: 866: *bp++ = c; 867: continue; 868: case ']': 869: *bp++ = c; 870: *bp = NULL; 871: return (last_lex = LX_DLIT); 872: } 873: 874: /* */ 875: 876: *bp++ = c; 877: *bp = NULL; 878: for (i = 0; special[i].lx_chr != NULL; i++) 879: if (c == special[i].lx_chr) 880: return (last_lex = special[i].lx_val); 881: 882: if (iscntrl (c)) 883: return (last_lex = LX_ERR); 884: 885: for (;;) { 886: if ((c = *cp++) == NULL) 887: break; 888: for (i = 0; special[i].lx_chr != NULL; i++) 889: if (c == special[i].lx_chr) 890: goto got_atom; 891: if (iscntrl (c) || isspace (c)) 892: break; 893: *bp++ = c; 894: } 895: got_atom: ; 896: if (c == NULL) 897: cp = NULL; 898: else 899: cp--; 900: *bp = NULL; 901: last_lex = !lequal (buffer, "at") || cp == NULL || index (cp, '<') != NULL 902: ? LX_ATOM : LX_AT; 903: return last_lex; 904: } 905: 906: /* */ 907: 908: char *legal_person (p) 909: register char *p; 910: { 911: int i; 912: register char *cp; 913: static char buffer[BUFSIZ]; 914: 915: if (*p == '"') 916: return p; 917: for (cp = p; *cp; cp++) 918: for (i = 0; special[i].lx_chr; i++) 919: if (*cp == special[i].lx_chr) { 920: (void) sprintf (buffer, "\"%s\"", p); 921: return buffer; 922: } 923: 924: return p; 925: } 926: 927: /* */ 928: 929: int mfgets (in, bp) 930: register FILE *in; 931: register char **bp; 932: { 933: int i; 934: register char *cp, 935: *dp, 936: *ep; 937: static int len = 0; 938: static char *pp = NULL; 939: 940: if (pp == NULL) 941: if ((pp = malloc ((unsigned) (len = BUFSIZ))) == NULL) 942: return NOTOK; 943: 944: for (ep = (cp = pp) + len - 2;;) { 945: switch (i = getc (in)) { 946: case EOF: 947: eol: ; 948: if (cp != pp) { 949: *cp = NULL; 950: *bp = pp; 951: return OK; 952: } 953: eoh: ; 954: *bp = NULL; 955: free (pp); 956: pp = NULL; 957: return DONE; 958: 959: case NULL: 960: continue; 961: 962: case '\n': 963: if (cp == pp) /* end of headers, gobble it */ 964: goto eoh; 965: switch (i = getc (in)) { 966: default: /* end of line */ 967: case '\n': /* end of headers, save for next call */ 968: (void) ungetc (i, in); 969: goto eol; 970: 971: case ' ': /* continue headers */ 972: case '\t': 973: *cp++ = '\n'; 974: break; 975: } /* fall into default case */ 976: 977: default: 978: *cp++ = i; 979: break; 980: } 981: if (cp >= ep) 982: if ((dp = realloc (pp, (unsigned) (len += BUFSIZ))) == NULL) { 983: free (pp); 984: pp = NULL; 985: return NOTOK; 986: } 987: else 988: cp += dp - pp, ep = (pp = cp) + len - 2; 989: } 990: }