1: #ifndef lint 2: static char *sccsid = "@(#)arpatxt.c 9.1 87/10/04"; 3: #endif 4: 5: /* 6: * convert hosts.txt into pathalias format. 7: * 8: * alias rule: "host.dom.ain,nickname.arpa,..." -> host = nickname, ... 9: */ 10: 11: /* remove the next line for standard or research unix */ 12: #define BSD 13: 14: #ifdef BSD 15: #define strchr index 16: #endif /* BSD */ 17: 18: #include <stdio.h> 19: #include <ctype.h> 20: 21: typedef struct node node; 22: 23: struct node { 24: node *child; /* subdomain or member host */ 25: node *parent; /* parent domain */ 26: node *next; /* sibling in domain tree or host list */ 27: char *name; 28: node *alias; 29: node *bucket; 30: node *gateway; 31: int flag; 32: }; 33: 34: node *Top; 35: int Atflag; 36: int Fflag, Iflag; 37: char *DotArpa = ".ARPA"; 38: char Fname[256], *Fstart; 39: 40: node *newnode(), *find(); 41: char *strsave(), *lowercase(); 42: void crcinit(); 43: long fold(); 44: FILE *mkfile(); 45: 46: extern char *malloc(), *strchr(), *calloc(), *gets(), *strcpy(), *fgets(); 47: extern FILE *fopen(); 48: 49: #define ISADOMAIN(n) ((n) && *((n)->name) == '.') 50: 51: /* for node.flag */ 52: #define COLLISION 1 53: 54: /* for formatprint() */ 55: #define PRIVATE 0 56: #define HOSTS 1 57: #define SUBDOMAINS 2 58: 59: /* for usage() */ 60: #define USAGE "usage: %s [-i@] [-g gateway] [-p privatefile] [-f | -d directory] [file ...]\n" 61: 62: main(argc, argv) 63: char **argv; 64: { int c; 65: char *progname; 66: extern char *optarg; 67: extern int optind; 68: 69: if ((progname = strchr(argv[0], '/')) != 0) 70: progname++; 71: else 72: progname = argv[0]; 73: crcinit(); 74: 75: Top = newnode(); /* needed for adding gateways */ 76: while ((c = getopt(argc, argv, "d:fg:ip:@")) != EOF) 77: switch(c) { 78: case 'd': 79: strcpy(Fname, optarg); 80: break; 81: case 'f': /* filter mode -- write on stdout */ 82: Fflag++; 83: break; 84: case 'g': 85: gateway(optarg); 86: break; 87: case 'i': 88: Iflag++; 89: break; 90: case 'p': 91: readprivates(optarg); 92: break; 93: case '@': 94: Atflag++; 95: break; 96: default: 97: usage(progname); 98: } 99: 100: if (Fflag && *Fname) 101: usage(progname); 102: if (Iflag) 103: (void) lowercase(DotArpa); 104: if (Top->gateway == 0) 105: fprintf(stderr, "%s: warning: no gateway to top level domains\n", progname); 106: 107: Fstart = Fname + strlen(Fname); 108: if (Fstart != Fname) { 109: *Fstart++ = '/'; 110: *Fstart = 0; 111: } 112: /* should do the mkdir here instead of the shell script ...*/ 113: 114: Top->name = "internet"; 115: if (optind == argc) 116: scan(); 117: else for ( ; optind < argc; optind++) { 118: if (freopen(argv[optind], "r", stdin) == 0) { 119: perror(argv[optind]); 120: continue; 121: } 122: scan(); 123: } 124: merge(); 125: dump(Top); 126: return 0; 127: } 128: 129: scan() 130: { static first; 131: char buf0[BUFSIZ], buf1[BUFSIZ], buf2[BUFSIZ]; 132: 133: if (first++ == 0) 134: (void) re_comp("^HOST.*SMTP"); 135: while (gets(buf0) != 0) { 136: if (re_exec(buf0) == 0) 137: continue; 138: if (sscanf(buf0, "HOST : %[^:] : %[^: ]", buf1, buf2) != 2) 139: continue; 140: if (Iflag) 141: (void) lowercase(buf2); 142: insert(buf2); 143: } 144: } 145: /* 146: * format of private file: 147: * one per line, optionally followed by white space and comments 148: * line starting with # is comment 149: */ 150: readprivates(pfile) 151: char *pfile; 152: { FILE *f; 153: node *n; 154: char buf[BUFSIZ], *bptr; 155: 156: if ((f = fopen(pfile, "r")) == 0) { 157: perror(pfile); 158: return; 159: } 160: while (fgets(buf, BUFSIZ, f) != 0) { 161: if (*buf == '#') 162: continue; 163: if ((bptr = strchr(buf, ' ')) != 0) 164: *bptr = 0; 165: if ((bptr = strchr(buf, '\t')) != 0) 166: *bptr = 0; 167: if (*buf == 0) 168: continue; 169: n = newnode(); 170: n->name = strsave(buf); 171: hash(n); 172: } 173: (void) fclose(f); 174: } 175: usage(progname) 176: char *progname; 177: { 178: fprintf(stderr, USAGE, progname); 179: exit(1); 180: } 181: dumpgateways(ndom, f) 182: node *ndom; 183: FILE *f; 184: { node *n; 185: 186: for (n = ndom->gateway; n; n = n->next) { 187: fprintf(f, "%s ", n->name); 188: if (Atflag) 189: putc('@', f); 190: fprintf(f, "%s(%s)\t# gateway\n", ndom->name, 191: ndom == Top ? "DEDICATED" : "LOCAL"); 192: } 193: } 194: 195: gateway(buf) 196: char *buf; 197: { node *n, *dom; 198: char *dot; 199: 200: dot = strchr(buf, '.'); 201: if (dot) { 202: dom = find(dot); 203: *dot = 0; 204: } else 205: dom = Top; 206: 207: n = newnode(); 208: n->name = strsave(buf); 209: n->next = dom->gateway; 210: dom->gateway = n; 211: } 212: 213: insert(buf) 214: char *buf; 215: { char host[BUFSIZ], *hptr, *dot; 216: node *n; 217: 218: for (hptr = host; *hptr = *buf++; hptr++) 219: if (*hptr == ',') 220: break; 221: 222: if (*hptr == ',') 223: *hptr = 0; 224: else 225: buf = 0; /* no aliases */ 226: 227: if ((dot = strchr(host, '.')) == 0) 228: abort(); /* can't happen */ 229: 230: if (strcmp(dot, DotArpa) == 0) 231: buf = 0; /* no aliases */ 232: 233: n = find(dot); 234: *dot = 0; 235: 236: addchild(n, host, buf); 237: } 238: 239: node * 240: find(domain) 241: char *domain; 242: { char *dot; 243: node *parent, *child; 244: 245: if (domain == 0) 246: return Top; 247: if ((dot = strchr(domain+1, '.')) != 0) { 248: parent = find(dot); 249: *dot = 0; 250: } else 251: parent = Top; 252: 253: for (child = parent->child; child; child = child->next) 254: if (strcmp(domain, child->name) == 0) 255: break; 256: if (child == 0) { 257: child = newnode(); 258: child->next = parent->child; 259: parent->child = child; 260: child->parent = parent; 261: child->name = strsave(domain); 262: } 263: return child; 264: } 265: 266: node * 267: newnode() 268: { 269: node *n; 270: 271: if ((n = (node *) calloc(1, sizeof(node))) == 0) 272: abort(); 273: return n; 274: } 275: 276: char * 277: strsave(buf) 278: char *buf; 279: { char *mstr; 280: 281: if ((mstr = malloc(strlen(buf)+1)) == 0) 282: abort(); 283: strcpy(mstr, buf); 284: return mstr; 285: } 286: 287: addchild(n, host, aliases) 288: node *n; 289: char *host, *aliases; 290: { node *child; 291: 292: /* check for dups? nah! */ 293: child = newnode(); 294: child->name = strsave(host); 295: child->parent = n; 296: child->next = n->child; 297: makealiases(child, aliases); 298: n->child = child; 299: } 300: 301: /* yer basic tree walk */ 302: dump(n) 303: node *n; 304: { node *child; 305: FILE *f; 306: int hadprivates = 0; 307: 308: if (n->child == 0) 309: return; 310: 311: f = mkfile(n); 312: 313: if (n != Top && ! ISADOMAIN(n)) 314: abort(); 315: 316: hadprivates = domprint(n, f); 317: dumpgateways(n, f); 318: if (hadprivates || n == Top) 319: fputs("private {}\n", f); /* end scope of privates */ 320: if (!Fflag) 321: (void) fclose(f); 322: else 323: putc('\n', f); 324: for (child = n->child; child; child = child->next) 325: dump(child); 326: } 327: 328: qcmp(a, b) 329: node **a, **b; 330: { 331: return strcmp((*a)->name, (*b)->name); 332: } 333: 334: domprint(n, f) 335: node *n; 336: FILE *f; 337: { node *table[10240], *child, *alias; 338: char *cost = 0; 339: int nelem, i, rval = 0; 340: 341: /* dump private definitions */ 342: /* sort hosts and aliases in table */ 343: i = 0; 344: for (child = n->child; child; child = child->next) { 345: table[i++] = child; 346: for (alias = child->alias; alias; alias = alias->next) 347: table[i++] = alias; 348: } 349: 350: qsort((char *) table, i, sizeof(table[0]), qcmp); 351: formatprint(f, table, i, PRIVATE, "private", cost); 352: 353: /* rval == 0 IFF no privates */ 354: while (i-- > 0) 355: if (table[i]->flag & COLLISION) { 356: rval = 1; 357: break; 358: } 359: 360: /* dump domains and aliases */ 361: /* sort hosts (only) in table */ 362: i = 0; 363: for (child = n->child; child; child = child->next) 364: table[i++] = child; 365: qsort((char *) table, i, sizeof(table[0]), qcmp); 366: 367: /* cost is DEDICATED for hosts in top-level domains, LOCAL o.w. */ 368: if (n->parent == Top && strchr(n->name + 1, '.') == 0) 369: cost = "DEDICATED"; 370: else 371: cost = "LOCAL"; 372: formatprint(f, table, i, HOSTS, n->name, cost); 373: 374: formatprint(f, table, i, SUBDOMAINS, n->name, "0"); 375: 376: /* dump aliases */ 377: nelem = i; 378: for (i = 0; i < nelem; i++) { 379: if ((alias = table[i]->alias) == 0) 380: continue; 381: fprintf(f, "%s = %s", table[i]->name, alias->name); 382: for (alias = alias->next; alias; alias = alias->next) 383: fprintf(f, ", %s", alias->name); 384: putc('\n', f); 385: } 386: 387: return rval; 388: } 389: 390: /* for debugging */ 391: dtable(comment, table, nelem) 392: char *comment; 393: node **table; 394: { int i; 395: 396: fprintf(stderr, "\n%s\n", comment); 397: for (i = 0; i < nelem; i++) 398: fprintf(stderr, "%3d\t%s\n", i, table[i]->name); 399: } 400: 401: formatprint(f, table, nelem, type, lhs, cost) 402: FILE *f; 403: node **table; 404: char *lhs, *cost; 405: { int i, didprint; 406: char buf[128], *bptr; 407: 408: sprintf(buf, "%s%s{" /*}*/, lhs, type == PRIVATE ? " " : " = "); 409: bptr = buf + strlen(buf); 410: 411: didprint = 0; 412: for (i = 0; i < nelem; i++) { 413: if (type == PRIVATE && ! (table[i]->flag & COLLISION)) 414: continue; 415: else if (type == HOSTS && ISADOMAIN(table[i]) ) 416: continue; 417: else if (type == SUBDOMAINS && ! ISADOMAIN(table[i]) ) 418: continue; 419: 420: if ((bptr - buf) + strlen(table[i]->name) > 69) { 421: *bptr = 0; 422: fprintf(f, "%s\n ", buf); /* continuation */ 423: bptr = buf; 424: } 425: sprintf(bptr, "%s, ", table[i]->name); 426: bptr += strlen(bptr); 427: didprint++; 428: } 429: *bptr = 0; 430: 431: if ( ! didprint ) 432: return; 433: 434: fprintf(f, /*{*/ "%s}", buf); 435: if (type != PRIVATE) 436: fprintf(f, "(%s)", cost); 437: putc('\n', f); 438: } 439: 440: FILE * 441: mkfile(n) 442: node *n; 443: { node *parent; 444: char *bptr; 445: FILE *f; 446: 447: /* build up the domain name in Fname[] */ 448: bptr = Fstart; 449: if (n == Top) 450: strcpy(bptr, n->name); 451: else { 452: strcpy(bptr, n->name + 1); /* skip leading dot */ 453: bptr = bptr + strlen(bptr); 454: parent = n->parent; 455: while (ISADOMAIN(parent)) { 456: strcpy(bptr, parent->name); 457: bptr += strlen(bptr); 458: parent = parent->parent; 459: } 460: *bptr = 0; 461: } 462: 463: /* get a stream descriptor */ 464: if (Fflag) { 465: printf("# %s\n", Fstart); 466: f = stdout; 467: } else { 468: #ifndef BSD 469: Fstart[14] = 0; 470: #endif 471: if ((f = fopen(Fname, "w")) == 0) { 472: perror(Fname); 473: exit(1); 474: } 475: } 476: if (n == Top) 477: fprintf(f, "private {%s}\ndead {%s}\n", Top->name, Top->name); 478: return f; 479: } 480: 481: /* map to lower case in place. return parameter for convenience */ 482: char * 483: lowercase(buf) 484: char *buf; 485: { char *str; 486: 487: for (str = buf ; *str; str++) 488: if (isupper(*str)) 489: *str -= 'A' - 'a'; 490: return buf; 491: } 492: 493: /* get the interesting aliases, attach to n->alias */ 494: makealiases(n, line) 495: node *n; 496: char *line; 497: { char *next, *dot; 498: node *a; 499: 500: if (line == 0 || *line == 0) 501: return; 502: 503: for ( ; line; line = next) { 504: next = strchr(line, ','); 505: if (next) 506: *next++ = 0; 507: if ((dot = strchr(line, '.')) == 0) 508: continue; 509: if (strcmp(dot, DotArpa) != 0) 510: continue; 511: *dot = 0; 512: 513: if (strcmp(n->name, line) == 0) 514: continue; 515: 516: a = newnode(); 517: a->name = strsave(line); 518: a->next = n->alias; 519: n->alias = a; 520: } 521: } 522: 523: #define NHASH 13309 524: node *htable[NHASH]; 525: 526: merge() 527: { node *n; 528: 529: setuniqflag(Top); 530: for (n = Top->child; n; n = n->next) { 531: if (n->flag & COLLISION) { 532: fprintf(stderr, "illegal subdomain: %s\n", n->name); 533: abort(); 534: } 535: promote(n); 536: } 537: } 538: 539: /* 540: * for domains like cc.umich.edu and cc.berkeley.edu, it's inaccurate 541: * to describe cc as a member of umich.edu or berkeley.edu. (i.e., the 542: * lousy scoping rules for privates don't permit a clean syntax.) so. 543: * 544: * to prevent confusion, tack on to any such domain name its parent domain 545: * and promote it in the tree. e.g., make cc.umich and cc.berkeley 546: * subdomains of edu. 547: */ 548: 549: promote(parent) 550: node *parent; 551: { node *prev, *child, *next; 552: char buf[BUFSIZ]; 553: 554: prev = 0; 555: for (child = parent->child; child; child = next) { 556: next = child->next; 557: if (!ISADOMAIN(child)) { 558: prev = child; 559: continue; 560: } 561: if ((child->flag & COLLISION) || child->gateway) { 562: /* 563: * dup domain name or gateway found. don't bump 564: * prev: this node is moving up the tree. 565: */ 566: 567: /* qualify child domain name */ 568: sprintf(buf, "%s%s", child->name, parent->name); 569: cfree(child->name); 570: child->name = strsave(buf); 571: 572: /* unlink child out of sibling chain */ 573: if (prev) 574: prev->next = child->next; 575: else 576: parent->child = child->next; 577: 578: /* link child in as peer of parent */ 579: child->next = parent->next; 580: parent->next = child; 581: child->parent = parent->parent; 582: 583: /* 584: * reset collision flag; may promote again on 585: * return to caller. 586: */ 587: child->flag &= ~COLLISION; 588: hash(child); 589: } else { 590: /* this domain is uninteresting (so bump prev) */ 591: promote(child); 592: prev = child; 593: } 594: } 595: 596: } 597: 598: setuniqflag(n) 599: node *n; 600: { node *child, *alias; 601: 602: /* mark this node in the hash table */ 603: hash(n); 604: /* mark the aliases of this node */ 605: for (alias = n->alias; alias; alias = alias->next) 606: hash(alias); 607: /* recursively mark this node's children */ 608: for (child = n->child; child; child = child->next) 609: setuniqflag(child); 610: } 611: 612: hash(n) 613: node *n; 614: { node **bucket, *b; 615: 616: bucket = &htable[fold(n->name) % NHASH]; 617: for (b = *bucket; b; b = b->bucket) { 618: if (strcmp(n->name, b->name) == 0) { 619: b->flag |= COLLISION; 620: n->flag |= COLLISION; 621: return; 622: } 623: } 624: n->bucket = *bucket; 625: *bucket = n; 626: } 627: 628: /* stolen from pathalias:addnode.c, q.v. */ 629: #define POLY 0x48000000 /* 31-bit polynomial */ 630: long CrcTable[128]; 631: 632: void 633: crcinit() 634: { register int i,j; 635: register long sum; 636: 637: for (i = 0; i < 128; i++) { 638: sum = 0; 639: for (j = 7-1; j >= 0; --j) 640: if (i & (1 << j)) 641: sum ^= POLY >> j; 642: CrcTable[i] = sum; 643: } 644: } 645: 646: long 647: fold(s) 648: register char *s; 649: { register long sum = 0; 650: register int c; 651: 652: while (c = *s++) 653: sum = (sum >> 7) ^ CrcTable[(sum ^ c) & 0x7f]; 654: return sum; 655: }