1: #ifndef lint 2: static char *sccsid = "@(#)inv1.c 4.2 (Berkeley) 3/4/86"; 3: #endif 4: 5: #include <stdio.h> 6: #include <assert.h> 7: 8: main(argc, argv) 9: char *argv[]; 10: { 11: /* Make inverted file indexes. Reads a stream from mkey which 12: * gives record pointer items and keys. Generates set of files 13: * a. NHASH pointers to file b. 14: * b. lists of record numbers. 15: * c. record pointer items. 16: * 17: * these files are named xxx.ia, xxx.ib, xxx.ic; 18: * where xxx is taken from arg1. 19: * If the files exist they are updated. 20: */ 21: 22: FILE *fa, *fb, *fc, *fta, *ftb, *ftc, *fd; 23: int nhash = 256; 24: int appflg = 1; 25: int keepkey = 0, pipein = 0; 26: char nma[100], nmb[100], nmc[100], com[100], nmd[100]; 27: char tmpa[20], tmpb[20], tmpc[20]; 28: char *remove = NULL; 29: int chatty = 0, docs, hashes, fp[2], fr, fw, pfork, pwait, status; 30: int i,j,k; 31: long keys; 32: int iflong =0; 33: char *sortdir; 34: 35: sortdir = (access("/crp/tmp", 06)==0) ? "/crp/tmp" : "/usr/tmp"; 36: while (argv[1][0] == '-') 37: { 38: switch(argv[1][1]) 39: { 40: case 'h': /* size of hash table */ 41: nhash = atoi (argv[1]+2); 42: break; 43: case 'n': /* new, don't append */ 44: appflg=0; 45: break; 46: case 'a': /* append to old file */ 47: appflg=1; 48: break; 49: case 'v': /* verbose output */ 50: chatty=1; 51: break; 52: case 'd': /* keep keys on file .id for check on searching */ 53: keepkey=1; 54: break; 55: case 'p': /* pipe into sort (saves space, costs time)*/ 56: pipein = 1; 57: break; 58: case 'i': /* input is on file, not stdin */ 59: close(0); 60: if (open(argv[2], 0) != 0) 61: err("Can't read input %s", argv[2]); 62: if (argv[1][2]=='u') /* unlink */ 63: remove = argv[2]; 64: argc--; 65: argv++; 66: break; 67: } 68: argc--; 69: argv++; 70: } 71: strcpy (nma, argc >= 2 ? argv[1] : "Index"); 72: strcpy (nmb, nma); 73: strcpy (nmc, nma); 74: strcpy (nmd, nma); 75: strcat (nma, ".ia"); 76: strcat (nmb, ".ib"); 77: strcat (nmc, ".ic"); 78: strcat (nmd, ".id"); 79: 80: sprintf(tmpa, "junk%di", getpid()); 81: if (pipein) 82: { 83: pipe(fp); 84: fr=fp[0]; 85: fw=fp[1]; 86: if ( (pfork=fork()) == 0) 87: { 88: close(fw); 89: close(0); 90: _assert(dup(fr)==0); 91: close(fr); 92: execl("/bin/sort", "sort", "-T", sortdir, "-o", tmpa, 0); 93: execl("/usr/bin/sort", "sort", "-T", sortdir, "-o", tmpa, 0); 94: _assert(0); 95: } 96: _assert(pfork!= -1); 97: close(fr); 98: fta = fopen("/dev/null", "w"); 99: close(fta->_file); 100: fta->_file = fw; 101: } 102: else /* use tmp file */ 103: { 104: fta = fopen(tmpa, "w"); 105: _assert (fta != NULL); 106: } 107: fb = 0; 108: if (appflg ) 109: { 110: if (fb = fopen(nmb, "r")) 111: { 112: sprintf(tmpb, "junk%dj", getpid()); 113: ftb = fopen(tmpb, "w"); 114: if (ftb==NULL) 115: err("Can't get scratch file %s",tmpb); 116: nhash = recopy(ftb, fb, fopen(nma, "r")); 117: fclose(ftb); 118: } 119: else 120: appflg=0; 121: } 122: fc = fopen(nmc, appflg ? "a" : "w"); 123: fd = keepkey ? fopen(nmd, "w") : 0; 124: docs = newkeys(fta, stdin, fc, nhash, fd, &iflong); 125: fclose(stdin); 126: if (remove != NULL) 127: unlink(remove); 128: fclose(fta); 129: if (pipein) 130: { 131: pwait = wait(&status); 132: printf("pfork %o pwait %o status %d\n",pfork,pwait,status); 133: _assert(pwait==pfork); 134: _assert(status==0); 135: } 136: else 137: { 138: sprintf(com, "sort -T %s %s -o %s", sortdir, tmpa, tmpa); 139: system(com); 140: } 141: if (appflg) 142: { 143: sprintf(tmpc, "junk%dk", getpid()); 144: sprintf(com, "mv %s %s", tmpa, tmpc); 145: system(com); 146: sprintf(com, "sort -T %s -m %s %s -o %s", sortdir, 147: tmpb, tmpc, tmpa); 148: system(com); 149: } 150: fta = fopen(tmpa, "r"); 151: fa = fopen(nma, "w"); 152: fb = fopen(nmb, "w"); 153: whash(fta, fa, fb, nhash, iflong, &keys, &hashes); 154: fclose(fta); 155: # ifndef D1 156: unlink(tmpa); 157: # endif 158: if (appflg) 159: { 160: unlink(tmpb); 161: unlink(tmpc); 162: } 163: if (chatty) 164: 165: printf ("%ld key occurrences, %d hashes, %d docs\n", 166: keys, hashes, docs); 167: }