1: # include "stdio.h" 2: # include "assert.h" 3: 4: main(argc, argv) 5: char *argv[]; 6: { 7: /* make inverted file indexes. Reads a stream from mkey which 8: gives record pointer items and keys. Generates a set of files 9: a. NHASH pointers to file b. 10: b. lists of record numbers. 11: c. record pointer items. 12: 13: these files are named xxx.ia, xxx.ib, xxx.ic; 14: where xxx is taken from arg1. 15: If the files exist they are updated. 16: */ 17: 18: FILE *fa, *fb, *fc, *fta, *ftb, *ftc, *fd; 19: int nhash 256; 20: int appflg 1; 21: int keepkey 0, pipein 0; 22: char nma[100], nmb[100], nmc[100], com[100], nmd[100]; 23: char tmpa[20], tmpb[20], tmpc[20]; 24: char *remove NULL; 25: int chatty 0, docs, hashes, fp[2], fr, fw, pfork, pwait, status; 26: int i,j,k; 27: long keys; 28: int iflong =0; 29: char *sortdir; 30: 31: sortdir = (access("/crp/tmp", 06)==0) ? "/crp/tmp" : "/usr/tmp"; 32: while (argv[1][0] == '-') 33: { 34: switch(argv[1][1]) 35: { 36: case 'h': /* size of hash table */ 37: nhash = atoi (argv[1]+2); break; 38: case 'n': /* new, don't append */ 39: appflg=0; break; 40: case 'a': /* append to old file */ 41: appflg=1; break; 42: case 'v': /* verbose output */ 43: chatty=1; break; 44: case 'd': /* keep keys on file .id for check on searching */ 45: keepkey=1; break; 46: case 'p': /* pipe into sort (saves space, costs time)*/ 47: pipein = 1; break; 48: case 'i': /* input is on file, not stdin */ 49: close(0); 50: if (open(argv[2], 0) != 0) 51: err("Can't read input %s", argv[2]); 52: if (argv[1][2]=='u') /* unlink */ 53: remove = argv[2]; 54: argc--; argv++; 55: break; 56: } 57: argc--; 58: argv++; 59: } 60: 61: strcpy (nma, argc >= 2 ? argv[1] : "Index"); 62: strcpy (nmb, nma); 63: strcpy (nmc, nma); 64: strcpy (nmd, nma); 65: strcat (nma, ".ia"); 66: strcat (nmb, ".ib"); 67: strcat (nmc, ".ic"); 68: strcat (nmd, ".id"); 69: 70: sprintf(tmpa, "junk%di", getpid()); 71: if (pipein) 72: { 73: pipe(fp); fr=fp[0]; fw=fp[1]; 74: if ( (pfork=fork()) == 0) 75: { 76: close(fw); 77: close(0); 78: _assert(dup(fr)==0); 79: close(fr); 80: execl("/bin/sort", "sort", "-T", sortdir, "-o", tmpa, 0); 81: execl("/usr/bin/sort", "sort", "-T", sortdir, "-o", tmpa, 0); 82: _assert(0); 83: } 84: _assert(pfork!= -1); 85: close(fr); 86: fta = fopen("/dev/null", "w"); 87: close(fta->_file); 88: fta->_file = fw; 89: } 90: else /* use tmp file */ 91: { 92: fta = fopen(tmpa, "w"); 93: _assert (fta != NULL); 94: } 95: fb = 0; 96: if (appflg ) 97: { 98: if (fb = fopen(nmb, "r")) 99: { 100: sprintf(tmpb, "junk%dj", getpid()); 101: ftb = fopen(tmpb, "w"); 102: if (ftb==NULL) 103: err("Can't get scratch file %s",tmpb); 104: nhash = recopy(ftb, fb, fopen(nma, "r")); 105: fclose(ftb); 106: } 107: else 108: appflg=0; 109: } 110: fc = fopen(nmc, appflg ? "a" : "w"); 111: fd = keepkey ? fopen(nmd, "w") : 0; 112: docs = newkeys(fta, stdin, fc, nhash, fd, &iflong); 113: fclose(stdin); 114: if (remove != NULL) 115: unlink(remove); 116: fclose(fta); 117: if (pipein) 118: { 119: pwait = wait(&status); 120: printf("pfork %o pwait %o status %d\n",pfork,pwait,status); 121: _assert(pwait==pfork); 122: _assert(status==0); 123: } 124: else 125: { 126: sprintf(com, "sort -T %s %s -o %s", sortdir, tmpa, tmpa); 127: system(com); 128: } 129: 130: if (appflg) 131: { 132: sprintf(tmpc, "junk%dk", getpid()); 133: sprintf(com, "mv %s %s", tmpa, tmpc); 134: system(com); 135: sprintf(com, "sort -T %s -m %s %s -o %s", sortdir, 136: tmpb, tmpc, tmpa); 137: system(com); 138: } 139: fta = fopen(tmpa, "r"); 140: fa = fopen(nma, "w"); 141: fb = fopen(nmb, "w"); 142: whash(fta, fa, fb, nhash, iflong, &keys, &hashes); 143: fclose(fta); 144: # ifndef D1 145: unlink(tmpa); 146: # endif 147: if (appflg) 148: { 149: unlink(tmpb); 150: unlink(tmpc); 151: } 152: if (chatty) 153: 154: printf ("%ld key occurrences, %d hashes, %d docs\n", 155: keys, hashes, docs); 156: }