1: # include "stdio.h" 2: # include "assert.h" 3: 4: main(argc, argv) 5: char *argv[]; 6: { 7: /* make inverted file indexes. Reads a stream from mkey which 8: gives record pointer items and keys. Generates a set of files 9: a. NHASH pointers to file b. 10: b. lists of record numbers. 11: c. record pointer items. 12: 13: these files are named xxx.ia, xxx.ib, xxx.ic; 14: where xxx is taken from arg1. 15: If the files exist they are updated. 16: */ 17: 18: FILE *fa, *fb, *fc, *fta, *ftb, *ftc, *fd; 19: int nhash 256; 20: int appflg 1; 21: int keepkey 0, pipein 0; 22: char nma[100], nmb[100], nmc[100], com[100], nmd[100]; 23: char tmpa[20], tmpb[20], tmpc[20]; 24: char *remove NULL; 25: int chatty 0, docs, hashes, fp[2], fr, fw, pfork, pwait, status; 26: int i,j,k; 27: long keys; 28: int iflong =0; 29: char *sortdir; 30: 31: sortdir = (access("/crp/tmp", 06)==0) ? "/crp/tmp" : "/usr/tmp"; 32: while (argv[1][0] == '-') 33: { 34: switch(argv[1][1]) 35: { 36: case 'h': /* size of hash table */ 37: nhash = atoi (argv[1]+2); break; 38: case 'n': /* new, don't append */ 39: appflg=0; break; 40: case 'a': /* append to old file */ 41: appflg=1; break; 42: case 'v': /* verbose output */ 43: chatty=1; break; 44: case 'd': /* keep keys on file .id for check on searching */ 45: keepkey=1; break; 46: case 'p': /* pipe into sort (saves space, costs time)*/ 47: pipein = 1; break; 48: case 'i': /* input is on file, not stdin */ 49: close(0); 50: if (open(argv[2], 0) != 0) 51: err("Can't read input %s", argv[2]); 52: if (argv[1][2]=='u') /* unlink */ 53: remove = argv[2]; 54: argc--; argv++; 55: break; 56: } 57: argc--; 58: argv++; 59: } 60: 61: strcpy (nma, argc >= 2 ? argv[1] : "Index"); 62: strcpy (nmb, nma); 63: strcpy (nmc, nma); 64: strcpy (nmd, nma); 65: strcat (nma, ".ia"); 66: strcat (nmb, ".ib"); 67: strcat (nmc, ".ic"); 68: strcat (nmd, ".id"); 69: 70: sprintf(tmpa, "junk%di", getpid()); 71: if (pipein) 72: { 73: pipe(fp); fr=fp[0]; fw=fp[1]; 74: if ( (pfork=fork()) == 0) 75: { 76: close(fw); 77: close(0); 78: _assert(dup(fr)==0); 79: close(fr); 80: execl("/bin/sort", "sort", "-T", sortdir, "-o", tmpa, 0); 81: execl("/usr/bin/sort", "sort", "-T", sortdir, "-o", tmpa, 0); 82: _assert(0); 83: } 84: _assert(pfork!= -1); 85: close(fr); 86: fta = fopen("/dev/null", "w"); 87: close(fta->_file); 88: fta->_file = fw; 89: } 90: else /* use tmp file */ 91: { 92: fta = fopen(tmpa, "w"); 93: _assert (fta != NULL); 94: } 95: fb = 0; 96: if (appflg ) 97: { 98: if (fb = fopen(nmb, "r")) 99: { 100: sprintf(tmpb, "junk%dj", getpid()); 101: ftb = fopen(tmpb, "w"); 102: if (ftb==NULL) 103: err("Can't get scratch file %s",tmpb); 104: nhash = recopy(ftb, fb, fopen(nma, "r")); 105: fclose(ftb); 106: } 107: else 108: appflg=0; 109: } 110: fc = fopen(nmc, appflg ? "a" : "w"); 111: if (keepkey) 112: fd = keepkey ? fopen(nmd, "w") : 0; 113: docs = newkeys(fta, stdin, fc, nhash, fd, &iflong); 114: fclose(stdin); 115: if (remove != NULL) 116: unlink(remove); 117: fclose(fta); 118: if (pipein) 119: { 120: pwait = wait(&status); 121: printf("pfork %o pwait %o status %d\n",pfork,pwait,status); 122: _assert(pwait==pfork); 123: _assert(status==0); 124: } 125: else 126: { 127: sprintf(com, "sort -T %s %s -o %s", sortdir, tmpa, tmpa); 128: system(com); 129: } 130: 131: if (appflg) 132: { 133: sprintf(tmpc, "junk%dk", getpid()); 134: sprintf(com, "mv %s %s", tmpa, tmpc); 135: system(com); 136: sprintf(com, "sort -T %s -m %s %s -o %s", sortdir, 137: tmpb, tmpc, tmpa); 138: system(com); 139: } 140: fta = fopen(tmpa, "r"); 141: fa = fopen(nma, "w"); 142: fb = fopen(nmb, "w"); 143: whash(fta, fa, fb, nhash, iflong, &keys, &hashes); 144: fclose(fta); 145: # ifndef D1 146: unlink(tmpa); 147: # endif 148: if (appflg) 149: { 150: unlink(tmpb); 151: unlink(tmpc); 152: } 153: if (chatty) 154: 155: printf ("%ld key occurrences, %d hashes, %d docs\n", 156: keys, hashes, docs); 157: }