1: /* @(#)spell.h 4.1 12/18/82 */ 2: 3: #include <sys/localopts.h> /* for computer type (NONSEPARATE?) */ 4: #include <stdio.h> 5: #include <ctype.h> 6: 7: #ifndef unix 8: #define SHIFT 5 9: #define TABSIZE (int)(400000/(1<<SHIFT)) 10: int *tab; /*honeywell loader deficiency*/ 11: #else 12: #define Tolower(c) (isupper(c)?tolower(c):c) /* ugh!!! */ 13: #define SHIFT 4 14: /* The following modifications cut hash table to 300,000 15: * bits if NONSEPARATE is defined. 16: * mjk 9/19/80 17: * Eight hashing functions are used. 18: * This will run on non-separate I/D machine. 19: */ 20: #ifdef NONSEPARATE 21: #define TABSIZE 18750 /*(int)(300000/(1<<SHIFT)) */ 22: #else 23: #define TABSIZE 25000 /*(int)(400000/(1<<SHIFT)) */ 24: #endif 25: short tab[TABSIZE]; 26: #endif 27: 28: #ifdef NONSEPARATE 29: long p[] = { 30: 299909, 31: 299933, 32: 299941, 33: 299951, 34: 299969, 35: 299977, 36: 299983, 37: 299993 38: }; 39: #else 40: long p[] = { 41: 399871, 42: 399887, 43: 399899, 44: 399911, 45: 399913, 46: 399937, 47: 399941, 48: 399953, 49: 399979, 50: 399983, 51: 399989, 52: }; 53: #endif 54: #define NP (sizeof(p)/sizeof(p[0])) 55: #define NW 30 56: 57: /* 58: * Hash table for spelling checker has n bits. 59: * Each word w is hashed by k different (modular) hash functions, hi. 60: * The bits hi(w), i=1..k, are set for words in the dictionary. 61: * Assuming independence, the probability that no word of a d-word 62: * dictionary sets a particular bit is given by the Poisson formula 63: * P = exp(-y)*y**0/0!, where y=d*k/n. 64: * The probability that a random string is recognized as a word is then 65: * (1-P)**k. For given n and d this is minimum when y=log(2), P=1/2, 66: * whence one finds, for example, that a 25000-word dictionary in a 67: * 400000-bit table works best with k=11. 68: * (For 300000-bit table use k=8; mjk.) 69: */ 70: 71: long pow2[NP][NW]; 72: 73: prime(argc, argv) register char **argv; 74: { 75: int i, j; 76: long h; 77: register long *lp; 78: 79: #ifndef unix 80: if ((tab = (int *)calloc(sizeof(*tab), TABSIZE)) == NULL) 81: return(0); 82: #endif 83: if (argc > 1) { 84: FILE *f; 85: if ((f = fopen(argv[1], "ri")) == NULL) 86: return(0); 87: if (fread((char *)tab, sizeof(*tab), TABSIZE, f) != TABSIZE) 88: return(0); 89: fclose(f); 90: } 91: for (i=0; i<NP; i++) { 92: h = *(lp = pow2[i]) = 1<<14; 93: for (j=1; j<NW; j++) 94: h = *++lp = (h<<7) % p[i]; 95: } 96: return(1); 97: } 98: 99: #define get(h) (tab[h>>SHIFT]&(1<<((int)h&((1<<SHIFT)-1)))) 100: #define set(h) tab[h>>SHIFT] |= 1<<((int)h&((1<<SHIFT)-1))