1: /* @(#)spell.h 4.1 12/18/82 */ 2: 3: #include <stdio.h> 4: #include <ctype.h> 5: 6: #ifndef unix 7: #define SHIFT 5 8: #define TABSIZE (int)(400000/(1<<SHIFT)) 9: int *tab; /*honeywell loader deficiency*/ 10: #else 11: #define Tolower(c) (isupper(c)?tolower(c):c) /* ugh!!! */ 12: #define SHIFT 4 13: #define TABSIZE 25000 /*(int)(400000/(1<<shift))--pdp11 compiler deficiency*/ 14: short tab[TABSIZE]; 15: #endif 16: long p[] = { 17: 399871, 18: 399887, 19: 399899, 20: 399911, 21: 399913, 22: 399937, 23: 399941, 24: 399953, 25: 399979, 26: 399983, 27: 399989, 28: }; 29: #define NP (sizeof(p)/sizeof(p[0])) 30: #define NW 30 31: 32: /* 33: * Hash table for spelling checker has n bits. 34: * Each word w is hashed by k different (modular) hash functions, hi. 35: * The bits hi(w), i=1..k, are set for words in the dictionary. 36: * Assuming independence, the probability that no word of a d-word 37: * dictionary sets a particular bit is given by the Poisson formula 38: * P = exp(-y)*y**0/0!, where y=d*k/n. 39: * The probability that a random string is recognized as a word is then 40: * (1-P)**k. For given n and d this is minimum when y=log(2), P=1/2, 41: * whence one finds, for example, that a 25000-word dictionary in a 42: * 400000-bit table works best with k=11. 43: */ 44: 45: long pow2[NP][NW]; 46: 47: prime(argc, argv) register char **argv; 48: { 49: int i, j; 50: long h; 51: register long *lp; 52: 53: #ifndef unix 54: if ((tab = (int *)calloc(sizeof(*tab), TABSIZE)) == NULL) 55: return(0); 56: #endif 57: if (argc > 1) { 58: FILE *f; 59: if ((f = fopen(argv[1], "ri")) == NULL) 60: return(0); 61: if (fread((char *)tab, sizeof(*tab), TABSIZE, f) != TABSIZE) 62: return(0); 63: fclose(f); 64: } 65: for (i=0; i<NP; i++) { 66: h = *(lp = pow2[i]) = 1<<14; 67: for (j=1; j<NW; j++) 68: h = *++lp = (h<<7) % p[i]; 69: } 70: return(1); 71: } 72: 73: #define get(h) (tab[h>>SHIFT]&(1<<((int)h&((1<<SHIFT)-1)))) 74: #define set(h) tab[h>>SHIFT] |= 1<<((int)h&((1<<SHIFT)-1))