1: #ifndef lint
2: static char *sccsid = "@(#)what4.c 4.1 (Berkeley) 5/6/83";
3: #endif
4:
5: #include "what..c"
6: #define NW 5
7: #define ZIPF 10
8: #define HASHF 3
9: #define WLEN 10
10: #define SAME 0
11: #define TSIZE HASHF*ZIPF*NW
12: #define NF 10
13:
14: struct wst {
15: char *tx;
16: int ct;
17: }
18: ;
19: int HSIZE;
20: static struct wst word[TSIZE];
21: static char tbuf[NW*ZIPF*WLEN], *tp tbuf;
22:
23: freqwd ( fn, wd, nin )
24: char *fn[], *wd[];
25: {
26: FILE *fi[NF];
27: int nw 0, i, any, nf, j, wexch(), wcomp();
28: char tw[20];
29: for(HSIZE=TSIZE; !prime(HSIZE); HSIZE--);
30: for(nf=0; fn[nf] && nf<NF; nf++)
31: fi[nf] = fn[nf][0] ? fopen(fn[nf], "r") : NULL;
32: do {
33: any=0;
34: for(i=0; i<nf; i++)
35: {
36: if (fi[i]==NULL) continue;
37: if (gw(fi[i], tw)==0)
38: {
39: fclose(fi[i]);
40: fi[i]==NULL;
41: continue;
42: }
43: any=1;
44: if (common(tw)) continue;
45: if (strlen(tw)<3) continue;
46: j = lookup (tw);
47: if (j<0 && nw < ZIPF*NW)
48: {
49: j = -j;
50: strcpy (tp, tw);
51: word[j].tx = tp;
52: while (*tp++);
53: _assert (tp < tbuf+NW*ZIPF*WLEN);
54: word[j].ct = 1;
55: nw++;
56: }
57: else if (j>0)
58: word[j].ct++;
59: }
60: }
61: while (any>0);
62: shell ( TSIZE, wcomp, wexch );
63: for(nw=0; word[nw].ct >0 && nw<TSIZE; nw++)
64: if (nw>=nin*2 && word[nw].ct != word[0].ct)
65: break;
66: for(i=0; i<nw; i++)
67: wd[i] = word[i].tx;
68: return(nw);
69: }
70:
71: lookup (wt)
72: char *wt;
73: {
74: int h;
75: h = hash(wt);
76: for( h = h%HSIZE; word[h].tx; h = (h+1)%HSIZE)
77: {
78: if (h==0) continue;
79: if (strcmp(wt, word[h].tx) == SAME)
80: return (h);
81: }
82: return ( -h );
83: }
84:
85: hash (s)
86: char *s;
87: {
88: int k 0, c 0, i 0;
89: while ( c = *s++ )
90: k ^= (c << (i++%5) );
91: return (k>0 ? k : -k);
92: }
93:
94: gw (f, t)
95: char *t;
96: FILE *f;
97: {
98: int start 1, oldc ' ', c;
99: if (f==NULL) return (0);
100: while ( (c=getc(f)) != EOF)
101: {
102: if (isupper(c)) c= tolower(c);
103: if (start==1)
104: if (!alphanum(c, oldc))
105: continue;
106: else
107: start=0;
108: if (start==0)
109: if (alphanum(c, oldc))
110: *t++ = c;
111: else
112: {
113: *t=0;
114: return(1);
115: }
116: oldc=c;
117: }
118: return(0);
119: }
120:
121: alphanum( c, oldc )
122: {
123: if (isalpha(c) || isdigit(c)) return(1);
124: if (isalpha(oldc))
125: if (c== '\'' || c == '-') return(1);
126: return(0);
127: }
128:
129: wcomp (n1, n2)
130: {
131: return (word[n1].ct >= word[n2].ct);
132: }
133:
134: wexch (n1, n2)
135: {
136: struct wst tt;
137: tt.tx = word[n1].tx;
138: tt.ct = word[n1].ct;
139: word[n1].tx = word[n2].tx;
140: word[n1].ct = word[n2].ct;
141: word[n2].tx = tt.tx;
142: word[n2].ct = tt.ct;
143: }
144:
145: prime(n)
146: {
147: /* only executed once- slow is ok */
148: int i;
149: if (n%2==0) return(0);
150: for(i=3; i*i<=n; i+= 2)
151: if (n%i ==0 ) return(0);
152: return(1);
153: }
154:
155: trimnl(s)
156: char *s;
157: {
158: while (*s)s++;
159: if (*--s=='\n') *s=0;
160: }
161:
162: /* this is the test for what4.c as a standalone prog ... */
163: # ifdef 0
164: main (argc, argv)
165: char *argv[];
166: {
167: char *ff[10], *wd[20], **ffp ff;
168: int n, i;
169:
170: while (--argc)
171: *ffp++ = *++argv;
172: *ffp=0;
173: n=freqwd(ff,wd);
174: for(i=0; i<n; i++)
175: printf("%s\n",wd[i]);
176: printf("total of %d items\n",n);
177: }
178: # endif 0
Defined functions
gw
defined in line
94; used 1 times
hash
defined in line
85; used 1 times
main
defined in line
164;
never used
Defined variables
HSIZE
defined in line
19; used 5 times
sccsid
defined in line
2;
never used
tbuf
defined in line
21; used 1 times
tp
defined in line
21; used 4 times
word
defined in line
20; used 19 times
Defined struct's
wst
defined in line
14; used 4 times
Defined macros
HASHF
defined in line
8; used 1 times
NF
defined in line
12; used 2 times
NW
defined in line
6; used 4 times
SAME
defined in line
10; used 1 times
TSIZE
defined in line
11; used 4 times
WLEN
defined in line
9; used 2 times
ZIPF
defined in line
7; used 4 times