1: # include "what..c"
2: struct wst { char *tx; int ct; } ;
3: # define NW 5
4: # define ZIPF 10
5: # define HASHF 3
6: # define WLEN 10
7: # define SAME 0
8: # define TSIZE HASHF*ZIPF*NW
9: int HSIZE;
10: static struct wst word[TSIZE];
11: static char tbuf[NW*ZIPF*WLEN], *tp tbuf;
12: # define NF 10
13:
14: freqwd ( fn, wd, nin )
15: char *fn[], *wd[];
16: {
17: FILE *fi[NF];
18: int nw 0, i, any, nf, j, wexch(), wcomp();
19: char tw[20];
20: for(HSIZE=TSIZE; !prime(HSIZE); HSIZE--);
21: for(nf=0; fn[nf] && nf<NF; nf++)
22: fi[nf] = fn[nf][0] ? fopen(fn[nf], "r") : NULL;
23: do {
24: any=0;
25: for(i=0; i<nf; i++)
26: {
27: if (fi[i]==NULL) continue;
28: if (gw(fi[i], tw)==0)
29: {
30: fclose(fi[i]);
31: fi[i]==NULL;
32: continue;
33: }
34: any=1;
35: if (common(tw)) continue;
36: if (strlen(tw)<3) continue;
37: j = lookup (tw);
38: if (j<0 && nw < ZIPF*NW)
39: {
40: j = -j;
41: strcpy (tp, tw);
42: word[j].tx = tp;
43: while (*tp++);
44: _assert (tp < tbuf+NW*ZIPF*WLEN);
45: word[j].ct = 1;
46: nw++;
47: }
48: else if (j>0)
49: word[j].ct++;
50: }
51: } while (any>0);
52: shell ( TSIZE, wcomp, wexch );
53: for(nw=0; word[nw].ct >0 && nw<TSIZE; nw++)
54: if (nw>=nin*2 && word[nw].ct != word[0].ct)
55: break;
56: for(i=0; i<nw; i++)
57: wd[i] = word[i].tx;
58: return(nw);
59: }
60:
61: lookup (wt)
62: char *wt;
63: {
64: int h;
65: h = hash(wt);
66: for( h = h%HSIZE; word[h].tx; h = (h+1)%HSIZE)
67: {
68: if (h==0) continue;
69: if (strcmp(wt, word[h].tx) == SAME)
70: return (h);
71: }
72: return ( -h );
73: }
74:
75: hash (s)
76: char *s;
77: {
78: int k 0, c 0, i 0;
79: while ( c = *s++ )
80: k ^= (c << (i++%5) );
81: return (k>0 ? k : -k);
82: }
83:
84: gw (f, t)
85: char *t;
86: FILE *f;
87: {
88: int start 1, oldc ' ', c;
89: if (f==NULL) return (0);
90: while ( (c=getc(f)) != EOF)
91: {
92: if (isupper(c)) c= tolower(c);
93: if (start==1)
94: if (!alphanum(c, oldc))
95: continue;
96: else
97: start=0;
98: if (start==0)
99: if (alphanum(c, oldc))
100: *t++ = c;
101: else
102: {
103: *t=0;
104: return(1);
105: }
106: oldc=c;
107: }
108: return(0);
109: }
110:
111: alphanum( c, oldc )
112: {
113: if (isalpha(c) || isdigit(c)) return(1);
114: if (isalpha(oldc))
115: if (c== '\'' || c == '-') return(1);
116: return(0);
117: }
118:
119: wcomp (n1, n2)
120: {
121: return (word[n1].ct >= word[n2].ct);
122: }
123:
124: wexch (n1, n2)
125: {
126: struct wst tt;
127: tt.tx = word[n1].tx; tt.ct = word[n1].ct;
128: word[n1].tx = word[n2].tx; word[n1].ct = word[n2].ct;
129: word[n2].tx = tt.tx; word[n2].ct = tt.ct;
130: }
131:
132: prime(n)
133: {
134: /* only executed once- slow is ok */
135: int i;
136: if (n%2==0) return(0);
137: for(i=3; i*i<=n; i+= 2)
138: if (n%i ==0 ) return(0);
139: return(1);
140: }
141: trimnl(s)
142: char *s;
143: {
144: while (*s)s++;
145: if (*--s=='\n') *s=0;
146: }
147:
148:
149: /* this is the test for what4.c as a standalone prog ...
150: main (argc, argv)
151: char *argv[];
152: {
153: char *ff[10], *wd[20], **ffp ff;
154: int n, i;
155: while (--argc)
156: *ffp++ = *++argv;
157: *ffp=0;
158: n=freqwd(ff,wd);
159: for(i=0; i<n; i++)
160: printf("%s\n",wd[i]);
161: printf("total of %d items\n",n);
162: }
163: /* .... */
Defined functions
gw
defined in line
84; used 1 times
hash
defined in line
75; used 1 times
Defined variables
HSIZE
defined in line
9; used 5 times
tbuf
defined in line
11; used 1 times
tp
defined in line
11; used 4 times
word
defined in line
10; used 19 times
Defined struct's
wst
defined in line
2; used 4 times
Defined macros
HASHF
defined in line
5; used 1 times
NF
defined in line
12; used 2 times
NW
defined in line
3; used 4 times
SAME
defined in line
7; used 1 times
TSIZE
defined in line
8; used 4 times
WLEN
defined in line
6; used 2 times
ZIPF
defined in line
4; used 4 times