1: #ifndef lint
2: static char sccsid[] = "@(#)code.c 4.2 (Berkeley) 7/21/83";
3: #endif not lint
4:
5: /*
6: * PURPOSE: sorted list compressor (works with a modified 'find'
7: * to encode/decode a filename database)
8: *
9: * USAGE: bigram < list > bigrams
10: * process bigrams (see updatedb) > common_bigrams
11: * code common_bigrams < list > squozen_list
12: *
13: * METHOD: Uses 'front compression' (see ";login:", March 1983, p. 8 ).
14: * Output format is, per line, an offset differential count byte
15: * followed by a partially bigram-encoded ascii residue.
16: *
17: * The codes are:
18: *
19: * 0-28 likeliest differential counts + offset to make nonnegative
20: * 30 escape code for out-of-range count to follow in next word
21: * 128-255 bigram codes, (128 most common, as determined by 'updatedb')
22: * 32-127 single character (printable) ascii residue
23: *
24: * SEE ALSO: updatedb.csh, bigram.c, find.c
25: *
26: * AUTHOR: James A. Woods, Informatics General Corp.,
27: * NASA Ames Research Center, 10/82
28: */
29:
30: #include <stdio.h>
31:
32: #define MAXPATH 1024 /* maximum pathname length */
33: #define RESET 30 /* switch code */
34:
35: char path[MAXPATH];
36: char oldpath[MAXPATH] = " ";
37: char bigrams[257] = { 0 };
38:
39: main ( argc, argv )
40: int argc; char *argv[];
41: {
42: int count, oldcount, diffcount;
43: int j, code;
44: char bigram[3];
45: FILE *fp;
46:
47: oldcount = 0;
48: bigram[2] = NULL;
49:
50: if ((fp = fopen(argv[1], "r")) == NULL) {
51: printf("Usage: code common_bigrams < list > coded_list\n");
52: exit(1);
53: }
54: fgets ( bigrams, 257, fp );
55: fwrite ( bigrams, 1, 256, stdout );
56:
57: while ( gets ( path ) != NULL ) {
58: /*
59: squelch unprintable chars so as not to botch decoding
60: */
61: for ( j = 0; path[j] != NULL; j++ ) {
62: path[j] &= 0177;
63: if ( path[j] < 040 || path[j] == 0177 )
64: path[j] = '?';
65: }
66: count = prefix_length ( oldpath, path );
67: diffcount = count - oldcount;
68: if ( (diffcount < -14) || (diffcount > 14) ) {
69: putc ( RESET, stdout );
70: putw ( diffcount + 14, stdout );
71: }
72: else
73: putc ( diffcount + 14, stdout );
74:
75: for ( j = count; path[j] != NULL; j += 2 ) {
76: if ( path[j + 1] == NULL ) {
77: putchar ( path[j] );
78: break;
79: }
80: bigram[0] = path[j];
81: bigram[1] = path[j + 1];
82: /*
83: linear search for specific bigram in string table
84: */
85: if ( (code = strindex ( bigrams, bigram )) % 2 == 0 )
86: putchar ( (code / 2) | 0200 );
87: else
88: fputs ( bigram, stdout );
89: }
90: strcpy ( oldpath, path );
91: oldcount = count;
92: }
93: }
94:
95: strindex ( string, pattern ) /* return location of pattern in string or -1 */
96: char *string, *pattern;
97: {
98: register char *s, *p, *q;
99:
100: for ( s = string; *s != NULL; s++ )
101: if ( *s == *pattern ) { /* fast first char check */
102: for ( p = pattern + 1, q = s + 1; *p != NULL; p++, q++ )
103: if ( *q != *p )
104: break;
105: if ( *p == NULL )
106: return ( q - strlen ( pattern ) - string );
107: }
108: return ( -1 );
109: }
110:
111: prefix_length ( s1, s2 ) /* return length of longest common prefix */
112: char *s1, *s2; /* ... of strings s1 and s2 */
113: {
114: register char *start;
115:
116: for ( start = s1; *s1 == *s2; s1++, s2++ )
117: if ( *s1 == NULL )
118: break;
119: return ( s1 - start );
120: }
Defined functions
main
defined in line
39;
never used
Defined variables
path
defined in line
35; used 13 times
sccsid
defined in line
2;
never used
Defined macros
RESET
defined in line
33; used 1 times