1: #ifndef lint
   2: static char sccsid[] = "@(#)code.c	4.2	(Berkeley)	7/21/83";
   3: #endif not lint
   4: 
   5: /*
   6:  * PURPOSE:	sorted list compressor (works with a modified 'find'
   7:  *		to encode/decode a filename database)
   8:  *
   9:  * USAGE:	bigram < list > bigrams
  10:  *		process bigrams (see updatedb) > common_bigrams
  11:  *		code common_bigrams < list > squozen_list
  12:  *
  13:  * METHOD:	Uses 'front compression' (see ";login:", March 1983, p. 8 ).
  14:  *		Output format is, per line, an offset differential count byte
  15:  *		followed by a partially bigram-encoded ascii residue.
  16:  *
  17:  *  	The codes are:
  18:  *
  19:  *	0-28	likeliest differential counts + offset to make nonnegative
  20:  *	30	escape code for out-of-range count to follow in next word
  21:  *	128-255 bigram codes, (128 most common, as determined by 'updatedb')
  22:  *	32-127  single character (printable) ascii residue
  23:  *
  24:  * SEE ALSO:	updatedb.csh, bigram.c, find.c
  25:  *
  26:  * AUTHOR:	James A. Woods, Informatics General Corp.,
  27:  *		NASA Ames Research Center, 10/82
  28:  */
  29: 
  30: #include <stdio.h>
  31: 
  32: #define MAXPATH 1024        /* maximum pathname length */
  33: #define RESET   30      /* switch code */
  34: 
  35: char path[MAXPATH];
  36: char oldpath[MAXPATH] = " ";
  37: char bigrams[257] = { 0 };
  38: 
  39: main ( argc, argv )
  40:     int argc; char *argv[];
  41: {
  42:     int count, oldcount, diffcount;
  43:     int j, code;
  44:     char bigram[3];
  45:     FILE *fp;
  46: 
  47:     oldcount = 0;
  48:     bigram[2] = NULL;
  49: 
  50:     if ((fp = fopen(argv[1], "r")) == NULL) {
  51:         printf("Usage: code common_bigrams < list > coded_list\n");
  52:         exit(1);
  53:     }
  54:     fgets ( bigrams, 257, fp );
  55:     fwrite ( bigrams, 1, 256, stdout );
  56: 
  57:         while ( gets ( path ) != NULL ) {
  58:         /*
  59: 		   squelch unprintable chars so as not to botch decoding
  60: 		*/
  61:         for ( j = 0; path[j] != NULL; j++ ) {
  62:             path[j] &= 0177;
  63:             if ( path[j] < 040 || path[j] == 0177 )
  64:                 path[j] = '?';
  65:         }
  66:         count = prefix_length ( oldpath, path );
  67:         diffcount = count - oldcount;
  68:         if ( (diffcount < -14) || (diffcount > 14) ) {
  69:             putc ( RESET, stdout );
  70:             putw ( diffcount + 14, stdout );
  71:         }
  72:         else
  73:             putc ( diffcount + 14, stdout );
  74: 
  75:         for ( j = count; path[j] != NULL; j += 2 ) {
  76:             if ( path[j + 1] == NULL ) {
  77:                 putchar ( path[j] );
  78:                 break;
  79:             }
  80:             bigram[0] = path[j];
  81:             bigram[1] = path[j + 1];
  82:             /*
  83: 			    linear search for specific bigram in string table
  84: 			*/
  85:             if ( (code = strindex ( bigrams, bigram )) % 2 == 0 )
  86:                 putchar ( (code / 2) | 0200 );
  87:             else
  88:                 fputs ( bigram, stdout );
  89:         }
  90:         strcpy ( oldpath, path );
  91:         oldcount = count;
  92:     }
  93: }
  94: 
  95: strindex ( string, pattern )    /* return location of pattern in string or -1 */
  96:     char *string, *pattern;
  97: {
  98:     register char *s, *p, *q;
  99: 
 100:     for ( s = string; *s != NULL; s++ )
 101:         if ( *s == *pattern ) {     /* fast first char check */
 102:             for ( p = pattern + 1, q = s + 1; *p != NULL; p++, q++ )
 103:                 if ( *q != *p )
 104:                     break;
 105:             if ( *p == NULL )
 106:                 return ( q - strlen ( pattern ) - string );
 107:         }
 108:     return ( -1 );
 109: }
 110: 
 111: prefix_length ( s1, s2 )    /* return length of longest common prefix */
 112:     char *s1, *s2;      /* ... of strings s1 and s2 */
 113: {
 114:     register char *start;
 115: 
 116:         for ( start = s1; *s1 == *s2; s1++, s2++ )
 117:         if ( *s1 == NULL )
 118:                 break;
 119:         return ( s1 - start );
 120: }

Defined functions

main defined in line 39; never used
prefix_length defined in line 111; used 1 times
  • in line 66
strindex defined in line 95; used 1 times
  • in line 85

Defined variables

bigrams defined in line 37; used 3 times
oldpath defined in line 36; used 2 times
path defined in line 35; used 13 times
sccsid defined in line 2; never used

Defined macros

MAXPATH defined in line 32; used 2 times
RESET defined in line 33; used 1 times
  • in line 69
Last modified: 1983-07-21
Generated: 2016-12-26
Generated by src2html V0.67
page hit count: 1181
Valid CSS Valid XHTML 1.0 Strict