1: /* decompr16: decompress 16bit compressed files on a 16bit Intel processor 2: * running minix. 3: * This kludge was hacked together by John N. White on 6/30/91 and 4: * is Public Domain. 5: * Use chmem =500 decompr16. 6: * decompr16 needs about 280k to run in pipe mode. (56k per copy) 7: * 8: * This program acts as a filter: 9: * decompr16 < compressed_file > decompressed_file 10: * 11: * The arguments -0 to -4 causes only the corresponding pass to be done. Thus: 12: * decompr16 -0 < compressed_file | decompr16 -1 | decompr16 -2 | 13: * decompr16 -3 | decompr16 -4 > decompressed_file 14: * will also work. 15: * If RAM is scarce these passes can be done sequentially using tmp files. 16: * 17: * Compress uses a modified LZW compression algorithm. A compressed file 18: * is a set of indices into a dictionary of strings. The number of bits 19: * used to store each index depends on the number of entries currently 20: * in the dictionary. If there are between 257 and 512 entries, 9 bits 21: * are used. With 513 entries, 10 bits are used, etc. The initial dictionary 22: * consists of 0-255 (which are the corresponding chars) and 256 (which 23: * is a special CLEAR code). As each index in the compressed file is read, 24: * a new entry is added to the dictionary consisting of the current string 25: * with the first char of the next string appended. When the dictionary 26: * is full, no further entries are added. If a CLEAR code is received, 27: * the dictionary will be completely reset. The first two bytes of the 28: * compressed file are a magic number, and the third byte indicates the 29: * maximum number of bits, and whether the CLEAR code is used (older versions 30: * of compress didn't have CLEAR). 31: * 32: * This program works by forking four more copies of itself. The five 33: * programs form a pipeline. Copy 0 reads from stdin and writes to 34: * copy 1, which writes to copy 2, then to copy 3, then to copy 4 (the 35: * original) which writes to stdout. If given a switch -#, where # is a 36: * digit from 0 to 4 (example: -2), the program will run as that copy, 37: * reading from stdin and writing to stdout. This allows decompressing 38: * with very limited RAM because only one of the five passes is in 39: * memory at a time. 40: * 41: * The compressed data is a series of string indices (and a header at 42: * the beginning and an occasional CLEAR code). As these indices flow 43: * through the pipes, each program decodes the ones it can. The result 44: * of each decoding will be indices that the following programs can handle. 45: * 46: * Each of the 65536 strings in the dictionary is an earlier string with 47: * some character added to the end (except for the the 256 predefined 48: * single char strings). When new entries are made to the dictionary, 49: * the string index part will just be the last index to pass through. 50: * But the char part is the first char of the next string, which isn't 51: * known yet. So the string can be stored as a pair of indices. When 52: * this string is specified, it is converted to this pair of indices, 53: * which are flagged so that the first will be decoded in full while 54: * the second will be decoded to its first char. The dictionary takes 55: * 256k to store (64k strings of 2 indices of 2 bytes each). This is 56: * too big for a 64k data segment, so it is divided into 5 equal parts. 57: * Copy 0 of the program maintains the high part and copy 4 holds the 58: * low part. 59: */ 60: 61: #define BUFSZ 256 /* size of i/o buffers */ 62: #define BUFSZ_2 (BUFSZ/2) /* # of unsigned shorts in i/o bufs */ 63: #define DICTSZ (unsigned)13056 /* # of local dictionary entries */ 64: #define EOF_INDEX (unsigned short)0xFFFF /* EOF flag for pipeline */ 65: 66: int pipein=0, pipeout=1; /* file nums for pipeline input and output */ 67: int fildes[2]; /* for use with pipe() */ 68: int ibufp, obufp, ibufe; /* pointers to bufs, (inp, output, end) */ 69: int ipbufp=BUFSZ_2, opbufp; /* pointers to pipe bufs */ 70: int pnum= -1; /* ID of this copy */ 71: unsigned short ipbuf[BUFSZ_2]; /* for buffering input */ 72: unsigned short opbuf[BUFSZ_2]; /* for buffering output */ 73: unsigned char *ibuf=(unsigned char*)ipbuf, *obuf=(unsigned char*)opbuf; 74: 75: unsigned short dindex[DICTSZ]; /* dictionary: index to substring */ 76: unsigned short dchar[DICTSZ]; /* dictionary: last char of string */ 77: unsigned iindex, tindex, tindex2;/* holds index being processed */ 78: unsigned base; /* where in global dict local dict starts */ 79: unsigned tbase; 80: unsigned locend; /* where in global dict local dict ends */ 81: unsigned curend=256; /* current end of global dict */ 82: unsigned maxend; /* max end of global dict */ 83: int dcharp; /* ptr to dchar that needs next index entry */ 84: int curbits; /* number of bits for getbits() to read */ 85: int maxbits; /* limit on number of bits */ 86: int clearflg; /* if set, allow CLEAR */ 87: int inmod; /* mod 8 for getbits() */ 88: 89: main(argc, argv) char **argv; { 90: int i; 91: 92: /* check for arguments */ 93: if(argc>=2){ 94: if(**++argv != '-' || (pnum = (*argv)[1]-'0') < 0 || pnum > 4) 95: die("bad args\n"); 96: } 97: 98: if(pnum<=0){ /* if this is pass 0 */ 99: /* check header of compressed file */ 100: if(mygetc() != 0x1F || mygetc() != 0x9D)/* check magic number */ 101: die("not a compressed file\n"); 102: iindex=mygetc(); /* get compression style */ 103: } else getpipe(); /* get compression style */ 104: maxbits = iindex&0x1F; 105: clearflg = (iindex&0x80) != 0; 106: if(maxbits<9 || maxbits>16) /* check for valid maxbits */ 107: die("can't decompress\n"); 108: if((pnum & ~3) == 0) putpipe(iindex, 0); /* pass style to next copy */ 109: 110: if(pnum<0){ /* if decompr should set up pipeline */ 111: /* fork copies and setup pipeline */ 112: for(pnum=0; pnum<4; pnum++){ /* do four forks */ 113: if(pipe(fildes)) die("pipe() error\n"); 114: if((i=fork()) == -1) die("fork() error\n"); 115: if(i){ /* if this is the copy */ 116: pipeout = fildes[1]; 117: break; 118: } 119: /* if this is the original */ 120: pipein = fildes[0]; 121: close(fildes[1]); /* don't accumulate these */ 122: } 123: } 124: 125: /* Preliminary inits. Note: end/maxend/curend are highest, not highest+1 */ 126: base = DICTSZ*(4-pnum) + 256, locend = base+DICTSZ-1; 127: maxend = (1<<maxbits)-1; 128: if(maxend>locend) maxend=locend; 129: 130: for(;;){ 131: curend = 255+clearflg; /* init dictionary */ 132: dcharp = DICTSZ; /* flag for none needed */ 133: curbits = 9; /* init curbits (for copy 0) */ 134: for(;;){ /* for each index in input */ 135: if(pnum) getpipe(); /* get next index */ 136: else{ /* get index using getbits() */ 137: if(curbits<maxbits && (1<<curbits)<=curend){ 138: /* curbits needs to be increased */ 139: /* due to uglyness in compress, these indices 140: * in the compressed file are wasted */ 141: while(inmod) getbits(); 142: curbits++; 143: } 144: getbits(); 145: } 146: if(iindex==256 && clearflg){ 147: if(pnum<4) putpipe(iindex, 0); 148: /* due to uglyness in compress, these indices 149: * in the compressed file are wasted */ 150: while(inmod) getbits(); 151: break; 152: } 153: tindex=iindex; 154: /* convert the index part, ignoring spawned chars */ 155: while(tindex>=base) tindex = dindex[tindex-base]; 156: putpipe(tindex, 0); /* pass on the index */ 157: /* save the char of the last added entry, if any */ 158: if(dcharp<DICTSZ) dchar[dcharp++] = tindex; 159: if(curend<maxend) /* if still building dictionary */ 160: if(++curend >= base) 161: dindex[dcharp=(curend-base)] = iindex; 162: /* Do spawned chars. They are naturally produced in the wrong 163: * order. To get them in the right order without using memory, 164: * a series of passes, progressively less deep, are used */ 165: tbase = base; 166: while((tindex=iindex) >= tbase){/* for each char to spawn */ 167: while((tindex2=dindex[tindex-base]) >= tbase) 168: tindex=tindex2; /* scan to desired char */ 169: putpipe(dchar[tindex-base], 1);/* put it to the pipe */ 170: tbase=tindex+1; 171: } 172: } 173: } 174: } 175: 176: /* If s, write to stderr. Flush buffers if needed. Then exit. */ 177: die(s) char *s; { 178: static int recurseflag=0; 179: if(recurseflag++) exit(1); 180: if(s) while(*s) write(2, s++, 1); 181: if(obufp) write(1, obuf, obufp); /* flush stdout buf if needed */ 182: do putpipe(EOF_INDEX, 0); while(opbufp); /* flush pipe if needed */ 183: exit(s ? 1 : 0); 184: } 185: 186: /* Put a char to stdout. */ 187: myputc(c){ 188: obuf[obufp++] = c; 189: if(obufp >= BUFSZ){ /* if stdout buf full */ 190: write(1, obuf, BUFSZ); /* flush to stdout */ 191: obufp=0; 192: } 193: } 194: 195: /* Get a char from stdin. If EOF, then die() and exit. */ 196: mygetc(){ 197: unsigned u; 198: if(ibufp >= ibufe){ /* if stdin buf empty */ 199: if((ibufe = read(0, ibuf, BUFSZ)) <= 0) 200: die(0); /* if EOF, do normal exit */ 201: ibufp=0; 202: } 203: return(ibuf[ibufp++]); 204: } 205: 206: /* Put curbits bits into index from stdin. Note: only copy 0 uses this. 207: * The bits within a byte are in the correct order. But when the bits 208: * cross a byte boundry, the lowest bits will be in the higher part of 209: * the current byte, and the higher bits will be in the lower part of 210: * the next byte. */ 211: getbits(){ 212: int have; 213: static unsigned curbyte; /* byte having bits extracted from it */ 214: static int left; /* how many bits are left in curbyte */ 215: 216: inmod = (inmod+1) & 7; /* count input mod 8 */ 217: iindex=curbyte, have=left; 218: if(curbits-have > 8) iindex |= (unsigned)mygetc() << have, have+=8; 219: iindex |= ((curbyte=mygetc()) << have) & ~((unsigned)0xFFFF << curbits); 220: curbyte >>= curbits-have, left = 8 - (curbits-have); 221: } 222: 223: /* Get an index from the pipeline. If flagged firstonly, handle it here. */ 224: getpipe(){ 225: static short flags; 226: static int n=0; /* number of flags in flags */ 227: 228: for(;;){ /* while index with firstonly flag set */ 229: if(n<=0){ 230: if(ipbufp >= BUFSZ_2){ /* if pipe input buf empty */ 231: if(read(pipein, (char*)ipbuf, BUFSZ) != BUFSZ) 232: die("bad pipe read\n"); 233: ipbufp=0; 234: } 235: flags = ipbuf[ipbufp++]; 236: n = 15; 237: } 238: iindex = ipbuf[ipbufp++]; 239: if(iindex>curend) 240: die(iindex == EOF_INDEX ? 0 : "invalid data\n"); 241: flags<<=1, n--; 242: /* assume flags<0 if highest remaining flag is set */ 243: if(flags>=0) /* if firstonly flag for index is not set */ 244: return; /* return with valid non-firstonly index */ 245: /* handle firstonly case here */ 246: while(iindex>=base) iindex = dindex[iindex-base]; 247: putpipe(iindex, 1); 248: } 249: } 250: 251: /* put an index to the pipeline */ 252: putpipe(u, flag) unsigned u; { 253: static unsigned short flags, *flagp; 254: static int n=0; /* number of flags in flags */ 255: 256: if(pnum==4){ /* if we should write to stdout */ 257: myputc(u); /* index will BE the char value */ 258: return; 259: } 260: 261: if(n==0) /* if we need to reserve a flag entry */ 262: flags=0, flagp = opbuf + opbufp++; 263: opbuf[opbufp++] = u; /* add index to buffer */ 264: flags = (flags<<1) | flag; /* add firstonly flag */ 265: if(++n >= 15){ /* if block of 15 indicies */ 266: n=0, *flagp=flags; /* insert flags entry */ 267: if(opbufp >= BUFSZ_2){ /* if pipe out buf full */ 268: opbufp=0; 269: if(write(pipeout, (char*)opbuf, BUFSZ) != BUFSZ) 270: die("bad pipe write\n"); 271: } 272: } 273: }