1: #define L 512 2: #define N 7 3: #define C 20 4: #define MEM (16*2048) 5: #define NF 10 6: 7: int ibuf[259]; 8: int obuf[259]; 9: char *file; 10: char *filep; 11: int nfiles; 12: int nlines; 13: int ntext; 14: int *lspace; 15: char *tspace; 16: int cmp(); 17: int term(); 18: int mflg; 19: char *outfil; 20: char tabchar; 21: int eargc; 22: char **eargv; 23: 24: char fold[128] { 25: 0000,0001,0002,0003,0004,0005,0006,0007, 26: 0010,0011,0012,0013,0014,0015,0016,0017, 27: 0020,0021,0022,0023,0024,0025,0026,0027, 28: 0030,0031,0032,0033,0034,0035,0036,0037, 29: 0040,0041,0042,0043,0044,0045,0046,0047, 30: 0050,0051,0052,0053,0054,0055,0056,0057, 31: 0060,0061,0062,0063,0064,0065,0066,0067, 32: 0070,0071,0072,0073,0074,0075,0076,0077, 33: 0100,0101,0102,0103,0104,0105,0106,0107, 34: 0110,0111,0112,0113,0114,0115,0116,0117, 35: 0120,0121,0122,0123,0124,0125,0126,0127, 36: 0130,0131,0132,0133,0134,0134,0136,0137, 37: 0140,0101,0102,0103,0104,0105,0106,0107, 38: 0110,0111,0112,0113,0114,0115,0116,0117, 39: 0120,0121,0122,0123,0124,0125,0126,0127, 40: 0130,0131,0132,0173,0174,0175,0176,0177 41: }; 42: char nofold[128]; 43: char dict[128] { 44: 1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1, 45: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 46: 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 47: 0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1, 48: 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 49: 0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1, 50: 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 51: 0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1 52: }; 53: char nodict[128] { 1 }; 54: 55: struct field { 56: char *code; 57: char *ignore; 58: int nflg; 59: int rflg; 60: int bflg; 61: char *m[2]; 62: char *n[2]; 63: } fields[NF]; 64: int proto[9] { 65: &fold, 66: &nodict, 67: 0, 68: 1, 69: 0, 70: 0,-1, 71: 0,0 72: }; 73: int nfields; 74: int error 1; 75: 76: main(argc, argv) 77: char **argv; 78: { 79: register a, i; 80: char *arg; 81: register int *p; 82: int *q; 83: 84: for(a=0; a<128; a++) 85: nofold[a] = a; 86: nodict[127] = 1; 87: copyproto(); 88: eargv = argv; 89: while (--argc > 0) { 90: if(**++argv == '-') for(arg = *argv;;) { 91: switch(*++arg) { 92: case '\0': 93: if(arg[-1] == '-') 94: eargv[eargc++] = "-"; 95: break; 96: 97: case 'm': 98: mflg++; 99: continue; 100: 101: case 'o': 102: if(--argc > 0) 103: outfil = *++argv; 104: continue; 105: 106: default: 107: field(++*argv,1); 108: break; 109: } 110: break; 111: } else if (**argv == '+') { 112: if(++nfields>=NF) { 113: mess("Too many keys\n"); 114: exit(1); 115: } 116: copyproto(); 117: field(++*argv,0); 118: } else 119: eargv[eargc++] = *argv; 120: } 121: q = &fields[0]; 122: for(a=1; a<=nfields; a++) { 123: p = &fields[a]; 124: for(i=0; i<5; i++) /*sensitive to sizeof(proto)*/ 125: if(p[i] != proto[i]) 126: goto next; 127: for(i=0; i<5; i++) 128: p[i] = q[i]; 129: next: ; 130: } 131: if(eargc == 0) 132: eargv[eargc++] = "-"; 133: 134: a = MEM; 135: i = lspace = sbrk(0); 136: while(brk(a) == -1) 137: a =- 512; 138: brk(a =- 512); /* for recursion */ 139: a =- i; 140: nlines = ((a-L)>>1) & 077777; 141: nlines =/ 5; 142: ntext = nlines*8; 143: tspace = lspace+nlines; 144: file = "/usr/tmp/stmXaa"; 145: loop: 146: filep = file; 147: while(*filep != 'X') 148: filep++; 149: for(*filep = 'a';;(*filep)++) { 150: if(stat(file, lspace) < 0) { 151: a = creat(file, 0600); 152: if(a >= 0) 153: break; 154: } 155: if(*filep == 'z') { 156: if(file[1] != 't') { 157: file = "/tmp/stmXaa"; 158: goto loop; 159: } 160: mess("Cannot locate temp\n"); 161: exit(1); 162: } 163: } 164: close(a); 165: filep++; 166: if ((signal(2, 1) & 01) == 0) 167: signal(2, term); 168: nfiles = eargc; 169: if(!mflg) { 170: ibuf[0] = -1; 171: sort(); 172: close(0); 173: } 174: for(a = mflg?0:eargc; a+N < nfiles; a=+N) { 175: newfile(); 176: merge(a, a+N); 177: } 178: if(a != nfiles) { 179: oldfile(); 180: merge(a, nfiles); 181: } 182: error = 0; 183: term(); 184: } 185: 186: sort() 187: { 188: register char *cp; 189: register *lp, c; 190: int done; 191: int i; 192: int f; 193: 194: done = 0; 195: i = 0; 196: do { 197: cp = tspace; 198: lp = lspace; 199: while(lp < lspace+nlines && cp < tspace+ntext) { 200: *lp++ = cp; 201: while((*cp++ = c = getc(ibuf)) != '\n') { 202: if(c >= 0) continue; 203: cp--; 204: close(ibuf[0]); 205: if(i < eargc) { 206: if((f = setfil(i++)) == 0) 207: ibuf[0] = 0; 208: else if(fopen(f, ibuf) < 0) 209: cant(f); 210: } else 211: break; 212: } 213: if(c < 0) { 214: done++; 215: lp--; 216: break; 217: } 218: } 219: qsort(lspace, lp-lspace, 2, cmp); 220: if(done == 0 || nfiles != eargc) 221: newfile(); else 222: oldfile(); 223: while(lp > lspace) { 224: cp = *--lp; 225: if(*cp) 226: do 227: putc(*cp, obuf); 228: while(*cp++ != '\n'); 229: } 230: fflush(obuf); 231: close(obuf[0]); 232: } while(done == 0); 233: } 234: 235: struct merg 236: { 237: char l[L]; 238: int b[259]; 239: }; 240: 241: merge(a, b) 242: { 243: register struct merg *p; 244: register char *cp; 245: register i; 246: struct { int *ip;}; 247: int f; 248: int j; 249: int k,l,c; 250: 251: p = lspace; 252: j = 0; 253: for(i=a; i<b; i++) { 254: f = setfil(i); 255: if(f == 0) 256: p->b[0] = dup(0); 257: else if(fopen(f, p->b) < 0) 258: cant(f); 259: ibuf[j] = p; 260: if(!rline(p)) j++; 261: p++; 262: } 263: 264: do { 265: i = j; 266: qsort(ibuf, i, 2, cmp); 267: l = 0; 268: while(i--) { 269: cp = ibuf[i]; 270: if(*cp == '\0') { 271: l = 1; 272: if(rline(ibuf[i])) { 273: k = i; 274: while(++k < j) 275: ibuf[k-1] = ibuf[k]; 276: j--; 277: } 278: } 279: } 280: } while(l); 281: 282: i = j; 283: if(i > 0) for(;;) { 284: cp = ibuf[i-1]; 285: if(i == 1 || cmp(&ibuf[i-1], &ibuf[i-2])) 286: do 287: putc(*cp, obuf); 288: while(*cp++ != '\n'); 289: if(rline(ibuf[i-1])) { 290: i--; 291: if(i == 0) 292: break; 293: } 294: cp = &ibuf[i]; 295: while (--cp.ip > ibuf && cmp(cp.ip, cp.ip-1) < 0) { 296: p = *cp.ip; 297: *cp.ip = *(cp.ip-1); 298: *(cp.ip-1) = p; 299: } 300: } 301: p = lspace; 302: for(i=a; i<b; i++) { 303: close(p->b[0]); 304: p++; 305: if(i >= eargc) 306: close(creat(setfil(i))); 307: } 308: fflush(obuf); 309: close(obuf[0]); 310: } 311: 312: rline(mp) 313: struct merg *mp; 314: { 315: register char *cp; 316: register *bp, c; 317: 318: bp = mp->b; 319: cp = mp->l; 320: do { 321: c = getc(bp); 322: if(c < 0) 323: return(1); 324: *cp++ = c; 325: } while(c != '\n'); 326: *cp = '\0'; 327: return(0); 328: } 329: 330: newfile() 331: { 332: 333: if(fcreat(setfil(nfiles), obuf) < 0) { 334: mess("Can't create temp\n"); 335: term(); 336: } 337: nfiles++; 338: } 339: 340: char * 341: setfil(i) 342: { 343: 344: if(i < eargc) 345: if(eargv[i][0] == '-' && eargv[i][1] == '\0') 346: return(0); 347: else 348: return(eargv[i]); 349: i =- eargc; 350: filep[0] = i/26 + 'a'; 351: filep[1] = i%26 + 'a'; 352: return(file); 353: } 354: 355: oldfile() 356: { 357: 358: if(outfil) { 359: if(fcreat(outfil, obuf) < 0) { 360: mess("Can't create output\n"); 361: term(); 362: } 363: } else 364: obuf[0] = 1; 365: } 366: 367: cant(f) 368: { 369: mess("Can't open "); 370: mess(f); 371: mess("\n"); 372: term(); 373: } 374: 375: term() 376: { 377: register i; 378: 379: if(nfiles == eargc) 380: nfiles++; 381: for(i=eargc; i<nfiles; i++) { 382: unlink(setfil(i)); 383: } 384: exit(error); 385: } 386: 387: cmp(a, b) 388: int *a,*b; 389: { 390: register char *ra, *rb; 391: 392: ra = *a - 1; 393: rb = *b - 1; 394: 395: while(*++ra == *++rb) 396: if(*ra == '\n') 397: return(0); 398: 399: return(*rb - *ra); 400: } 401: 402: 403: skip(pp, fp, j) 404: struct field *fp; 405: char *pp; 406: { 407: register i; 408: register char *p; 409: 410: p = pp; 411: if( (i=fp->m[j]) < 0) 412: return(-1); 413: while(i-- > 0) { 414: if(tabchar != 0) { 415: while(*p != tabchar) 416: if(*p != '\n') 417: p++; 418: else goto ret; 419: p++; 420: } else { 421: while(blank(*p)) 422: p++; 423: while(!blank(*p)) 424: if(*p != '\n') 425: p++; 426: else goto ret; 427: } 428: } 429: if(fp->bflg) 430: while(blank(*p)) 431: p++; 432: i = fp->n[j]; 433: while(i-- > 0) { 434: if(*p != '\n') 435: p++; 436: else goto ret; 437: } 438: ret: 439: return(p); 440: } 441: 442: digit(c) 443: { 444: 445: return(c <= '9' && c >= '0'); 446: } 447: 448: mess(s) 449: char *s; 450: { 451: while(*s) 452: write(2, s++, 1); 453: } 454: 455: copyproto() 456: { 457: register int i, *p, *q; 458: 459: p = proto; 460: q = &fields[nfields]; 461: for(i=0; i<sizeof(proto)/2; i++) 462: *q++ = *p++; 463: } 464: 465: field(s,k) 466: char *s; 467: { 468: register struct field *p; 469: p = &fields[nfields]; 470: for(; *s!=0; s++) { 471: switch(*s) { 472: case '\0': 473: return; 474: 475: case 'a': 476: p->code = nofold; 477: break; 478: 479: case 'b': 480: p->bflg++; 481: break; 482: 483: case 'd': 484: p->ignore = dict; 485: break; 486: 487: case 'n': 488: p->nflg++; 489: break; 490: case 't': 491: tabchar = *++s; 492: if(tabchar == 0) s--; 493: break; 494: 495: case 'r': 496: p->rflg = -1; 497: break; 498: 499: default: 500: p->m[k] = number(&s); 501: if(*s == '.') 502: s++; 503: p->n[k] = number(&s); 504: s--; 505: } 506: } 507: } 508: 509: number(ppa) 510: char **ppa; 511: { 512: int n; 513: register char *pa; 514: pa = *ppa; 515: n = 0; 516: while(digit(*pa)) 517: n = n*10 + *pa++ - '0'; 518: *ppa = pa; 519: return(n); 520: } 521: 522: blank(c) 523: { 524: if(c==' ' || c=='\t') 525: return(1); 526: return(0); 527: } 528: 529: int (*qscmp)(); 530: int qses; 531: 532: qsort(a, n, es, fc) 533: char *a; 534: int n, es; 535: int (*fc)(); 536: { 537: qscmp = fc; 538: qses = es; 539: qs1(a, a+n*es); 540: } 541: 542: qs1(a, l) 543: char *a, *l; 544: { 545: register char *i, *j, *es; 546: char **k; 547: char *lp, *hp; 548: int n, c; 549: 550: 551: es = qses; 552: 553: start: 554: if((n=l-a) <= es) 555: return; 556: 557: 558: n = ((n/(2*es))*es) & 077777; 559: hp = lp = a+n; 560: i = a; 561: j = l-es; 562: 563: 564: for(;;) { 565: if(i < lp) { 566: if((c = (*qscmp)(i, lp)) == 0) { 567: qsexc(i, lp =- es); 568: continue; 569: } 570: if(c < 0) { 571: i =+ es; 572: continue; 573: } 574: } 575: 576: loop: 577: if(j > hp) { 578: if((c = (*qscmp)(hp, j)) == 0) { 579: qsexc(hp =+ es, j); 580: goto loop; 581: } 582: if(c > 0) { 583: if(i == lp) { 584: qstexc(i, hp =+ es, j); 585: i = lp =+ es; 586: goto loop; 587: } 588: qsexc(i, j); 589: j =- es; 590: i =+ es; 591: continue; 592: } 593: j =- es; 594: goto loop; 595: } 596: 597: 598: if(i == lp) { 599: for(k=lp+2; k<=hp;) *(*k++)='\0'; 600: if(lp-a >= l-hp) { 601: qs1(hp+es, l); 602: l = lp; 603: } else { 604: qs1(a, lp); 605: a = hp+es; 606: } 607: goto start; 608: } 609: 610: 611: qstexc(j, lp =- es, i); 612: j = hp =- es; 613: } 614: } 615: 616: qsexc(i, j) 617: char *i, *j; 618: { 619: register char *ri, *rj, c; 620: int n; 621: 622: n = qses; 623: ri = i; 624: rj = j; 625: do { 626: c = *ri; 627: *ri++ = *rj; 628: *rj++ = c; 629: } while(--n); 630: } 631: 632: qstexc(i, j, k) 633: char *i, *j, *k; 634: { 635: register char *ri, *rj, *rk; 636: char c; 637: int n; 638: 639: n = qses; 640: ri = i; 641: rj = j; 642: rk = k; 643: do { 644: c = *ri; 645: *ri++ = *rk; 646: *rk++ = *rj; 647: *rj++ = c; 648: } while(--n); 649: }