1: #define L 512 2: #define N 7 3: #define C 20 4: #define MEM (16*2048) 5: #define NF 10 6: 7: int ibuf[259]; 8: int obuf[259]; 9: char *file; 10: char *filep; 11: int nfiles; 12: int nlines; 13: int ntext; 14: int *lspace; 15: char *tspace; 16: int cmp(); 17: int term(); 18: int mflg; 19: char *outfil; 20: char tabchar; 21: int eargc; 22: char **eargv; 23: 24: char fold[128] { 25: 0000,0001,0002,0003,0004,0005,0006,0007, 26: 0010,0011,0012,0013,0014,0015,0016,0017, 27: 0020,0021,0022,0023,0024,0025,0026,0027, 28: 0030,0031,0032,0033,0034,0035,0036,0037, 29: 0040,0041,0042,0043,0044,0045,0046,0047, 30: 0050,0051,0052,0053,0054,0055,0056,0057, 31: 0060,0061,0062,0063,0064,0065,0066,0067, 32: 0070,0071,0072,0073,0074,0075,0076,0077, 33: 0100,0101,0102,0103,0104,0105,0106,0107, 34: 0110,0111,0112,0113,0114,0115,0116,0117, 35: 0120,0121,0122,0123,0124,0125,0126,0127, 36: 0130,0131,0132,0133,0134,0134,0136,0137, 37: 0140,0101,0102,0103,0104,0105,0106,0107, 38: 0110,0111,0112,0113,0114,0115,0116,0117, 39: 0120,0121,0122,0123,0124,0125,0126,0127, 40: 0130,0131,0132,0173,0174,0175,0176,0177 41: }; 42: char nofold[128]; 43: char dict[128] { 44: 1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1, 45: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 46: 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 47: 0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1, 48: 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 49: 0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1, 50: 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 51: 0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1 52: }; 53: char nodict[128] { 1 }; 54: 55: struct field { 56: char *code; 57: char *ignore; 58: int nflg; 59: int rflg; 60: int bflg; 61: char *m[2]; 62: char *n[2]; 63: } fields[NF]; 64: int proto[9] { 65: &fold, 66: &nodict, 67: 0, 68: 1, 69: 0, 70: 0,-1, 71: 0,0 72: }; 73: int nfields; 74: int error 1; 75: 76: main(argc, argv) 77: char **argv; 78: { 79: register a, i; 80: char *arg; 81: register int *p; 82: int *q; 83: 84: for(a=0; a<128; a++) 85: nofold[a] = a; 86: nodict[127] = 1; 87: copyproto(); 88: eargv = argv; 89: while (--argc > 0) { 90: if(**++argv == '-') for(arg = *argv;;) { 91: switch(*++arg) { 92: case '\0': 93: if(arg[-1] == '-') 94: eargv[eargc++] = "-"; 95: break; 96: 97: case 'm': 98: mflg++; 99: continue; 100: 101: case 'o': 102: if(--argc > 0) 103: outfil = *++argv; 104: continue; 105: 106: default: 107: field(++*argv,1); 108: break; 109: } 110: break; 111: } else if (**argv == '+') { 112: if(++nfields>=NF) { 113: mess("Too many keys\n"); 114: exit(1); 115: } 116: copyproto(); 117: field(++*argv,0); 118: } else 119: eargv[eargc++] = *argv; 120: } 121: q = &fields[0]; 122: for(a=1; a<=nfields; a++) { 123: p = &fields[a]; 124: for(i=0; i<5; i++) /*sensitive to sizeof(proto)*/ 125: if(p[i] != proto[i]) 126: goto next; 127: for(i=0; i<5; i++) 128: p[i] = q[i]; 129: next: ; 130: } 131: if(eargc == 0) 132: eargv[eargc++] = "-"; 133: 134: a = MEM; 135: i = lspace = sbrk(0); 136: while(brk(a) == -1) 137: a =- 512; 138: brk(a =- 512); /* for recursion */ 139: a =- i; 140: nlines = ((a-L)>>1) & 077777; 141: nlines =/ 5; 142: ntext = nlines*8; 143: tspace = lspace+nlines; 144: file = "/usr/tmp/stmXaa"; 145: loop: 146: filep = file; 147: while(*filep != 'X') 148: filep++; 149: for(*filep = 'a';;(*filep)++) { 150: if(stat(file, lspace) < 0) { 151: a = creat(file, 0600); 152: if(a >= 0) 153: break; 154: } 155: if(*filep == 'z') { 156: if(file[1] != 't') { 157: file = "/tmp/stmXaa"; 158: goto loop; 159: } 160: mess("Cannot locate temp\n"); 161: exit(1); 162: } 163: } 164: close(a); 165: filep++; 166: if ((signal(2, 1) & 01) == 0) 167: signal(2, term); 168: nfiles = eargc; 169: if(!mflg) { 170: ibuf[0] = -1; 171: sort(); 172: close(0); 173: } 174: for(a = mflg?0:eargc; a+N < nfiles; a=+N) { 175: newfile(); 176: merge(a, a+N); 177: } 178: if(a != nfiles) { 179: oldfile(); 180: merge(a, nfiles); 181: } 182: error = 0; 183: term(); 184: } 185: 186: sort() 187: { 188: register char *cp; 189: register *lp, c; 190: int done; 191: int i; 192: int f; 193: 194: done = 0; 195: i = 0; 196: do { 197: cp = tspace; 198: lp = lspace; 199: while(lp < lspace+nlines && cp < tspace+ntext) { 200: *lp++ = cp; 201: while((*cp++ = c = getc(ibuf)) != '\n') { 202: if(c >= 0) continue; 203: cp--; 204: close(ibuf[0]); 205: if(i < eargc) { 206: if((f = setfil(i++)) == 0) 207: ibuf[0] = 0; 208: else if(fopen(f, ibuf) < 0) 209: cant(f); 210: } else 211: break; 212: } 213: if(c < 0) { 214: done++; 215: lp--; 216: break; 217: } 218: } 219: qsort(lspace, lp-lspace, 2, cmp); 220: if(done == 0 || nfiles != eargc) 221: newfile(); else 222: oldfile(); 223: while(lp > lspace) { 224: cp = *--lp; 225: do 226: putc(*cp, obuf); 227: while(*cp++ != '\n'); 228: } 229: fflush(obuf); 230: close(obuf[0]); 231: } while(done == 0); 232: } 233: 234: struct merg 235: { 236: char l[L]; 237: int b[259]; 238: }; 239: 240: merge(a, b) 241: { 242: register struct merg *p; 243: register char *cp; 244: register i; 245: struct { int *ip;}; 246: int f; 247: int j; 248: 249: p = lspace; 250: j = 0; 251: for(i=a; i<b; i++) { 252: f = setfil(i); 253: if(f == 0) 254: p->b[0] = dup(0); 255: else if(fopen(f, p->b) < 0) 256: cant(f); 257: ibuf[j] = p; 258: if(!rline(p)) j++; 259: p++; 260: } 261: i = j; 262: qsort(ibuf, i, 2, cmp); 263: if(i > 0) for(;;) { 264: cp = ibuf[i-1]; 265: do 266: putc(*cp, obuf); 267: while(*cp++ != '\n'); 268: if(rline(ibuf[i-1])) { 269: i--; 270: if(i == 0) 271: break; 272: } 273: cp = &ibuf[i]; 274: while (--cp.ip > ibuf && cmp(cp.ip, cp.ip-1) < 0) { 275: p = *cp.ip; 276: *cp.ip = *(cp.ip-1); 277: *(cp.ip-1) = p; 278: } 279: } 280: p = lspace; 281: for(i=a; i<b; i++) { 282: close(p->b[0]); 283: p++; 284: if(i >= eargc) 285: close(creat(setfil(i))); 286: } 287: fflush(obuf); 288: close(obuf[0]); 289: } 290: 291: rline(mp) 292: struct merg *mp; 293: { 294: register char *cp; 295: register *bp, c; 296: 297: bp = mp->b; 298: cp = mp->l; 299: do { 300: c = getc(bp); 301: if(c < 0) 302: return(1); 303: *cp++ = c; 304: } while(c != '\n'); 305: return(0); 306: } 307: 308: newfile() 309: { 310: 311: if(fcreat(setfil(nfiles), obuf) < 0) { 312: mess("Can't create temp\n"); 313: term(); 314: } 315: nfiles++; 316: } 317: 318: char * 319: setfil(i) 320: { 321: 322: if(i < eargc) 323: if(eargv[i][0] == '-' && eargv[i][1] == '\0') 324: return(0); 325: else 326: return(eargv[i]); 327: i =- eargc; 328: filep[0] = i/26 + 'a'; 329: filep[1] = i%26 + 'a'; 330: return(file); 331: } 332: 333: oldfile() 334: { 335: 336: if(outfil) { 337: if(fcreat(outfil, obuf) < 0) { 338: mess("Can't create output\n"); 339: term(); 340: } 341: } else 342: obuf[0] = 1; 343: } 344: 345: cant(f) 346: { 347: mess("Can't open "); 348: mess(f); 349: mess("\n"); 350: term(); 351: } 352: 353: term() 354: { 355: register i; 356: 357: if(nfiles == eargc) 358: nfiles++; 359: for(i=eargc; i<nfiles; i++) 360: unlink(setfil(i)); 361: exit(error); 362: } 363: 364: cmp(i, j) 365: int *i, *j; 366: { 367: register char *pa, *pb; 368: char *code, *ignore; 369: int a, b; 370: int k; 371: char *la, *lb; 372: register int sa; 373: int sb; 374: char *ipa, *ipb, *jpa, *jpb; 375: struct field *fp; 376: 377: for(k = nfields>0; k<=nfields; k++) { 378: fp = &fields[k]; 379: pa = *i; 380: pb = *j; 381: if(k) { 382: la = skip(pa, fp, 1); 383: pa = skip(pa, fp, 0); 384: lb = skip(pb, fp, 1); 385: pb = skip(pb, fp, 0); 386: } else { 387: la = -1; 388: lb = -1; 389: } 390: if(fp->nflg) { 391: while(blank(*pa)) 392: pa++; 393: while(blank(*pb)) 394: pb++; 395: sa = sb = fp->rflg; 396: if(*pa == '-') { 397: pa++; 398: sa = -sa; 399: } 400: if(*pb == '-') { 401: pb++; 402: sb = -sb; 403: } 404: if(sa != sb) 405: sa = 0; 406: for(ipa = pa; ipa<la&&digit(*ipa); ipa++); 407: for(ipb = pb; ipb<lb&&digit(*ipb); ipb++); 408: jpa = ipa; 409: jpb = ipb; 410: a = 0; 411: if(sa) while(ipa > pa && ipb > pb) 412: if(b = *--ipb - *--ipa) 413: a = b; 414: while(ipa > pa) 415: if(*--ipa != '0') 416: return(sa ? -sa : sb); 417: while(ipb > pb) 418: if(*--ipb != '0') 419: return(sa ? sa : sb); 420: if(a) return(a*sa); 421: if(*(pa=jpa) == '.') 422: pa++; 423: if(*(pb=jpb) == '.') 424: pb++; 425: while(pa<la && digit(*pa)) 426: if(pb<lb &&digit(*pb)) { 427: if(a = *pb++ - *pa++) 428: return(sa ? a*sa : sb); 429: } else if(*pa++ != '0') 430: return(sa ? -sa : sb); 431: while(pb<lb && digit(*pb)) 432: if(*pb++ != '0') 433: return(sa ? sa : sb); 434: continue; 435: } 436: code = fp->code; 437: ignore = fp->ignore; 438: loop: 439: while(*pa<0 || ignore[*pa]) 440: pa++; 441: while(*pb<0 || ignore[*pb]) 442: pb++; 443: if(pa>=la || *pa=='\n') 444: if(pb<lb && *pb!='\n') 445: return(fp->rflg); 446: else continue; 447: if(pb>=lb || *pb=='\n') 448: return(-fp->rflg); 449: if((sa = code[*pb++]-code[*pa++]) == 0) 450: goto loop; 451: return(sa*fp->rflg); 452: } 453: pa = *i; 454: pb = *j; 455: while(*pa != '\n') { 456: if(*pa == *pb) { 457: pa++; 458: pb++; 459: continue; 460: } 461: if(*pb == '\n') 462: return(-1); 463: return(*pb - *pa); 464: } 465: return(*pb != '\n'); 466: } 467: 468: skip(pp, fp, j) 469: struct field *fp; 470: char *pp; 471: { 472: register i; 473: register char *p; 474: 475: p = pp; 476: if( (i=fp->m[j]) < 0) 477: return(-1); 478: while(i-- > 0) { 479: if(tabchar != 0) { 480: while(*p != tabchar) 481: if(*p != '\n') 482: p++; 483: else goto ret; 484: p++; 485: } else { 486: while(blank(*p)) 487: p++; 488: while(!blank(*p)) 489: if(*p != '\n') 490: p++; 491: else goto ret; 492: } 493: } 494: if(fp->bflg) 495: while(blank(*p)) 496: p++; 497: i = fp->n[j]; 498: while(i-- > 0) { 499: if(*p != '\n') 500: p++; 501: else goto ret; 502: } 503: ret: 504: return(p); 505: } 506: 507: digit(c) 508: { 509: 510: return(c <= '9' && c >= '0'); 511: } 512: 513: mess(s) 514: char *s; 515: { 516: while(*s) 517: write(2, s++, 1); 518: } 519: 520: copyproto() 521: { 522: register int i, *p, *q; 523: 524: p = proto; 525: q = &fields[nfields]; 526: for(i=0; i<sizeof(proto)/2; i++) 527: *q++ = *p++; 528: } 529: 530: field(s,k) 531: char *s; 532: { 533: register struct field *p; 534: p = &fields[nfields]; 535: for(; *s!=0; s++) { 536: switch(*s) { 537: case '\0': 538: return; 539: 540: case 'a': 541: p->code = nofold; 542: break; 543: 544: case 'b': 545: p->bflg++; 546: break; 547: 548: case 'd': 549: p->ignore = dict; 550: break; 551: 552: case 'n': 553: p->nflg++; 554: break; 555: case 't': 556: tabchar = *++s; 557: if(tabchar == 0) s--; 558: break; 559: 560: case 'r': 561: p->rflg = -1; 562: break; 563: 564: default: 565: p->m[k] = number(&s); 566: if(*s == '.') 567: s++; 568: p->n[k] = number(&s); 569: s--; 570: } 571: } 572: } 573: 574: number(ppa) 575: char **ppa; 576: { 577: int n; 578: register char *pa; 579: pa = *ppa; 580: n = 0; 581: while(digit(*pa)) 582: n = n*10 + *pa++ - '0'; 583: *ppa = pa; 584: return(n); 585: } 586: 587: blank(c) 588: { 589: if(c==' ' || c=='\t') 590: return(1); 591: return(0); 592: }