1: # include <stdio.h> 2: 3: # include "../ingres.h" 4: # include "../aux.h" 5: # include "../symbol.h" 6: # include "../access.h" 7: 8: # define N 7 9: # define MEM (32768 - 2) 10: # define BUCKETSIZE 4 11: # define ENDKEY MAXDOM + 1 12: 13: /* 14: ** Trace info comes from trace flag Z23 passed as the 15: ** second parameter. The bits used are: 16: ** 17: ** 0001 main trace info 18: ** 0002 secondary trace info 19: ** 0004 terciary trace info 20: ** 0010 don't truncate temps 21: ** 0020 don't unlink temps 22: ** 0040 print am page refs 23: ** 0100 print am tuple gets 24: */ 25: 26: char *Infile; 27: char *Outfile; 28: struct descriptor Desc; 29: char Descsort[MAXDOM+1]; 30: FILE *Oiop; 31: int Trace; 32: int Tupsize; 33: int Bucket; 34: char File[15]; 35: char *Fileset; 36: char *Filep; 37: int Nfiles = 1; 38: int Nlines; 39: long Ccount; 40: char **Lspace; 41: char *Tspace; 42: extern term(); 43: extern int cmpa(); 44: long Tupsout; 45: 46: main(argc, argv) 47: char **argv; 48: { 49: extern char *Proc_name; 50: extern (*Exitfn)(); 51: register int i; 52: register int j; 53: register char *mem; 54: char *start; 55: int maxkey, rev; 56: char *sbrk(); 57: 58: Proc_name = "KSORT"; 59: Exitfn = &term; 60: 61: Fileset = *++argv; 62: atoi(*++argv, &Trace); 63: if ((i = open(*++argv, 0)) < 0) 64: cant(*argv); 65: if (read(i, &Desc, sizeof Desc) < sizeof Desc) 66: syserr("read(Desc)"); 67: close(i); 68: 69: /* set up Descsort to indicate the sort order for tuple */ 70: /* if domain zero is given prepare to generate "hash bucket" 71: ** value for tuple */ 72: 73: maxkey = 0; 74: for (i = 0; i <= Desc.relatts; i++) 75: if (j = Desc.relgiven[i]) 76: { 77: if ((rev = j) < 0) 78: j = -j; 79: if (maxkey < j) 80: maxkey = j; 81: Descsort[--j] = rev < 0 ? -i : i; 82: } 83: 84: Descsort[maxkey] = ENDKEY; /* mark end of list */ 85: 86: Tupsize = Desc.relwid; 87: 88: if (Bucket = (Descsort[0] == 0)) 89: { 90: /* we will be generating hash bucket */ 91: Tupsize += BUCKETSIZE; 92: Desc.relfrml[0] = BUCKETSIZE; 93: Desc.relfrmt[0] = INT; 94: Desc.reloff[0] = Desc.relwid; 95: } 96: 97: if (Trace & 01) 98: { 99: printf("Bucket is %d,Sort is:\n", Bucket); 100: for (i = 0; (j = Descsort[i]) != ENDKEY; i++) 101: printf("Descsort[%d]=%d\n", i, j); 102: } 103: if (i = (maxkey - Bucket - Desc.relatts)) 104: syserr("%d domains missing\n", -i); 105: Infile = *++argv; 106: Outfile = *++argv; 107: 108: /* get up to 2**15 - 1 bytes of memory for buffers */ 109: /* note that mem must end up positive so that Nlines computation is right */ 110: start = sbrk(0); Lspace = (char **) start; 111: mem = start + MEM; /* take at most 2**15 - 1 bytes */ 112: while (brk(mem) == -1) 113: mem -= 512; 114: mem -= start; 115: 116: /* compute pointers and sizes into buffer memory */ 117: Nlines = (unsigned int) mem / (Tupsize + 2); 118: Tspace = (char *) (Lspace + Nlines); 119: if (Trace & 01) 120: printf("Tspace=%l,Lspace=%l,Nlines=%l,mem=%l,start=%l\n", 121: Tspace, Lspace, Nlines, mem, start); 122: 123: /* set up temp files */ 124: concat(ztack("_SYSS", Fileset), "Xaa", File); 125: Filep = File; 126: while (*Filep != 'X') 127: Filep++; 128: Filep++; 129: if ((signal(2, 1) & 01) == 0) 130: signal(2, term); 131: 132: /* sort stage -- create a bunch of temporaries */ 133: Ccount = 0; 134: if (Trace & 01) 135: printf("sorting\n"); 136: sort(); 137: close(0); 138: if (Trace & 01) 139: { 140: printf("done sorting\n%s tuples written to %d files\n", locv(Tupsout), Nfiles - 1); 141: printf("sort required %s compares\n", locv(Ccount)); 142: } 143: 144: /* merge stage -- merge up to N temps into a new temp */ 145: Ccount = 0; 146: for (i = 1; i + N < Nfiles; i += N) 147: { 148: newfile(); 149: merge(i, i + N); 150: } 151: 152: /* merge last set of temps into target file */ 153: if (i != Nfiles) 154: { 155: oldfile(); 156: merge(i, Nfiles); 157: } 158: if (Trace & 01) 159: { 160: printf("%s tuples in out file\n", locv(Tupsout)); 161: printf("merge required %s compares\n", locv(Ccount)); 162: } 163: term(0); 164: } 165: 166: sort() 167: { 168: register char *cp; 169: register char **lp; 170: register int i; 171: int done; 172: long ntups; 173: struct tup_id tid, ltid; 174: char *xp; 175: long pageid; 176: long rhash(); 177: 178: done = 0; 179: ntups = 0; 180: Tupsout = 0; 181: if ((Desc.relfp = open(Infile, 0)) < 0) 182: cant(Infile); 183: Desc.relopn = (Desc.relfp + 1) * 5; 184: 185: /* initialize tids for full scan */ 186: pageid = 0; 187: tid.line_id = -1; 188: stuff_page(&tid, &pageid); 189: pageid = -1; 190: ltid.line_id = -1; 191: stuff_page(<id, &pageid); 192: 193: do 194: { 195: cp = Tspace; 196: lp = Lspace; 197: while (lp < Lspace + Nlines) 198: { 199: if ((i = get(&Desc, &tid, <id, cp, TRUE)) != 0) 200: { 201: if (i < 0) 202: syserr("get %d", i); 203: close(Desc.relfp); 204: Desc.relopn = 0; 205: done++; 206: break; 207: } 208: if (Bucket) 209: { 210: /* compute hash bucket and insert at end */ 211: pageid = rhash(&Desc, cp); 212: bmove(&pageid, cp + Desc.relwid, BUCKETSIZE); 213: } 214: *lp++ = cp; 215: cp += Tupsize; 216: ntups++; 217: } 218: qsort(Lspace, lp - Lspace, 2, &cmpa); 219: if (done == 0 || Nfiles != 1) 220: newfile(); 221: else 222: oldfile(); 223: while (lp > Lspace) 224: { 225: cp = *--lp; 226: xp = cp; 227: if ((lp == Lspace) || (cmpa(&xp, &lp[-1]) != 0)) 228: { 229: if ((i = fwrite(cp, 1, Tupsize, Oiop)) != Tupsize) 230: syserr("cant write outfile %d (%d)", i, Nfiles); 231: Tupsout++; 232: } 233: } 234: fclose(Oiop); 235: } while (done == 0); 236: if (Trace & 01) 237: printf("%s tuples in\n", locv(ntups)); 238: } 239: 240: struct merg 241: { 242: char tup[MAXTUP+BUCKETSIZE]; 243: int file_num; 244: FILE *fiop; 245: }; 246: 247: merge(a, b) 248: int a; 249: int b; 250: { 251: register struct merg *merg; 252: register int i, j; 253: char *f, *yesno; 254: struct merg *mbuf[N + 1]; 255: char *setfil(); 256: 257: if (Trace & 02) 258: printf("merge %d to %d\n", a, b); 259: merg = (struct merg *) Lspace; 260: j = 0; 261: for (i = a; i < b; i++) 262: { 263: f = setfil(i); 264: mbuf[j] = merg; 265: merg->file_num = i; 266: if ((merg->fiop = fopen(f, "r")) == NULL) 267: cant(f); 268: if (!rline(merg)) 269: j++; 270: merg++; 271: } 272: 273: i = j - 1; 274: if (Trace & 04) 275: printf("start merg with %d\n", i); 276: while (i >= 0) 277: { 278: if (Trace & 04) 279: printf("mintup %d\n", i); 280: if (mintup(mbuf, i, &cmpa)) 281: { 282: if (fwrite(mbuf[i]->tup, 1, Tupsize, Oiop) != Tupsize) 283: syserr("cant write merge output"); 284: Tupsout++; 285: } 286: merg = mbuf[i]; 287: if (rline(merg)) 288: { 289: yesno = "not "; 290: if (!(Trace & 010)) 291: { 292: /* truncate temporary files to zero length */ 293: yesno = ""; 294: close(creat(setfil(merg->file_num), 0600)); 295: } 296: if (Trace & 02 || Trace & 010) 297: printf("dropping and %struncating %s\n", yesno, setfil(merg->file_num)); 298: i--; 299: } 300: } 301: 302: fclose(Oiop); 303: } 304: 305: 306: mintup(mbuf, cnt, cmpfunc) 307: struct merg *mbuf[]; 308: int cnt; 309: int (*cmpfunc)(); 310: 311: /* 312: ** Mintup puts the smallest tuple in mbuf[cnt-1]. 313: ** If the tuple is a duplicate of another then 314: ** mintup returns 0, else 1. 315: ** 316: ** Cnt is the number of compares to make; i.e. 317: ** mbuf[cnt] is the last element. 318: */ 319: 320: { 321: register struct merg **next, **last; 322: struct merg *temp; 323: register int nodup; 324: int j; 325: 326: nodup = TRUE; 327: next = mbuf; 328: last = &next[cnt]; 329: 330: while (cnt--) 331: { 332: if (j = (*cmpfunc)(last, next)) 333: { 334: /* tuples not equal. keep smallest */ 335: if (j < 0) 336: { 337: /* exchange */ 338: temp = *last; 339: *last = *next; 340: *next = temp; 341: nodup = TRUE; 342: } 343: } 344: else 345: nodup = FALSE; 346: 347: next++; 348: } 349: return (nodup); 350: } 351: 352: 353: rline(mp) 354: struct merg *mp; 355: { 356: register struct merg *merg; 357: register int i; 358: 359: merg = mp; 360: if ((i = fread(merg->tup, 1, Tupsize, merg->fiop)) != Tupsize) 361: { 362: if (i == 0) 363: { 364: fclose(merg->fiop); 365: return (1); 366: } 367: syserr("rd err %d on %s", i, setfil(merg->file_num)); 368: } 369: return (0); 370: } 371: 372: newfile() 373: { 374: char *setfil(); 375: 376: makfile(setfil(Nfiles)); 377: Nfiles++; 378: } 379: 380: char *setfil(i) 381: int i; 382: 383: /* 384: ** Convert the number i to a char 385: ** sequence aa, ab, ..., az, ba, etc. 386: */ 387: 388: { 389: register int j; 390: 391: j = i; 392: j--; 393: Filep[0] = j/26 + 'a'; 394: Filep[1] = j%26 + 'a'; 395: return (File); 396: } 397: 398: oldfile() 399: { 400: makfile(Outfile); 401: Tupsout = 0; 402: } 403: 404: makfile(name) 405: char *name; 406: 407: /* 408: ** Create a file by the name "name" 409: ** and place its fio pointer in Oiop 410: */ 411: 412: { 413: if ((Oiop = fopen(name, "w")) == NULL) 414: cant(name); 415: } 416: 417: cant(f) 418: char *f; 419: { 420: syserr("open %s", f); 421: } 422: 423: term(error) 424: int error; 425: { 426: register int i; 427: 428: if (Nfiles == 1) 429: Nfiles++; 430: if (Trace & 020) 431: printf("temp files not removed\n"); 432: else 433: for (i = 1; i < Nfiles; i++) 434: { 435: unlink(setfil(i)); 436: } 437: exit(error); 438: } 439: 440: cmpa(a, b) 441: char **a; 442: char **b; 443: { 444: int af[4]; 445: int bf[4]; 446: char *pa, *pb; 447: register char *tupa, *tupb; 448: int dom; 449: register int frml; 450: int frmt; 451: int off; 452: int temp; 453: int rt; 454: char *dp; 455: 456: pa = *a; 457: pb = *b; 458: Ccount++; 459: dp = Descsort; 460: while ((temp = *dp++) != ENDKEY) 461: { 462: if ((dom = temp) < 0) 463: dom = -temp; 464: frml = Desc.relfrml[dom]; 465: frmt = Desc.relfrmt[dom]; 466: off = Desc.reloff[dom]; 467: tupa = &pa[off]; 468: tupb = &pb[off]; 469: if (temp < 0) 470: { 471: tupb = tupa; 472: tupa = &pb[off]; 473: } 474: if (frmt == CHAR) 475: { 476: frml &= 0377; 477: if (rt = scompare(tupb, frml, tupa, frml)) 478: return (rt); 479: continue; 480: } 481: 482: /* domain is a numeric type */ 483: if (bequal(tupa, tupb, frml)) 484: continue; 485: /* copy to even word boundary */ 486: bmove(tupa, af, frml); 487: bmove(tupb, bf, frml); 488: tupa = (char *) &af[0]; 489: tupb = (char *) &bf[0]; 490: 491: switch (frmt) 492: { 493: 494: case INT: 495: switch (frml) 496: { 497: 498: case 1: 499: return (i1deref(tupa) > i1deref(tupb) ? -1 : 1); 500: 501: case 2: 502: return (i2deref(tupa) > i2deref(tupb) ? -1 : 1); 503: 504: case 4: 505: return (i4deref(tupa) > i4deref(tupb) ? -1 : 1); 506: } 507: 508: case FLOAT: 509: switch (frml) 510: { 511: 512: case 4: 513: return (f4deref(tupa) > f4deref(tupb) ? -1 : 1); 514: 515: case 8: 516: return (f8deref(tupa) > f8deref(tupb) ? -1 : 1); 517: } 518: } 519: } 520: return (0); 521: } 522: 523: 524: 525: /* 526: ** Replacement for access method routine get_page(); 527: ** and associated globals and routines. 528: */ 529: 530: struct accbuf *Acc_head, Accbuf; 531: long Accuread, Accuwrite; 532: 533: get_page(d1, tid) 534: struct descriptor *d1; 535: struct tup_id *tid; 536: { 537: register int i; 538: register struct descriptor *d; 539: long pageid; 540: register struct accbuf *b; 541: 542: d = d1; 543: if (Trace & 0100) 544: { 545: printf("get_page: %.14s,", d->relid); 546: dumptid(tid); 547: } 548: b = Acc_head; 549: if (b == 0) 550: { 551: /* initialize buffer */ 552: Acc_head = &Accbuf; 553: b = &Accbuf; 554: b->thispage = -1; 555: } 556: pluck_page(tid, &pageid); 557: i = 0; 558: if (b->thispage != pageid) 559: { 560: if (Trace & 040) 561: printf("get_page: rdg pg %s\n", locv(pageid)); 562: b->thispage = pageid; 563: if ((lseek(d->relfp, pageid * PGSIZE, 0) < 0) || 564: ((read(d->relfp, b, PGSIZE)) != PGSIZE)) 565: { 566: i = AMREAD_ERR; 567: } 568: Accuread++; 569: } 570: return (i); 571: } 572: 573: resetacc(buf) 574: struct accbuf *buf; 575: { 576: return (0); 577: } 578: 579: acc_err(err) 580: int err; 581: { 582: return (err); 583: }