1: # include "../ingres.h" 2: # include "../catalog.h" 3: # include "../tree.h" 4: # include "../symbol.h" 5: # include "decomp.h" 6: # include "../access.h" 7: # include "../aux.h" 8: 9: /* 10: ** REFORMAT.C -- "reformat" module of DECOMPOSITION 11: ** 12: ** reformat() examines each relation which will be 13: ** involved in a one variable subquery to see if it 14: ** is cost effective to modify the relation to a more 15: ** usefull structure. Included in this file are all the 16: ** routines associated with reformat: 17: ** 18: ** reformat -- reformats each relation if cost effective 19: ** 20: ** findlinks -- determines what one variable clauses (if any) 21: ** are associated with the current variable 22: ** and the substitution variable. 23: ** 24: ** rangrf -- decides whether to actually reformat and does it. 25: ** 26: ** primrf -- performs a projection on a user relation 27: ** 28: ** ckpkey -- determines if relation is already structured usefully. 29: ** 30: ** findwid -- determines width of target list. 31: ** 32: ** rel_tup -- returns how many tuples fit on one page 33: */ 34: 35: 36: 37: reformat(var, sqlist, locrang, buf, tree) 38: int var; 39: struct querytree *sqlist[]; 40: int locrang[]; 41: char buf[]; 42: struct querytree *tree; 43: 44: /* 45: ** Reformat -- Examine each variable to see if it should be reformated. 46: ** 47: ** The algorithm is: 48: ** (1) Find all variables for which, after tuple substitution, 49: ** will have one variable equality clauses on them. 50: ** (2) Estimate how much it will cost to execute them. 51: ** (3) Ignore those relations which are already keyed usefully. 52: ** (4) If it is a user relation, determine if it will be less costly 53: ** to first project (and possibly) modify the relation. 54: ** (5) If it is a _SYS relation, determine if it will be less costly 55: ** to modify the relation to hash on the linking domains. 56: */ 57: 58: { 59: register struct querytree *sq; 60: register char *lmap; 61: register int mainvar; 62: int i, j; 63: char linkmap[MAXDOM]; 64: 65: # ifdef xDTR1 66: if (tTf(13, -1)) 67: printf("REFORMAT: subvar=%d\n", var); 68: # endif 69: 70: /* if the main tree is single var then put it in sq list */ 71: mainvar = -1; 72: if (((struct qt_root *)tree)->tvarc == 1) 73: { 74: mainvar = bitpos(((struct qt_root *)tree)->lvarm | ((struct qt_root *)tree)->rvarm); 75: if (sqlist[mainvar] != 0) 76: syserr("help reformat"); 77: sqlist[mainvar] = tree; 78: # ifdef xDTR1 79: if (tTf(13, 0)) 80: printf("including var %d\n", mainvar); 81: # endif 82: } 83: for(i = 0; i < MAXRANGE; i++) 84: if (sq = sqlist[i]) 85: { 86: # ifdef xDTR1 87: if (tTf(13, 0)) 88: printf("Sqlist[%d]:\n", i); 89: # endif 90: lmap = linkmap; 91: for (j = 0; j < MAXDOM; j++) 92: *lmap++ = 0; 93: if (findlinks(sq->right, var, i, linkmap)) 94: { 95: # ifdef xDTR1 96: if (tTf(13, 1)) 97: prlinks("Findlinks found", linkmap); 98: # endif 99: rangrf(i, var, sq, buf, linkmap, tree, locrang); 100: } 101: } 102: if (mainvar >= 0) 103: sqlist[mainvar] = 0; 104: } 105: 106: 107: 108: findlinks(node, selv, linkv, linkmap) 109: struct querytree *node; 110: int selv, linkv; 111: char linkmap[]; 112: 113: /* 114: ** Findlinks -- Determine whether there are any equalify clauses 115: ** between the "linkv" and the variable selected for tuple 116: ** substitution "selv". 117: ** 118: ** Return: 119: ** TRUE if there is a linking variable 120: ** FALSE if not 121: ** 122: ** Side Effects: 123: ** The linkmap is set to 1 for each linking domains. 124: ** ie. if domains 3 is a linking domains then 125: ** linkmap[3] = 1. 126: */ 127: 128: { 129: register struct querytree *b, *a; 130: register int s; 131: struct querytree *temp; 132: extern struct querytree *ckvar(); 133: 134: a = node; 135: # ifdef xDTR1 136: if (tTf(13, 2)) 137: { 138: printf("FINDLINKS:"); 139: writenod(a); 140: } 141: # endif 142: if (a->sym.type == QLEND) 143: return (0); 144: s = findlinks(a->right, selv, linkv, linkmap); 145: 146: /* 147: ** This mess is looking for a clause of the form: 148: ** EQ 149: ** Var Var 150: ** linkv selv 151: ** Where the VARS can be in either order 152: */ 153: if ((b = a->left)->sym.type != BOP || *b->sym.value!= opEQ || 154: b->left->sym.type!=VAR || b->right->sym.type!=VAR) 155: return (s); 156: 157: a = ckvar(b->left); 158: b = ckvar(b->right); 159: if (((struct qt_var *)b)->varno == linkv) 160: { 161: temp = a; 162: a = b; 163: b = temp; 164: } 165: if (((struct qt_var *)a)->varno != linkv || (selv >= 0 && ((struct qt_var *)b)->varno != selv)) 166: return (s); 167: 168: linkmap[((struct qt_var *)a)->attno] = 1; 169: # ifdef xDTR1 170: if (tTf(13, 3)) 171: printf("found:attno=%d\n", ((struct qt_var *)a)->attno); 172: # endif 173: return (1); 174: } 175: 176: 177: 178: rangrf(var, substvar, sq, buf, linkmap, tree, locrang) 179: int var, substvar; 180: struct querytree *sq; 181: char buf[]; 182: char linkmap[]; 183: struct querytree *tree; 184: int locrang[]; 185: 186: /* 187: ** Rangrf -- reformat the variable "var" if it is cost effective. 188: ** 189: ** Rangrf does the actual work of reformat. If the relation is 190: ** already keyed usefully then rangrf does nothing. 191: ** Otherwise rangrf is split into two decisions: 192: ** A user relation must first be projected and then modified; 193: ** a _SYS relation can be modified directly. 194: ** 195: ** It may be cost effective to just project a user relation. 196: */ 197: 198: { 199: register struct rang_tab *r, *rs; 200: register int j; 201: char nums[2]; 202: int i, newwid; 203: struct querytree *pq; 204: long npages, projpages, newpages; 205: long origcost, modcost, projcost; 206: char *rangename(); 207: long rel_pages(), hashcost(); 208: extern struct querytree *makroot(); 209: 210: 211: r = &Rangev[var]; 212: rs = &Rangev[substvar]; 213: npages = rel_pages(r->rtcnt, r->rtwid); 214: 215: /* calculate original cost of substitution */ 216: 217: origcost = rs->rtcnt * npages; 218: 219: # ifdef xDTR1 220: if (tTf(13, 5)) 221: printf("eval of mod for var %d. orig cost=%s\n", var, locv(origcost)); 222: # endif 223: 224: /* if relation is already keyed usefully, just return */ 225: if (i = ckpkey(linkmap, var)) 226: { 227: # ifdef xDTR1 228: if (tTf(13, 4)) 229: printf("already keyed ok %d\n", i); 230: # endif 231: return; 232: } 233: 234: /* if this is a primary relation, a projection must be done 235: ** before any modify can be performed */ 236: 237: if (!rnum_temp(r->relnum)) 238: { 239: /* evaluation for primary, user relation */ 240: 241: /* to save some time, don't evaluate if origcost is very small */ 242: if (origcost < OVHDP + 1 + npages) 243: return; 244: 245: j = markbuf(buf); 246: 247: /* build a projection tree but don't actually create the projection */ 248: pq = makroot(buf); 249: dfind(sq, buf, mksqlist(pq, var)); 250: 251: newwid = findwid(pq); 252: newpages = rel_pages(r->rtcnt, newwid); 253: 254: /* 255: ** Calculate cost of projection + new cost of substitution 256: ** projcost = readoldpages + writenewpages + runquery + overhead 257: */ 258: 259: projcost = npages + newpages + rs->rtcnt * newpages + OVHDP; 260: 261: 262: /* CALCULATE COST OF MODIFY = COST OF PROJECTION + COST OF MODIFY 263: * AND NEW COST OF SUBSTITUTION AFTER MODIFY */ 264: 265: modcost = (npages + newpages) + 266: hashcost(newpages) + 267: rs->rtcnt + 268: OVHDP + OVHDM; 269: 270: # ifdef xDTR1 271: if (tTf(13, 5)) 272: { 273: printf("primary rel: proj cost=%s\t", locv(projcost)); 274: printf("mod cost=%s\n", locv(modcost)); 275: } 276: # endif 277: 278: if (origcost <= modcost) 279: if (origcost <= projcost) 280: { 281: freebuf(buf, j); 282: return; 283: } 284: 285: # ifdef xDTR1 286: if (tTf(13, 5)) 287: printf("doing projection\n"); 288: # endif 289: 290: /* i will be TRUE if the proj was aborted */ 291: i = primrf(var, pq, locrang); 292: freebuf(buf, j); 293: if ((projcost <= modcost) || i) 294: return; 295: } 296: 297: /* IF THIS IS A TEMPORARY RELATION, A MODIFY CAN BE DONE DIRECTLY */ 298: 299: else 300: { 301: 302: /* CALCULATE MODIFY COST (which does not include a projection) 303: * AND NEW COST OF SUBSTITUTION */ 304: 305: modcost = hashcost(npages) 306: + rs->rtcnt + OVHDM; 307: 308: # ifdef xDTR1 309: if (tTf(13, 5)) 310: printf("temp rel: mod cost=%s\n", locv(modcost)); 311: # endif 312: 313: if (origcost <= modcost) 314: return; 315: } 316: 317: # ifdef xDTR1 318: if (tTf(13, 5)) 319: printf("doing modify\n"); 320: # endif 321: 322: initp(); 323: setp(rangename(var)); 324: setp("hash"); /* modify to hash */ 325: setp("num"); /* passing domain numbers - not names */ 326: 327: nums[1] = '\0'; 328: for (j = 0; j < MAXDOM; j++) 329: if (linkmap[j]) 330: { 331: *nums = j; 332: setp(nums); 333: } 334: 335: /* fill relation completely */ 336: setp(""); 337: setp("fillfactor"); 338: setp("100"); 339: setp("minpages"); 340: setp("1"); 341: closer1(var); 342: call_dbu(mdMODIFY, FALSE); 343: 344: /* re-open modified range to get accurate descriptor */ 345: openr1(var); 346: } 347: 348: 349: 350: 351: 352: primrf(var, pq, locrang) 353: int var; 354: struct querytree *pq; 355: int locrang[]; 356: 357: /* 358: ** Primrf -- Replace a user relation with a projection of the needed domains. 359: ** 360: ** Primrf retrieves into a temporary relation, the domains 361: ** specified by the "pq" tree. The range table is updated to 362: ** reflect the new range. 363: ** 364: ** In fact a projection is not an accurate way to describe what 365: ** happens. In order to avoid changing any attribute numbers in 366: ** the query, the needed domains are projected and the domains 367: ** inbetween are created as type "c0". This way they occupy 368: ** no space and the attribute numbering is unchanged. Of course, 369: ** any attributes beyond the last one needed are simply discarded. 370: ** 371: ** In previous versions attempts were made to project only the needed 372: ** domains and to renumber the query tree. This proved to be very 373: ** expensive and difficult and was not always as optimal as this 374: ** method. 375: ** 376: ** The routines newquery() and endquery() will undo the effects 377: ** of primrf and restore the range table back to its original state. 378: */ 379: 380: 381: { 382: register struct querytree *q, **np; 383: register int i; 384: int maxresno, rnum; 385: struct querytree *node1[MAXDOM+1], *node2[MAXDOM+1]; 386: char czero[10]; /* a dummy resdom */ 387: 388: # ifdef xDTR1 389: if (tTf(13, 6)) 390: printf("PRIMRF:\n"); 391: # endif 392: 393: /* renumber RESDOMs to match their VARs */ 394: for (q = pq->left; q->sym.type != TREE; q = q->left) 395: ((struct qt_res *)q)->resno = ((struct qt_var *)q->right)->attno; 396: 397: /* form list of RESDOMs from outermost inward */ 398: node1[lnode(pq->left, node1, 0)] = 0; 399: 400: /* form a dummy RESDOM with type CHAR and length 0 */ 401: q = (struct querytree *) czero; 402: ((struct qt_var *)q)->frmt = CHAR; 403: ((struct qt_var *)q)->frml = 0; 404: 405: /* zero node2 */ 406: for (np = node2, i = 0; i < MAXDOM + 1; i++) 407: *np++ = 0; 408: 409: /* sort RESDOMs into node2 */ 410: maxresno = 0; 411: for (np = node1; q = *np++; ) 412: { 413: if ((i = ((struct qt_res *)q)->resno) == 0) 414: return (1); /* abort. Tid is referenced */ 415: if (i > maxresno) 416: maxresno = i; 417: node2[i-1] = q; 418: } 419: 420: /* fill missing RESDOMs with czero */ 421: for (np = node2, i = 0; i < maxresno; i++, np++) 422: if (*np == 0) 423: *np = (struct querytree *) czero; 424: 425: 426: /* set up params for the create */ 427: initp(); 428: setp("0"); /* initial relstat field */ 429: rnum = rnum_alloc(); 430: setp(rnum_convert(rnum)); 431: domnam(node2, "f"); 432: call_dbu(mdCREATE, FALSE); 433: 434: /* now run projection */ 435: i = var; 436: execsq1(pq, i, rnum); 437: new_range(i, rnum); 438: /* save the name of the new relation */ 439: savrang(locrang, i); 440: openr1(i); 441: return (0); 442: } 443: 444: 445: 446: ckpkey(linkmap, var) 447: char linkmap[]; 448: int var; 449: 450: /* 451: ** Ckpkey -- determine if a relation is already keyed usefully. 452: ** 453: ** Ckpkey gets the key structure from paramd() and compares 454: ** it to the linkmap. If the relation is ISAM and the first keyed 455: ** domain is in linkmap, or if it is HASH and all keyed domains 456: ** are in the linkmap, then ckpkey() returns >0, else 457: ** ckpkey looks for secondary indexes which are usable. 458: ** if none, then ckpkey() returns 0. 459: ** 460: ** The value returned is an estimate of the number of 461: ** pages which must be read to satisfy a query given 462: ** equality clauses on the linkmap domains and the 463: ** structure of the relation. The (crude) estimates are: 464: ** hash 1 page 465: ** isam 2 pages 466: ** hash sec index 2 pages 467: ** isam sec index 3 pages 468: */ 469: { 470: register struct descriptor *d; 471: register int i; 472: struct index itup; 473: struct accessparam ap; 474: struct tup_id lotid, hitid; 475: extern struct descriptor *readopen(), *openr1(), Inddes; 476: 477: 478: # ifdef xDTR1 479: if (tTf(13, 11)) 480: printf("CKPKEY:%s\n", rangename(var)); 481: # endif 482: 483: /* if relation is an unindexed heap, then it cannot be keyed usefully */ 484: d = openr1(var); 485: if (d->relspec != M_HEAP || d->relindxd > 0) 486: { 487: d = readopen(var); /* open relation if it hasn't been already */ 488: paramd(d, &ap); 489: if (ckpkey1(linkmap, &ap) == 0) 490: return (ap.mode == EXACTKEY ? 1 : 2); /* success */ 491: 492: if (d->relindxd > 0) 493: { 494: opencatalog("indexes", 0); 495: setkey(&Inddes, &itup, d->relid, IRELIDP); 496: setkey(&Inddes, &itup, d->relowner, IOWNERP); 497: if (i = find(&Inddes, EXACTKEY, &lotid, &hitid, &itup)) 498: syserr("ckpkey:find %d", i); 499: 500: while ((i = get(&Inddes, &lotid, &hitid, &itup, TRUE)) == 0) 501: { 502: if (!bequal(&itup, d->relid, MAXNAME + 2)) 503: continue; 504: 505: parami(&itup, &ap); 506: if (ckpkey1(linkmap, &ap) == 0) 507: return (ap.mode == EXACTKEY ? 2 : 3); /* success */ 508: } 509: } 510: } 511: return (0); /* failure. no usefull structure */ 512: } 513: 514: 515: 516: ckpkey1(linkmap, ap) 517: char linkmap[]; 518: struct accessparam *ap; 519: { 520: register int i, k, anykey; 521: 522: if (ap->mode == NOKEY) 523: return (1); 524: anykey = 0; 525: for (i = 0; k = ap->keydno[i]; i++) 526: { 527: if (linkmap[k] == 0) 528: { 529: if (ap->mode == EXACTKEY) 530: return (1); 531: else 532: break; 533: } 534: anykey++; 535: } 536: return (!anykey); 537: } 538: 539: 540: 541: 542: 543: findwid(tree) 544: struct querytree *tree; 545: 546: /* 547: ** Findwid -- scan the target list of the projection tree 548: ** to determine the resultant tuple size. 549: ** 550: ** Return: 551: ** Size of projected tuple. 552: */ 553: 554: { 555: register struct querytree *nod, *t; 556: register int wid; 557: 558: 559: wid = 0; 560: t = tree; 561: 562: for (nod = t->left; nod && nod->sym.type != TREE; nod = nod->left) 563: { 564: wid += ((struct qt_var *)nod)->frml & 0377; 565: } 566: 567: return (wid); 568: } 569: 570: 571: /* 572: ** HASHCOST -- determine cost to modify to hash 573: ** 574: ** Estimates the cost to modify the relation to hash. 575: ** The estimate is crude since it assumes that there 576: ** are no duplicates and that a "unix" page will be 577: ** the same size as an "ingres" page. 578: ** 579: ** The cost is based on the parameters which signify 580: ** the size of the in-core sort buffer and the n-way 581: ** merge sort plus the cost to read the sorted file 582: ** and write the new relation twice (that's the was it works!). 583: ** 584: ** Parameters: 585: ** npages - the number of pages (estimate) which the 586: ** relation currently occupies. 587: ** 588: ** Returns: 589: ** the cost to hash the relation. 590: ** 591: ** Side Effects: 592: ** none 593: ** 594: ** Called By: 595: ** rangrf 596: */ 597: 598: # define COREBUFSIZE 32767 / PGSIZE 599: # define MERGESIZE 7 600: 601: long hashcost(npages) 602: long npages; 603: { 604: long sortpages, total; 605: register int nfiles; 606: 607: nfiles = npages / COREBUFSIZE; 608: sortpages = 2 * npages; 609: 610: while (nfiles > 1) 611: { 612: nfiles = (nfiles + MERGESIZE - 1) / MERGESIZE; 613: sortpages += 2 * npages; 614: } 615: 616: total = sortpages + npages + npages + npages; 617: # ifdef xDTR1 618: if (tTf(13, 5)) 619: printf("hashcost is %s\n", locv(total)); 620: # endif 621: 622: return (total); 623: }