1: # include <ingres.h> 2: # include <catalog.h> 3: # include <aux.h> 4: # include <tree.h> 5: # include <symbol.h> 6: # include <pv.h> 7: # include "globs.h" 8: # include <access.h> 9: # include <sccs.h> 10: 11: SCCSID(@(#)reformat.c 8.3 1/16/85) 12: 13: /* 14: ** REFORMAT.C -- "reformat" module of DECOMPOSITION 15: ** 16: ** reformat() examines each relation which will be 17: ** involved in a one variable subquery to see if it 18: ** is cost effective to modify the relation to a more 19: ** usefull structure. Included in this file are all the 20: ** routines associated with reformat: 21: ** 22: ** reformat -- reformats each relation if cost effective 23: ** 24: ** findlinks -- determines what one variable clauses (if any) 25: ** are associated with the current variable 26: ** and the substitution variable. 27: ** 28: ** rangrf -- decides whether to actually reformat and does it. 29: ** 30: ** primrf -- performs a projection on a user relation 31: ** 32: ** ckpkey -- determines if relation is already structured usefully. 33: ** 34: ** findwid -- determines width of target list. 35: ** 36: ** rel_tup -- returns how many tuples fit on one page 37: */ 38: /* 39: ** Reformat -- Examine each variable to see if it should be reformated. 40: ** 41: ** The algorithm is: 42: ** (1) Find all variables for which, after tuple substitution, 43: ** will have one variable equality clauses on them. 44: ** (2) Estimate how much it will cost to execute them. 45: ** (3) Ignore those relations which are already keyed usefully. 46: ** (4) If it is a user relation, determine if it will be less costly 47: ** to first project (and possibly) modify the relation. 48: ** (5) If it is a _SYS relation, determine if it will be less costly 49: ** to modify the relation to hash on the linking domains. 50: */ 51: 52: reformat(var, sqlist, locrang, buf, tree) 53: int var; 54: QTREE *sqlist[]; 55: int locrang[]; 56: char buf[]; 57: QTREE *tree; 58: { 59: register QTREE *sq; 60: register char *lmap; 61: register int mainvar; 62: int i, j; 63: char linkmap[MAXDOM]; 64: 65: # ifdef xDTR1 66: if (tTf(39, -1)) 67: printf("REFORMAT: subvar=%d\n", var); 68: # endif 69: 70: /* if the main tree is single var then put it in sq list */ 71: mainvar = -1; 72: if (tree->sym.value.sym_root.tvarc == 1) 73: { 74: mainvar = bitpos(tree->sym.value.sym_root.lvarm | tree->sym.value.sym_root.rvarm); 75: if (sqlist[mainvar] != 0) 76: syserr("help reformat"); 77: sqlist[mainvar] = tree; 78: # ifdef xDTR1 79: if (tTf(39, 0)) 80: printf("including var %d\n", mainvar); 81: # endif 82: } 83: for(i = 0; i < MAXRANGE; i++) 84: if (sq = sqlist[i]) 85: { 86: # ifdef xDTR1 87: if (tTf(39, 0)) 88: printf("Sqlist[%d]:\n", i); 89: # endif 90: lmap = linkmap; 91: for (j = 0; j < MAXDOM; j++) 92: *lmap++ = 0; 93: if (findlinks(sq->right, var, i, linkmap)) 94: { 95: # ifdef xDTR1 96: if (tTf(39, 1)) 97: prlinks("Findlinks found", linkmap); 98: # endif 99: rangrf(i, var, sq, buf, linkmap, tree, locrang); 100: } 101: } 102: if (mainvar >= 0) 103: sqlist[mainvar] = 0; 104: } 105: /* 106: ** Findlinks -- Determine whether there are any equalify clauses 107: ** between the "linkv" and the variable selected for tuple 108: ** substitution "selv". 109: ** 110: ** Return: 111: ** TRUE if there is a linking variable 112: ** FALSE if not 113: ** 114: ** Side Effects: 115: ** The linkmap is set to 1 for each linking domains. 116: ** ie. if domains 3 is a linking domains then 117: ** linkmap[3] = 1. 118: */ 119: 120: findlinks(node, selv, linkv, linkmap) 121: QTREE *node; 122: int selv, linkv; 123: char linkmap[]; 124: { 125: register QTREE *b, *a; 126: register int s; 127: QTREE *temp; 128: extern QTREE *ckvar(); 129: 130: a = node; 131: # ifdef xDTR1 132: if (tTf(39, 2)) 133: { 134: printf("FINDLINKS:"); 135: nodepr(a); 136: } 137: # endif 138: if (a->sym.type == QLEND) 139: return (0); 140: s = findlinks(a->right, selv, linkv, linkmap); 141: 142: /* 143: ** This mess is looking for a clause of the form: 144: ** EQ 145: ** Var Var 146: ** linkv selv 147: ** Where the VARS can be in either order 148: */ 149: if ((b = a->left)->sym.type != BOP || b->sym.value.sym_op.opno!= opEQ || 150: b->left->sym.type!=VAR || b->right->sym.type!=VAR) 151: return (s); 152: 153: a = ckvar(b->left); 154: b = ckvar(b->right); 155: if (b->sym.value.sym_var.varno == linkv) 156: { 157: temp = a; 158: a = b; 159: b = temp; 160: } 161: if (a->sym.value.sym_var.varno != linkv || (selv >= 0 && b->sym.value.sym_var.varno != selv)) 162: return (s); 163: 164: linkmap[a->sym.value.sym_var.attno] = 1; 165: # ifdef xDTR1 166: if (tTf(39, 3)) 167: printf("found:attno=%d\n", a->sym.value.sym_var.attno); 168: # endif 169: return (1); 170: } 171: /* 172: ** Rangrf -- reformat the variable "var" if it is cost effective. 173: ** 174: ** Rangrf does the actual work of reformat. If the relation is 175: ** already keyed usefully then rangrf does nothing. 176: ** Otherwise rangrf is split into two decisions: 177: ** A user relation must first be projected and then modified; 178: ** a _SYS relation can be modified directly. 179: ** 180: ** It may be cost effective to just project a user relation. 181: */ 182: 183: /*ARGSUSED*/ 184: rangrf(var, substvar, sq, buf, linkmap, tree, locrang) 185: int var, substvar; 186: QTREE *sq; 187: char buf[]; 188: char linkmap[]; 189: QTREE *tree; 190: int locrang[]; 191: { 192: register struct rang_tab *r, *rs; 193: register int j; 194: char nums[2]; 195: int i, newwid; 196: QTREE *pq; 197: long npages, newpages; 198: long origcost, modcost, projcost; 199: extern char *rangename(); 200: extern long rel_pages(), hashcost(); 201: extern QTREE *makroot(); 202: extern DESC *openr1(); 203: extern QTREE **mksqlist(); 204: 205: r = &De.de_rangev[var]; 206: rs = &De.de_rangev[substvar]; 207: npages = rel_pages(r->rtcnt, r->rtwid); 208: 209: /* calculate original cost of substitution */ 210: 211: origcost = rs->rtcnt * npages; 212: 213: # ifdef xDTR1 214: if (tTf(39, 5)) 215: printf("eval of mod for var %d. orig cost=%ld\n", var, origcost); 216: # endif 217: 218: /* if relation is already keyed usefully, just return */ 219: if (i = ckpkey(linkmap, var)) 220: { 221: # ifdef xDTR1 222: if (tTf(39, 4)) 223: printf("already keyed ok %d\n", i); 224: # endif 225: return; 226: } 227: 228: /* if this is a primary relation, a projection must be done 229: ** before any modify can be performed */ 230: 231: if (!rnum_temp(r->relnum)) 232: { 233: /* evaluation for primary, user relation */ 234: 235: /* to save some time, don't evaluate if origcost is very small */ 236: if (origcost < OVHDP + 1 + npages) 237: return; 238: 239: j = markbuf(buf); 240: 241: /* build a projection tree but don't actually create the projection */ 242: pq = makroot(buf); 243: dfind(sq, buf, mksqlist(pq, var)); 244: 245: newwid = findwid(pq); 246: newpages = rel_pages(r->rtcnt, newwid); 247: 248: /* 249: ** Calculate cost of projection + new cost of substitution 250: ** projcost = readoldpages + writenewpages + runquery + overhead 251: */ 252: 253: projcost = npages + newpages + rs->rtcnt * newpages + OVHDP; 254: 255: 256: /* CALCULATE COST OF MODIFY = COST OF PROJECTION + COST OF MODIFY 257: * AND NEW COST OF SUBSTITUTION AFTER MODIFY */ 258: 259: modcost = (npages + newpages) + 260: hashcost(newpages) + 261: rs->rtcnt + 262: OVHDP + OVHDM; 263: 264: # ifdef xDTR1 265: if (tTf(39, 5)) 266: { 267: printf("primary rel: proj cost=%ld\t", projcost); 268: printf("mod cost=%ld\n", modcost); 269: } 270: # endif 271: 272: if (origcost <= modcost) 273: if (origcost <= projcost) 274: { 275: freebuf(buf, j); 276: return; 277: } 278: 279: # ifdef xDTR1 280: if (tTf(39, 5)) 281: printf("doing projection\n"); 282: # endif 283: 284: /* i will be TRUE if the proj was aborted */ 285: i = primrf(var, pq, locrang); 286: freebuf(buf, j); 287: if ((projcost <= modcost) || i) 288: return; 289: } 290: 291: /* IF THIS IS A TEMPORARY RELATION, A MODIFY CAN BE DONE DIRECTLY */ 292: 293: else 294: { 295: 296: /* CALCULATE MODIFY COST (which does not include a projection) 297: * AND NEW COST OF SUBSTITUTION */ 298: 299: modcost = hashcost(npages) 300: + rs->rtcnt + OVHDM; 301: 302: # ifdef xDTR1 303: if (tTf(39, 5)) 304: printf("temp rel: mod cost=%ld\n", modcost); 305: # endif 306: 307: if (origcost <= modcost) 308: return; 309: } 310: 311: # ifdef xDTR1 312: if (tTf(39, 5)) 313: printf("doing modify\n"); 314: # endif 315: 316: initp(); 317: setp(PV_STR, rangename(var)); 318: setp(PV_STR, "hash"); /* modify to hash */ 319: setp(PV_STR, "num"); /* passing domain numbers - not names */ 320: 321: nums[1] = '\0'; 322: for (j = 0; j < MAXDOM; j++) 323: if (linkmap[j]) 324: { 325: *nums = j; 326: setp(PV_STR, nums); 327: } 328: 329: /* fill relation completely */ 330: setp(PV_STR, ""); 331: setp(PV_STR, "fillfactor"); 332: setp(PV_STR, "100"); 333: setp(PV_STR, "minpages"); 334: setp(PV_STR, "1"); 335: closer1(var); 336: call_dbu(mdMODIFY, FALSE); 337: 338: /* re-open modified range to get accurate descriptor */ 339: openr1(var); 340: } 341: /* 342: ** Primrf -- Replace a user relation with a projection of the needed domains. 343: ** 344: ** Primrf retrieves into a temporary relation, the domains 345: ** specified by the "pq" tree. The range table is updated to 346: ** reflect the new range. 347: ** 348: ** In fact a projection is not an accurate way to describe what 349: ** happens. In order to avoid changing any attribute numbers in 350: ** the query, the needed domains are projected and the domains 351: ** inbetween are created as type "c0". This way they occupy 352: ** no space and the attribute numbering is unchanged. Of course, 353: ** any attributes beyond the last one needed are simply discarded. 354: ** 355: ** In previous versions attempts were made to project only the needed 356: ** domains and to renumber the query tree. This proved to be very 357: ** expensive and difficult and was not always as optimal as this 358: ** method. 359: ** 360: ** The routines newquery() and endquery() will undo the effects 361: ** of primrf and restore the range table back to its original state. 362: */ 363: 364: primrf(var, pq, locrang) 365: int var; 366: QTREE *pq; 367: int locrang[]; 368: { 369: register QTREE *q, **np; 370: register int i; 371: int maxresno, rnum; 372: QTREE *node1[MAXDOM+1], *node2[MAXDOM+1]; 373: static char czero[QT_HDR_SIZ + sizeof (struct resdomnode)]; /* a dummy resdom */ 374: extern DESC *openr1(); 375: extern char *rnum_convert(); 376: 377: # ifdef xDTR1 378: if (tTf(39, 6)) 379: printf("PRIMRF:\n"); 380: # endif 381: 382: /* renumber RESDOMs to match their VARs */ 383: for (q = pq->left; q->sym.type != TREE; q = q->left) 384: q->sym.value.sym_resdom.resno = q->right->sym.value.sym_var.attno; 385: 386: /* form list of RESDOMs from outermost inward */ 387: node1[lnode(pq->left, node1, 0)] = 0; 388: 389: /* form a dummy RESDOM with type CHAR and length 0 */ 390: q = (QTREE *) czero; 391: q->sym.value.sym_resdom.resfrmt = CHAR; 392: q->sym.value.sym_resdom.resfrml = 0; 393: 394: /* zero node2 */ 395: for (np = node2, i = 0; i < MAXDOM + 1; i++) 396: *np++ = 0; 397: 398: /* sort RESDOMs into node2 */ 399: maxresno = 0; 400: for (np = node1; q = *np++; ) 401: { 402: if ((i = q->sym.value.sym_resdom.resno) == 0) 403: return (1); /* abort. Tid is referenced */ 404: if (i > maxresno) 405: maxresno = i; 406: node2[i-1] = q; 407: } 408: 409: /* fill missing RESDOMs with czero */ 410: for (np = node2, i = 0; i < maxresno; i++, np++) 411: if (*np == 0) 412: *np = (QTREE *) czero; 413: 414: 415: /* set up params for the create */ 416: initp(); 417: setp(PV_STR, "0"); /* initial relstat field */ 418: rnum = rnum_alloc(); 419: setp(PV_STR, rnum_convert(rnum)); 420: domnam(node2, "f"); 421: call_dbu(mdCREATE, FALSE); 422: 423: /* now run projection */ 424: i = var; 425: execsq1(pq, i, rnum); 426: new_range(i, rnum); 427: /* save the name of the new relation */ 428: savrang(locrang, i); 429: openr1(i); 430: return (0); 431: } 432: /* 433: ** Ckpkey -- determine if a relation is already keyed usefully. 434: ** 435: ** Ckpkey gets the key structure from paramd() and compares 436: ** it to the linkmap. If the relation is ISAM and the first keyed 437: ** domain is in linkmap, or if it is HASH and all keyed domains 438: ** are in the linkmap, then ckpkey() returns >0, else 439: ** ckpkey looks for secondary indexes which are usable. 440: ** if none, then ckpkey() returns 0. 441: ** 442: ** The value returned is an estimate of the number of 443: ** pages which must be read to satisfy a query given 444: ** equality clauses on the linkmap domains and the 445: ** structure of the relation. The (crude) estimates are: 446: ** hash 1 page 447: ** isam 2 pages 448: ** hash sec index 2 pages 449: ** isam sec index 3 pages 450: */ 451: 452: ckpkey(linkmap, var) 453: char linkmap[]; 454: int var; 455: { 456: register DESC *d; 457: register int i; 458: struct index itup; 459: struct accessparam ap; 460: TID lotid, hitid; 461: extern DESC *readopen(), *openr1(), Inddes; 462: 463: 464: # ifdef xDTR1 465: if (tTf(39, 11)) 466: printf("CKPKEY:%s\n", rangename(var)); 467: # endif 468: 469: /* if relation is an unindexed heap, then it cannot be keyed usefully */ 470: d = openr1(var); 471: if (d->reldum.relspec != M_HEAP || d->reldum.relindxd > 0) 472: { 473: d = readopen(var); /* open relation if it hasn't been already */ 474: paramd(d, &ap); 475: if (ckpkey1(linkmap, &ap) == 0) 476: return (ap.mode == EXACTKEY ? 1 : 2); /* success */ 477: 478: if (d->reldum.relindxd > 0) 479: { 480: opencatalog("indexes", OR_READ); 481: setkey(&Inddes, (char *)&itup, d->reldum.relid, IRELIDP); 482: setkey(&Inddes, (char *)&itup, d->reldum.relowner, IOWNERP); 483: if (i = find(&Inddes, EXACTKEY, &lotid, &hitid, (char *)&itup)) 484: syserr("ckpkey:find %d", i); 485: 486: while ((i = get(&Inddes, &lotid, &hitid, (char *)&itup, TRUE)) == 0) 487: { 488: if (!bequal(itup.irelidp, d->reldum.relid, MAXNAME + 2)) 489: continue; 490: 491: parami(&itup, &ap); 492: if (ckpkey1(linkmap, &ap) == 0) 493: return (ap.mode == EXACTKEY ? 2 : 3); /* success */ 494: } 495: } 496: } 497: return (0); /* failure. no useful structure */ 498: } 499: 500: 501: 502: ckpkey1(linkmap, ap) 503: char linkmap[]; 504: struct accessparam *ap; 505: { 506: register int i, k, anykey; 507: 508: if (ap->mode == NOKEY) 509: return (1); 510: anykey = 0; 511: for (i = 0; k = ap->keydno[i]; i++) 512: { 513: if (linkmap[k] == 0) 514: { 515: if (ap->mode == EXACTKEY) 516: return (1); 517: else 518: break; 519: } 520: anykey++; 521: } 522: return (!anykey); 523: } 524: /* 525: ** Findwid -- scan the target list of the projection tree 526: ** to determine the resultant tuple size. 527: ** 528: ** Return: 529: ** Size of projected tuple. 530: */ 531: 532: findwid(tree) 533: QTREE *tree; 534: { 535: register QTREE *nod, *t; 536: register int wid; 537: 538: wid = 0; 539: t = tree; 540: 541: for (nod = t->left; nod && nod->sym.type != TREE; nod = nod->left) 542: { 543: wid += nod->sym.value.sym_var.varfrml & I1MASK; 544: } 545: 546: return (wid); 547: } 548: /* 549: ** HASHCOST -- determine cost to modify to hash 550: ** 551: ** Estimates the cost to modify the relation to hash. 552: ** The estimate is crude since it assumes that there 553: ** are no duplicates and that a "unix" page will be 554: ** the same size as an "ingres" page. 555: ** 556: ** The cost is based on the parameters which signify 557: ** the size of the in-core sort buffer and the n-way 558: ** merge sort plus the cost to read the sorted file 559: ** and write the new relation twice (that's the was it works!). 560: ** 561: ** Parameters: 562: ** npages - the number of pages (estimate) which the 563: ** relation currently occupies. 564: ** 565: ** Returns: 566: ** the cost to hash the relation. 567: ** 568: ** Side Effects: 569: ** none 570: ** 571: ** Called By: 572: ** rangrf 573: */ 574: 575: # define COREBUFSIZE 32767 / PGSIZE 576: # define MERGESIZE 7 577: 578: long 579: hashcost(npages) 580: long npages; 581: { 582: long sortpages, total; 583: register int nfiles; 584: 585: nfiles = npages / COREBUFSIZE; 586: sortpages = 2 * npages; 587: 588: while (nfiles > 1) 589: { 590: nfiles = (nfiles + MERGESIZE - 1) / MERGESIZE; 591: sortpages += 2 * npages; 592: } 593: 594: total = sortpages + npages + npages + npages; 595: # ifdef xDTR1 596: if (tTf(39, 5)) 597: printf("hashcost is %ld\n", total); 598: # endif 599: 600: return (total); 601: }