1: # include   <ingres.h>
   2: # include   <catalog.h>
   3: # include   <aux.h>
   4: # include   <tree.h>
   5: # include   <symbol.h>
   6: # include   <pv.h>
   7: # include   "globs.h"
   8: # include   <access.h>
   9: # include   <sccs.h>
  10: 
  11: SCCSID(@(#)reformat.c	8.3	1/16/85)
  12: 
  13: /*
  14: **	REFORMAT.C -- "reformat" module of DECOMPOSITION
  15: **
  16: **	reformat() examines each relation which will be
  17: **	involved in a one variable subquery to see if it
  18: **	is cost effective to modify the relation to a more
  19: **	usefull structure. Included in this file are all the
  20: **	routines associated with reformat:
  21: **
  22: **		reformat -- reformats each relation if cost effective
  23: **
  24: **		findlinks -- determines what one variable clauses (if any)
  25: **				are associated with the current variable
  26: **				and the substitution variable.
  27: **
  28: **		rangrf    -- decides whether to actually reformat and does it.
  29: **
  30: **		primrf    -- performs a projection on a user relation
  31: **
  32: **		ckpkey    -- determines if relation is already structured usefully.
  33: **
  34: **		findwid   -- determines width of target list.
  35: **
  36: **		rel_tup   -- returns how many tuples fit on one page
  37: */
  38: /*
  39: ** Reformat -- Examine each variable to see if it should be reformated.
  40: **
  41: **	The algorithm is:
  42: **	(1) Find all variables for which, after tuple substitution,
  43: **	    will have one variable equality clauses on them.
  44: **	(2) Estimate how much it will cost to execute them.
  45: **	(3) Ignore those relations which are already keyed usefully.
  46: **	(4) If it is a user relation, determine if it will be less costly
  47: **	    to first project (and possibly) modify the relation.
  48: **	(5) If it is a _SYS relation, determine if it will be less costly
  49: **	    to modify the relation to hash on the linking domains.
  50: */
  51: 
  52: reformat(var, sqlist, locrang, buf, tree)
  53: int var;
  54: QTREE   *sqlist[];
  55: int locrang[];
  56: char    buf[];
  57: QTREE   *tree;
  58: {
  59:     register QTREE  *sq;
  60:     register char   *lmap;
  61:     register int    mainvar;
  62:     int     i, j;
  63:     char        linkmap[MAXDOM];
  64: 
  65: #	ifdef xDTR1
  66:     if (tTf(39, -1))
  67:         printf("REFORMAT: subvar=%d\n", var);
  68: #	endif
  69: 
  70:     /* if the main tree is single var then put it in sq list */
  71:     mainvar = -1;
  72:     if (tree->sym.value.sym_root.tvarc == 1)
  73:     {
  74:         mainvar = bitpos(tree->sym.value.sym_root.lvarm | tree->sym.value.sym_root.rvarm);
  75:         if (sqlist[mainvar] != 0)
  76:             syserr("help reformat");
  77:         sqlist[mainvar] = tree;
  78: #		ifdef xDTR1
  79:         if (tTf(39, 0))
  80:             printf("including var %d\n", mainvar);
  81: #		endif
  82:     }
  83:     for(i = 0; i < MAXRANGE; i++)
  84:         if (sq = sqlist[i])
  85:         {
  86: #			ifdef xDTR1
  87:             if (tTf(39, 0))
  88:                 printf("Sqlist[%d]:\n", i);
  89: #			endif
  90:             lmap = linkmap;
  91:             for (j = 0; j < MAXDOM; j++)
  92:                 *lmap++ = 0;
  93:             if (findlinks(sq->right, var, i, linkmap))
  94:             {
  95: #				ifdef xDTR1
  96:                 if (tTf(39, 1))
  97:                     prlinks("Findlinks found", linkmap);
  98: #				endif
  99:                 rangrf(i, var, sq, buf, linkmap, tree, locrang);
 100:             }
 101:         }
 102:     if (mainvar >= 0)
 103:         sqlist[mainvar] = 0;
 104: }
 105: /*
 106: ** Findlinks -- Determine whether there are any equalify clauses
 107: **	between the "linkv" and the variable selected for tuple
 108: **	substitution "selv".
 109: **
 110: **	Return:
 111: **		TRUE if there is a linking variable
 112: **		FALSE if not
 113: **
 114: **	Side Effects:
 115: **		The linkmap is set to 1 for each linking domains.
 116: **		ie. if domains 3 is a linking domains then
 117: **		linkmap[3] = 1.
 118: */
 119: 
 120: findlinks(node, selv, linkv, linkmap)
 121: QTREE   *node;
 122: int selv, linkv;
 123: char    linkmap[];
 124: {
 125:     register QTREE  *b, *a;
 126:     register int    s;
 127:     QTREE       *temp;
 128:     extern QTREE    *ckvar();
 129: 
 130:     a = node;
 131: #	ifdef xDTR1
 132:     if (tTf(39, 2))
 133:     {
 134:         printf("FINDLINKS:");
 135:         nodepr(a);
 136:     }
 137: #	endif
 138:     if (a->sym.type == QLEND)
 139:         return (0);
 140:     s = findlinks(a->right, selv, linkv, linkmap);
 141: 
 142:     /*
 143: 	** This mess is looking for a clause of the form:
 144: 	**		EQ
 145: 	**	Var		Var
 146: 	**	linkv		selv
 147: 	** Where the VARS can be in either order
 148: 	*/
 149:     if ((b = a->left)->sym.type != BOP || b->sym.value.sym_op.opno!= opEQ ||
 150:         b->left->sym.type!=VAR || b->right->sym.type!=VAR)
 151:             return (s);
 152: 
 153:     a = ckvar(b->left);
 154:     b = ckvar(b->right);
 155:     if (b->sym.value.sym_var.varno == linkv)
 156:     {
 157:         temp = a;
 158:         a = b;
 159:         b = temp;
 160:     }
 161:     if (a->sym.value.sym_var.varno != linkv || (selv >= 0 && b->sym.value.sym_var.varno != selv))
 162:         return (s);
 163: 
 164:     linkmap[a->sym.value.sym_var.attno] = 1;
 165: #	ifdef xDTR1
 166:     if (tTf(39, 3))
 167:         printf("found:attno=%d\n", a->sym.value.sym_var.attno);
 168: #	endif
 169:     return (1);
 170: }
 171: /*
 172: ** Rangrf -- reformat the variable "var" if it is cost effective.
 173: **
 174: **	Rangrf does the actual work of reformat. If the relation is
 175: **	already keyed usefully then rangrf does nothing.
 176: **	Otherwise rangrf is split into two decisions:
 177: **	A user relation must first be projected and then modified;
 178: **	a _SYS relation can be modified directly.
 179: **
 180: **	It may be cost effective to just project a user relation.
 181: */
 182: 
 183: /*ARGSUSED*/
 184: rangrf(var, substvar, sq, buf, linkmap, tree, locrang)
 185: int var, substvar;
 186: QTREE   *sq;
 187: char    buf[];
 188: char    linkmap[];
 189: QTREE   *tree;
 190: int locrang[];
 191: {
 192:     register struct rang_tab    *r, *rs;
 193:     register int            j;
 194:     char                nums[2];
 195:     int             i, newwid;
 196:     QTREE               *pq;
 197:     long                npages, newpages;
 198:     long                origcost, modcost, projcost;
 199:     extern char         *rangename();
 200:     extern long         rel_pages(), hashcost();
 201:     extern QTREE            *makroot();
 202:     extern DESC         *openr1();
 203:     extern QTREE            **mksqlist();
 204: 
 205:     r = &De.de_rangev[var];
 206:     rs = &De.de_rangev[substvar];
 207:     npages = rel_pages(r->rtcnt, r->rtwid);
 208: 
 209:     /* calculate original cost of substitution */
 210: 
 211:     origcost = rs->rtcnt * npages;
 212: 
 213: #	ifdef xDTR1
 214:     if (tTf(39, 5))
 215:         printf("eval of mod for var %d. orig cost=%ld\n", var, origcost);
 216: #	endif
 217: 
 218:     /* if relation is already keyed usefully, just return */
 219:     if (i = ckpkey(linkmap, var))
 220:     {
 221: #		ifdef xDTR1
 222:         if (tTf(39, 4))
 223:             printf("already keyed ok %d\n", i);
 224: #		endif
 225:         return;
 226:     }
 227: 
 228:     /* if this is a primary relation, a projection must be done
 229: 	** before any modify can be performed */
 230: 
 231:     if (!rnum_temp(r->relnum))
 232:     {
 233:         /* evaluation for primary, user relation */
 234: 
 235:         /* to save some time, don't evaluate if origcost is very small */
 236:         if (origcost < OVHDP + 1 + npages)
 237:             return;
 238: 
 239:         j = markbuf(buf);
 240: 
 241:         /* build a projection tree but don't actually create the projection */
 242:         pq = makroot(buf);
 243:         dfind(sq, buf, mksqlist(pq, var));
 244: 
 245:         newwid = findwid(pq);
 246:         newpages = rel_pages(r->rtcnt, newwid);
 247: 
 248:         /*
 249: 		** Calculate cost of projection + new cost of substitution
 250: 		** projcost = readoldpages + writenewpages + runquery + overhead
 251: 		*/
 252: 
 253:         projcost = npages + newpages + rs->rtcnt * newpages + OVHDP;
 254: 
 255: 
 256:         /*  CALCULATE COST OF MODIFY = COST OF PROJECTION + COST OF MODIFY
 257: 		 *  	AND NEW COST OF SUBSTITUTION AFTER MODIFY    */
 258: 
 259:         modcost = (npages + newpages) +
 260:                 hashcost(newpages) +
 261:                 rs->rtcnt +
 262:                 OVHDP + OVHDM;
 263: 
 264: #		ifdef xDTR1
 265:         if (tTf(39, 5))
 266:         {
 267:             printf("primary rel: proj cost=%ld\t", projcost);
 268:             printf("mod cost=%ld\n", modcost);
 269:         }
 270: #		endif
 271: 
 272:         if (origcost <= modcost)
 273:             if (origcost <= projcost)
 274:             {
 275:                 freebuf(buf, j);
 276:                 return;
 277:             }
 278: 
 279: #		ifdef xDTR1
 280:         if (tTf(39, 5))
 281:             printf("doing projection\n");
 282: #		endif
 283: 
 284:         /* i will be TRUE if the proj was aborted */
 285:         i = primrf(var, pq, locrang);
 286:         freebuf(buf, j);
 287:         if ((projcost <= modcost) || i)
 288:             return;
 289:     }
 290: 
 291:     /*  IF THIS IS A TEMPORARY RELATION, A MODIFY CAN BE DONE DIRECTLY  */
 292: 
 293:     else
 294:     {
 295: 
 296:         /*  CALCULATE MODIFY COST (which does not include a projection)
 297: 		 *  AND NEW COST OF SUBSTITUTION				  */
 298: 
 299:         modcost = hashcost(npages)
 300:                   + rs->rtcnt + OVHDM;
 301: 
 302: #		ifdef xDTR1
 303:         if (tTf(39, 5))
 304:             printf("temp rel: mod cost=%ld\n", modcost);
 305: #		endif
 306: 
 307:         if (origcost <= modcost)
 308:             return;
 309:     }
 310: 
 311: #	ifdef xDTR1
 312:     if (tTf(39, 5))
 313:         printf("doing modify\n");
 314: #	endif
 315: 
 316:     initp();
 317:     setp(PV_STR, rangename(var));
 318:     setp(PV_STR, "hash");   /* modify to hash */
 319:     setp(PV_STR, "num");    /* passing domain numbers - not names */
 320: 
 321:     nums[1] = '\0';
 322:     for (j = 0; j < MAXDOM; j++)
 323:         if (linkmap[j])
 324:         {
 325:             *nums = j;
 326:             setp(PV_STR, nums);
 327:         }
 328: 
 329:     /* fill relation completely */
 330:     setp(PV_STR, "");
 331:     setp(PV_STR, "fillfactor");
 332:     setp(PV_STR, "100");
 333:     setp(PV_STR, "minpages");
 334:     setp(PV_STR, "1");
 335:     closer1(var);
 336:     call_dbu(mdMODIFY, FALSE);
 337: 
 338:     /* re-open modified range to get accurate descriptor */
 339:     openr1(var);
 340: }
 341: /*
 342: ** Primrf -- Replace a user relation with a projection of the needed domains.
 343: **
 344: **	Primrf retrieves into a temporary relation, the domains
 345: **	specified by the "pq" tree. The range table is updated to
 346: **	reflect the new range.
 347: **
 348: **	In fact a projection is not an accurate way to describe what
 349: **	happens. In order to avoid changing any attribute numbers in
 350: **	the query, the needed domains are projected and the domains
 351: **	inbetween are created as type "c0". This way they occupy
 352: **	no space and the attribute numbering is unchanged. Of course,
 353: **	any attributes beyond the last one needed are simply discarded.
 354: **
 355: **	In previous versions attempts were made to project only the needed
 356: **	domains and to renumber the query tree. This proved to be very
 357: **	expensive and difficult and was not always as optimal as this
 358: **	method.
 359: **
 360: **	The routines newquery() and endquery() will undo the effects
 361: **	of primrf and restore the range table back to its original state.
 362: */
 363: 
 364: primrf(var, pq, locrang)
 365: int var;
 366: QTREE   *pq;
 367: int locrang[];
 368: {
 369:     register QTREE  *q, **np;
 370:     register int    i;
 371:     int     maxresno, rnum;
 372:     QTREE       *node1[MAXDOM+1], *node2[MAXDOM+1];
 373:     static char czero[QT_HDR_SIZ + sizeof (struct resdomnode)]; /* a dummy resdom */
 374:     extern DESC *openr1();
 375:     extern char *rnum_convert();
 376: 
 377: #	ifdef xDTR1
 378:     if (tTf(39, 6))
 379:         printf("PRIMRF:\n");
 380: #	endif
 381: 
 382:     /* renumber RESDOMs to match their VARs */
 383:     for (q = pq->left; q->sym.type != TREE; q = q->left)
 384:         q->sym.value.sym_resdom.resno = q->right->sym.value.sym_var.attno;
 385: 
 386:     /* form list of RESDOMs from outermost inward */
 387:     node1[lnode(pq->left, node1, 0)] = 0;
 388: 
 389:     /* form a dummy RESDOM with type CHAR and length 0 */
 390:     q = (QTREE *) czero;
 391:     q->sym.value.sym_resdom.resfrmt = CHAR;
 392:     q->sym.value.sym_resdom.resfrml = 0;
 393: 
 394:     /* zero node2 */
 395:     for (np = node2, i = 0; i < MAXDOM + 1; i++)
 396:         *np++ = 0;
 397: 
 398:     /* sort RESDOMs into node2 */
 399:     maxresno = 0;
 400:     for (np = node1; q = *np++; )
 401:     {
 402:         if ((i = q->sym.value.sym_resdom.resno) == 0)
 403:             return (1); /* abort. Tid is referenced */
 404:         if (i > maxresno)
 405:             maxresno = i;
 406:         node2[i-1] = q;
 407:     }
 408: 
 409:     /* fill missing RESDOMs with czero */
 410:     for (np = node2, i = 0; i < maxresno; i++, np++)
 411:         if (*np == 0)
 412:             *np = (QTREE *) czero;
 413: 
 414: 
 415:     /* set up params for the create */
 416:     initp();
 417:     setp(PV_STR, "0");  /* initial relstat field */
 418:     rnum = rnum_alloc();
 419:     setp(PV_STR, rnum_convert(rnum));
 420:     domnam(node2, "f");
 421:     call_dbu(mdCREATE, FALSE);
 422: 
 423:     /* now run projection */
 424:     i = var;
 425:     execsq1(pq, i, rnum);
 426:     new_range(i, rnum);
 427:     /* save the name of the new relation */
 428:     savrang(locrang, i);
 429:     openr1(i);
 430:     return (0);
 431: }
 432: /*
 433: ** Ckpkey -- determine if a relation is already keyed usefully.
 434: **
 435: **	Ckpkey gets the key structure from paramd() and compares
 436: **	it to the linkmap. If the relation is ISAM and the first keyed
 437: **	domain is in linkmap, or if it is HASH and all keyed domains
 438: **	are in the linkmap, then ckpkey() returns >0, else
 439: **	ckpkey looks for secondary indexes which are usable.
 440: **	if none, then ckpkey() returns 0.
 441: **
 442: **	The value returned is an estimate of the number of
 443: **	pages which must be read to satisfy a query given
 444: **	equality clauses on the linkmap domains and the
 445: **	structure of the relation. The (crude) estimates are:
 446: **		hash	1 page
 447: **		isam	2 pages
 448: **		hash sec index	2 pages
 449: **		isam sec index	3 pages
 450: */
 451: 
 452: ckpkey(linkmap, var)
 453: char    linkmap[];
 454: int var;
 455: {
 456:     register DESC       *d;
 457:     register int        i;
 458:     struct index        itup;
 459:     struct accessparam  ap;
 460:     TID         lotid, hitid;
 461:     extern DESC     *readopen(), *openr1(), Inddes;
 462: 
 463: 
 464: #	ifdef  xDTR1
 465:     if (tTf(39, 11))
 466:         printf("CKPKEY:%s\n", rangename(var));
 467: #	endif
 468: 
 469:     /* if relation is an unindexed heap, then it cannot be keyed usefully */
 470:     d = openr1(var);
 471:     if (d->reldum.relspec != M_HEAP || d->reldum.relindxd > 0)
 472:     {
 473:         d = readopen(var);  /* open relation if it hasn't been already */
 474:         paramd(d, &ap);
 475:         if (ckpkey1(linkmap, &ap) == 0)
 476:             return (ap.mode == EXACTKEY ? 1 : 2);   /* success */
 477: 
 478:         if (d->reldum.relindxd > 0)
 479:         {
 480:             opencatalog("indexes", OR_READ);
 481:             setkey(&Inddes, (char *)&itup, d->reldum.relid, IRELIDP);
 482:             setkey(&Inddes, (char *)&itup, d->reldum.relowner, IOWNERP);
 483:             if (i = find(&Inddes, EXACTKEY, &lotid, &hitid, (char *)&itup))
 484:                 syserr("ckpkey:find %d", i);
 485: 
 486:             while ((i = get(&Inddes, &lotid, &hitid, (char *)&itup, TRUE)) == 0)
 487:             {
 488:                 if (!bequal(itup.irelidp, d->reldum.relid, MAXNAME + 2))
 489:                     continue;
 490: 
 491:                 parami(&itup, &ap);
 492:                 if (ckpkey1(linkmap, &ap) == 0)
 493:                     return (ap.mode == EXACTKEY ? 2 : 3);   /* success */
 494:             }
 495:         }
 496:     }
 497:     return (0); /* failure. no useful structure */
 498: }
 499: 
 500: 
 501: 
 502: ckpkey1(linkmap, ap)
 503: char            linkmap[];
 504: struct accessparam  *ap;
 505: {
 506:     register int    i, k, anykey;
 507: 
 508:     if (ap->mode == NOKEY)
 509:         return (1);
 510:     anykey = 0;
 511:     for (i = 0; k = ap->keydno[i]; i++)
 512:     {
 513:         if (linkmap[k] == 0)
 514:         {
 515:             if (ap->mode == EXACTKEY)
 516:                 return (1);
 517:             else
 518:                 break;
 519:         }
 520:         anykey++;
 521:     }
 522:     return (!anykey);
 523: }
 524: /*
 525: ** Findwid -- scan the target list of the projection tree
 526: **	to determine the resultant tuple size.
 527: **
 528: **	Return:
 529: **		Size of projected tuple.
 530: */
 531: 
 532: findwid(tree)
 533: QTREE   *tree;
 534: {
 535:     register QTREE  *nod, *t;
 536:     register int    wid;
 537: 
 538:     wid = 0;
 539:     t = tree;
 540: 
 541:     for (nod = t->left; nod && nod->sym.type != TREE; nod = nod->left)
 542:     {
 543:         wid += nod->sym.value.sym_var.varfrml & I1MASK;
 544:     }
 545: 
 546:     return (wid);
 547: }
 548: /*
 549: **  HASHCOST -- determine cost to modify to hash
 550: **
 551: **	Estimates the cost to modify the relation to hash.
 552: **	The estimate is crude since it assumes that there
 553: **	are no duplicates and that a "unix" page will be
 554: **	the same size as an "ingres" page.
 555: **
 556: **	The cost is based on the parameters which signify
 557: **	the size of the in-core sort buffer and the n-way
 558: **	merge sort plus the cost to read the sorted file
 559: **	and write the new relation twice (that's the was it works!).
 560: **
 561: **	Parameters:
 562: **		npages - the number of pages (estimate) which the
 563: **				relation currently occupies.
 564: **
 565: **	Returns:
 566: **		the cost to hash the relation.
 567: **
 568: **	Side Effects:
 569: **		none
 570: **
 571: **	Called By:
 572: **		rangrf
 573: */
 574: 
 575: # define    COREBUFSIZE 32767 / PGSIZE
 576: # define    MERGESIZE   7
 577: 
 578: long
 579: hashcost(npages)
 580: long    npages;
 581: {
 582:     long        sortpages, total;
 583:     register int    nfiles;
 584: 
 585:     nfiles = npages / COREBUFSIZE;
 586:     sortpages = 2 * npages;
 587: 
 588:     while (nfiles > 1)
 589:     {
 590:         nfiles = (nfiles + MERGESIZE - 1) / MERGESIZE;
 591:         sortpages += 2 * npages;
 592:     }
 593: 
 594:     total = sortpages + npages + npages + npages;
 595: #	ifdef xDTR1
 596:     if (tTf(39, 5))
 597:         printf("hashcost is %ld\n", total);
 598: #	endif
 599: 
 600:     return (total);
 601: }

Defined functions

ckpkey defined in line 452; used 2 times
ckpkey1 defined in line 502; used 2 times
findlinks defined in line 120; used 3 times
hashcost defined in line 578; used 3 times
primrf defined in line 364; used 1 times
rangrf defined in line 184; used 1 times
  • in line 99
reformat defined in line 11; used 1 times

Defined macros

COREBUFSIZE defined in line 575; used 1 times
MERGESIZE defined in line 576; used 2 times
  • in line 590(2)
Last modified: 1986-04-17
Generated: 2016-12-26
Generated by src2html V0.67
page hit count: 1718
Valid CSS Valid XHTML 1.0 Strict