SQLite

Check-in [eefeda32d5]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Fix a couple of out-of-date comments in where.c.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | experimental-costs
Files: files | file ages | folders
SHA1: eefeda32d54efbbdf7d20b719299eda48b891fae
User & Date: dan 2014-04-30 14:47:01.628
Context
2014-04-30
14:53
Update a couple of test cases to account for the fact that this branch prefers an index scan and partial sort over a full-table scan and full external sort. (check-in: 9b975bf33c user: dan tags: experimental-costs)
14:47
Fix a couple of out-of-date comments in where.c. (check-in: eefeda32d5 user: dan tags: experimental-costs)
14:22
Improved rendering of LogEst values corresponding to real values near 0.0 in the tool/logest.c utility program. (check-in: 32910c8c59 user: drh tags: experimental-costs)
Changes
Unified Diff Ignore Whitespace Patch
Changes to src/where.c.
4018
4019
4020
4021
4022
4023
4024
4025
4026





4027
4028
4029
4030
4031
4032
4033
    if( j<0 ){
      pLoop->nOut += (pTerm->truthProb<=0 ? pTerm->truthProb : -1);
    }
  }
}

/*
** We have so far matched pBuilder->pNew->u.btree.nEq terms of the index pIndex.
** Try to match one more.





**
** If pProbe->tnum==0, that means pIndex is a fake index used for the
** INTEGER PRIMARY KEY.
*/
static int whereLoopAddBtreeIndex(
  WhereLoopBuilder *pBuilder,     /* The WhereLoop factory */
  struct SrcList_item *pSrc,      /* FROM clause term being analyzed */







|
|
>
>
>
>
>







4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
    if( j<0 ){
      pLoop->nOut += (pTerm->truthProb<=0 ? pTerm->truthProb : -1);
    }
  }
}

/*
** We have so far matched pBuilder->pNew->u.btree.nEq terms of the 
** index pIndex. Try to match one more.
**
** When this function is called, pBuilder->pNew->nOut contains the 
** number of rows expected to be visited by filtering using the nEq 
** terms only. If it is modified, this value is restored before this 
** function returns.
**
** If pProbe->tnum==0, that means pIndex is a fake index used for the
** INTEGER PRIMARY KEY.
*/
static int whereLoopAddBtreeIndex(
  WhereLoopBuilder *pBuilder,     /* The WhereLoop factory */
  struct SrcList_item *pSrc,      /* FROM clause term being analyzed */
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091






4092
4093
4094
4095
4096
4097
4098
  saved_prereq = pNew->prereq;
  saved_nOut = pNew->nOut;
  pNew->rSetup = 0;
  rLogSize = estLog(pProbe->aiRowLogEst[0]);

  /* Consider using a skip-scan if there are no WHERE clause constraints
  ** available for the left-most terms of the index, and if the average
  ** number of repeats in the left-most terms is at least 18.  The magic
  ** number 18 was found by experimentation to be the payoff point where
  ** skip-scan become faster than a full-scan.
  */






  assert( 42==sqlite3LogEst(18) );
  if( pTerm==0
   && saved_nEq==saved_nSkip
   && saved_nEq+1<pProbe->nKeyCol
   && pProbe->aiRowLogEst[saved_nEq+1]>=42  /* TUNING: Minimum for skip-scan */
   && (rc = whereLoopResize(db, pNew, pNew->nLTerm+1))==SQLITE_OK
  ){







|
<
<
|
>
>
>
>
>
>







4086
4087
4088
4089
4090
4091
4092
4093


4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106
4107
  saved_prereq = pNew->prereq;
  saved_nOut = pNew->nOut;
  pNew->rSetup = 0;
  rLogSize = estLog(pProbe->aiRowLogEst[0]);

  /* Consider using a skip-scan if there are no WHERE clause constraints
  ** available for the left-most terms of the index, and if the average
  ** number of repeats in the left-most terms is at least 18. 


  **
  ** The magic number 18 is selected on the basis that scanning 17 rows
  ** is almost always quicker than an index seek (even though if the index
  ** contains fewer than 2^17 rows we assume otherwise in other parts of
  ** the code). And, even if it is not, it should not be too much slower. 
  ** On the other hand, the extra seeks could end up being significantly
  ** more expensive.  */
  assert( 42==sqlite3LogEst(18) );
  if( pTerm==0
   && saved_nEq==saved_nSkip
   && saved_nEq+1<pProbe->nKeyCol
   && pProbe->aiRowLogEst[saved_nEq+1]>=42  /* TUNING: Minimum for skip-scan */
   && (rc = whereLoopResize(db, pNew, pNew->nLTerm+1))==SQLITE_OK
  ){
5222
5223
5224
5225
5226
5227
5228
5229




5230
5231
5232
5233
5234
5235
5236
5237


5238
5239
5240
5241
5242
5243
5244
5245
5246
5247
5248
5249
5250
5251
5252
        nOut = pFrom->nRow + pWLoop->nOut;
        maskNew = pFrom->maskLoop | pWLoop->maskSelf;
        if( isOrdered<0 ){
          isOrdered = wherePathSatisfiesOrderBy(pWInfo,
                       pWInfo->pOrderBy, pFrom, pWInfo->wctrlFlags,
                       iLoop, pWLoop, &revMask);
          if( isOrdered>=0 && isOrdered<nOrderBy ){
            /* TUNING: Estimated cost of sorting is N*log(N).




            ** If the order-by clause has X terms but only the last Y terms
            ** are out of order, then block-sorting will reduce the sorting
            ** cost to N*log(N)*log(Y/X).  The log(Y/X) term is computed
            ** by rScale.
            ** TODO: Should the sorting cost get a small multiplier to help
            ** discourage the use of sorting and encourage the use of index
            ** scans instead?
            */


            LogEst rScale, rSortCost;
            assert( nOrderBy>0 && 66==sqlite3LogEst(100) );
            rScale = sqlite3LogEst((nOrderBy-isOrdered)*100/nOrderBy) - 66;
            rSortCost = nRowEst + estLog(nRowEst) + rScale + 16;

            /* TUNING: The cost of implementing DISTINCT using a B-TREE is
            ** also N*log(N) but it has a larger constant of proportionality.
            ** Multiply by 3.0. */
            if( pWInfo->wctrlFlags & WHERE_WANT_DISTINCT ){
              rSortCost += 16;
            }
            WHERETRACE(0x002,
               ("---- sort cost=%-3d (%d/%d) increases cost %3d to %-3d\n",
                rSortCost, (nOrderBy-isOrdered), nOrderBy, rCost,
                sqlite3LogEstAdd(rCost,rSortCost)));







|
>
>
>
>
|
|
|
|
<
<
|
|
>
>






|
|







5231
5232
5233
5234
5235
5236
5237
5238
5239
5240
5241
5242
5243
5244
5245
5246


5247
5248
5249
5250
5251
5252
5253
5254
5255
5256
5257
5258
5259
5260
5261
5262
5263
5264
5265
        nOut = pFrom->nRow + pWLoop->nOut;
        maskNew = pFrom->maskLoop | pWLoop->maskSelf;
        if( isOrdered<0 ){
          isOrdered = wherePathSatisfiesOrderBy(pWInfo,
                       pWInfo->pOrderBy, pFrom, pWInfo->wctrlFlags,
                       iLoop, pWLoop, &revMask);
          if( isOrdered>=0 && isOrdered<nOrderBy ){
            /* TUNING: Estimated cost of a full external sort, where N is 
            ** the number of rows to sort is:
            **
            **   cost = (3.0 * N * log(N)).
            ** 
            ** Or, if the order-by clause has X terms but only the last Y 
            ** terms are out of order, then block-sorting will reduce the 
            ** sorting cost to:
            **


            **   cost = (3.0 * N * log(N)) * (Y/X)
            **
            ** The (Y/X) term is implemented using stack variable rScale
            ** below.  */
            LogEst rScale, rSortCost;
            assert( nOrderBy>0 && 66==sqlite3LogEst(100) );
            rScale = sqlite3LogEst((nOrderBy-isOrdered)*100/nOrderBy) - 66;
            rSortCost = nRowEst + estLog(nRowEst) + rScale + 16;

            /* TUNING: The cost of implementing DISTINCT using a B-TREE is
            ** similar but with a larger constant of proportionality. 
            ** Multiply by an additional factor of 3.0.  */
            if( pWInfo->wctrlFlags & WHERE_WANT_DISTINCT ){
              rSortCost += 16;
            }
            WHERETRACE(0x002,
               ("---- sort cost=%-3d (%d/%d) increases cost %3d to %-3d\n",
                rSortCost, (nOrderBy-isOrdered), nOrderBy, rCost,
                sqlite3LogEstAdd(rCost,rSortCost)));