SQLite

Check-in [221659945c]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Deploy heuristics (well-commented) to better estimate how much unindexed terms in the WHERE clause filter the number of output rows from a single table.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 221659945c3f78d3b6789bfe8fdeb8d3ee1fa038
User & Date: drh 2014-11-22 18:50:44.269
Context
2014-11-22
19:52
Fix an error in the comments from the previous check-in. (check-in: 9660ce5418 user: drh tags: trunk)
18:50
Deploy heuristics (well-commented) to better estimate how much unindexed terms in the WHERE clause filter the number of output rows from a single table. (check-in: 221659945c user: drh tags: trunk)
12:22
Remove a redundant test case (probably a copy/paste error). Add an assert() to where.c to ensure that automatic indexes do not have there output row counts adjusted downward by supplementary constraints. (check-in: eea4793349 user: drh tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Changes to src/where.c.
4287
4288
4289
4290
4291
4292
4293





4294
4295







4296



4297





4298
4299
4300
4301
4302
4303
4304
4305
4306
4307
4308
4309
4310
4311
4312
4313
4314
4315
4316
4317
4318
4319
4320
4321


4322
4323


4324
4325








4326
4327
4328
4329
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342
  return SQLITE_OK;
}

/*
** Adjust the WhereLoop.nOut value downward to account for terms of the
** WHERE clause that reference the loop but which are not used by an
** index.





**
** In the current implementation, the first extra WHERE clause term reduces







** the number of output rows by a factor of 10 and each additional term



** reduces the number of output rows by sqrt(2).





*/
static void whereLoopOutputAdjust(
  WhereClause *pWC,      /* The WHERE clause */
  WhereLoop *pLoop,      /* The loop to adjust downward */
  LogEst nRow            /* Number of rows in the entire table */
){
  WhereTerm *pTerm, *pX;
  Bitmask notAllowed = ~(pLoop->prereq|pLoop->maskSelf);
  int i, j;
  int nEq = 0;    /* Number of = constraints not within likely()/unlikely() */

  assert( (pLoop->wsFlags & WHERE_AUTO_INDEX)==0 );
  for(i=pWC->nTerm, pTerm=pWC->a; i>0; i--, pTerm++){
    if( (pTerm->wtFlags & TERM_VIRTUAL)!=0 ) break;
    if( (pTerm->prereqAll & pLoop->maskSelf)==0 ) continue;
    if( (pTerm->prereqAll & notAllowed)!=0 ) continue;
    for(j=pLoop->nLTerm-1; j>=0; j--){
      pX = pLoop->aLTerm[j];
      if( pX==0 ) continue;
      if( pX==pTerm ) break;
      if( pX->iParent>=0 && (&pWC->a[pX->iParent])==pTerm ) break;
    }
    if( j<0 ){
      if( pTerm->truthProb<=0 ){


        pLoop->nOut += pTerm->truthProb;
      }else{


        pLoop->nOut--;
        if( pTerm->eOperator&WO_EQ ) nEq++;








      }
    }
  }
  /* TUNING:  If there is at least one equality constraint in the WHERE
  ** clause that does not have a likelihood() explicitly assigned to it
  ** then do not let the estimated number of output rows exceed half 
  ** the number of rows in the table. */
  if( nEq && pLoop->nOut>nRow-10 ){
    pLoop->nOut = nRow - 10;
  }
}

/*
** Adjust the cost C by the costMult facter T.  This only occurs if
** compiled with -DSQLITE_ENABLE_COSTMULT
*/
#ifdef SQLITE_ENABLE_COSTMULT







>
>
>
>
>

|
>
>
>
>
>
>
>
|
>
>
>
|
>
>
>
>
>








|
|














>
>


>
>

|
>
>
>
>
>
>
>
>



<
<
<
<
<
|
<







4287
4288
4289
4290
4291
4292
4293
4294
4295
4296
4297
4298
4299
4300
4301
4302
4303
4304
4305
4306
4307
4308
4309
4310
4311
4312
4313
4314
4315
4316
4317
4318
4319
4320
4321
4322
4323
4324
4325
4326
4327
4328
4329
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352
4353
4354
4355
4356
4357
4358
4359
4360





4361

4362
4363
4364
4365
4366
4367
4368
  return SQLITE_OK;
}

/*
** Adjust the WhereLoop.nOut value downward to account for terms of the
** WHERE clause that reference the loop but which are not used by an
** index.
*
** For every WHERE clause term that is not used by the index
** and which has a truth probability assigned by one of the likelihood(),
** likely(), or unlikely() SQL functions, reduce the estimated number
** of output rows by the probability specified.
**
** TUNING:  For every WHERE clause term that is not used by the index
** and which does not have an assigned truth probability, heuristics
** described below are used to try to estimate the truth probability.
** TODO --> Perhaps this is something that could be improved by better
** table statistics.
**
** Heuristic 1:  Estimate the truth probability as 6.25%.  The 6.25%
** value corresponds to 1 in LogEst notation, so this means decrement
** the WhereLoop.nOut field for every such WHERE clause term.
**
** Heuristic 2:  If there exists one or more WHERE clause terms of the
** form "x==EXPR" and EXPR is not a constant 0 or 1, then make sure the
** final output row estimate is no greater than 1/4 of the total number
** of rows in the table.  In other words, assume that x==EXPR will filter
** out at least 3 out of 4 rows.  If EXPR is -1 or 0 or 1, then maybe the
** "x" column is boolean or else -1 or 0 or 1 is a common default value
** on the "x" column and so in that case only cap the output row estimate
** at 1/2 instead of 1/4.
*/
static void whereLoopOutputAdjust(
  WhereClause *pWC,      /* The WHERE clause */
  WhereLoop *pLoop,      /* The loop to adjust downward */
  LogEst nRow            /* Number of rows in the entire table */
){
  WhereTerm *pTerm, *pX;
  Bitmask notAllowed = ~(pLoop->prereq|pLoop->maskSelf);
  int i, j, k;
  LogEst iReduce = 0;    /* pLoop->nOut should not exceed nRow-iReduce */

  assert( (pLoop->wsFlags & WHERE_AUTO_INDEX)==0 );
  for(i=pWC->nTerm, pTerm=pWC->a; i>0; i--, pTerm++){
    if( (pTerm->wtFlags & TERM_VIRTUAL)!=0 ) break;
    if( (pTerm->prereqAll & pLoop->maskSelf)==0 ) continue;
    if( (pTerm->prereqAll & notAllowed)!=0 ) continue;
    for(j=pLoop->nLTerm-1; j>=0; j--){
      pX = pLoop->aLTerm[j];
      if( pX==0 ) continue;
      if( pX==pTerm ) break;
      if( pX->iParent>=0 && (&pWC->a[pX->iParent])==pTerm ) break;
    }
    if( j<0 ){
      if( pTerm->truthProb<=0 ){
        /* If a truth probability is specified using the likelihood() hints,
        ** then use the probability provided by the application. */
        pLoop->nOut += pTerm->truthProb;
      }else{
        /* In the absence of explicit truth probabilities, use heuristics to
        ** guess a reasonable truth probability. */
        pLoop->nOut--;
        if( pTerm->eOperator&WO_EQ ){
          Expr *pRight = pTerm->pExpr->pRight;
          if( sqlite3ExprIsInteger(pRight, &k) && k>=(-1) && k<=1 ){
            k = 10;
          }else{
            k = 20;
          }
          if( iReduce<k ) iReduce = k;
        }
      }
    }
  }





  if( pLoop->nOut > nRow-iReduce )  pLoop->nOut = nRow - iReduce;

}

/*
** Adjust the cost C by the costMult facter T.  This only occurs if
** compiled with -DSQLITE_ENABLE_COSTMULT
*/
#ifdef SQLITE_ENABLE_COSTMULT
Changes to test/scanstatus.test.
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
  PRAGMA foreign_keys=on;
}
do_execsql_test    4.2.1 { DELETE FROM p1 WHERE x=4 }
do_scanstatus_test 4.2.2 { 
  nLoop 1 nVisit 1 nEst 1.0 zName sqlite_autoindex_p1_1 
  zExplain {SEARCH TABLE p1 USING INDEX sqlite_autoindex_p1_1 (x=?)}

  nLoop 1 nVisit 3 nEst 524288.0 zName c1 zExplain {SCAN TABLE c1}
}

#-------------------------------------------------------------------------
# Further tests of different scan types.
#
reset_db
proc tochar {i} {







|







264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
  PRAGMA foreign_keys=on;
}
do_execsql_test    4.2.1 { DELETE FROM p1 WHERE x=4 }
do_scanstatus_test 4.2.2 { 
  nLoop 1 nVisit 1 nEst 1.0 zName sqlite_autoindex_p1_1 
  zExplain {SEARCH TABLE p1 USING INDEX sqlite_autoindex_p1_1 (x=?)}

  nLoop 1 nVisit 3 nEst 262144.0 zName c1 zExplain {SCAN TABLE c1}
}

#-------------------------------------------------------------------------
# Further tests of different scan types.
#
reset_db
proc tochar {i} {