Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | If the user provides likelihood() data for a WHERE clause term used as part of an index key, have the planner use it when calculating the expected number of rows visited by the loop. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | experimental-costs |
Files: | files | file ages | folders |
SHA1: |
c51efaa5d29ee0a91b9e6a83a8dd8253 |
User & Date: | dan 2014-04-25 20:22:45.291 |
Context
2014-04-26
| ||
20:21 | Fix an sqlite3_stmt_status() problem caused by recent changs on this branch. (check-in: dee2040924 user: dan tags: experimental-costs) | |
2014-04-25
| ||
20:22 | If the user provides likelihood() data for a WHERE clause term used as part of an index key, have the planner use it when calculating the expected number of rows visited by the loop. (check-in: c51efaa5d2 user: dan tags: experimental-costs) | |
15:01 | Store values loaded from the stat1 table as logarithmic values in memory. (check-in: 1bd74c49dd user: dan tags: experimental-costs) | |
Changes
Changes to src/sqliteInt.h.
︙ | ︙ | |||
1676 1677 1678 1679 1680 1681 1682 | ** and the value of Index.onError indicate the which conflict resolution ** algorithm to employ whenever an attempt is made to insert a non-unique ** element. */ struct Index { char *zName; /* Name of this index */ i16 *aiColumn; /* Which columns are used by this index. 1st is 0 */ | < < < | 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 | ** and the value of Index.onError indicate the which conflict resolution ** algorithm to employ whenever an attempt is made to insert a non-unique ** element. */ struct Index { char *zName; /* Name of this index */ i16 *aiColumn; /* Which columns are used by this index. 1st is 0 */ LogEst *aiRowLogEst; /* From ANALYZE: Est. rows selected by each column */ Table *pTable; /* The SQL table being indexed */ char *zColAff; /* String defining the affinity of each column */ Index *pNext; /* The next index associated with the same table */ Schema *pSchema; /* Schema containing this index */ u8 *aSortOrder; /* for each column: True==DESC, False==ASC */ char **azColl; /* Array of collation sequence names for index */ |
︙ | ︙ |
Changes to src/where.c.
︙ | ︙ | |||
4036 4037 4038 4039 4040 4041 4042 | u16 saved_nLTerm; /* Original value of pNew->nLTerm */ u16 saved_nEq; /* Original value of pNew->u.btree.nEq */ u16 saved_nSkip; /* Original value of pNew->u.btree.nSkip */ u32 saved_wsFlags; /* Original value of pNew->wsFlags */ LogEst saved_nOut; /* Original value of pNew->nOut */ int iCol; /* Index of the column in the table */ int rc = SQLITE_OK; /* Return code */ | < < < | 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 | u16 saved_nLTerm; /* Original value of pNew->nLTerm */ u16 saved_nEq; /* Original value of pNew->u.btree.nEq */ u16 saved_nSkip; /* Original value of pNew->u.btree.nSkip */ u32 saved_wsFlags; /* Original value of pNew->wsFlags */ LogEst saved_nOut; /* Original value of pNew->nOut */ int iCol; /* Index of the column in the table */ int rc = SQLITE_OK; /* Return code */ LogEst rLogSize; /* Logarithm of table size */ WhereTerm *pTop = 0, *pBtm = 0; /* Top and bottom range constraints */ pNew = pBuilder->pNew; if( db->mallocFailed ) return SQLITE_NOMEM; assert( (pNew->wsFlags & WHERE_VIRTUALTABLE)==0 ); assert( (pNew->wsFlags & WHERE_TOP_LIMIT)==0 ); if( pNew->wsFlags & WHERE_BTM_LIMIT ){ opMask = WO_LT|WO_LE; }else if( pProbe->tnum<=0 || (pSrc->jointype & JT_LEFT)!=0 ){ opMask = WO_EQ|WO_IN|WO_GT|WO_GE|WO_LT|WO_LE; }else{ opMask = WO_EQ|WO_IN|WO_ISNULL|WO_GT|WO_GE|WO_LT|WO_LE; } if( pProbe->bUnordered ) opMask &= ~(WO_GT|WO_GE|WO_LT|WO_LE); assert( pNew->u.btree.nEq<=pProbe->nKeyCol ); if( pNew->u.btree.nEq < pProbe->nKeyCol ){ iCol = pProbe->aiColumn[pNew->u.btree.nEq]; }else{ iCol = -1; } pTerm = whereScanInit(&scan, pBuilder->pWC, pSrc->iCursor, iCol, opMask, pProbe); saved_nEq = pNew->u.btree.nEq; saved_nSkip = pNew->u.btree.nSkip; saved_nLTerm = pNew->nLTerm; saved_wsFlags = pNew->wsFlags; |
︙ | ︙ | |||
4091 4092 4093 4094 4095 4096 4097 | && (rc = whereLoopResize(db, pNew, pNew->nLTerm+1))==SQLITE_OK ){ LogEst nIter; pNew->u.btree.nEq++; pNew->u.btree.nSkip++; pNew->aLTerm[pNew->nLTerm++] = 0; pNew->wsFlags |= WHERE_SKIPSCAN; | | < | | > > | < < | > > > > > > | | < < | < < < < | < | < < < < < < < | | | | | | | > > > > > > > < > > > | > > > > > > > > | | | | > > | | < | | | | | < | | | > > | | | > | | > > > > > > > > > > > > > | > > > > > | 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 | && (rc = whereLoopResize(db, pNew, pNew->nLTerm+1))==SQLITE_OK ){ LogEst nIter; pNew->u.btree.nEq++; pNew->u.btree.nSkip++; pNew->aLTerm[pNew->nLTerm++] = 0; pNew->wsFlags |= WHERE_SKIPSCAN; nIter = pProbe->aiRowLogEst[saved_nEq] - pProbe->aiRowLogEst[saved_nEq+1]; pNew->nOut -= nIter; whereLoopAddBtreeIndex(pBuilder, pSrc, pProbe, nIter + nInMul); pNew->nOut = saved_nOut; } for(; rc==SQLITE_OK && pTerm!=0; pTerm = whereScanNext(&scan)){ u16 eOp = pTerm->eOperator; /* Shorthand for pTerm->eOperator */ LogEst rCostIdx; LogEst nOutUnadjusted; /* nOut before IN() and WHERE adjustments */ int nIn = 0; #ifdef SQLITE_ENABLE_STAT3_OR_STAT4 int nRecValid = pBuilder->nRecValid; #endif if( (eOp==WO_ISNULL || (pTerm->wtFlags&TERM_VNULL)!=0) && (iCol<0 || pSrc->pTab->aCol[iCol].notNull) ){ continue; /* ignore IS [NOT] NULL constraints on NOT NULL columns */ } if( pTerm->prereqRight & pNew->maskSelf ) continue; pNew->wsFlags = saved_wsFlags; pNew->u.btree.nEq = saved_nEq; pNew->nLTerm = saved_nLTerm; if( whereLoopResize(db, pNew, pNew->nLTerm+1) ) break; /* OOM */ pNew->aLTerm[pNew->nLTerm++] = pTerm; pNew->prereq = (saved_prereq | pTerm->prereqRight) & ~pNew->maskSelf; assert( nInMul==0 || (pNew->wsFlags & WHERE_COLUMN_NULL)!=0 || (pNew->wsFlags & WHERE_COLUMN_IN)!=0 || (pNew->wsFlags & WHERE_SKIPSCAN)!=0 ); if( eOp & WO_IN ){ Expr *pExpr = pTerm->pExpr; pNew->wsFlags |= WHERE_COLUMN_IN; if( ExprHasProperty(pExpr, EP_xIsSelect) ){ /* "x IN (SELECT ...)": TUNING: the SELECT returns 25 rows */ nIn = 46; assert( 46==sqlite3LogEst(25) ); }else if( ALWAYS(pExpr->x.pList && pExpr->x.pList->nExpr) ){ /* "x IN (value, value, ...)" */ nIn = sqlite3LogEst(pExpr->x.pList->nExpr); } assert( nIn>0 ); /* RHS always has 2 or more terms... The parser ** changes "x IN (?)" into "x=?". */ }else if( eOp & (WO_EQ) ){ pNew->wsFlags |= WHERE_COLUMN_EQ; if( iCol<0 || (nInMul==0 && pNew->u.btree.nEq==pProbe->nKeyCol-1) ){ if( iCol>=0 && pProbe->onError==OE_None ){ pNew->wsFlags |= WHERE_UNQ_WANTED; }else{ pNew->wsFlags |= WHERE_ONEROW; } } }else if( eOp & (WO_GT|WO_GE) ){ testcase( eOp & WO_GT ); testcase( eOp & WO_GE ); pNew->wsFlags |= WHERE_COLUMN_RANGE|WHERE_BTM_LIMIT; pBtm = pTerm; pTop = 0; }else if( (eOp & WO_ISNULL)==0 ){ assert( eOp & (WO_LT|WO_LE) ); testcase( eOp & WO_LT ); testcase( eOp & WO_LE ); pNew->wsFlags |= WHERE_COLUMN_RANGE|WHERE_TOP_LIMIT; pTop = pTerm; pBtm = (pNew->wsFlags & WHERE_BTM_LIMIT)!=0 ? pNew->aLTerm[pNew->nLTerm-2] : 0; } /* At this point pNew->nOut is set to the number of rows expected to ** be visited by the index scan before considering term pTerm, or the ** values of nIn and nInMul. In other words, assuming that all ** "x IN(...)" terms are replaced with "x = ?". This block updates ** the value of pNew->nOut to account for pTerm (but not nIn/nInMul). */ assert( pNew->nOut==saved_nOut ); if( pNew->wsFlags & WHERE_COLUMN_RANGE ){ /* Adjust nOut using stat3/stat4 data. Or, if there is no stat3/stat4 ** data, using some other estimate. */ whereRangeScanEst(pParse, pBuilder, pBtm, pTop, pNew); }else{ int nEq = ++pNew->u.btree.nEq; assert( eOp & (WO_ISNULL|WO_EQ|WO_IN) ); assert( pNew->nOut==saved_nOut ); if( pTerm->truthProb<=0 ){ assert( (eOp & WO_IN) || nIn==0 ); pNew->nOut += pTerm->truthProb; pNew->nOut -= nIn; pNew->wsFlags |= WHERE_LIKELIHOOD; }else{ #ifdef SQLITE_ENABLE_STAT3_OR_STAT4 tRowcnt nOut = 0; if( nInMul==0 && pProbe->nSample && pNew->u.btree.nEq<=pProbe->nSampleCol && OptimizationEnabled(db, SQLITE_Stat3) && ((eOp & WO_IN)==0 || !ExprHasProperty(pTerm->pExpr, EP_xIsSelect)) && (pNew->wsFlags & WHERE_LIKELIHOOD)==0 ){ Expr *pExpr = pTerm->pExpr; if( (eOp & (WO_EQ|WO_ISNULL))!=0 ){ testcase( eOp & WO_EQ ); testcase( eOp & WO_ISNULL ); rc = whereEqualScanEst(pParse, pBuilder, pExpr->pRight, &nOut); }else{ rc = whereInScanEst(pParse, pBuilder, pExpr->x.pList, &nOut); } assert( rc!=SQLITE_OK || nOut>0 ); if( rc==SQLITE_NOTFOUND ) rc = SQLITE_OK; if( rc!=SQLITE_OK ) break; /* Jump out of the pTerm loop */ if( nOut ){ pNew->nOut = sqlite3LogEst(nOut); if( pNew->nOut>saved_nOut ) pNew->nOut = saved_nOut; pNew->nOut -= nIn; } } if( nOut==0 ) #endif { pNew->nOut += (pProbe->aiRowLogEst[nEq] - pProbe->aiRowLogEst[nEq-1]); if( eOp & WO_ISNULL ){ /* TUNING: If there is no likelihood() value, assume that a ** "col IS NULL" expression matches twice as many rows ** as (col=?). */ pNew->nOut += 10; } } } } /* Set rCostIdx to the cost of visiting selected rows in index. Add ** it to pNew->rRun, which is currently set to the cost of the index ** seek only. Then, if this is a non-covering index, add the cost of ** visiting the rows in the main table. */ rCostIdx = pNew->nOut + 1 + (15*pProbe->szIdxRow)/pSrc->pTab->szTabRow; pNew->rRun = sqlite3LogEstAdd(rLogSize, rCostIdx); if( (pNew->wsFlags & (WHERE_IDX_ONLY|WHERE_IPK))==0 ){ pNew->rRun = sqlite3LogEstAdd(pNew->rRun, pNew->nOut + 16); } nOutUnadjusted = pNew->nOut; pNew->rRun += nInMul + nIn; pNew->nOut += nInMul + nIn; whereLoopOutputAdjust(pBuilder->pWC, pNew); rc = whereLoopInsert(pBuilder, pNew); pNew->nOut = nOutUnadjusted; if( (pNew->wsFlags & WHERE_TOP_LIMIT)==0 && pNew->u.btree.nEq<(pProbe->nKeyCol + (pProbe->zName!=0)) ){ whereLoopAddBtreeIndex(pBuilder, pSrc, pProbe, nInMul+nIn); } pNew->nOut = saved_nOut; #ifdef SQLITE_ENABLE_STAT3_OR_STAT4 |
︙ | ︙ |
Changes to src/whereInt.h.
︙ | ︙ | |||
454 455 456 457 458 459 460 | #define WHERE_VIRTUALTABLE 0x00000400 /* WhereLoop.u.vtab is valid */ #define WHERE_IN_ABLE 0x00000800 /* Able to support an IN operator */ #define WHERE_ONEROW 0x00001000 /* Selects no more than one row */ #define WHERE_MULTI_OR 0x00002000 /* OR using multiple indices */ #define WHERE_AUTO_INDEX 0x00004000 /* Uses an ephemeral index */ #define WHERE_SKIPSCAN 0x00008000 /* Uses the skip-scan algorithm */ #define WHERE_UNQ_WANTED 0x00010000 /* WHERE_ONEROW would have been helpful*/ | > | 454 455 456 457 458 459 460 461 | #define WHERE_VIRTUALTABLE 0x00000400 /* WhereLoop.u.vtab is valid */ #define WHERE_IN_ABLE 0x00000800 /* Able to support an IN operator */ #define WHERE_ONEROW 0x00001000 /* Selects no more than one row */ #define WHERE_MULTI_OR 0x00002000 /* OR using multiple indices */ #define WHERE_AUTO_INDEX 0x00004000 /* Uses an ephemeral index */ #define WHERE_SKIPSCAN 0x00008000 /* Uses the skip-scan algorithm */ #define WHERE_UNQ_WANTED 0x00010000 /* WHERE_ONEROW would have been helpful*/ #define WHERE_LIKELIHOOD 0x00020000 /* A likelihood() is affecting nOut */ |
Changes to test/whereG.test.
︙ | ︙ | |||
177 178 179 180 181 182 183 | SELECT DISTINCT x FROM (SELECT x FROM t4 GROUP BY x) WHERE x='right' ORDER BY x; } {right} #------------------------------------------------------------------------- | > > | > > | > < < > > > > > > > | 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 | SELECT DISTINCT x FROM (SELECT x FROM t4 GROUP BY x) WHERE x='right' ORDER BY x; } {right} #------------------------------------------------------------------------- # Test that likelihood() specifications on indexed terms are taken into # account by various forms of loops. # # 5.1.*: open ended range scans # 5.2.*: skip-scans # reset_db do_execsql_test 5.1 { CREATE TABLE t1(a, b, c); CREATE INDEX i1 ON t1(a, b); } do_eqp_test 5.1.2 { SELECT * FROM t1 WHERE a>? } {0 0 0 {SEARCH TABLE t1 USING INDEX i1 (a>?)}} do_eqp_test 5.1.3 { SELECT * FROM t1 WHERE likelihood(a>?, 0.9) } {0 0 0 {SCAN TABLE t1}} do_test 5.2 { for {set i 0} {$i < 100} {incr i} { execsql { INSERT INTO t1 VALUES('abc', $i, $i); } } execsql { INSERT INTO t1 SELECT 'def', b, c FROM t1; } execsql { ANALYZE } } {} do_eqp_test 5.2.2 { SELECT * FROM t1 WHERE likelihood(b>?, 0.01) } {0 0 0 {SEARCH TABLE t1 USING INDEX i1 (ANY(a) AND b>?)}} do_eqp_test 5.2.3 { SELECT * FROM t1 WHERE likelihood(b>?, 0.9) } {0 0 0 {SCAN TABLE t1}} do_eqp_test 5.3.1 { SELECT * FROM t1 WHERE a=? } {0 0 0 {SEARCH TABLE t1 USING INDEX i1 (a=?)}} do_eqp_test 5.3.2 { SELECT * FROM t1 WHERE likelihood(a=?, 0.9) } {0 0 0 {SCAN TABLE t1}} finish_test |