Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Attempt to use sqlite_stat4 data to estimate the number of rows visited by a range query that uses a skip-scan. This code is largely untested. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | stat4-skipscan |
Files: | files | file ages | folders |
SHA1: |
01dc8102592427b71a18c2cb82301d22 |
User & Date: | dan 2014-06-26 20:21:46.005 |
Context
2014-06-26
| ||
21:32 | Fix compilation issue when STAT4 is not enabled. (check-in: 74a5454a71 user: mistachkin tags: stat4-skipscan) | |
20:21 | Attempt to use sqlite_stat4 data to estimate the number of rows visited by a range query that uses a skip-scan. This code is largely untested. (check-in: 01dc810259 user: dan tags: stat4-skipscan) | |
2014-06-24
| ||
20:19 | Fix showstat4.c so that it decodes typecodes 8 and 9 correctly. (check-in: 9ca737c0b4 user: drh tags: trunk) | |
Changes
Changes to src/sqliteInt.h.
︙ | ︙ | |||
3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 | void sqlite3BackupRestart(sqlite3_backup *); void sqlite3BackupUpdate(sqlite3_backup *, Pgno, const u8 *); #ifdef SQLITE_ENABLE_STAT3_OR_STAT4 void sqlite3AnalyzeFunctions(void); int sqlite3Stat4ProbeSetValue(Parse*,Index*,UnpackedRecord**,Expr*,u8,int,int*); void sqlite3Stat4ProbeFree(UnpackedRecord*); #endif /* ** The interface to the LEMON-generated parser */ void *sqlite3ParserAlloc(void*(*)(size_t)); void sqlite3ParserFree(void*, void(*)(void*)); | > > | 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 | void sqlite3BackupRestart(sqlite3_backup *); void sqlite3BackupUpdate(sqlite3_backup *, Pgno, const u8 *); #ifdef SQLITE_ENABLE_STAT3_OR_STAT4 void sqlite3AnalyzeFunctions(void); int sqlite3Stat4ProbeSetValue(Parse*,Index*,UnpackedRecord**,Expr*,u8,int,int*); int sqlite3Stat4ValueFromExpr(Parse*, Expr*, u8, sqlite3_value**); void sqlite3Stat4ProbeFree(UnpackedRecord*); int sqlite3Stat4Column(sqlite3*, const void*, int, int, sqlite3_value**); #endif /* ** The interface to the LEMON-generated parser */ void *sqlite3ParserAlloc(void*(*)(size_t)); void sqlite3ParserFree(void*, void(*)(void*)); |
︙ | ︙ |
Changes to src/vdbemem.c.
︙ | ︙ | |||
1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 | int i; FuncDefHash *pHash = &GLOBAL(FuncDefHash, sqlite3GlobalFunctions); FuncDef *aFunc = (FuncDef*)&GLOBAL(FuncDef, aAnalyzeTableFuncs); for(i=0; i<ArraySize(aAnalyzeTableFuncs); i++){ sqlite3FuncDefInsert(pHash, &aFunc[i]); } } /* ** This function is used to allocate and populate UnpackedRecord ** structures intended to be compared against sample index keys stored ** in the sqlite_stat4 table. ** ** A single call to this function attempts to populates field iVal (leftmost | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 | int i; FuncDefHash *pHash = &GLOBAL(FuncDefHash, sqlite3GlobalFunctions); FuncDef *aFunc = (FuncDef*)&GLOBAL(FuncDef, aAnalyzeTableFuncs); for(i=0; i<ArraySize(aAnalyzeTableFuncs); i++){ sqlite3FuncDefInsert(pHash, &aFunc[i]); } } static int stat4ValueFromExpr( Parse *pParse, /* Parse context */ Expr *pExpr, /* The expression to extract a value from */ u8 affinity, /* Affinity to use */ struct ValueNewStat4Ctx *pAlloc,/* How to allocate space */ sqlite3_value **ppVal /* OUT: New value object (or NULL) */ ){ int rc = SQLITE_OK; sqlite3_value *pVal = 0; sqlite3 *db = pParse->db; /* Skip over any TK_COLLATE nodes */ pExpr = sqlite3ExprSkipCollate(pExpr); if( !pExpr ){ pVal = valueNew(db, pAlloc); if( pVal ){ sqlite3VdbeMemSetNull((Mem*)pVal); } }else if( pExpr->op==TK_VARIABLE || NEVER(pExpr->op==TK_REGISTER && pExpr->op2==TK_VARIABLE) ){ Vdbe *v; int iBindVar = pExpr->iColumn; sqlite3VdbeSetVarmask(pParse->pVdbe, iBindVar); if( (v = pParse->pReprepare)!=0 ){ pVal = valueNew(db, pAlloc); if( pVal ){ rc = sqlite3VdbeMemCopy((Mem*)pVal, &v->aVar[iBindVar-1]); if( rc==SQLITE_OK ){ sqlite3ValueApplyAffinity(pVal, affinity, ENC(db)); } pVal->db = pParse->db; } } }else{ rc = valueFromExpr(db, pExpr, ENC(db), affinity, &pVal, pAlloc); } assert( pVal==0 || pVal->db==db ); *ppVal = pVal; return rc; } /* ** This function is used to allocate and populate UnpackedRecord ** structures intended to be compared against sample index keys stored ** in the sqlite_stat4 table. ** ** A single call to this function attempts to populates field iVal (leftmost |
︙ | ︙ | |||
1187 1188 1189 1190 1191 1192 1193 | Index *pIdx, /* Index being probed */ UnpackedRecord **ppRec, /* IN/OUT: Probe record */ Expr *pExpr, /* The expression to extract a value from */ u8 affinity, /* Affinity to use */ int iVal, /* Array element to populate */ int *pbOk /* OUT: True if value was extracted */ ){ | | < | < < < | < < < < < < < < < < < < < < < < < < | < < < < < > | | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 | Index *pIdx, /* Index being probed */ UnpackedRecord **ppRec, /* IN/OUT: Probe record */ Expr *pExpr, /* The expression to extract a value from */ u8 affinity, /* Affinity to use */ int iVal, /* Array element to populate */ int *pbOk /* OUT: True if value was extracted */ ){ int rc; sqlite3_value *pVal = 0; struct ValueNewStat4Ctx alloc; alloc.pParse = pParse; alloc.pIdx = pIdx; alloc.ppRec = ppRec; alloc.iVal = iVal; rc = stat4ValueFromExpr(pParse, pExpr, affinity, &alloc, &pVal); assert( pVal==0 || pVal->db==pParse->db ); *pbOk = (pVal!=0); return rc; } /* ** Attempt to extract a value from expression pExpr using the methods ** as described for sqlite3Stat4ProbeSetValue() above. ** ** If successful, set *ppVal to point to a new value object and return ** SQLITE_OK. If no value can be extracted, but no other error occurs ** (e.g. OOM), return SQLITE_OK and set *ppVal to NULL. Or, if an error ** does occur, return an SQLite error code. The final value of *ppVal ** is undefined in this case. */ int sqlite3Stat4ValueFromExpr( Parse *pParse, /* Parse context */ Expr *pExpr, /* The expression to extract a value from */ u8 affinity, /* Affinity to use */ sqlite3_value **ppVal /* OUT: New value object (or NULL) */ ){ return stat4ValueFromExpr(pParse, pExpr, affinity, 0, ppVal); } int sqlite3Stat4Column( sqlite3 *db, /* Database handle */ const void *pRec, /* Pointer to buffer containing record */ int nRec, /* Size of buffer pRec in bytes */ int iCol, /* Column to extract */ sqlite3_value **ppVal /* OUT: Extracted value */ ){ int rc = SQLITE_OK; Mem *pMem = *ppVal; if( pMem==0 ){ pMem = (Mem*)sqlite3ValueNew(db); if( pMem==0 ){ rc = SQLITE_NOMEM; } } if( rc==SQLITE_OK ){ u32 t; int nHdr; int iHdr; int iField; int i; u8 *a = (u8*)pRec; iHdr = getVarint32(a, nHdr); iField = nHdr; for(i=0; i<iCol; i++){ iHdr = getVarint32(&a[iHdr], t); iField += sqlite3VdbeSerialTypeLen(t); } iHdr = getVarint32(&a[iHdr], t); sqlite3VdbeSerialGet(&a[iField], t, pMem); } *ppVal = pMem; return rc; } /* ** Unless it is NULL, the argument must be an UnpackedRecord object returned ** by an earlier call to sqlite3Stat4ProbeSetValue(). This call deletes ** the object. |
︙ | ︙ |
Changes to src/where.c.
︙ | ︙ | |||
14 15 16 17 18 19 20 21 22 23 24 25 26 27 | ** generating the code that loops through a table looking for applicable ** rows. Indices are selected and used to speed the search when doing ** so is applicable. Because this module is responsible for selecting ** indices, you might also think of this module as the "query optimizer". */ #include "sqliteInt.h" #include "whereInt.h" /* ** Return the estimated number of output rows from a WHERE clause */ u64 sqlite3WhereOutputRowCount(WhereInfo *pWInfo){ return sqlite3LogEstToInt(pWInfo->nRowOut); } | > | 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 | ** generating the code that loops through a table looking for applicable ** rows. Indices are selected and used to speed the search when doing ** so is applicable. Because this module is responsible for selecting ** indices, you might also think of this module as the "query optimizer". */ #include "sqliteInt.h" #include "whereInt.h" #include "vdbeInt.h" /* ** Return the estimated number of output rows from a WHERE clause */ u64 sqlite3WhereOutputRowCount(WhereInfo *pWInfo){ return sqlite3LogEstToInt(pWInfo->nRowOut); } |
︙ | ︙ | |||
1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 | nRet += pTerm->truthProb; }else if( (pTerm->wtFlags & TERM_VNULL)==0 ){ nRet -= 20; assert( 20==sqlite3LogEst(4) ); } } return nRet; } /* ** This function is used to estimate the number of rows that will be visited ** by scanning an index for a range of values. The range may have an upper ** bound, a lower bound, or both. The WHERE clause terms that set the upper ** and lower bounds are represented by pLower and pUpper respectively. For ** example, assuming that index p is on t1(a): | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 | nRet += pTerm->truthProb; }else if( (pTerm->wtFlags & TERM_VNULL)==0 ){ nRet -= 20; assert( 20==sqlite3LogEst(4) ); } } return nRet; } /* ** This function is called to estimate the number of rows visited by a ** range-scan on a skip-scan index. For example: ** ** CREATE INDEX i1 ON t1(a, b, c); ** SELECT * FROM t1 WHERE a=? AND c BETWEEN ? AND ?; ** ** Value pLoop->nOut is currently set to the estimated number of rows ** visited for scanning (a=? AND b=?). This function reduces that estimate ** by some factor to account for the (c BETWEEN ? AND ?) expression based ** on the stat4 data for the index. this scan will be peformed multiple ** times (once for each (a,b) combination that matches a=?) is dealt with ** by the caller. ** ** It does this by scanning through all stat4 samples, comparing values ** extracted from pLower and pUpper with the corresponding column in each ** sample. If L and U are the number of samples found to be less than or ** equal to the values extracted from pLower and pUpper respectively, and ** N is the total number of samples, the pLoop->nOut value is adjusted ** as follows: ** ** nOut = nOut * ( min(U - L, 1) / N ) ** ** If pLower is NULL, or a value cannot be extracted from the term, L is ** set to zero. If pUpper is NULL, or a value cannot be extracted from it, ** U is set to N. ** ** Normally, this function sets *pbDone to 1 before returning. However, ** if no value can be extracted from either pLower or pUpper (and so the ** estimate of the number of rows delivered remains unchanged), *pbDone ** is left as is. ** ** If an error occurs, an SQLite error code is returned. Otherwise, ** SQLITE_OK. */ static int whereRangeSkipScanEst( Parse *pParse, /* Parsing & code generating context */ WhereTerm *pLower, /* Lower bound on the range. ex: "x>123" Might be NULL */ WhereTerm *pUpper, /* Upper bound on the range. ex: "x<455" Might be NULL */ WhereLoop *pLoop, /* Update the .nOut value of this loop */ int *pbDone /* Set to true if at least one expr. value extracted */ ){ Index *p = pLoop->u.btree.pIndex; int nEq = pLoop->u.btree.nEq; sqlite3 *db = pParse->db; int nLower = 0; int nUpper = 0; int rc = SQLITE_OK; u8 aff = p->pTable->aCol[ p->aiColumn[nEq] ].affinity; CollSeq *pColl; sqlite3_value *p1 = 0; /* Value extracted from pLower */ sqlite3_value *p2 = 0; /* Value extracted from pUpper */ sqlite3_value *pVal = 0; /* Value extracted from record */ pColl = sqlite3LocateCollSeq(pParse, p->azColl[nEq]); if( pLower ){ rc = sqlite3Stat4ValueFromExpr(pParse, pLower->pExpr->pRight, aff, &p1); } if( pUpper && rc==SQLITE_OK ){ rc = sqlite3Stat4ValueFromExpr(pParse, pUpper->pExpr->pRight, aff, &p2); } if( p1 || p2 ){ int i; int nDiff; for(i=0; rc==SQLITE_OK && i<p->nSample; i++){ rc = sqlite3Stat4Column(db, p->aSample[i].p, p->aSample[i].n, nEq, &pVal); if( rc==SQLITE_OK && p1 ){ int res = sqlite3MemCompare(p1, pVal, pColl); if( res<=0 ) nLower++; } if( rc==SQLITE_OK && p2 ){ int res = sqlite3MemCompare(p2, pVal, pColl); if( res<=0 ) nUpper++; } } if( p2==0 ) nUpper = p->nSample; nDiff = (nUpper - nLower); if( nDiff<=0 ) nDiff = 1; pLoop->nOut -= (sqlite3LogEst(p->nSample) - sqlite3LogEst(nDiff)); *pbDone = 1; }else{ assert( *pbDone==0 ); } sqlite3ValueFree(p1); sqlite3ValueFree(p2); sqlite3ValueFree(pVal); return rc; } /* ** This function is used to estimate the number of rows that will be visited ** by scanning an index for a range of values. The range may have an upper ** bound, a lower bound, or both. The WHERE clause terms that set the upper ** and lower bounds are represented by pLower and pUpper respectively. For ** example, assuming that index p is on t1(a): |
︙ | ︙ | |||
2050 2051 2052 2053 2054 2055 2056 | LogEst nNew; #ifdef SQLITE_ENABLE_STAT3_OR_STAT4 Index *p = pLoop->u.btree.pIndex; int nEq = pLoop->u.btree.nEq; if( p->nSample>0 | < > | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | > > > > > | 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 | LogEst nNew; #ifdef SQLITE_ENABLE_STAT3_OR_STAT4 Index *p = pLoop->u.btree.pIndex; int nEq = pLoop->u.btree.nEq; if( p->nSample>0 && nEq<p->nSampleCol && OptimizationEnabled(pParse->db, SQLITE_Stat3) ){ if( nEq==pBuilder->nRecValid ){ UnpackedRecord *pRec = pBuilder->pRec; tRowcnt a[2]; u8 aff; /* Variable iLower will be set to the estimate of the number of rows in ** the index that are less than the lower bound of the range query. The ** lower bound being the concatenation of $P and $L, where $P is the ** key-prefix formed by the nEq values matched against the nEq left-most ** columns of the index, and $L is the value in pLower. ** ** Or, if pLower is NULL or $L cannot be extracted from it (because it ** is not a simple variable or literal value), the lower bound of the ** range is $P. Due to a quirk in the way whereKeyStats() works, even ** if $L is available, whereKeyStats() is called for both ($P) and ** ($P:$L) and the larger of the two returned values used. ** ** Similarly, iUpper is to be set to the estimate of the number of rows ** less than the upper bound of the range query. Where the upper bound ** is either ($P) or ($P:$U). Again, even if $U is available, both values ** of iUpper are requested of whereKeyStats() and the smaller used. */ tRowcnt iLower; tRowcnt iUpper; if( nEq==p->nKeyCol ){ aff = SQLITE_AFF_INTEGER; }else{ aff = p->pTable->aCol[p->aiColumn[nEq]].affinity; } /* Determine iLower and iUpper using ($P) only. */ if( nEq==0 ){ iLower = 0; iUpper = sqlite3LogEstToInt(p->aiRowLogEst[0]); }else{ /* Note: this call could be optimized away - since the same values must ** have been requested when testing key $P in whereEqualScanEst(). */ whereKeyStats(pParse, p, pRec, 0, a); iLower = a[0]; iUpper = a[0] + a[1]; } /* If possible, improve on the iLower estimate using ($P:$L). */ if( pLower ){ int bOk; /* True if value is extracted from pExpr */ Expr *pExpr = pLower->pExpr->pRight; assert( (pLower->eOperator & (WO_GT|WO_GE))!=0 ); rc = sqlite3Stat4ProbeSetValue(pParse, p, &pRec, pExpr, aff, nEq, &bOk); if( rc==SQLITE_OK && bOk ){ tRowcnt iNew; whereKeyStats(pParse, p, pRec, 0, a); iNew = a[0] + ((pLower->eOperator & WO_GT) ? a[1] : 0); if( iNew>iLower ) iLower = iNew; nOut--; } } /* If possible, improve on the iUpper estimate using ($P:$U). */ if( pUpper ){ int bOk; /* True if value is extracted from pExpr */ Expr *pExpr = pUpper->pExpr->pRight; assert( (pUpper->eOperator & (WO_LT|WO_LE))!=0 ); rc = sqlite3Stat4ProbeSetValue(pParse, p, &pRec, pExpr, aff, nEq, &bOk); if( rc==SQLITE_OK && bOk ){ tRowcnt iNew; whereKeyStats(pParse, p, pRec, 1, a); iNew = a[0] + ((pUpper->eOperator & WO_LE) ? a[1] : 0); if( iNew<iUpper ) iUpper = iNew; nOut--; } } pBuilder->pRec = pRec; if( rc==SQLITE_OK ){ if( iUpper>iLower ){ nNew = sqlite3LogEst(iUpper - iLower); }else{ nNew = 10; assert( 10==sqlite3LogEst(2) ); } if( nNew<nOut ){ nOut = nNew; } pLoop->nOut = (LogEst)nOut; WHERETRACE(0x10, ("range scan regions: %u..%u est=%d\n", (u32)iLower, (u32)iUpper, nOut)); return SQLITE_OK; } }else{ int bDone = 0; rc = whereRangeSkipScanEst(pParse, pLower, pUpper, pLoop, &bDone); if( bDone ) return rc; } } #else UNUSED_PARAMETER(pParse); UNUSED_PARAMETER(pBuilder); #endif assert( pLower || pUpper ); |
︙ | ︙ |