Index: src/where.c ================================================================== --- src/where.c +++ src/where.c @@ -17,44 +17,10 @@ ** indices, you might also think of this module as the "query optimizer". */ #include "sqliteInt.h" -/* -** The following parameter define the relative cost of various -** search operations. These parameters are used to estimate query -** plan costs and to select the query plan with the lowest estimated -** cost. -** -** Let the cost of moving from one row in a table or index to the next -** or previous row be SEEK_COST. -** -** Let the base-10 logarithm of the number of rows in a table or -** index be L. The estLog() function below will estimate this -** numbmer given the number of rows in the table. -** -** The cost of doing a lookup of an index will be IDX_LKUP_COST*L. -** -** The cost of doing a lookup on a table is TBL_LKUP_COST*L. -** -** The cost of sorting a result set of N rows is assumed to be -** N*log10(N)*SORT_COST. -*/ -#if defined(SEEK_COST) - /* Assume that IDX_LKUP_COST, TBL_LKUP_COST, and SORT_COST are also defined */ -#elif !defined(SQLITE_OMIT_FLOATING_POINT) -# define SEEK_COST 1.0 -# define IDX_LKUP_COST 1.0 -# define TBL_LKUP_COST 0.1 -# define SORT_COST 3.0 -#else -# define SEEK_COST 10 -# define IDX_LKUP_COST 10 -# define TBL_LKUP_COST 1 -# define SORT_COST 30 -#endif - /* ** Trace output macros */ #if defined(SQLITE_TEST) || defined(SQLITE_DEBUG) int sqlite3WhereTrace = 0; @@ -2769,12 +2735,11 @@ */ for(; pProbe; pIdx=pProbe=pProbe->pNext){ const unsigned int * const aiRowEst = pProbe->aiRowEst; double cost; /* Cost of using pProbe */ double nRow; /* Estimated number of rows in result set */ - double nTabSrch; /* Est number of table searches */ - double nIdxSrch; /* Est number of index searches */ + double log10N; /* base-10 logarithm of nRow (inexact) */ int rev; /* True to scan in reverse order */ int wsFlags = 0; Bitmask used = 0; /* The following variables are populated based on the properties of @@ -2965,60 +2930,75 @@ whereInScanEst(pParse, pProbe, pFirstTerm->pExpr->x.pList, &nRow); } } #endif /* SQLITE_ENABLE_STAT2 */ - /* Adjust the number of rows and the cost downward to reflect rows + /* Adjust the number of output rows and downward to reflect rows ** that are excluded by range constraints. */ nRow = (nRow * (double)estBound) / (double)100; if( nRow<1 ) nRow = 1; - /* Assume constant cost to advance from one row to the next and - ** logarithmic cost to do a binary search. Hence, the initial cost - ** is the number of output rows plus log2(table-size) times the - ** number of binary searches. - ** - ** Because fan-out on tables is so much higher than the fan-out on - ** indices (because table btrees contain only integer keys in non-leaf - ** nodes) we weight the cost of a table binary search as 1/10th the - ** cost of an index binary search. - */ - if( pIdx ){ - if( bLookup ){ - /* For an index lookup followed by a table lookup: - ** nInMul index searches to find the start of each index range - ** + nRow steps through the index - ** + nRow table searches to lookup the table entry using the rowid - */ - nIdxSrch = nInMul; - nTabSrch = nRow; - }else{ - /* For a covering index: - ** nInMul index searches to find the initial entry - ** + nRow steps through the index - */ - nIdxSrch = nInMul; - nTabSrch = 0; - } - }else{ - /* For a rowid primary key lookup: - ** nInMult table searches to find the initial entry for each range - ** + nRow steps through the table - */ - nIdxSrch = 0; - nTabSrch = nInMul; - } - cost = nRow + (nIdxSrch*IDX_LKUP_COST + nTabSrch*TBL_LKUP_COST) - *estLog(aiRowEst[0])/SEEK_COST; - - /* Add in the estimated cost of sorting the result. This cost is expanded - ** by a fudge factor of 3.0 to account for the fact that a sorting step - ** involves a write and is thus more expensive than a lookup step. + /* Experiments run on real SQLite databases show that the time needed + ** to do a binary search to locate a row in a table or index is roughly + ** log10(N) times the time to move from one row to the next row within + ** a table or index. The actual times can vary, with the size of + ** records being an important factor. Both moves and searches are + ** slower with larger records, presumably because fewer records fit + ** on one page and hence more pages have to be fetched. + ** + ** The ANALYZE command and the sqlite_stat1 and sqlite_stat2 tables do + ** not give us data on the relative sizes of table and index records. + ** So this computation assumes table records are about twice as big + ** as index records + */ + if( (wsFlags & WHERE_NOT_FULLSCAN)==0 ){ + /* The cost of a full table scan is a number of move operations equal + ** to the number of rows in the table. + ** + ** We add an additional 4x penalty to full table scans. This causes + ** the cost function to err on the side of choosing an index over + ** choosing a full scan. This 4x full-scan penalty is an arguable + ** decision and one which we expect to revisit in the future. But + ** it seems to be working well enough at the moment. + */ + cost = aiRowEst[0]*4; + }else{ + log10N = estLog(aiRowEst[0]); + cost = nRow; + if( pIdx ){ + if( bLookup ){ + /* For an index lookup followed by a table lookup: + ** nInMul index searches to find the start of each index range + ** + nRow steps through the index + ** + nRow table searches to lookup the table entry using the rowid + */ + cost += (nInMul + nRow)*log10N; + }else{ + /* For a covering index: + ** nInMul index searches to find the initial entry + ** + nRow steps through the index + */ + cost += nInMul*log10N; + } + }else{ + /* For a rowid primary key lookup: + ** nInMult table searches to find the initial entry for each range + ** + nRow steps through the table + */ + cost += nInMul*log10N; + } + } + + /* Add in the estimated cost of sorting the result. Actual experimental + ** measurements of sorting performance in SQLite show that sorting time + ** adds C*N*log10(N) to the cost, where N is the number of rows to be + ** sorted and C is a factor between 1.95 and 4.3. We will split the + ** difference and select C of 3.0. */ if( bSort ){ - cost += nRow*estLog(nRow)*SORT_COST/SEEK_COST; + cost += nRow*estLog(nRow)*3; } /**** Cost of using this index has now been computed ****/ /* If there are additional constraints on this table that cannot @@ -3080,15 +3060,14 @@ } WHERETRACE(( "%s(%s): nEq=%d nInMul=%d estBound=%d bSort=%d bLookup=%d wsFlags=0x%x\n" - " notReady=0x%llx nTSrch=%.1f nISrch=%.1f nRow=%.1f\n" - " estLog=%.1f cost=%.1f used=0x%llx\n", + " notReady=0x%llx log10N=%.1f nRow=%.1f cost=%.1f used=0x%llx\n", pSrc->pTab->zName, (pIdx ? pIdx->zName : "ipk"), nEq, nInMul, estBound, bSort, bLookup, wsFlags, - notReady, nTabSrch, nIdxSrch, nRow, estLog(aiRowEst[0]), cost, used + notReady, log10N, nRow, cost, used )); /* If this index is the best we have seen so far, then record this ** index and its cost in the pCost structure. */ Index: test/analyze5.test ================================================================== --- test/analyze5.test +++ test/analyze5.test @@ -100,12 +100,12 @@ 16 {z>=-100 AND z<=-1} t1z 50 17 {z>=-100 AND z<=0} t1z 400 18 {z>=-100 AND z<0} t1z 50 19 {z>=-100 AND z<=1} t1z 700 20 {z>=-100 AND z<2} t1z 700 - 21 {z>=-100 AND z<=2} {} 111 - 22 {z>=-100 AND z<3} {} 111 + 21 {z>=-100 AND z<=2} t1z 900 + 22 {z>=-100 AND z<3} t1z 900 31 {z>=0.0 AND z<=0.0} t1z 400 32 {z>=1.0 AND z<=1.0} t1z 300 33 {z>=2.0 AND z<=2.0} t1z 200 34 {z>=3.0 AND z<=3.0} t1z 100 @@ -123,12 +123,12 @@ 46 {z>=-100 AND z<=-1.0} t1z 50 47 {z>=-100 AND z<=0.0} t1z 400 48 {z>=-100 AND z<0.0} t1z 50 49 {z>=-100 AND z<=1.0} t1z 700 50 {z>=-100 AND z<2.0} t1z 700 - 51 {z>=-100 AND z<=2.0} {} 111 - 52 {z>=-100 AND z<3.0} {} 111 + 51 {z>=-100 AND z<=2.0} t1z 900 + 52 {z>=-100 AND z<3.0} t1z 900 101 {z=-1} t1z 50 102 {z=0} t1z 400 103 {z=1} t1z 300 104 {z=2} t1z 200 @@ -149,11 +149,11 @@ 204 {z IN (2)} t1z 200 205 {z IN (3)} t1z 100 206 {z IN (4)} t1z 50 207 {z IN (0.5)} t1z 50 208 {z IN (0,1)} t1z 700 - 209 {z IN (0,1,2)} {} 100 + 209 {z IN (0,1,2)} t1z 900 210 {z IN (0,1,2,3)} {} 100 211 {z IN (0,1,2,3,4,5)} {} 100 212 {z IN (1,2)} t1z 500 213 {z IN (2,3)} t1z 300 214 {z=3 OR z=2} t1z 300