Index: src/analyze.c ================================================================== --- src/analyze.c +++ src/analyze.c @@ -1446,16 +1446,16 @@ z++; } #ifdef SQLITE_ENABLE_STAT3_OR_STAT4 if( aOut ){ aOut[i] = v; - }else + } #else assert( aOut==0 ); UNUSED_PARAMETER(aOut); #endif - { + if( aLog ){ aLog[i] = sqlite3LogEst(v); } if( *z==' ' ) z++; } #ifndef SQLITE_ENABLE_STAT3_OR_STAT4 @@ -1514,12 +1514,21 @@ pIndex = sqlite3FindIndex(pInfo->db, argv[1], pInfo->zDatabase); } z = argv[2]; if( pIndex ){ + int nCol = pIndex->nKeyCol+1; +#ifdef SQLITE_ENABLE_STAT3_OR_STAT4 + tRowcnt * const aiRowEst = pIndex->aiRowEst = (tRowcnt*)sqlite3MallocZero( + sizeof(tRowcnt) * nCol + ); + if( aiRowEst==0 ) pInfo->db->mallocFailed = 1; +#else + tRowcnt * const aiRowEst = 0; +#endif pIndex->bUnordered = 0; - decodeIntArray((char*)z, pIndex->nKeyCol+1, 0, pIndex->aiRowLogEst, pIndex); + decodeIntArray((char*)z, nCol, aiRowEst, pIndex->aiRowLogEst, pIndex); if( pIndex->pPartIdxWhere==0 ) pTable->nRowLogEst = pIndex->aiRowLogEst[0]; }else{ Index fakeIdx; fakeIdx.szIdxRow = pTable->szTabRow; #ifdef SQLITE_ENABLE_COSTMULT @@ -1574,29 +1583,42 @@ ** unique. */ nCol = pIdx->nSampleCol-1; pIdx->aAvgEq[nCol] = 1; } for(iCol=0; iColnSample; int i; /* Used to iterate through samples */ tRowcnt sumEq = 0; /* Sum of the nEq values */ - tRowcnt nSum = 0; /* Number of terms contributing to sumEq */ tRowcnt avgEq = 0; - tRowcnt nDLt = pFinal->anDLt[iCol]; + tRowcnt nRow; /* Number of rows in index */ + i64 nSum100 = 0; /* Number of terms contributing to sumEq */ + i64 nDist100; /* Number of distinct values in index */ + + if( pIdx->aiRowEst==0 || pIdx->aiRowEst[iCol+1]==0 ){ + nRow = pFinal->anLt[iCol]; + nDist100 = (i64)100 * pFinal->anDLt[iCol]; + nSample--; + }else{ + nRow = pIdx->aiRowEst[0]; + nDist100 = ((i64)100 * pIdx->aiRowEst[0]) / pIdx->aiRowEst[iCol+1]; + } /* Set nSum to the number of distinct (iCol+1) field prefixes that - ** occur in the stat4 table for this index before pFinal. Set - ** sumEq to the sum of the nEq values for column iCol for the same - ** set (adding the value only once where there exist duplicate - ** prefixes). */ - for(i=0; i<(pIdx->nSample-1); i++){ - if( aSample[i].anDLt[iCol]!=aSample[i+1].anDLt[iCol] ){ + ** occur in the stat4 table for this index. Set sumEq to the sum of + ** the nEq values for column iCol for the same set (adding the value + ** only once where there exist duplicate prefixes). */ + for(i=0; inSample-1) + || aSample[i].anDLt[iCol]!=aSample[i+1].anDLt[iCol] + ){ sumEq += aSample[i].anEq[iCol]; - nSum++; + nSum100 += 100; } } - if( nDLt>nSum ){ - avgEq = (pFinal->anLt[iCol] - sumEq)/(nDLt - nSum); + + if( nDist100>nSum100 ){ + avgEq = ((i64)100 * (nRow - sumEq))/(nDist100 - nSum100); } if( avgEq==0 ) avgEq = 1; pIdx->aAvgEq[iCol] = avgEq; } } @@ -1843,10 +1865,15 @@ if( rc==SQLITE_OK ){ int lookasideEnabled = db->lookaside.bEnabled; db->lookaside.bEnabled = 0; rc = loadStat4(db, sInfo.zDatabase); db->lookaside.bEnabled = lookasideEnabled; + } + for(i=sqliteHashFirst(&db->aDb[iDb].pSchema->idxHash);i;i=sqliteHashNext(i)){ + Index *pIdx = sqliteHashData(i); + sqlite3_free(pIdx->aiRowEst); + pIdx->aiRowEst = 0; } #endif if( rc==SQLITE_NOMEM ){ db->mallocFailed = 1; Index: src/build.c ================================================================== --- src/build.c +++ src/build.c @@ -433,10 +433,13 @@ #endif if( db==0 || db->pnBytesFreed==0 ) sqlite3KeyInfoUnref(p->pKeyInfo); sqlite3ExprDelete(db, p->pPartIdxWhere); sqlite3DbFree(db, p->zColAff); if( p->isResized ) sqlite3DbFree(db, p->azColl); +#ifdef SQLITE_ENABLE_STAT3_OR_STAT4 + sqlite3_free(p->aiRowEst); +#endif sqlite3DbFree(db, p); } /* ** For the index called zIdxName which is found in the database iDb, Index: src/sqliteInt.h ================================================================== --- src/sqliteInt.h +++ src/sqliteInt.h @@ -1799,10 +1799,11 @@ #ifdef SQLITE_ENABLE_STAT3_OR_STAT4 int nSample; /* Number of elements in aSample[] */ int nSampleCol; /* Size of IndexSample.anEq[] and so on */ tRowcnt *aAvgEq; /* Average nEq values for keys not in aSample */ IndexSample *aSample; /* Samples of the left-most key */ + tRowcnt *aiRowEst; /* Non-logarithmic stat1 data for this table */ #endif }; /* ** Allowed values for Index.idxType ADDED test/analyzeD.test Index: test/analyzeD.test ================================================================== --- /dev/null +++ test/analyzeD.test @@ -0,0 +1,117 @@ +# 2005 July 22 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# This file implements regression tests for SQLite library. +# This file implements tests for the ANALYZE command. +# +# $Id: analyze.test,v 1.9 2008/08/11 18:44:58 drh Exp $ + +set testdir [file dirname $argv0] +source $testdir/tester.tcl +set ::testprefix analyzeD + +ifcapable {!stat4} { + finish_test + return +} + + +# Set up a table with the following properties: +# +# * Contains 1000 rows. +# * Column a contains even integers between 0 and 18, inclusive (so that +# a=? for any such integer matches 100 rows). +# * Column b contains integers between 0 and 9, inclusive. +# * Column c contains integers between 0 and 199, inclusive (so that +# for any such integer, c=? matches 5 rows). +# * Then add 7 rows with a new value for "a" - 3001. The stat4 table will +# not contain any samples with a=3001. +# +do_execsql_test 1.0 { + CREATE TABLE t1(a, b, c); +} +do_test 1.1 { + for {set i 1} {$i < 1000} {incr i} { + set c [expr $i % 200] + execsql { INSERT INTO t1(a, b, c) VALUES( 2*($i/100), $i%10, $c ) } + } + + execsql { + INSERT INTO t1 VALUES(3001, 3001, 3001); + INSERT INTO t1 VALUES(3001, 3001, 3002); + INSERT INTO t1 VALUES(3001, 3001, 3003); + INSERT INTO t1 VALUES(3001, 3001, 3004); + INSERT INTO t1 VALUES(3001, 3001, 3005); + INSERT INTO t1 VALUES(3001, 3001, 3006); + INSERT INTO t1 VALUES(3001, 3001, 3007); + + CREATE INDEX t1_ab ON t1(a, b); + CREATE INDEX t1_c ON t1(c); + + ANALYZE; + } +} {} + +# With full ANALYZE data, SQLite sees that c=150 (5 rows) is better than +# a=3001 (7 rows). +# +do_eqp_test 1.2 { + SELECT * FROM t1 WHERE a=3001 AND c=150; +} { + 0 0 0 {SEARCH TABLE t1 USING INDEX t1_c (c=?)} +} + +do_test 1.3 { + execsql { DELETE FROM sqlite_stat1 } + db close + sqlite3 db test.db +} {} + +# Without stat1, because 3001 is larger than all samples in the stat4 +# table, SQLite things that a=3001 matches just 1 row. So it (incorrectly) +# chooses it over the c=150 index (5 rows). Even with stat1 data, things +# worked this way before commit [e6f7f97dbc]. +# +do_eqp_test 1.4 { + SELECT * FROM t1 WHERE a=3001 AND c=150; +} { + 0 0 0 {SEARCH TABLE t1 USING INDEX t1_ab (a=?)} +} + +do_test 1.5 { + execsql { + UPDATE t1 SET a=13 WHERE a = 3001; + ANALYZE; + } +} {} + +do_eqp_test 1.6 { + SELECT * FROM t1 WHERE a=13 AND c=150; +} { + 0 0 0 {SEARCH TABLE t1 USING INDEX t1_c (c=?)} +} + +do_test 1.7 { + execsql { DELETE FROM sqlite_stat1 } + db close + sqlite3 db test.db +} {} + +# Same test as 1.4, except this time the 7 rows that match the a=? condition +# do not feature larger values than all rows in the stat4 table. So SQLite +# gets this right, even without stat1 data. +do_eqp_test 1.8 { + SELECT * FROM t1 WHERE a=13 AND c=150; +} { + 0 0 0 {SEARCH TABLE t1 USING INDEX t1_c (c=?)} +} + +finish_test +