Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Change the estimated row counts in stat1 to be one-third worst-case and two-threads average case. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | analyze-worst-case |
Files: | files | file ages | folders |
SHA1: |
21bfd47c4245846b12aeeb7cf0212529 |
User & Date: | drh 2016-03-01 14:31:59.247 |
Context
2016-03-04
| ||
18:45 | Merge changes from trunk. (check-in: 5294c977d9 user: drh tags: analyze-worst-case) | |
2016-03-01
| ||
14:31 | Change the estimated row counts in stat1 to be one-third worst-case and two-threads average case. (check-in: 21bfd47c42 user: drh tags: analyze-worst-case) | |
12:45 | Fix test cases to align with the improved stats computation. (check-in: 810967bff6 user: drh tags: analyze-worst-case) | |
Changes
Changes to src/analyze.c.
︙ | ︙ | |||
824 825 826 827 828 829 830 | || eCall==STAT_GET_NDLT ); if( eCall==STAT_GET_STAT1 ) #else assert( argc==1 ); #endif { | | | | | | | > | < | | > | | > | > > > > > > | > > > > > > | < > > | | > | > > > | < < | 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 | || eCall==STAT_GET_NDLT ); if( eCall==STAT_GET_STAT1 ) #else assert( argc==1 ); #endif { /* Return a value for the "stat" column of the sqlite_stat1 ** table for this index. ** ** The value is a string composed of a list of integers describing ** the index. The first integer is the (estimated) total number ** entries in the index. There is one additional integer for each ** column in the index. The first added integer is an estimate of ** the number of rows that match a single key in the first column of ** the index. The second added integer is an estimate on of the number ** of rows that match a single key consisting of the first two columns ** of the index. And so forth. ** ** For example, for an index on columns (a,b), if the sqlite_stat1.stat ** values is "100 10 2", that means there are about 100 rows in the ** index, and that a query against a=$key1 will match about 10 rows ** and a query against "a=$key1 AND b=$key2" will match about 2 rows. ** ** Let V be the average number of rows that match a key, and let M ** be the most number of rows that match the key for any possible value ** of that key. The estimate is computed as: ** ** E = (2*V + M)/3 ** ** Consider two indexes. Index X has with 100 values of exactly 0 and ** 100 singleton values between 1 and 100. Index Y has 200 values ** evenly distributed between 1 and 20. If only the average (V) is ** used in the estimate, X would have "200 2" and Y would have "200 10" ** and so the planner would think X is the more selective index. And ** X often would be more selective. But when searching for 0, index X ** would perform badly. To avoid this problem, the M is added into the ** estimate so that the stat for X is "200 34" and Y is still "200 10". ** In this way, Y is the preferred index (all else being equal) and ** the pathological case is avoided. ** ** For deciding whether or not to do a skip-scan, we want to know the ** average number of rows (V) with the same key, not the mixed estimate ** E shown above. Usually E will be close enough. However, if E is ** large but V is small, that could trick the query planner into thinking ** that a skip-scan might work well on this index. To avoid that, the ** "noskipscan" flag is added in cases where the divergence between E ** and V might mislead the query planner. */ char *z; int i; int noSkipScan = 0; char *zRet = sqlite3MallocZero( (p->nKeyCol+2)*25 ); if( zRet==0 ){ sqlite3_result_error_nomem(context); return; } sqlite3_snprintf(24, zRet, "%llu", (u64)p->nRow); z = zRet + sqlite3Strlen30(zRet); for(i=0; i<p->nKeyCol; i++){ u64 nDistinct = p->current.anDLt[i]; u64 iMx = p->current.amxEq[i]; /* M: Most rows per key */ u64 iAvg = (p->nRow+nDistinct)/(nDistinct+1); /* V: Average per key */ u64 iVal = (iMx+iAvg*2)/3; /* E: The estimate */ sqlite3_snprintf(24, z, " %llu", iVal); z += sqlite3Strlen30(z); assert( p->current.anEq[i] ); if( iVal>=WHERE_SKIPSCAN_ONSET && iAvg<(WHERE_SKIPSCAN_ONSET*2/3) ){ noSkipScan = 1; } } if( noSkipScan ) sqlite3_snprintf(14, z, " noskipscan"); sqlite3_result_text(context, zRet, -1, sqlite3_free); } #ifdef SQLITE_ENABLE_STAT3_OR_STAT4 |
︙ | ︙ |
Changes to test/analyze.test.
︙ | ︙ | |||
154 155 156 157 158 159 160 | } {t1i1 {4 4} t1i2 {4 1} t1i3 {4 4 1}} do_test analyze-3.3 { execsql { INSERT INTO t1 VALUES(2,5); ANALYZE main; SELECT idx, stat FROM sqlite_stat1 ORDER BY idx; } | | | | | | | | | | | | 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 | } {t1i1 {4 4} t1i2 {4 1} t1i3 {4 4 1}} do_test analyze-3.3 { execsql { INSERT INTO t1 VALUES(2,5); ANALYZE main; SELECT idx, stat FROM sqlite_stat1 ORDER BY idx; } } {t1i1 {5 3} t1i2 {5 2} t1i3 {5 3 1}} do_test analyze-3.4 { execsql { CREATE TABLE t2 AS SELECT * FROM t1; CREATE INDEX t2i1 ON t2(a); CREATE INDEX t2i2 ON t2(b); CREATE INDEX t2i3 ON t2(a,b); ANALYZE; SELECT idx, stat FROM sqlite_stat1 ORDER BY idx; } } {t1i1 {5 3} t1i2 {5 2} t1i3 {5 3 1} t2i1 {5 3} t2i2 {5 2} t2i3 {5 3 1}} do_test analyze-3.5 { execsql { DROP INDEX t2i3; ANALYZE t1; SELECT idx, stat FROM sqlite_stat1 ORDER BY idx; } } {t1i1 {5 3} t1i2 {5 2} t1i3 {5 3 1} t2i1 {5 3} t2i2 {5 2}} do_test analyze-3.6 { execsql { ANALYZE t2; SELECT idx, stat FROM sqlite_stat1 ORDER BY idx; } } {t1i1 {5 3} t1i2 {5 2} t1i3 {5 3 1} t2i1 {5 3} t2i2 {5 2}} do_test analyze-3.7 { execsql { DROP INDEX t2i2; ANALYZE t2; SELECT idx, stat FROM sqlite_stat1 ORDER BY idx; } } {t1i1 {5 3} t1i2 {5 2} t1i3 {5 3 1} t2i1 {5 3}} do_test analyze-3.8 { execsql { CREATE TABLE t3 AS SELECT a, b, rowid AS c, 'hi' AS d FROM t1; CREATE INDEX t3i1 ON t3(a); CREATE INDEX t3i2 ON t3(a,b,c,d); CREATE INDEX t3i3 ON t3(d,b,c,a); DROP TABLE t1; DROP TABLE t2; SELECT idx, stat FROM sqlite_stat1 ORDER BY idx; } } {} do_test analyze-3.9 { execsql { ANALYZE; SELECT idx, stat FROM sqlite_stat1 ORDER BY idx; } } {t3i1 {5 3} t3i2 {5 3 1 1 1} t3i3 {5 5 2 1 1}} do_test analyze-3.10 { execsql { CREATE TABLE [silly " name](a, b, c); CREATE INDEX 'foolish '' name' ON [silly " name](a, b); CREATE INDEX 'another foolish '' name' ON [silly " name](c); INSERT INTO [silly " name] VALUES(1, 2, 3); INSERT INTO [silly " name] VALUES(4, 5, 6); ANALYZE; SELECT idx, stat FROM sqlite_stat1 ORDER BY idx; } } {{another foolish ' name} {2 1} {foolish ' name} {2 1 1} t3i1 {5 3} t3i2 {5 3 1 1 1} t3i3 {5 5 2 1 1}} do_test analyze-3.11 { execsql { DROP INDEX "foolish ' name"; SELECT idx, stat FROM sqlite_stat1 ORDER BY idx; } } {{another foolish ' name} {2 1} t3i1 {5 3} t3i2 {5 3 1 1 1} t3i3 {5 5 2 1 1}} do_test analyze-3.11 { execsql { DROP TABLE "silly "" name"; SELECT idx, stat FROM sqlite_stat1 ORDER BY idx; } } {t3i1 {5 3} t3i2 {5 3 1 1 1} t3i3 {5 5 2 1 1}} # Try corrupting the sqlite_stat1 table and make sure the # database is still able to function. # do_test analyze-4.0 { sqlite3 db2 test.db db2 eval { CREATE TABLE t4(x,y,z); CREATE INDEX t4i1 ON t4(x); CREATE INDEX t4i2 ON t4(y); INSERT INTO t4 SELECT a,b,c FROM t3; } db2 close db close sqlite3 db test.db execsql { ANALYZE; SELECT idx, stat FROM sqlite_stat1 ORDER BY idx; } } {t3i1 {5 3} t3i2 {5 3 1 1 1} t3i3 {5 5 2 1 1} t4i1 {5 3} t4i2 {5 2}} do_test analyze-4.1 { execsql { PRAGMA writable_schema=on; INSERT INTO sqlite_stat1 VALUES(null,null,null); PRAGMA writable_schema=off; } db close |
︙ | ︙ |