Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Improvements to the logic for adding the "noskipscan" flag to stat1 entries. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | analyze-worst-case |
Files: | files | file ages | folders |
SHA1: |
421b5b544af734b97e3da47699fa0f82 |
User & Date: | drh 2016-02-29 23:02:50.488 |
Context
2016-03-01
| ||
12:45 | Fix test cases to align with the improved stats computation. (check-in: 810967bff6 user: drh tags: analyze-worst-case) | |
2016-02-29
| ||
23:02 | Improvements to the logic for adding the "noskipscan" flag to stat1 entries. (check-in: 421b5b544a user: drh tags: analyze-worst-case) | |
21:27 | The ANALYZE command automatically appends "noskipscan" to sqlite_stat1 entries that have large worst-case repeat estimates but small average repeat estimates. (check-in: 6326ba5891 user: drh tags: analyze-worst-case) | |
Changes
Changes to src/analyze.c.
︙ | ︙ | |||
840 841 842 843 844 845 846 | ** if the index is on columns (a,b) and the sqlite_stat1 value is ** "100 10 2", then SQLite estimates that: ** ** * the index contains 100 rows, ** * "WHERE a=?" matches 10 rows, and ** * "WHERE a=? AND b=?" matches 2 rows. ** | | | | > | | < > > | | | | | | 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 | ** if the index is on columns (a,b) and the sqlite_stat1 value is ** "100 10 2", then SQLite estimates that: ** ** * the index contains 100 rows, ** * "WHERE a=?" matches 10 rows, and ** * "WHERE a=? AND b=?" matches 2 rows. ** ** A worst-case estimate is used: the maximum number of rows that ** could be select for any set of query parameters. The worst case ** is the estimate we want for choosing indexes. ** ** For deciding whether or not to do a skip-scan, we want to know the ** average number of rows with the same key. We can approximate this ** using the (worst case) most number of rows with the same key. But ** sometimes that approximation can be badly off. In those cases, ** mark the index as "noskipscan" to avoid suboptimal skip-scan plans. */ char *z; int i; int noSkipScan = 0; char *zRet = sqlite3MallocZero( (p->nKeyCol+2)*25 ); if( zRet==0 ){ sqlite3_result_error_nomem(context); return; } sqlite3_snprintf(24, zRet, "%llu", (u64)p->nRow); z = zRet + sqlite3Strlen30(zRet); for(i=0; i<p->nKeyCol; i++){ u64 iVal = p->current.amxEq[i]; sqlite3_snprintf(24, z, " %llu", iVal); z += sqlite3Strlen30(z); assert( p->current.anEq[i] ); if( iVal>=WHERE_SKIPSCAN_ONSET && p->current.anDLt[i] > p->nRow/(WHERE_SKIPSCAN_ONSET*2/3) ){ noSkipScan = 1; } } if( noSkipScan ) sqlite3_snprintf(14, z, " noskipscan"); sqlite3_result_text(context, zRet, -1, sqlite3_free); } #ifdef SQLITE_ENABLE_STAT3_OR_STAT4 else if( eCall==STAT_GET_ROWID ){ |
︙ | ︙ |
Changes to test/skipscan2.test.
︙ | ︙ | |||
44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 | INSERT INTO people VALUES('Nathan','student',163); INSERT INTO people VALUES('Olivia','student',161); INSERT INTO people VALUES('Patrick','teacher',180); INSERT INTO people VALUES('Quiana','student',182); INSERT INTO people VALUES('Robert','student',159); INSERT INTO people VALUES('Sally','student',166); INSERT INTO people VALUES('Tom','student',171); INSERT INTO people VALUES('Ursula','student',170); INSERT INTO people VALUES('Vance','student',179); INSERT INTO people VALUES('Willma','student',175); INSERT INTO people VALUES('Xavier','teacher',185); INSERT INTO people VALUES('Yvonne','student',149); INSERT INTO people VALUES('Zach','student',170); } # Without ANALYZE, a skip-scan is not used # do_execsql_test skipscan2-1.3 { | > > | | | | | | | | | | | | | | | < < < < < < < | | < > | < < < < < < < < < < < < < < < < < < < | | | | | | | | | | | 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 | INSERT INTO people VALUES('Nathan','student',163); INSERT INTO people VALUES('Olivia','student',161); INSERT INTO people VALUES('Patrick','teacher',180); INSERT INTO people VALUES('Quiana','student',182); INSERT INTO people VALUES('Robert','student',159); INSERT INTO people VALUES('Sally','student',166); INSERT INTO people VALUES('Tom','student',171); /* INSERT INTO people VALUES('Ursula','student',170); INSERT INTO people VALUES('Vance','student',179); INSERT INTO people VALUES('Willma','student',175); INSERT INTO people VALUES('Xavier','teacher',185); INSERT INTO people VALUES('Yvonne','student',149); INSERT INTO people VALUES('Zach','student',170); */ } # Without ANALYZE, a skip-scan is not used # do_execsql_test skipscan2-1.3 { SELECT name FROM people WHERE height<=161 ORDER BY +name; } {Alice Bob Cindy Emily Megan Olivia Robert} do_execsql_test skipscan2-1.3eqp { EXPLAIN QUERY PLAN SELECT name FROM people WHERE height<=161 ORDER BY +name; } {~/*INDEX people_idx1 */} # Now do an ANALYZE. A skip-scan can be used after ANALYZE. # do_execsql_test skipscan2-1.4 { ANALYZE; -- We do not have enough people above to actually force the use -- of a skip-scan. So make a manual adjustment to the stat1 table -- to make it seem like there are many more. UPDATE sqlite_stat1 SET stat='10000 5000 20' WHERE idx='people_idx1'; UPDATE sqlite_stat1 SET stat='10000 1' WHERE idx='sqlite_autoindex_people_1'; ANALYZE sqlite_master; } db cache flush do_execsql_test skipscan2-1.5 { SELECT name FROM people WHERE height<=161 ORDER BY +name; } {Alice Bob Cindy Emily Megan Olivia Robert} do_execsql_test skipscan2-1.5eqp { EXPLAIN QUERY PLAN SELECT name FROM people WHERE height<=161 ORDER BY +name; } {/*INDEX people_idx1 */} # Same answer with other formulations of the same query # do_execsql_test skipscan2-1.6 { SELECT name FROM people WHERE role IN (SELECT DISTINCT role FROM people) AND height<=161 ORDER BY +name; } {Alice Bob Cindy Emily Megan Olivia Robert} do_execsql_test skipscan2-1.7 { SELECT name FROM people WHERE role='teacher' AND height<=161 UNION ALL SELECT name FROM people WHERE role='student' AND height<=161 ORDER BY 1; } {Alice Bob Cindy Emily Megan Olivia Robert} # Add more people so that the estimated number of rows per key is # equal to the skip-scan threshold of 18. Then run an (unfudged) # ANALYZE. skip-scan should work. # do_execsql_test skipscan2-1.8 { INSERT INTO people VALUES('Ursula','student',170); ANALYZE; SELECT stat FROM sqlite_stat1 WHERE idx='people_idx1'; } {{21 18 3}} db cache flush do_execsql_test skipscan2-1.9 { SELECT name FROM people WHERE height<=161 ORDER BY +name; } {Alice Bob Cindy Emily Megan Olivia Robert} do_execsql_test skipscan2-1.9eqp { EXPLAIN QUERY PLAN SELECT name FROM people WHERE height<=161 ORDER BY +name; } {~/*INDEX people_idx1 */} # Repeat using a WITHOUT ROWID table. # do_execsql_test skipscan2-2.1 { CREATE TABLE peoplew( name TEXT PRIMARY KEY, role TEXT NOT NULL, height INT NOT NULL, -- in cm CHECK( role IN ('student','teacher') ) ) WITHOUT ROWID; CREATE INDEX peoplew_idx1 ON peoplew(role, height); INSERT INTO peoplew(name,role,height) SELECT name, role, height FROM people; ALTER TABLE people RENAME TO old_people; SELECT name FROM peoplew WHERE height<=161 ORDER BY +name; } {Alice Bob Cindy Emily Megan Olivia Robert} do_execsql_test skipscan2-2.2 { SELECT name FROM peoplew WHERE role IN (SELECT DISTINCT role FROM peoplew) AND height<=161 ORDER BY +name; } {Alice Bob Cindy Emily Megan Olivia Robert} do_execsql_test skipscan2-2.2 { SELECT name FROM peoplew WHERE role='teacher' AND height<=161 UNION ALL SELECT name FROM peoplew WHERE role='student' AND height<=161 ORDER BY 1; } {Alice Bob Cindy Emily Megan Olivia Robert} # Now do an ANALYZE. A skip-scan can be used after ANALYZE. # do_execsql_test skipscan2-2.4 { ANALYZE; } db cache flush do_execsql_test skipscan2-2.5 { SELECT name FROM peoplew WHERE height<=161 ORDER BY +name; } {Alice Bob Cindy Emily Megan Olivia Robert} do_execsql_test skipscan2-2.5eqp { EXPLAIN QUERY PLAN SELECT name FROM peoplew WHERE height<=161 ORDER BY +name; } {/*INDEX peoplew_idx1 */} # A skip-scan on a PK index of a WITHOUT ROWID table. # do_execsql_test skipscan2-3.1 { CREATE TABLE t3(a, b, c, PRIMARY KEY(a, b)) WITHOUT ROWID; } |
︙ | ︙ |