/ Check-in [421b5b54]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Improvements to the logic for adding the "noskipscan" flag to stat1 entries.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | analyze-worst-case
Files: files | file ages | folders
SHA1:421b5b544af734b97e3da47699fa0f82b21617d3
User & Date: drh 2016-02-29 23:02:50
Context
2016-03-01
12:45
Fix test cases to align with the improved stats computation. check-in: 810967bf user: drh tags: analyze-worst-case
2016-02-29
23:02
Improvements to the logic for adding the "noskipscan" flag to stat1 entries. check-in: 421b5b54 user: drh tags: analyze-worst-case
21:27
The ANALYZE command automatically appends "noskipscan" to sqlite_stat1 entries that have large worst-case repeat estimates but small average repeat estimates. check-in: 6326ba58 user: drh tags: analyze-worst-case
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to src/analyze.c.

840
841
842
843
844
845
846
847
848
849
850
851
852
853


854
855
856
857
858
859
860
...
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
    ** if the index is on columns (a,b) and the sqlite_stat1 value is 
    ** "100 10 2", then SQLite estimates that:
    **
    **   * the index contains 100 rows,
    **   * "WHERE a=?" matches 10 rows, and
    **   * "WHERE a=? AND b=?" matches 2 rows.
    **
    ** Use the worst-case estimate: the maximum number of repeated entries
    ** in the index.  The worst-case estimate is best for picking indexes.
    ** But for skip-scan, we want an average case estimate.  The worst-case
    ** estimate might be too high.  To avoid undesirable skip-scans, if the
    ** worst-case estimate is above the WHERE_SKIPSCAN_ONSET but the average
    ** estimate is below, simply disable skipscans on this index by adding
    ** the "noskipscan" modifier onto the end of the stat line.


    */
    char *z;
    int i;
    int noSkipScan = 0;

    char *zRet = sqlite3MallocZero( (p->nKeyCol+2)*25 );
    if( zRet==0 ){
................................................................................
    sqlite3_snprintf(24, zRet, "%llu", (u64)p->nRow);
    z = zRet + sqlite3Strlen30(zRet);
    for(i=0; i<p->nKeyCol; i++){
      u64 iVal = p->current.amxEq[i];
      sqlite3_snprintf(24, z, " %llu", iVal);
      z += sqlite3Strlen30(z);
      assert( p->current.anEq[i] );
      if( i>0 && iVal>=WHERE_SKIPSCAN_ONSET+5 ){
        iVal = p->current.anDLt[i];
        iVal = (p->nRow + iVal)/(iVal + 1);
        if( iVal<WHERE_SKIPSCAN_ONSET-5 ) noSkipScan = 1;
      }
    }
    if( noSkipScan ) sqlite3_snprintf(14, z, " noskipscan");
    sqlite3_result_text(context, zRet, -1, sqlite3_free);
  }
#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
  else if( eCall==STAT_GET_ROWID ){







|
|
|
|
|
|
|
>
>







 







|
|
|
|







840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
...
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
    ** if the index is on columns (a,b) and the sqlite_stat1 value is 
    ** "100 10 2", then SQLite estimates that:
    **
    **   * the index contains 100 rows,
    **   * "WHERE a=?" matches 10 rows, and
    **   * "WHERE a=? AND b=?" matches 2 rows.
    **
    ** A worst-case estimate is used:  the maximum number of rows that
    ** could be select for any set of query parameters.  The worst case
    ** is the estimate we want for choosing indexes.
    **
    ** For deciding whether or not to do a skip-scan, we want to know the
    ** average number of rows with the same key.  We can approximate this
    ** using the (worst case) most number of rows with the same key.  But
    ** sometimes that approximation can be badly off.  In those cases,
    ** mark the index as "noskipscan" to avoid suboptimal skip-scan plans.
    */
    char *z;
    int i;
    int noSkipScan = 0;

    char *zRet = sqlite3MallocZero( (p->nKeyCol+2)*25 );
    if( zRet==0 ){
................................................................................
    sqlite3_snprintf(24, zRet, "%llu", (u64)p->nRow);
    z = zRet + sqlite3Strlen30(zRet);
    for(i=0; i<p->nKeyCol; i++){
      u64 iVal = p->current.amxEq[i];
      sqlite3_snprintf(24, z, " %llu", iVal);
      z += sqlite3Strlen30(z);
      assert( p->current.anEq[i] );
      if( iVal>=WHERE_SKIPSCAN_ONSET
       && p->current.anDLt[i] > p->nRow/(WHERE_SKIPSCAN_ONSET*2/3)
      ){
        noSkipScan = 1;
      }
    }
    if( noSkipScan ) sqlite3_snprintf(14, z, " noskipscan");
    sqlite3_result_text(context, zRet, -1, sqlite3_free);
  }
#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
  else if( eCall==STAT_GET_ROWID ){

Changes to test/skipscan2.test.

44
45
46
47
48
49
50

51
52
53
54
55
56

57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
..
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118

119
120
121
122

123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
...
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
  INSERT INTO people VALUES('Nathan','student',163);
  INSERT INTO people VALUES('Olivia','student',161);
  INSERT INTO people VALUES('Patrick','teacher',180);
  INSERT INTO people VALUES('Quiana','student',182);
  INSERT INTO people VALUES('Robert','student',159);
  INSERT INTO people VALUES('Sally','student',166);
  INSERT INTO people VALUES('Tom','student',171);

  INSERT INTO people VALUES('Ursula','student',170);
  INSERT INTO people VALUES('Vance','student',179);
  INSERT INTO people VALUES('Willma','student',175);
  INSERT INTO people VALUES('Xavier','teacher',185);
  INSERT INTO people VALUES('Yvonne','student',149);
  INSERT INTO people VALUES('Zach','student',170);

}

# Without ANALYZE, a skip-scan is not used
#
do_execsql_test skipscan2-1.3 {
  SELECT name FROM people WHERE height>=180 ORDER BY +name;
} {David Jack Patrick Quiana Xavier}
do_execsql_test skipscan2-1.3eqp {
  EXPLAIN QUERY PLAN
  SELECT name FROM people WHERE height>=180 ORDER BY +name;
} {~/*INDEX people_idx1 */}

# Now do an ANALYZE.  A skip-scan can be used after ANALYZE.
#
do_execsql_test skipscan2-1.4 {
  ANALYZE;
  -- We do not have enough people above to actually force the use
................................................................................
  -- to make it seem like there are many more.
  UPDATE sqlite_stat1 SET stat='10000 5000 20' WHERE idx='people_idx1';
  UPDATE sqlite_stat1 SET stat='10000 1' WHERE idx='sqlite_autoindex_people_1';
  ANALYZE sqlite_master;
}
db cache flush
do_execsql_test skipscan2-1.5 {
  SELECT name FROM people WHERE height>=180 ORDER BY +name;
} {David Jack Patrick Quiana Xavier}
do_execsql_test skipscan2-1.5eqp {
  EXPLAIN QUERY PLAN
  SELECT name FROM people WHERE height>=180 ORDER BY +name;
} {/*INDEX people_idx1 */}

# Same answer with other formulations of the same query
#
do_execsql_test skipscan2-1.6 {
  SELECT name FROM people
   WHERE role IN (SELECT DISTINCT role FROM people)
     AND height>=180 ORDER BY +name;
} {David Jack Patrick Quiana Xavier}
do_execsql_test skipscan2-1.7 {
  SELECT name FROM people WHERE role='teacher' AND height>=180
  UNION ALL
  SELECT name FROM people WHERE role='student' AND height>=180
  ORDER BY 1;
} {David Jack Patrick Quiana Xavier}

# Add 8 more people, bringing the total to 34.  Then the number of
# duplicates in the left-column of the index will be 17 and 
# skip-scan should not be used after an (unfudged) ANALYZE.
#
do_execsql_test skipscan2-1.8 {
  INSERT INTO people VALUES('Angie','student',166);
  INSERT INTO people VALUES('Brad','student',176);
  INSERT INTO people VALUES('Claire','student',168);
  INSERT INTO people VALUES('Donald','student',162);
  INSERT INTO people VALUES('Elaine','student',177);
  INSERT INTO people VALUES('Frazier','student',159);
  INSERT INTO people VALUES('Grace','student',179);
  INSERT INTO people VALUES('Horace','student',166);
  ANALYZE;
  SELECT stat FROM sqlite_stat1 WHERE idx='people_idx1';
} {{34 17 2}}

db cache flush
do_execsql_test skipscan2-1.9 {
  SELECT name FROM people WHERE height>=180 ORDER BY +name;
} {David Jack Patrick Quiana Xavier}

do_execsql_test skipscan2-1.9eqp {
  EXPLAIN QUERY PLAN
  SELECT name FROM people WHERE height>=180 ORDER BY +name;
} {~/*INDEX people_idx1 */}

# Add 2 more people, bringing the total to 36.  Then the number of
# duplicates in the left-column of the index will be 18 and 
# skip-scan will be used after an (unfudged) ANALYZE.
#
do_execsql_test skipscan2-1.10 {
  INSERT INTO people VALUES('Ingrad','student',155);
  INSERT INTO people VALUES('Jacob','student',179);
  ANALYZE;
  SELECT stat FROM sqlite_stat1 WHERE idx='people_idx1';
} {{36 18 2}}
db cache flush
do_execsql_test skipscan2-1.11 {
  SELECT name FROM people WHERE height>=180 ORDER BY +name;
} {David Jack Patrick Quiana Xavier}
do_execsql_test skipscan2-1.11eqp {
  EXPLAIN QUERY PLAN
  SELECT name FROM people WHERE height>=180 ORDER BY +name;
} {/*INDEX people_idx1 */}


# Repeat using a WITHOUT ROWID table.
#
do_execsql_test skipscan2-2.1 {
  CREATE TABLE peoplew(
    name TEXT PRIMARY KEY,
    role TEXT NOT NULL,
................................................................................
    height INT NOT NULL, -- in cm
    CHECK( role IN ('student','teacher') )
  ) WITHOUT ROWID;
  CREATE INDEX peoplew_idx1 ON peoplew(role, height);
  INSERT INTO peoplew(name,role,height)
     SELECT name, role, height FROM  people;
  ALTER TABLE people RENAME TO old_people;
  SELECT name FROM peoplew WHERE height>=180 ORDER BY +name;
} {David Jack Patrick Quiana Xavier}
do_execsql_test skipscan2-2.2 {
  SELECT name FROM peoplew
   WHERE role IN (SELECT DISTINCT role FROM peoplew)
     AND height>=180 ORDER BY +name;
} {David Jack Patrick Quiana Xavier}
do_execsql_test skipscan2-2.2 {
  SELECT name FROM peoplew WHERE role='teacher' AND height>=180
  UNION ALL
  SELECT name FROM peoplew WHERE role='student' AND height>=180
  ORDER BY 1;
} {David Jack Patrick Quiana Xavier}

# Now do an ANALYZE.  A skip-scan can be used after ANALYZE.
#
do_execsql_test skipscan2-2.4 {
  ANALYZE;
}
db cache flush
do_execsql_test skipscan2-2.5 {
  SELECT name FROM peoplew WHERE height>=180 ORDER BY +name;
} {David Jack Patrick Quiana Xavier}
do_execsql_test skipscan2-2.5eqp {
  EXPLAIN QUERY PLAN
  SELECT name FROM peoplew WHERE height>=180 ORDER BY +name;
} {/*INDEX peoplew_idx1 */}

# A skip-scan on a PK index of a WITHOUT ROWID table.
#
do_execsql_test skipscan2-3.1 {
  CREATE TABLE t3(a, b, c, PRIMARY KEY(a, b)) WITHOUT ROWID;
}







>






>





|
|


|







 







|
|


|







|
|

|

|

|

|
|
|


|
<
<
<
<
<
<
<


<
>


|
<
>


|


<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







 







|
|



|
|

|

|

|








|
|


|







44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
..
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110







111
112

113
114
115
116

117
118
119
120
121
122



















123
124
125
126
127
128
129
...
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
  INSERT INTO people VALUES('Nathan','student',163);
  INSERT INTO people VALUES('Olivia','student',161);
  INSERT INTO people VALUES('Patrick','teacher',180);
  INSERT INTO people VALUES('Quiana','student',182);
  INSERT INTO people VALUES('Robert','student',159);
  INSERT INTO people VALUES('Sally','student',166);
  INSERT INTO people VALUES('Tom','student',171);
/*
  INSERT INTO people VALUES('Ursula','student',170);
  INSERT INTO people VALUES('Vance','student',179);
  INSERT INTO people VALUES('Willma','student',175);
  INSERT INTO people VALUES('Xavier','teacher',185);
  INSERT INTO people VALUES('Yvonne','student',149);
  INSERT INTO people VALUES('Zach','student',170);
*/
}

# Without ANALYZE, a skip-scan is not used
#
do_execsql_test skipscan2-1.3 {
  SELECT name FROM people WHERE height<=161 ORDER BY +name;
} {Alice Bob Cindy Emily Megan Olivia Robert}
do_execsql_test skipscan2-1.3eqp {
  EXPLAIN QUERY PLAN
  SELECT name FROM people WHERE height<=161 ORDER BY +name;
} {~/*INDEX people_idx1 */}

# Now do an ANALYZE.  A skip-scan can be used after ANALYZE.
#
do_execsql_test skipscan2-1.4 {
  ANALYZE;
  -- We do not have enough people above to actually force the use
................................................................................
  -- to make it seem like there are many more.
  UPDATE sqlite_stat1 SET stat='10000 5000 20' WHERE idx='people_idx1';
  UPDATE sqlite_stat1 SET stat='10000 1' WHERE idx='sqlite_autoindex_people_1';
  ANALYZE sqlite_master;
}
db cache flush
do_execsql_test skipscan2-1.5 {
  SELECT name FROM people WHERE height<=161 ORDER BY +name;
} {Alice Bob Cindy Emily Megan Olivia Robert}
do_execsql_test skipscan2-1.5eqp {
  EXPLAIN QUERY PLAN
  SELECT name FROM people WHERE height<=161 ORDER BY +name;
} {/*INDEX people_idx1 */}

# Same answer with other formulations of the same query
#
do_execsql_test skipscan2-1.6 {
  SELECT name FROM people
   WHERE role IN (SELECT DISTINCT role FROM people)
     AND height<=161 ORDER BY +name;
} {Alice Bob Cindy Emily Megan Olivia Robert}
do_execsql_test skipscan2-1.7 {
  SELECT name FROM people WHERE role='teacher' AND height<=161
  UNION ALL
  SELECT name FROM people WHERE role='student' AND height<=161
  ORDER BY 1;
} {Alice Bob Cindy Emily Megan Olivia Robert}

# Add more people so that the estimated number of rows per key is
# equal to the skip-scan threshold of 18.  Then run an (unfudged)
# ANALYZE.  skip-scan should work.
#
do_execsql_test skipscan2-1.8 {
  INSERT INTO people VALUES('Ursula','student',170);







  ANALYZE;
  SELECT stat FROM sqlite_stat1 WHERE idx='people_idx1';

} {{21 18 3}}
db cache flush
do_execsql_test skipscan2-1.9 {
  SELECT name FROM people WHERE height<=161 ORDER BY +name;

} {Alice Bob Cindy Emily Megan Olivia Robert}
do_execsql_test skipscan2-1.9eqp {
  EXPLAIN QUERY PLAN
  SELECT name FROM people WHERE height<=161 ORDER BY +name;
} {~/*INDEX people_idx1 */}





















# Repeat using a WITHOUT ROWID table.
#
do_execsql_test skipscan2-2.1 {
  CREATE TABLE peoplew(
    name TEXT PRIMARY KEY,
    role TEXT NOT NULL,
................................................................................
    height INT NOT NULL, -- in cm
    CHECK( role IN ('student','teacher') )
  ) WITHOUT ROWID;
  CREATE INDEX peoplew_idx1 ON peoplew(role, height);
  INSERT INTO peoplew(name,role,height)
     SELECT name, role, height FROM  people;
  ALTER TABLE people RENAME TO old_people;
  SELECT name FROM peoplew WHERE height<=161 ORDER BY +name;
} {Alice Bob Cindy Emily Megan Olivia Robert}
do_execsql_test skipscan2-2.2 {
  SELECT name FROM peoplew
   WHERE role IN (SELECT DISTINCT role FROM peoplew)
     AND height<=161 ORDER BY +name;
} {Alice Bob Cindy Emily Megan Olivia Robert}
do_execsql_test skipscan2-2.2 {
  SELECT name FROM peoplew WHERE role='teacher' AND height<=161
  UNION ALL
  SELECT name FROM peoplew WHERE role='student' AND height<=161
  ORDER BY 1;
} {Alice Bob Cindy Emily Megan Olivia Robert}

# Now do an ANALYZE.  A skip-scan can be used after ANALYZE.
#
do_execsql_test skipscan2-2.4 {
  ANALYZE;
}
db cache flush
do_execsql_test skipscan2-2.5 {
  SELECT name FROM peoplew WHERE height<=161 ORDER BY +name;
} {Alice Bob Cindy Emily Megan Olivia Robert}
do_execsql_test skipscan2-2.5eqp {
  EXPLAIN QUERY PLAN
  SELECT name FROM peoplew WHERE height<=161 ORDER BY +name;
} {/*INDEX peoplew_idx1 */}

# A skip-scan on a PK index of a WITHOUT ROWID table.
#
do_execsql_test skipscan2-3.1 {
  CREATE TABLE t3(a, b, c, PRIMARY KEY(a, b)) WITHOUT ROWID;
}