SQLite

Check-in [21bfd47c42]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Change the estimated row counts in stat1 to be one-third worst-case and two-threads average case.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | analyze-worst-case
Files: files | file ages | folders
SHA1: 21bfd47c4245846b12aeeb7cf0212529e300b878
User & Date: drh 2016-03-01 14:31:59.247
Context
2016-03-04
18:45
Merge changes from trunk. (check-in: 5294c977d9 user: drh tags: analyze-worst-case)
2016-03-01
14:31
Change the estimated row counts in stat1 to be one-third worst-case and two-threads average case. (check-in: 21bfd47c42 user: drh tags: analyze-worst-case)
12:45
Fix test cases to align with the improved stats computation. (check-in: 810967bff6 user: drh tags: analyze-worst-case)
Changes
Unified Diff Ignore Whitespace Patch
Changes to src/analyze.c.
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839

840
841
842
843
844

845
846
847

848






849






850
851
852
853


854
855

856
857
858
859
860
861
862
863
864
865
866
867
868
869
870



871
872
873
874
875
876
877
878
879
880
881
882
883
       || eCall==STAT_GET_NDLT 
  );
  if( eCall==STAT_GET_STAT1 )
#else
  assert( argc==1 );
#endif
  {
    /* Return the value to store in the "stat" column of the sqlite_stat1
    ** table for this index.
    **
    ** The value is a string composed of a list of integers describing 
    ** the index. The first integer in the list is the total number of 
    ** entries in the index. There is one additional integer in the list 
    ** for each indexed column. This additional integer is an estimate of
    ** the number of rows matched by a query on the index using
    ** a key with the corresponding number of fields. In other words,

    ** if the index is on columns (a,b) and the sqlite_stat1 value is 
    ** "100 10 2", then SQLite estimates that:
    **
    **   * the index contains 100 rows,
    **   * "WHERE a=?" matches 10 rows, and

    **   * "WHERE a=? AND b=?" matches 2 rows.
    **
    ** A worst-case estimate is used:  the maximum number of rows that

    ** could be select for any set of query parameters.  The worst case






    ** is the estimate we want for choosing indexes.






    **
    ** For deciding whether or not to do a skip-scan, we want to know the
    ** average number of rows with the same key.  We can approximate this
    ** using the (worst case) most number of rows with the same key.  But


    ** sometimes that approximation can be badly off.  In those cases,
    ** mark the index as "noskipscan" to avoid suboptimal skip-scan plans.

    */
    char *z;
    int i;
    int noSkipScan = 0;

    char *zRet = sqlite3MallocZero( (p->nKeyCol+2)*25 );
    if( zRet==0 ){
      sqlite3_result_error_nomem(context);
      return;
    }

    sqlite3_snprintf(24, zRet, "%llu", (u64)p->nRow);
    z = zRet + sqlite3Strlen30(zRet);
    for(i=0; i<p->nKeyCol; i++){
      u64 iVal = p->current.amxEq[i];



      sqlite3_snprintf(24, z, " %llu", iVal);
      z += sqlite3Strlen30(z);
      assert( p->current.anEq[i] );
      if( iVal>=WHERE_SKIPSCAN_ONSET
       && p->current.anDLt[i] > p->nRow/(WHERE_SKIPSCAN_ONSET*2/3)
      ){
        noSkipScan = 1;
      }
    }
    if( noSkipScan ) sqlite3_snprintf(14, z, " noskipscan");
    sqlite3_result_text(context, zRet, -1, sqlite3_free);
  }
#ifdef SQLITE_ENABLE_STAT3_OR_STAT4







|



|
|
|
|
|
>
|
<

|
|
>
|

|
>
|
>
>
>
>
>
>
|
>
>
>
>
>
>


|
<
>
>
|
|
>














|
>
>
>



|
<
<







824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841

842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866

867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893


894
895
896
897
898
899
900
       || eCall==STAT_GET_NDLT 
  );
  if( eCall==STAT_GET_STAT1 )
#else
  assert( argc==1 );
#endif
  {
    /* Return a value for the "stat" column of the sqlite_stat1
    ** table for this index.
    **
    ** The value is a string composed of a list of integers describing 
    ** the index. The first integer is the (estimated) total number
    ** entries in the index. There is one additional integer for each
    ** column in the index.  The first added integer is an estimate of
    ** the number of rows that match a single key in the first column of
    ** the index.  The second added integer is an estimate on of the number
    ** of rows that match a single key consisting of the first two columns
    ** of the index.  And so forth.

    **
    ** For example, for an index on columns (a,b), if the sqlite_stat1.stat
    ** values is "100 10 2", that means there are about 100 rows in the
    ** index, and that a query against a=$key1 will match about 10 rows
    ** and a query against "a=$key1 AND b=$key2" will match about 2 rows.
    **
    ** Let V be the average number of rows that match a key, and let M
    ** be the most number of rows that match the key for any possible value
    ** of that key.  The estimate is computed as:
    **
    **     E = (2*V + M)/3
    **
    ** Consider two indexes.  Index X has with 100 values of exactly 0 and 
    ** 100 singleton values between 1 and 100.  Index Y has 200 values
    ** evenly distributed between 1 and 20.  If only the average (V) is
    ** used in the estimate, X would have "200 2" and Y would have "200 10"
    ** and so the planner would think X is the more selective index.  And
    ** X often would be more selective.  But when searching for 0, index X
    ** would perform badly.  To avoid this problem, the M is added into the
    ** estimate so that the stat for X is "200 34" and Y is still "200 10".
    ** In this way, Y is the preferred index (all else being equal) and
    ** the pathological case is avoided.
    **
    ** For deciding whether or not to do a skip-scan, we want to know the
    ** average number of rows (V) with the same key, not the mixed estimate

    ** E shown above.  Usually E will be close enough.  However, if E is
    ** large but V is small, that could trick the query planner into thinking
    ** that a skip-scan might work well on this index.  To avoid that, the
    ** "noskipscan" flag is added in cases where the divergence between E
    ** and V might mislead the query planner.
    */
    char *z;
    int i;
    int noSkipScan = 0;

    char *zRet = sqlite3MallocZero( (p->nKeyCol+2)*25 );
    if( zRet==0 ){
      sqlite3_result_error_nomem(context);
      return;
    }

    sqlite3_snprintf(24, zRet, "%llu", (u64)p->nRow);
    z = zRet + sqlite3Strlen30(zRet);
    for(i=0; i<p->nKeyCol; i++){
      u64 nDistinct = p->current.anDLt[i];
      u64 iMx = p->current.amxEq[i];                /* M: Most rows per key */
      u64 iAvg = (p->nRow+nDistinct)/(nDistinct+1); /* V: Average per key */
      u64 iVal = (iMx+iAvg*2)/3;                    /* E: The estimate */
      sqlite3_snprintf(24, z, " %llu", iVal);
      z += sqlite3Strlen30(z);
      assert( p->current.anEq[i] );
      if( iVal>=WHERE_SKIPSCAN_ONSET && iAvg<(WHERE_SKIPSCAN_ONSET*2/3) ){


        noSkipScan = 1;
      }
    }
    if( noSkipScan ) sqlite3_snprintf(14, z, " noskipscan");
    sqlite3_result_text(context, zRet, -1, sqlite3_free);
  }
#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
Changes to test/analyze.test.
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
} {t1i1 {4 4} t1i2 {4 1} t1i3 {4 4 1}}
do_test analyze-3.3 {
  execsql {
    INSERT INTO t1 VALUES(2,5);
    ANALYZE main;
    SELECT idx, stat FROM sqlite_stat1 ORDER BY idx;
  }
} {t1i1 {5 4} t1i2 {5 2} t1i3 {5 4 1}}
do_test analyze-3.4 {
  execsql {
    CREATE TABLE t2 AS SELECT * FROM t1;
    CREATE INDEX t2i1 ON t2(a);
    CREATE INDEX t2i2 ON t2(b);
    CREATE INDEX t2i3 ON t2(a,b);
    ANALYZE;
    SELECT idx, stat FROM sqlite_stat1 ORDER BY idx;
  }
} {t1i1 {5 4} t1i2 {5 2} t1i3 {5 4 1} t2i1 {5 4} t2i2 {5 2} t2i3 {5 4 1}}
do_test analyze-3.5 {
  execsql {
    DROP INDEX t2i3;
    ANALYZE t1;
    SELECT idx, stat FROM sqlite_stat1 ORDER BY idx;
  }
} {t1i1 {5 4} t1i2 {5 2} t1i3 {5 4 1} t2i1 {5 4} t2i2 {5 2}}
do_test analyze-3.6 {
  execsql {
    ANALYZE t2;
    SELECT idx, stat FROM sqlite_stat1 ORDER BY idx;
  }
} {t1i1 {5 4} t1i2 {5 2} t1i3 {5 4 1} t2i1 {5 4} t2i2 {5 2}}
do_test analyze-3.7 {
  execsql {
    DROP INDEX t2i2;
    ANALYZE t2;
    SELECT idx, stat FROM sqlite_stat1 ORDER BY idx;
  }
} {t1i1 {5 4} t1i2 {5 2} t1i3 {5 4 1} t2i1 {5 4}}
do_test analyze-3.8 {
  execsql {
    CREATE TABLE t3 AS SELECT a, b, rowid AS c, 'hi' AS d FROM t1;
    CREATE INDEX t3i1 ON t3(a);
    CREATE INDEX t3i2 ON t3(a,b,c,d);
    CREATE INDEX t3i3 ON t3(d,b,c,a);
    DROP TABLE t1;
    DROP TABLE t2;
    SELECT idx, stat FROM sqlite_stat1 ORDER BY idx;
  }
} {}
do_test analyze-3.9 {
  execsql {
    ANALYZE;
    SELECT idx, stat FROM sqlite_stat1 ORDER BY idx;
  }
} {t3i1 {5 4} t3i2 {5 4 1 1 1} t3i3 {5 5 2 1 1}}

do_test analyze-3.10 {
  execsql {
    CREATE TABLE [silly " name](a, b, c);
    CREATE INDEX 'foolish '' name' ON [silly " name](a, b);
    CREATE INDEX 'another foolish '' name' ON [silly " name](c);
    INSERT INTO [silly " name] VALUES(1, 2, 3);
    INSERT INTO [silly " name] VALUES(4, 5, 6);
    ANALYZE;
    SELECT idx, stat FROM sqlite_stat1 ORDER BY idx;
  }
} {{another foolish ' name} {2 1} {foolish ' name} {2 1 1} t3i1 {5 4} t3i2 {5 4 1 1 1} t3i3 {5 5 2 1 1}}
do_test analyze-3.11 {
  execsql {
    DROP INDEX "foolish ' name";
    SELECT idx, stat FROM sqlite_stat1 ORDER BY idx;
  }
} {{another foolish ' name} {2 1} t3i1 {5 4} t3i2 {5 4 1 1 1} t3i3 {5 5 2 1 1}}
do_test analyze-3.11 {
  execsql {
    DROP TABLE "silly "" name";
    SELECT idx, stat FROM sqlite_stat1 ORDER BY idx;
  }
} {t3i1 {5 4} t3i2 {5 4 1 1 1} t3i3 {5 5 2 1 1}}

# Try corrupting the sqlite_stat1 table and make sure the
# database is still able to function.
#
do_test analyze-4.0 {
  sqlite3 db2 test.db
  db2 eval {
    CREATE TABLE t4(x,y,z);
    CREATE INDEX t4i1 ON t4(x);
    CREATE INDEX t4i2 ON t4(y);
    INSERT INTO t4 SELECT a,b,c FROM t3;
  }
  db2 close
  db close
  sqlite3 db test.db
  execsql {
    ANALYZE;
    SELECT idx, stat FROM sqlite_stat1 ORDER BY idx;
  }
} {t3i1 {5 4} t3i2 {5 4 1 1 1} t3i3 {5 5 2 1 1} t4i1 {5 4} t4i2 {5 2}}
do_test analyze-4.1 {
  execsql {
    PRAGMA writable_schema=on;
    INSERT INTO sqlite_stat1 VALUES(null,null,null);
    PRAGMA writable_schema=off;
  }
  db close







|









|






|





|






|
















|











|





|





|



















|







154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
} {t1i1 {4 4} t1i2 {4 1} t1i3 {4 4 1}}
do_test analyze-3.3 {
  execsql {
    INSERT INTO t1 VALUES(2,5);
    ANALYZE main;
    SELECT idx, stat FROM sqlite_stat1 ORDER BY idx;
  }
} {t1i1 {5 3} t1i2 {5 2} t1i3 {5 3 1}}
do_test analyze-3.4 {
  execsql {
    CREATE TABLE t2 AS SELECT * FROM t1;
    CREATE INDEX t2i1 ON t2(a);
    CREATE INDEX t2i2 ON t2(b);
    CREATE INDEX t2i3 ON t2(a,b);
    ANALYZE;
    SELECT idx, stat FROM sqlite_stat1 ORDER BY idx;
  }
} {t1i1 {5 3} t1i2 {5 2} t1i3 {5 3 1} t2i1 {5 3} t2i2 {5 2} t2i3 {5 3 1}}
do_test analyze-3.5 {
  execsql {
    DROP INDEX t2i3;
    ANALYZE t1;
    SELECT idx, stat FROM sqlite_stat1 ORDER BY idx;
  }
} {t1i1 {5 3} t1i2 {5 2} t1i3 {5 3 1} t2i1 {5 3} t2i2 {5 2}}
do_test analyze-3.6 {
  execsql {
    ANALYZE t2;
    SELECT idx, stat FROM sqlite_stat1 ORDER BY idx;
  }
} {t1i1 {5 3} t1i2 {5 2} t1i3 {5 3 1} t2i1 {5 3} t2i2 {5 2}}
do_test analyze-3.7 {
  execsql {
    DROP INDEX t2i2;
    ANALYZE t2;
    SELECT idx, stat FROM sqlite_stat1 ORDER BY idx;
  }
} {t1i1 {5 3} t1i2 {5 2} t1i3 {5 3 1} t2i1 {5 3}}
do_test analyze-3.8 {
  execsql {
    CREATE TABLE t3 AS SELECT a, b, rowid AS c, 'hi' AS d FROM t1;
    CREATE INDEX t3i1 ON t3(a);
    CREATE INDEX t3i2 ON t3(a,b,c,d);
    CREATE INDEX t3i3 ON t3(d,b,c,a);
    DROP TABLE t1;
    DROP TABLE t2;
    SELECT idx, stat FROM sqlite_stat1 ORDER BY idx;
  }
} {}
do_test analyze-3.9 {
  execsql {
    ANALYZE;
    SELECT idx, stat FROM sqlite_stat1 ORDER BY idx;
  }
} {t3i1 {5 3} t3i2 {5 3 1 1 1} t3i3 {5 5 2 1 1}}

do_test analyze-3.10 {
  execsql {
    CREATE TABLE [silly " name](a, b, c);
    CREATE INDEX 'foolish '' name' ON [silly " name](a, b);
    CREATE INDEX 'another foolish '' name' ON [silly " name](c);
    INSERT INTO [silly " name] VALUES(1, 2, 3);
    INSERT INTO [silly " name] VALUES(4, 5, 6);
    ANALYZE;
    SELECT idx, stat FROM sqlite_stat1 ORDER BY idx;
  }
} {{another foolish ' name} {2 1} {foolish ' name} {2 1 1} t3i1 {5 3} t3i2 {5 3 1 1 1} t3i3 {5 5 2 1 1}}
do_test analyze-3.11 {
  execsql {
    DROP INDEX "foolish ' name";
    SELECT idx, stat FROM sqlite_stat1 ORDER BY idx;
  }
} {{another foolish ' name} {2 1} t3i1 {5 3} t3i2 {5 3 1 1 1} t3i3 {5 5 2 1 1}}
do_test analyze-3.11 {
  execsql {
    DROP TABLE "silly "" name";
    SELECT idx, stat FROM sqlite_stat1 ORDER BY idx;
  }
} {t3i1 {5 3} t3i2 {5 3 1 1 1} t3i3 {5 5 2 1 1}}

# Try corrupting the sqlite_stat1 table and make sure the
# database is still able to function.
#
do_test analyze-4.0 {
  sqlite3 db2 test.db
  db2 eval {
    CREATE TABLE t4(x,y,z);
    CREATE INDEX t4i1 ON t4(x);
    CREATE INDEX t4i2 ON t4(y);
    INSERT INTO t4 SELECT a,b,c FROM t3;
  }
  db2 close
  db close
  sqlite3 db test.db
  execsql {
    ANALYZE;
    SELECT idx, stat FROM sqlite_stat1 ORDER BY idx;
  }
} {t3i1 {5 3} t3i2 {5 3 1 1 1} t3i3 {5 5 2 1 1} t4i1 {5 3} t4i2 {5 2}}
do_test analyze-4.1 {
  execsql {
    PRAGMA writable_schema=on;
    INSERT INTO sqlite_stat1 VALUES(null,null,null);
    PRAGMA writable_schema=off;
  }
  db close