SQLite

Check-in [f2207a0691]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Another test case for the planner change on this branch.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | stat4-change
Files: files | file ages | folders
SHA1: f2207a0691ed361061719f4dacf021a677a9d892
User & Date: dan 2015-03-16 09:21:30.738
Context
2015-03-16
16:28
When estimating the number of rows visited by a range scan for which the keys consist of more than one field, consider prefixes of stat4 samples as well as the full samples. This generates more accurate estimates. (check-in: 3e0590dee0 user: dan tags: trunk)
09:21
Another test case for the planner change on this branch. (Closed-Leaf check-in: f2207a0691 user: dan tags: stat4-change)
2015-03-14
18:59
When estimating the number of rows visited by a range scan for which the keys consist of more than one field, consider prefixes of stat4 samples as well as the full samples. (check-in: e1caf93c9a user: dan tags: stat4-change)
Changes
Unified Diff Ignore Whitespace Patch
Changes to test/analyze9.test.
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
}

#-------------------------------------------------------------------------
# Check that a problem in they way stat4 data is used has been 
# resolved (see below).
#
reset_db
do_test 26.1 {
  db transaction {
    execsql { 
      CREATE TABLE t1(x, y, z);
      CREATE INDEX t1xy ON t1(x, y);
      CREATE INDEX t1z ON t1(z);
    }
    for {set i 0} {$i < 10000} {incr i} {







|







1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
}

#-------------------------------------------------------------------------
# Check that a problem in they way stat4 data is used has been 
# resolved (see below).
#
reset_db
do_test 26.1.1 {
  db transaction {
    execsql { 
      CREATE TABLE t1(x, y, z);
      CREATE INDEX t1xy ON t1(x, y);
      CREATE INDEX t1z ON t1(z);
    }
    for {set i 0} {$i < 10000} {incr i} {
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194


1195

1196














































1197
1198
1199
1200
    execsql {
      UPDATE t1 SET z = rowid / 20;
      ANALYZE;
    }
  }
} {}

do_execsql_test 26.2 {
  SELECT count(*) FROM t1 WHERE x = 10000 AND y < 50;
} {49}
do_execsql_test 26.3 {
  SELECT count(*) FROM t1 WHERE z = 444;
} {20}

# The analyzer knows that any (z=?) expression matches 20 rows. So it
# will use index "t1z" if the estimate of hits for (x=10000 AND y<50)
# is greater than 20 rows.
#
# And it should be. The analyzer has a stat4 sample as follows:
#
#   sample=(x=10000, y=100) nLt=(10000 10099)
#
# There should be no other samples that start with (x=10000). So it knows 
# that (x=10000 AND y<50) must match somewhere between 0 and 99 rows, but
# know more than that. Guessing less than 20 is therefore unreasonable.
#
# At one point though, due to a problem in whereKeyStats(), the planner was
# estimating that (x=10000 AND y<50) would match only 2 rows.
#
do_eqp_test 26.4 {
  SELECT * FROM t1 WHERE x = 10000 AND y < 50 AND z = 444;
} {
  0 0 0 {SEARCH TABLE t1 USING INDEX t1z (z=?)}
}





















































finish_test










|


|


















|






>
>
|
>
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>




1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
    execsql {
      UPDATE t1 SET z = rowid / 20;
      ANALYZE;
    }
  }
} {}

do_execsql_test 26.1.2 {
  SELECT count(*) FROM t1 WHERE x = 10000 AND y < 50;
} {49}
do_execsql_test 26.1.3 {
  SELECT count(*) FROM t1 WHERE z = 444;
} {20}

# The analyzer knows that any (z=?) expression matches 20 rows. So it
# will use index "t1z" if the estimate of hits for (x=10000 AND y<50)
# is greater than 20 rows.
#
# And it should be. The analyzer has a stat4 sample as follows:
#
#   sample=(x=10000, y=100) nLt=(10000 10099)
#
# There should be no other samples that start with (x=10000). So it knows 
# that (x=10000 AND y<50) must match somewhere between 0 and 99 rows, but
# know more than that. Guessing less than 20 is therefore unreasonable.
#
# At one point though, due to a problem in whereKeyStats(), the planner was
# estimating that (x=10000 AND y<50) would match only 2 rows.
#
do_eqp_test 26.1.4 {
  SELECT * FROM t1 WHERE x = 10000 AND y < 50 AND z = 444;
} {
  0 0 0 {SEARCH TABLE t1 USING INDEX t1z (z=?)}
}


# This test - 26.2.* - tests that another manifestation of the same problem
# is no longer present in the library. Assuming:
# 
#   CREATE INDEX t1xy ON t1(x, y)
#
# and that have samples for index t1xy as follows:
#
#
#   sample=('A', 70)        nEq=(100, 2)        nLt=(900, 970)
#   sample=('B', 70)        nEq=(100, 2)        nLt=(1000, 1070)    
#
# the planner should estimate that (x = 'B' AND y > 25) matches 76 rows
# (70 * 2/3 + 30). Before, due to the problem, the planner was estimating 
# that this matched 100 rows.
# 
reset_db
do_execsql_test 26.2.1 {
  BEGIN;
    CREATE TABLE t1(x, y, z);
    CREATE INDEX i1 ON t1(x, y);
    CREATE INDEX i2 ON t1(z);
  
    WITH 
    cnt(y) AS (SELECT 0 UNION ALL SELECT y+1 FROM cnt WHERE y<99),
    letters(x) AS (
      SELECT 'A' UNION SELECT 'B' UNION SELECT 'C' UNION SELECT 'D'
    )
    INSERT INTO t1(x, y) SELECT x, y FROM letters, cnt;
  
    WITH
    letters(x) AS (
      SELECT 'A' UNION SELECT 'B' UNION SELECT 'C' UNION SELECT 'D'
    )
    INSERT INTO t1(x, y) SELECT x, 70 FROM letters;
  
    WITH
    cnt(i) AS (SELECT 0 UNION ALL SELECT i+1 FROM cnt WHERE i<9999)
    INSERT INTO t1(x, y) SELECT i, i FROM cnt;
  
    UPDATE t1 SET z = (rowid / 95);
    ANALYZE;
  COMMIT;
}

do_eqp_test 26.2.2 {
  SELECT * FROM t1 WHERE x='B' AND y>25 AND z=?;
} {
  0 0 0 {SEARCH TABLE t1 USING INDEX i1 (x=? AND y>?)}
}


finish_test