SQLite

Check-in [31fcc7067b]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Restructuring and generalizing analyze5.test. The whole script is currently disabled and will need to be reenabled prior to merging with trunk.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | stat2-enhancement
Files: files | file ages | folders
SHA1: 31fcc7067bd76da4bf19232811b90cf8b76eed74
User & Date: drh 2011-01-24 17:46:35.872
Context
2011-01-28
01:57
Change the weighting of binary searches on tables to 1/10th the cost of a search on an index. Change the assumed reduction in search space from a indexed range constraint from 1/3rd to 1/4th. Do not let the estimated number of rows drop below 1. (check-in: 4847c6cb71 user: drh tags: stat2-enhancement)
2011-01-24
17:46
Restructuring and generalizing analyze5.test. The whole script is currently disabled and will need to be reenabled prior to merging with trunk. (check-in: 31fcc7067b user: drh tags: stat2-enhancement)
15:11
Change the cost estimator in the query planner to take into account the logN rowid lookup cost when going from an index to a table. (check-in: b442525b0b user: drh tags: stat2-enhancement)
Changes
Unified Diff Ignore Whitespace Patch
Changes to test/analyze5.test.
9
10
11
12
13
14
15


16
17
18
19
20
21
22
23
24
25
26
27
28
29
30

31
32
33
34
35










36
37
38





39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58

59
60
61
62

63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167


168
169



170

171
172
173
174
175
176
177

178
179
180
181
182





183
184
185


186
187
188
189
190
191
192
#
#***********************************************************************
#
# This file implements tests for SQLite library.  The focus of the tests
# in this file is the use of the sqlite_stat2 histogram data on tables
# with many repeated values and only a few distinct values.
#



set testdir [file dirname $argv0]
source $testdir/tester.tcl

ifcapable !stat2 {
  finish_test
  return
}

set testprefix analyze5

proc eqp {sql {db db}} {
  uplevel execsql [list "EXPLAIN QUERY PLAN $sql"] $db
}


do_test analyze5-1.0 {
  execsql { CREATE TABLE t1(x INTEGER PRIMARY KEY, y, z) }
  for {set i 0} {$i < 1000} {incr i} {
    set j [expr {$i>=25 && $i<=50}]
    set k [expr {($i>=400) + ($i>=700) + ($i>=875)}]










    execsql { INSERT INTO t1 VALUES($i,$j,$k) }
  }
  execsql { 





    CREATE INDEX t1y ON t1(y);
    CREATE INDEX t1z ON t1(z);
    ANALYZE;
    SELECT * FROM sqlite_stat2 ORDER BY 1, 2, 3;
  }
} [list t1 t1y 0 0 \
        t1 t1y 1 0 \
        t1 t1y 2 0 \
        t1 t1y 3 0 \
        t1 t1y 4 0 \
        t1 t1y 5 0 \
        t1 t1y 6 0 \
        t1 t1y 7 0 \
        t1 t1y 8 0 \
        t1 t1y 9 0 \
        t1 t1z 0 0 \
        t1 t1z 1 0 \
        t1 t1z 2 0 \
        t1 t1z 3 0 \
        t1 t1z 4 1 \

        t1 t1z 5 1 \
        t1 t1z 6 1 \
        t1 t1z 7 2 \
        t1 t1z 8 2 \

        t1 t1z 9 3]

# Verify that range queries generate the correct row count estimates
#
foreach {testid where rows} {
  1  {z>=0 AND z<=0}     400
  2  {z>=1 AND z<=1}     300
  3  {z>=2 AND z<=2}     200
  4  {z>=3 AND z<=3}     100
  5  {z>=4 AND z<=4}      50
  6  {z>=-1 AND z<=-1}    50
  7  {z>1 AND z<3}       200
  8  {z>0 AND z<100}     600
  9  {z>=1 AND z<100}    600
 10  {z>1 AND z<100}     300
 11  {z>=2 AND z<100}    300
 12  {z>2 AND z<100}     100
 13  {z>=3 AND z<100}    100
 14  {z>3 AND z<100}      50
 15  {z>=4 AND z<100}     50
 16  {z>=-100 AND z<=-1}  50
 17  {z>=-100 AND z<=0}  400
 18  {z>=-100 AND z<0}    50
 19  {z>=-100 AND z<=1}  700
 20  {z>=-100 AND z<2}   700
 21  {z>=-100 AND z<=2}  900
 22  {z>=-100 AND z<3}   900

 31  {z>=0.0 AND z<=0.0}   400
 32  {z>=1.0 AND z<=1.0}   300
 33  {z>=2.0 AND z<=2.0}   200
 34  {z>=3.0 AND z<=3.0}   100
 35  {z>=4.0 AND z<=4.0}    50
 36  {z>=-1.0 AND z<=-1.0}  50
 37  {z>1.5 AND z<3.0}     200
 38  {z>0.5 AND z<100}     600
 39  {z>=1.0 AND z<100}    600
 40  {z>1.5 AND z<100}     300
 41  {z>=2.0 AND z<100}    300
 42  {z>2.1 AND z<100}     100
 43  {z>=3.0 AND z<100}    100
 44  {z>3.2 AND z<100}      50
 45  {z>=4.0 AND z<100}     50
 46  {z>=-100 AND z<=-1.0}  50
 47  {z>=-100 AND z<=0.0}  400
 48  {z>=-100 AND z<0.0}    50
 49  {z>=-100 AND z<=1.0}  700
 50  {z>=-100 AND z<2.0}   700
 51  {z>=-100 AND z<=2.0}  900
 52  {z>=-100 AND z<3.0}   900

} {
  do_test analyze5-1.$testid {
    eqp "SELECT * FROM t1 WHERE $where"
  } [format {0 0 0 {SEARCH TABLE t1 USING INDEX t1z (z>? AND z<?) (~%d rows)}} \
       $rows]
}
foreach {testid where rows} {
  101  {z=-1}            50
  102  {z=0}            400
  103  {z=1}            300
  104  {z=2}            200
  105  {z=3}            100
  106  {z=4}             50
  107  {z=-10.0}         50
  108  {z=0.0}          400
  109  {z=1.0}          300
  110  {z=2.0}          200
  111  {z=3.0}          100
  112  {z=4.0}           50
  113  {z=1.5}           50
  114  {z=2.5}           50
} {
  do_test analyze5-1.$testid {
    eqp "SELECT * FROM t1 WHERE $where"
  } [format {0 0 0 {SEARCH TABLE t1 USING INDEX t1z (z=?) (~%d rows)}} $rows]
}

# for the next sequence of tests a value of rows<=0 means a full-table scan
# is used.
#
#set sqlite_where_trace 1
foreach {testid where rows} {
  201  {z IN (-1)}            50
  202  {z IN (0)}            400
  203  {z IN (1)}            300
  204  {z IN (2)}            200
  205  {z IN (3)}            100
  206  {z IN (4)}             50
  207  {z IN (0.5)}           50
  208  {z IN (0,1)}          700
  209  {z IN (0,1,2)}        900
  210  {z IN (0,1,2,3)}        0
  211  {z IN (0,1,2,3,4,5)}    0
  212  {z IN (1,2)}          500
  213  {z IN (2,3)}          300
  214  {z=3 OR z=2}          300
  215  {z IN (-1,3)}         150
  216  {z=-1 OR z=3}         150
} {
  if {$rows<=0} {
    set ans {SCAN TABLE t1 (~100 rows)}
  } else {
    set ans [format {SEARCH TABLE t1 USING INDEX t1z (z=?) (~%d rows)} $rows]
  }


  do_test analyze5-1.$testid {
    lindex [eqp "SELECT * FROM t1 WHERE $where"] 3



  } $ans

}

# For the t1.y column, most entries are known to be zero.  So do a 
# full table scan for y=0 but use the index for any other constraint on
# y.
#
do_test analyze5-201 {

  eqp {SELECT * FROM t1 WHERE y=0}
} {0 0 0 {SCAN TABLE t1 (~100 rows)}}
do_test analyze5-202 {
  eqp {SELECT * FROM t1 WHERE y=1}
} {0 0 0 {SEARCH TABLE t1 USING INDEX t1y (y=?) (~50 rows)}}





do_test analyze5-203 {
  eqp {SELECT * FROM t1 WHERE y=0.1}
} {0 0 0 {SEARCH TABLE t1 USING INDEX t1y (y=?) (~50 rows)}}



# Change the table values from integer to floating point and then
# repeat the same sequence of tests.  We should get the same results.
#
do_test analyze5-2.0 {
  db eval {
    UPDATE t1 SET z=z+0.0;







>
>















>

|

|
|
>
>
>
>
>
>
>
>
>
>
|

|
>
>
>
>
>
|
|

|

|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
>
|
|
<
|
>
|



|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<
<
<
<
<
<
<
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<
<
<
<
|
<
<
<
<
<
<
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<
|
>
>
|
|
>
>
>
|
>
|
|
<
|
<
<
|
>
|
|
<
|
<
>
>
>
>
>
|
|
<
>
>







9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79

80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132







133
134
135
136
137
138
139
140
141
142
143
144
145
146




147






148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167

168
169
170
171
172
173
174
175
176
177
178
179

180


181
182
183
184

185

186
187
188
189
190
191
192

193
194
195
196
197
198
199
200
201
#
#***********************************************************************
#
# This file implements tests for SQLite library.  The focus of the tests
# in this file is the use of the sqlite_stat2 histogram data on tables
# with many repeated values and only a few distinct values.
#

return

set testdir [file dirname $argv0]
source $testdir/tester.tcl

ifcapable !stat2 {
  finish_test
  return
}

set testprefix analyze5

proc eqp {sql {db db}} {
  uplevel execsql [list "EXPLAIN QUERY PLAN $sql"] $db
}

unset -nocomplain i t u v w x y z
do_test analyze5-1.0 {
  db eval {CREATE TABLE t1(t,u,v TEXT COLLATE nocase,w,x,y,z)}
  for {set i 0} {$i < 1000} {incr i} {
    set y [expr {$i>=25 && $i<=50}]
    set z [expr {($i>=400) + ($i>=700) + ($i>=875)}]
    set x $z
    set w $z
    set t [expr {$z+0.5}]
    switch $z {
      0 {set u "alpha"; unset x}
      1 {set u "bravo"}
      2 {set u "charlie"}
      3 {set u "delta"; unset w}
    }
    if {$i%2} {set v $u} {set v [string toupper $u]}
    db eval {INSERT INTO t1 VALUES($t,$u,$v,$w,$x,$y,$z)}
  }
  db eval { 
    CREATE INDEX t1t ON t1(t);  -- 0.5, 1.5, 2.5, and 3.5
    CREATE INDEX t1u ON t1(u);  -- text
    CREATE INDEX t1v ON t1(v);  -- mixed case text
    CREATE INDEX t1w ON t1(w);  -- integers 0, 1, 2 and a few NULLs
    CREATE INDEX t1x ON t1(x);  -- integers 1, 2, 3 and many NULLs
    CREATE INDEX t1y ON t1(y);  -- integers 0 and very few 1s
    CREATE INDEX t1z ON t1(z);  -- integers 0, 1, 2, and 3
    ANALYZE;
    SELECT sample FROM sqlite_stat2 WHERE idx='t1u' ORDER BY sampleno;
  }
} {alpha alpha alpha alpha bravo bravo bravo charlie charlie delta}
do_test analyze5-1.1 {
  string tolower \
   [db eval {SELECT sample from sqlite_stat2 WHERE idx='t1v' ORDER BY sampleno}]
} {alpha alpha alpha alpha bravo bravo bravo charlie charlie delta}
do_test analyze5-1.2 {
  db eval {SELECT sample from sqlite_stat2 WHERE idx='t1w' ORDER BY sampleno}
} {{} 0 0 0 0 1 1 1 2 2}
do_test analyze5-1.3 {
  db eval {SELECT sample from sqlite_stat2 WHERE idx='t1x' ORDER BY sampleno}
} {{} {} {} {} 1 1 1 2 2 3}
do_test analyze5-1.4 {
  db eval {SELECT sample from sqlite_stat2 WHERE idx='t1y' ORDER BY sampleno}
} {0 0 0 0 0 0 0 0 0 0}
do_test analyze5-1.5 {
  db eval {SELECT sample from sqlite_stat2 WHERE idx='t1z' ORDER BY sampleno}
} {0 0 0 0 1 1 1 2 2 3}
do_test analyze5-1.6 {

  db eval {SELECT sample from sqlite_stat2 WHERE idx='t1t' ORDER BY sampleno}
} {0.5 0.5 0.5 0.5 1.5 1.5 1.5 2.5 2.5 3.5}


# Verify that range queries generate the correct row count estimates
#
foreach {testid where index rows} {
    1  {z>=0 AND z<=0}       t1z  400
    2  {z>=1 AND z<=1}       t1z  300
    3  {z>=2 AND z<=2}       t1z  200
    4  {z>=3 AND z<=3}       t1z  100
    5  {z>=4 AND z<=4}       t1z   50
    6  {z>=-1 AND z<=-1}     t1z   50
    7  {z>1 AND z<3}         t1z  200
    8  {z>0 AND z<100}       t1z  600
    9  {z>=1 AND z<100}      t1z  600
   10  {z>1 AND z<100}       t1z  300
   11  {z>=2 AND z<100}      t1z  300
   12  {z>2 AND z<100}       t1z  100
   13  {z>=3 AND z<100}      t1z  100
   14  {z>3 AND z<100}       t1z   50
   15  {z>=4 AND z<100}      t1z   50
   16  {z>=-100 AND z<=-1}   t1z   50
   17  {z>=-100 AND z<=0}    t1z  400
   18  {z>=-100 AND z<0}     t1z   50
   19  {z>=-100 AND z<=1}    t1z  700
   20  {z>=-100 AND z<2}     t1z  700
   21  {z>=-100 AND z<=2}    t1z  900
   22  {z>=-100 AND z<3}     t1z  900
  
   31  {z>=0.0 AND z<=0.0}   t1z  400
   32  {z>=1.0 AND z<=1.0}   t1z  300
   33  {z>=2.0 AND z<=2.0}   t1z  200
   34  {z>=3.0 AND z<=3.0}   t1z  100
   35  {z>=4.0 AND z<=4.0}   t1z   50
   36  {z>=-1.0 AND z<=-1.0} t1z   50
   37  {z>1.5 AND z<3.0}     t1z  200
   38  {z>0.5 AND z<100}     t1z  600
   39  {z>=1.0 AND z<100}    t1z  600
   40  {z>1.5 AND z<100}     t1z  300
   41  {z>=2.0 AND z<100}    t1z  300
   42  {z>2.1 AND z<100}     t1z  100
   43  {z>=3.0 AND z<100}    t1z  100
   44  {z>3.2 AND z<100}     t1z   50
   45  {z>=4.0 AND z<100}    t1z   50
   46  {z>=-100 AND z<=-1.0} t1z   50
   47  {z>=-100 AND z<=0.0}  t1z  400
   48  {z>=-100 AND z<0.0}   t1z   50
   49  {z>=-100 AND z<=1.0}  t1z  700
   50  {z>=-100 AND z<2.0}   t1z  700
   51  {z>=-100 AND z<=2.0}  t1z  900
   52  {z>=-100 AND z<3.0}   t1z  900
  







  101  {z=-1}                t1z   50
  102  {z=0}                 t1z  400
  103  {z=1}                 t1z  300
  104  {z=2}                 t1z  200
  105  {z=3}                 t1z  100
  106  {z=4}                 t1z   50
  107  {z=-10.0}             t1z   50
  108  {z=0.0}               t1z  400
  109  {z=1.0}               t1z  300
  110  {z=2.0}               t1z  200
  111  {z=3.0}               t1z  100
  112  {z=4.0}               t1z   50
  113  {z=1.5}               t1z   50
  114  {z=2.5}               t1z   50




  






  201  {z IN (-1)}           t1z   50
  202  {z IN (0)}            t1z  400
  203  {z IN (1)}            t1z  300
  204  {z IN (2)}            t1z  200
  205  {z IN (3)}            t1z  100
  206  {z IN (4)}            t1z   50
  207  {z IN (0.5)}          t1z   50
  208  {z IN (0,1)}          t1z  700
  209  {z IN (0,1,2)}        t1z  900
  210  {z IN (0,1,2,3)}      {}   100
  211  {z IN (0,1,2,3,4,5)}  {}   100
  212  {z IN (1,2)}          t1z  500
  213  {z IN (2,3)}          t1z  300
  214  {z=3 OR z=2}          t1z  300
  215  {z IN (-1,3)}         t1z  150
  216  {z=-1 OR z=3}         t1z  150

  300  {y=0}                 {}   100
  301  {y=1}                 t1y   50
  302  {y=0.1}               t1y   50


} {
  # Verify that the expected index is used with the expected row count
  do_test analyze5-1.${testid}a {
    set x [lindex [eqp "SELECT * FROM t1 WHERE $where"] 3]
    set idx {}
    regexp {INDEX (t1.) } $x all idx
    regexp {~([0-9]+) rows} $x all nrow
    list $idx $nrow
  } [list $index $rows]

  # Verify that the same result is achieved regardless of whether or not

  # the index is used


  do_test analyze5-1.${testid}b {
    set w2 [string map {y +y z +z} $where]
    set a1 [db eval "SELECT rowid FROM t1 NOT INDEXED WHERE $w2\
                     ORDER BY +rowid"]

    set a2 [db eval "SELECT rowid FROM t1 WHERE $where ORDER BY +rowid"]

    if {$a1==$a2} {
      set res ok
    } else {
      set res "a1=\[$a1\] a2=\[$a2\]"
    }
    set res
  } {ok}

}
exit

# Change the table values from integer to floating point and then
# repeat the same sequence of tests.  We should get the same results.
#
do_test analyze5-2.0 {
  db eval {
    UPDATE t1 SET z=z+0.0;