SQLite

Check-in [2353176811]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Enhances to the query planner such that "x IS NULL" constraints take the STAT2 statistics into account, just like "x=VALUE" constraints.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 2353176811f752a16c1f2351a3d3431919b062a9
User & Date: drh 2011-03-17 01:34:26.570
References
2011-03-17
01:53
Backport the "x IS NULL" query planner enhancement of [2353176811f] to the 3.7.2 branch. (check-in: 68daf20d01 user: drh tags: branch-3.7.2)
Context
2011-03-17
01:58
Comment enhancement to better explain the logic in the "x IS NULL" optimization. (check-in: 869f894798 user: drh tags: trunk)
01:34
Enhances to the query planner such that "x IS NULL" constraints take the STAT2 statistics into account, just like "x=VALUE" constraints. (check-in: 2353176811 user: drh tags: trunk)
2011-03-16
18:54
Additional interpretation of flags and constants in the VFS trace output. (check-in: 3e984195f1 user: drh tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
install-sh became executable.
Changes to src/where.c.
2541
2542
2543
2544
2545
2546
2547

2548
2549



2550
2551
2552
2553
2554
2555
2556
  int iLower, iUpper;       /* Range of histogram regions containing pRhs */
  u8 aff;                   /* Column affinity */
  int rc;                   /* Subfunction return code */
  double nRowEst;           /* New estimate of the number of rows */

  assert( p->aSample!=0 );
  aff = p->pTable->aCol[p->aiColumn[0]].affinity;

  rc = valueFromExpr(pParse, pExpr, aff, &pRhs);
  if( rc ) goto whereEqualScanEst_cancel;



  if( pRhs==0 ) return SQLITE_NOTFOUND;
  rc = whereRangeRegion(pParse, p, pRhs, 0, &iLower);
  if( rc ) goto whereEqualScanEst_cancel;
  rc = whereRangeRegion(pParse, p, pRhs, 1, &iUpper);
  if( rc ) goto whereEqualScanEst_cancel;
  WHERETRACE(("equality scan regions: %d..%d\n", iLower, iUpper));
  if( iLower>=iUpper ){







>
|
|
>
>
>







2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
  int iLower, iUpper;       /* Range of histogram regions containing pRhs */
  u8 aff;                   /* Column affinity */
  int rc;                   /* Subfunction return code */
  double nRowEst;           /* New estimate of the number of rows */

  assert( p->aSample!=0 );
  aff = p->pTable->aCol[p->aiColumn[0]].affinity;
  if( pExpr ){
    rc = valueFromExpr(pParse, pExpr, aff, &pRhs);
    if( rc ) goto whereEqualScanEst_cancel;
  }else{
    pRhs = sqlite3ValueNew(pParse->db);
  }
  if( pRhs==0 ) return SQLITE_NOTFOUND;
  rc = whereRangeRegion(pParse, p, pRhs, 0, &iLower);
  if( rc ) goto whereEqualScanEst_cancel;
  rc = whereRangeRegion(pParse, p, pRhs, 1, &iUpper);
  if( rc ) goto whereEqualScanEst_cancel;
  WHERETRACE(("equality scan regions: %d..%d\n", iLower, iUpper));
  if( iLower>=iUpper ){
2931
2932
2933
2934
2935
2936
2937
2938


2939
2940
2941
2942
2943
2944
2945
#ifdef SQLITE_ENABLE_STAT2
    /* If the constraint is of the form x=VALUE and histogram
    ** data is available for column x, then it might be possible
    ** to get a better estimate on the number of rows based on
    ** VALUE and how common that value is according to the histogram.
    */
    if( nRow>(double)1 && nEq==1 && pFirstTerm!=0 ){
      if( pFirstTerm->eOperator==WO_EQ ){


        whereEqualScanEst(pParse, pProbe, pFirstTerm->pExpr->pRight, &nRow);
      }else if( pFirstTerm->eOperator==WO_IN && bInEst==0 ){
        whereInScanEst(pParse, pProbe, pFirstTerm->pExpr->x.pList, &nRow);
      }
    }
#endif /* SQLITE_ENABLE_STAT2 */








|
>
>







2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
#ifdef SQLITE_ENABLE_STAT2
    /* If the constraint is of the form x=VALUE and histogram
    ** data is available for column x, then it might be possible
    ** to get a better estimate on the number of rows based on
    ** VALUE and how common that value is according to the histogram.
    */
    if( nRow>(double)1 && nEq==1 && pFirstTerm!=0 ){
      if( pFirstTerm->eOperator & (WO_EQ|WO_ISNULL) ){
        testcase( pFirstTerm->eOperator==WO_EQ );
        testcase( pFirstTerm->pOperator==WO_ISNULL );
        whereEqualScanEst(pParse, pProbe, pFirstTerm->pExpr->pRight, &nRow);
      }else if( pFirstTerm->eOperator==WO_IN && bInEst==0 ){
        whereInScanEst(pParse, pProbe, pFirstTerm->pExpr->x.pList, &nRow);
      }
    }
#endif /* SQLITE_ENABLE_STAT2 */

Changes to test/analyze5.test.
160
161
162
163
164
165
166














































167
168
169
170
171
172
173
  215  {z IN (-1,3)}         t1z  150
  216  {z=-1 OR z=3}         t1z  150

  300  {y=0}                 {}   100
  301  {y=1}                 t1y   50
  302  {y=0.1}               t1y   50















































} {
  # Verify that the expected index is used with the expected row count
  do_test analyze5-1.${testid}a {
    set x [lindex [eqp "SELECT * FROM t1 WHERE $where"] 3]
    set idx {}
    regexp {INDEX (t1.) } $x all idx
    regexp {~([0-9]+) rows} $x all nrow







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
  215  {z IN (-1,3)}         t1z  150
  216  {z=-1 OR z=3}         t1z  150

  300  {y=0}                 {}   100
  301  {y=1}                 t1y   50
  302  {y=0.1}               t1y   50

  400  {x IS NULL}           t1x  400

} {
  # Verify that the expected index is used with the expected row count
  do_test analyze5-1.${testid}a {
    set x [lindex [eqp "SELECT * FROM t1 WHERE $where"] 3]
    set idx {}
    regexp {INDEX (t1.) } $x all idx
    regexp {~([0-9]+) rows} $x all nrow
    list $idx $nrow
  } [list $index $rows]

  # Verify that the same result is achieved regardless of whether or not
  # the index is used
  do_test analyze5-1.${testid}b {
    set w2 [string map {y +y z +z} $where]
    set a1 [db eval "SELECT rowid FROM t1 NOT INDEXED WHERE $w2\
                     ORDER BY +rowid"]
    set a2 [db eval "SELECT rowid FROM t1 WHERE $where ORDER BY +rowid"]
    if {$a1==$a2} {
      set res ok
    } else {
      set res "a1=\[$a1\] a2=\[$a2\]"
    }
    set res
  } {ok}
}

# Increase the number of NULLs in column x
#
db eval {
   UPDATE t1 SET x=NULL;
   UPDATE t1 SET x=rowid
    WHERE rowid IN (SELECT rowid FROM t1 ORDER BY random() LIMIT 5);
   ANALYZE;
}

# Verify that range queries generate the correct row count estimates
#
foreach {testid where index rows} {
  500  {x IS NULL AND u='charlie'}         t1u  20
  501  {x=1 AND u='charlie'}               t1x   5
  502  {x IS NULL}                          {} 100
  503  {x=1}                               t1x  50
  504  {x IS NOT NULL}                     t1x  25

} {
  # Verify that the expected index is used with the expected row count
  do_test analyze5-1.${testid}a {
    set x [lindex [eqp "SELECT * FROM t1 WHERE $where"] 3]
    set idx {}
    regexp {INDEX (t1.) } $x all idx
    regexp {~([0-9]+) rows} $x all nrow
185
186
187
188
189
190
191
192
193
194
      set res ok
    } else {
      set res "a1=\[$a1\] a2=\[$a2\]"
    }
    set res
  } {ok}
}


finish_test








<

231
232
233
234
235
236
237
238

239
      set res ok
    } else {
      set res "a1=\[$a1\] a2=\[$a2\]"
    }
    set res
  } {ok}
}


finish_test
test/progress.test became a regular file.
tool/mkopts.tcl became a regular file.