/ Check-in [23531768]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Enhances to the query planner such that "x IS NULL" constraints take the STAT2 statistics into account, just like "x=VALUE" constraints.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1:2353176811f752a16c1f2351a3d3431919b062a9
User & Date: drh 2011-03-17 01:34:26
References
2011-03-17
01:53
Backport the "x IS NULL" query planner enhancement of [2353176811f] to the 3.7.2 branch. check-in: 68daf20d user: drh tags: branch-3.7.2
Context
2011-03-17
01:58
Comment enhancement to better explain the logic in the "x IS NULL" optimization. check-in: 869f8947 user: drh tags: trunk
01:34
Enhances to the query planner such that "x IS NULL" constraints take the STAT2 statistics into account, just like "x=VALUE" constraints. check-in: 23531768 user: drh tags: trunk
2011-03-16
18:54
Additional interpretation of flags and constants in the VFS trace output. check-in: 3e984195 user: drh tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

install-sh became executable.

Changes to src/where.c.

2541
2542
2543
2544
2545
2546
2547

2548
2549



2550
2551
2552
2553
2554
2555
2556
....
2931
2932
2933
2934
2935
2936
2937

2938

2939
2940
2941
2942
2943
2944
2945
  int iLower, iUpper;       /* Range of histogram regions containing pRhs */
  u8 aff;                   /* Column affinity */
  int rc;                   /* Subfunction return code */
  double nRowEst;           /* New estimate of the number of rows */

  assert( p->aSample!=0 );
  aff = p->pTable->aCol[p->aiColumn[0]].affinity;

  rc = valueFromExpr(pParse, pExpr, aff, &pRhs);
  if( rc ) goto whereEqualScanEst_cancel;



  if( pRhs==0 ) return SQLITE_NOTFOUND;
  rc = whereRangeRegion(pParse, p, pRhs, 0, &iLower);
  if( rc ) goto whereEqualScanEst_cancel;
  rc = whereRangeRegion(pParse, p, pRhs, 1, &iUpper);
  if( rc ) goto whereEqualScanEst_cancel;
  WHERETRACE(("equality scan regions: %d..%d\n", iLower, iUpper));
  if( iLower>=iUpper ){
................................................................................
#ifdef SQLITE_ENABLE_STAT2
    /* If the constraint is of the form x=VALUE and histogram
    ** data is available for column x, then it might be possible
    ** to get a better estimate on the number of rows based on
    ** VALUE and how common that value is according to the histogram.
    */
    if( nRow>(double)1 && nEq==1 && pFirstTerm!=0 ){

      if( pFirstTerm->eOperator==WO_EQ ){

        whereEqualScanEst(pParse, pProbe, pFirstTerm->pExpr->pRight, &nRow);
      }else if( pFirstTerm->eOperator==WO_IN && bInEst==0 ){
        whereInScanEst(pParse, pProbe, pFirstTerm->pExpr->x.pList, &nRow);
      }
    }
#endif /* SQLITE_ENABLE_STAT2 */








>
|
|
>
>
>







 







>
|
>







2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
....
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
  int iLower, iUpper;       /* Range of histogram regions containing pRhs */
  u8 aff;                   /* Column affinity */
  int rc;                   /* Subfunction return code */
  double nRowEst;           /* New estimate of the number of rows */

  assert( p->aSample!=0 );
  aff = p->pTable->aCol[p->aiColumn[0]].affinity;
  if( pExpr ){
    rc = valueFromExpr(pParse, pExpr, aff, &pRhs);
    if( rc ) goto whereEqualScanEst_cancel;
  }else{
    pRhs = sqlite3ValueNew(pParse->db);
  }
  if( pRhs==0 ) return SQLITE_NOTFOUND;
  rc = whereRangeRegion(pParse, p, pRhs, 0, &iLower);
  if( rc ) goto whereEqualScanEst_cancel;
  rc = whereRangeRegion(pParse, p, pRhs, 1, &iUpper);
  if( rc ) goto whereEqualScanEst_cancel;
  WHERETRACE(("equality scan regions: %d..%d\n", iLower, iUpper));
  if( iLower>=iUpper ){
................................................................................
#ifdef SQLITE_ENABLE_STAT2
    /* If the constraint is of the form x=VALUE and histogram
    ** data is available for column x, then it might be possible
    ** to get a better estimate on the number of rows based on
    ** VALUE and how common that value is according to the histogram.
    */
    if( nRow>(double)1 && nEq==1 && pFirstTerm!=0 ){
      if( pFirstTerm->eOperator & (WO_EQ|WO_ISNULL) ){
        testcase( pFirstTerm->eOperator==WO_EQ );
        testcase( pFirstTerm->pOperator==WO_ISNULL );
        whereEqualScanEst(pParse, pProbe, pFirstTerm->pExpr->pRight, &nRow);
      }else if( pFirstTerm->eOperator==WO_IN && bInEst==0 ){
        whereInScanEst(pParse, pProbe, pFirstTerm->pExpr->x.pList, &nRow);
      }
    }
#endif /* SQLITE_ENABLE_STAT2 */

Changes to test/analyze5.test.

160
161
162
163
164
165
166














































167
168
169
170
171
172
173
...
185
186
187
188
189
190
191
192
193
194
  215  {z IN (-1,3)}         t1z  150
  216  {z=-1 OR z=3}         t1z  150

  300  {y=0}                 {}   100
  301  {y=1}                 t1y   50
  302  {y=0.1}               t1y   50















































} {
  # Verify that the expected index is used with the expected row count
  do_test analyze5-1.${testid}a {
    set x [lindex [eqp "SELECT * FROM t1 WHERE $where"] 3]
    set idx {}
    regexp {INDEX (t1.) } $x all idx
    regexp {~([0-9]+) rows} $x all nrow
................................................................................
      set res ok
    } else {
      set res "a1=\[$a1\] a2=\[$a2\]"
    }
    set res
  } {ok}
}


finish_test







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







 








<

160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
...
231
232
233
234
235
236
237
238

239
  215  {z IN (-1,3)}         t1z  150
  216  {z=-1 OR z=3}         t1z  150

  300  {y=0}                 {}   100
  301  {y=1}                 t1y   50
  302  {y=0.1}               t1y   50

  400  {x IS NULL}           t1x  400

} {
  # Verify that the expected index is used with the expected row count
  do_test analyze5-1.${testid}a {
    set x [lindex [eqp "SELECT * FROM t1 WHERE $where"] 3]
    set idx {}
    regexp {INDEX (t1.) } $x all idx
    regexp {~([0-9]+) rows} $x all nrow
    list $idx $nrow
  } [list $index $rows]

  # Verify that the same result is achieved regardless of whether or not
  # the index is used
  do_test analyze5-1.${testid}b {
    set w2 [string map {y +y z +z} $where]
    set a1 [db eval "SELECT rowid FROM t1 NOT INDEXED WHERE $w2\
                     ORDER BY +rowid"]
    set a2 [db eval "SELECT rowid FROM t1 WHERE $where ORDER BY +rowid"]
    if {$a1==$a2} {
      set res ok
    } else {
      set res "a1=\[$a1\] a2=\[$a2\]"
    }
    set res
  } {ok}
}

# Increase the number of NULLs in column x
#
db eval {
   UPDATE t1 SET x=NULL;
   UPDATE t1 SET x=rowid
    WHERE rowid IN (SELECT rowid FROM t1 ORDER BY random() LIMIT 5);
   ANALYZE;
}

# Verify that range queries generate the correct row count estimates
#
foreach {testid where index rows} {
  500  {x IS NULL AND u='charlie'}         t1u  20
  501  {x=1 AND u='charlie'}               t1x   5
  502  {x IS NULL}                          {} 100
  503  {x=1}                               t1x  50
  504  {x IS NOT NULL}                     t1x  25

} {
  # Verify that the expected index is used with the expected row count
  do_test analyze5-1.${testid}a {
    set x [lindex [eqp "SELECT * FROM t1 WHERE $where"] 3]
    set idx {}
    regexp {INDEX (t1.) } $x all idx
    regexp {~([0-9]+) rows} $x all nrow
................................................................................
      set res ok
    } else {
      set res "a1=\[$a1\] a2=\[$a2\]"
    }
    set res
  } {ok}
}


finish_test

test/progress.test became a regular file.

tool/mkopts.tcl became a regular file.