/ Check-in [824c8dd3]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:The ANALYZE command now counts at all rows of an index, even those containing NULL values. A valid sqlite_stat1 entry is created even if the index contains nothing but NULLs.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1:824c8dd3015bbd5c8a1dd661cfe09fe5bf7a80d3
User & Date: drh 2011-01-04 17:57:54
Context
2011-01-04
19:01
Fix the ANALYZE command so that it takes collating sequences into account when gathering index statistics. check-in: a5867cfc user: drh tags: trunk
17:57
The ANALYZE command now counts at all rows of an index, even those containing NULL values. A valid sqlite_stat1 entry is created even if the index contains nothing but NULLs. check-in: 824c8dd3 user: drh tags: trunk
2010-12-29
18:24
Have testfixture invoke C routine Zipvfs_Init() when creating a new interpreter if SQLITE_ENABLE_ZIPVFS is defined. check-in: 430635da user: dan tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to src/analyze.c.

230
231
232
233
234
235
236
237
238

239
240
241
242
243
244
245
...
260
261
262
263
264
265
266
267
268
269



270
271
272
273
274
275
276
277
278
279
280
281
282
283




284
285
286
287
288
289
290
    endOfLoop = sqlite3VdbeMakeLabel(v);
    sqlite3VdbeAddOp2(v, OP_Rewind, iIdxCur, endOfLoop);
    topOfLoop = sqlite3VdbeCurrentAddr(v);
    sqlite3VdbeAddOp2(v, OP_AddImm, iMem, 1);

    for(i=0; i<nCol; i++){
      sqlite3VdbeAddOp3(v, OP_Column, iIdxCur, i, regCol);
#ifdef SQLITE_ENABLE_STAT2
      if( i==0 ){

        /* Check if the record that cursor iIdxCur points to contains a
        ** value that should be stored in the sqlite_stat2 table. If so,
        ** store it.  */
        int ne = sqlite3VdbeAddOp3(v, OP_Ne, regRecno, 0, regSamplerecno);
        assert( regTabname+1==regIdxname 
             && regTabname+2==regSampleno
             && regTabname+3==regCol
................................................................................
        sqlite3VdbeAddOp2(v, OP_Integer, SQLITE_INDEX_SAMPLES, regTemp2);
        sqlite3VdbeAddOp3(v, OP_Subtract, regSampleno, regTemp2, regTemp2);
        sqlite3VdbeAddOp3(v, OP_Divide, regTemp2, regTemp, regTemp);
        sqlite3VdbeAddOp3(v, OP_Add, regSamplerecno, regTemp, regSamplerecno);

        sqlite3VdbeJumpHere(v, ne);
        sqlite3VdbeAddOp2(v, OP_AddImm, regRecno, 1);
      }
#endif




      sqlite3VdbeAddOp3(v, OP_Ne, regCol, 0, iMem+nCol+i+1);
      /**** TODO:  add collating sequence *****/
      sqlite3VdbeChangeP5(v, SQLITE_JUMPIFNULL);
    }
    if( db->mallocFailed ){
      /* If a malloc failure has occurred, then the result of the expression 
      ** passed as the second argument to the call to sqlite3VdbeJumpHere() 
      ** below may be negative. Which causes an assert() to fail (or an
      ** out-of-bounds write if SQLITE_DEBUG is not defined).  */
      return;
    }
    sqlite3VdbeAddOp2(v, OP_Goto, 0, endOfLoop);
    for(i=0; i<nCol; i++){
      sqlite3VdbeJumpHere(v, sqlite3VdbeCurrentAddr(v)-(nCol*2));




      sqlite3VdbeAddOp2(v, OP_AddImm, iMem+i+1, 1);
      sqlite3VdbeAddOp3(v, OP_Column, iIdxCur, i, iMem+nCol+i+1);
    }

    /* End of the analysis loop. */
    sqlite3VdbeResolveLabel(v, endOfLoop);
    sqlite3VdbeAddOp2(v, OP_Next, iIdxCur, topOfLoop);







<

>







 







<


>
>
>


|










|
>
>
>
>







230
231
232
233
234
235
236

237
238
239
240
241
242
243
244
245
...
260
261
262
263
264
265
266

267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
    endOfLoop = sqlite3VdbeMakeLabel(v);
    sqlite3VdbeAddOp2(v, OP_Rewind, iIdxCur, endOfLoop);
    topOfLoop = sqlite3VdbeCurrentAddr(v);
    sqlite3VdbeAddOp2(v, OP_AddImm, iMem, 1);

    for(i=0; i<nCol; i++){
      sqlite3VdbeAddOp3(v, OP_Column, iIdxCur, i, regCol);

      if( i==0 ){
#ifdef SQLITE_ENABLE_STAT2
        /* Check if the record that cursor iIdxCur points to contains a
        ** value that should be stored in the sqlite_stat2 table. If so,
        ** store it.  */
        int ne = sqlite3VdbeAddOp3(v, OP_Ne, regRecno, 0, regSamplerecno);
        assert( regTabname+1==regIdxname 
             && regTabname+2==regSampleno
             && regTabname+3==regCol
................................................................................
        sqlite3VdbeAddOp2(v, OP_Integer, SQLITE_INDEX_SAMPLES, regTemp2);
        sqlite3VdbeAddOp3(v, OP_Subtract, regSampleno, regTemp2, regTemp2);
        sqlite3VdbeAddOp3(v, OP_Divide, regTemp2, regTemp, regTemp);
        sqlite3VdbeAddOp3(v, OP_Add, regSamplerecno, regTemp, regSamplerecno);

        sqlite3VdbeJumpHere(v, ne);
        sqlite3VdbeAddOp2(v, OP_AddImm, regRecno, 1);

#endif

        /* Always record the very first row */
        sqlite3VdbeAddOp1(v, OP_IfNot, iMem+1);
      }
      sqlite3VdbeAddOp3(v, OP_Ne, regCol, 0, iMem+nCol+i+1);
      /**** TODO:  add collating sequence *****/
      sqlite3VdbeChangeP5(v, SQLITE_NULLEQ);
    }
    if( db->mallocFailed ){
      /* If a malloc failure has occurred, then the result of the expression 
      ** passed as the second argument to the call to sqlite3VdbeJumpHere() 
      ** below may be negative. Which causes an assert() to fail (or an
      ** out-of-bounds write if SQLITE_DEBUG is not defined).  */
      return;
    }
    sqlite3VdbeAddOp2(v, OP_Goto, 0, endOfLoop);
    for(i=0; i<nCol; i++){
      int addr = sqlite3VdbeCurrentAddr(v) - (nCol*2);
      if( i==0 ){
        sqlite3VdbeJumpHere(v, addr-1);  /* Set jump dest for the OP_IfNot */
      }
      sqlite3VdbeJumpHere(v, addr);      /* Set jump dest for the OP_Ne */
      sqlite3VdbeAddOp2(v, OP_AddImm, iMem+i+1, 1);
      sqlite3VdbeAddOp3(v, OP_Column, iIdxCur, i, iMem+nCol+i+1);
    }

    /* End of the analysis loop. */
    sqlite3VdbeResolveLabel(v, endOfLoop);
    sqlite3VdbeAddOp2(v, OP_Next, iIdxCur, topOfLoop);

Added test/analyze4.test.

































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
# 2011 January 04
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#***********************************************************************
#
# This file implements regression tests for SQLite library. This file 
# implements tests for ANALYZE to verify that multiple rows containing
# a NULL value count as distinct rows for the purposes of analyze 
# statistics.
#

set testdir [file dirname $argv0]
source $testdir/tester.tcl

do_test analyze4-1.0 {
  db eval {
    CREATE TABLE t1(a,b);
    CREATE INDEX t1a ON t1(a);
    CREATE INDEX t1b ON t1(b);
    INSERT INTO t1 VALUES(1,NULL);
    INSERT INTO t1 SELECT a+1, b FROM t1;
    INSERT INTO t1 SELECT a+2, b FROM t1;
    INSERT INTO t1 SELECT a+4, b FROM t1;
    INSERT INTO t1 SELECT a+8, b FROM t1;
    INSERT INTO t1 SELECT a+16, b FROM t1;
    INSERT INTO t1 SELECT a+32, b FROM t1;
    INSERT INTO t1 SELECT a+64, b FROM t1;
    ANALYZE;
  }

  # Should choose the t1a index since it is more specific than t1b.
  db eval {EXPLAIN QUERY PLAN SELECT * FROM t1 WHERE a=5 AND b IS NULL}
} {0 0 0 {SEARCH TABLE t1 USING INDEX t1a (a=?) (~1 rows)}}

# Verify that the t1b index shows that it does not narrow down the
# search any at all.
#
do_test analyze4-1.1 {
  db eval {
    SELECT idx, stat FROM sqlite_stat1 WHERE tbl='t1' ORDER BY idx;
  }
} {t1a {128 1} t1b {128 128}}

# Change half of the b values from NULL to a constant.  Verify
# that the number of rows selected in stat1 is half the total 
# number of rows.
#
do_test analyze4-1.2 {
  db eval {
    UPDATE t1 SET b='x' WHERE a%2;
    ANALYZE;
    SELECT idx, stat FROM sqlite_stat1 WHERE tbl='t1' ORDER BY idx;
  }
} {t1a {128 1} t1b {128 64}}

# Change the t1.b values all back to NULL.  Add columns t1.c and t1.d.
# Create a multi-column indices using t1.b and verify that ANALYZE 
# processes them correctly.
#
do_test analyze4-1.3 {
  db eval {
    UPDATE t1 SET b=NULL;
    ALTER TABLE t1 ADD COLUMN c;
    ALTER TABLE t1 ADD COLUMN d;
    UPDATE t1 SET c=a/4, d=a/2;
    CREATE INDEX t1bcd ON t1(b,c,d);
    CREATE INDEX t1cdb ON t1(c,d,b);
    CREATE INDEX t1cbd ON t1(c,b,d);
    ANALYZE;
    SELECT idx, stat FROM sqlite_stat1 WHERE tbl='t1' ORDER BY idx;
  }
} {t1a {128 1} t1b {128 128} t1bcd {128 128 4 2} t1cbd {128 4 4 2} t1cdb {128 4 2 2}}

finish_test