/ Check-in [fb2b8ae8]
Login
SQLite training in Houston TX on 2019-11-05 (details)
Part of the 2019 Tcl Conference

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Set the TF_StatsUsed flag on tables when the query planner outcome is affected by the sqlite_stat1 data. Also, change the column names of the "PRAGMA stats" command so that they are not keywords.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | auto-analyze
Files: files | file ages | folders
SHA1: fb2b8ae8310e4ea4b42354bbf36c3084a9d5c6d7
User & Date: drh 2017-02-17 15:26:36
Context
2017-02-17
16:26
Add the "PRAGMA analyze_as_needed" command. check-in: e93db237 user: drh tags: auto-analyze
15:26
Set the TF_StatsUsed flag on tables when the query planner outcome is affected by the sqlite_stat1 data. Also, change the column names of the "PRAGMA stats" command so that they are not keywords. check-in: fb2b8ae8 user: drh tags: auto-analyze
13:38
Enhance the Index and Table objects so that they remember if their stats come from the sqlite_stat1 table. Make the "PRAGMA stats" an SQLITE_DEBUG only pragma. Add the flags column to "PRAGMA stats". These are all preliminary steps toward a "PRAGMA analyze_ifneeded;" feature. check-in: 85026c8e user: drh tags: auto-analyze
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to src/pragma.h.

67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
  /*   0 */ "cache_size",  /* Used by: default_cache_size */
  /*   1 */ "cid",         /* Used by: table_info */
  /*   2 */ "name",       
  /*   3 */ "type",       
  /*   4 */ "notnull",    
  /*   5 */ "dflt_value", 
  /*   6 */ "pk",         
  /*   7 */ "table",       /* Used by: stats */
  /*   8 */ "index",      
  /*   9 */ "width",      
  /*  10 */ "height",     
  /*  11 */ "flags",      
  /*  12 */ "seqno",       /* Used by: index_info */
  /*  13 */ "cid",        
  /*  14 */ "name",       
  /*  15 */ "seqno",       /* Used by: index_xinfo */
  /*  16 */ "cid",        
  /*  17 */ "name",       
  /*  18 */ "desc",       







|
|
|
|
|







67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
  /*   0 */ "cache_size",  /* Used by: default_cache_size */
  /*   1 */ "cid",         /* Used by: table_info */
  /*   2 */ "name",       
  /*   3 */ "type",       
  /*   4 */ "notnull",    
  /*   5 */ "dflt_value", 
  /*   6 */ "pk",         
  /*   7 */ "tbl",         /* Used by: stats */
  /*   8 */ "idx",        
  /*   9 */ "wdth",       
  /*  10 */ "hght",       
  /*  11 */ "flgs",       
  /*  12 */ "seqno",       /* Used by: index_info */
  /*  13 */ "cid",        
  /*  14 */ "name",       
  /*  15 */ "seqno",       /* Used by: index_xinfo */
  /*  16 */ "cid",        
  /*  17 */ "name",       
  /*  18 */ "desc",       

Changes to src/sqliteInt.h.

1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889



1890
1891
1892
1893
1894
1895
1896
....
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
** the TF_OOOHidden attribute would apply in this case.  Such tables require
** special handling during INSERT processing.
*/
#define TF_Readonly        0x0001    /* Read-only system table */
#define TF_Ephemeral       0x0002    /* An ephemeral table */
#define TF_HasPrimaryKey   0x0004    /* Table has a primary key */
#define TF_Autoincrement   0x0008    /* Integer primary key is autoincrement */
#define TF_HasStat1        0x0010    /* Has STAT1 data */
#define TF_WithoutRowid    0x0020    /* No rowid.  PRIMARY KEY is the key */
#define TF_NoVisibleRowid  0x0040    /* No user-visible "rowid" column */
#define TF_OOOHidden       0x0080    /* Out-of-Order hidden columns */




/*
** Test to see whether or not a table is a virtual table.  This is
** done as a macro so that it will be optimized out when virtual
** table support is omitted from the build.
*/
#ifndef SQLITE_OMIT_VIRTUALTABLE
................................................................................
  u8 onError;              /* OE_Abort, OE_Ignore, OE_Replace, or OE_None */
  unsigned idxType:2;      /* 1==UNIQUE, 2==PRIMARY KEY, 0==CREATE INDEX */
  unsigned bUnordered:1;   /* Use this index for == or IN queries only */
  unsigned uniqNotNull:1;  /* True if UNIQUE and NOT NULL for all columns */
  unsigned isResized:1;    /* True if resizeIndexObject() has been called */
  unsigned isCovering:1;   /* True if this is a covering index */
  unsigned noSkipScan:1;   /* Do not try to use skip-scan if true */
  unsigned hasStat1:1;     /* True if stat1 data has been loaded */
#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
  int nSample;             /* Number of elements in aSample[] */
  int nSampleCol;          /* Size of IndexSample.anEq[] and so on */
  tRowcnt *aAvgEq;         /* Average nEq values for keys not in aSample */
  IndexSample *aSample;    /* Samples of the left-most key */
  tRowcnt *aiRowEst;       /* Non-logarithmic stat1 data for this index */
  tRowcnt nRowEst0;        /* Non-logarithmic number of rows in the index */







|



>
>
>







 







|







1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
....
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
** the TF_OOOHidden attribute would apply in this case.  Such tables require
** special handling during INSERT processing.
*/
#define TF_Readonly        0x0001    /* Read-only system table */
#define TF_Ephemeral       0x0002    /* An ephemeral table */
#define TF_HasPrimaryKey   0x0004    /* Table has a primary key */
#define TF_Autoincrement   0x0008    /* Integer primary key is autoincrement */
#define TF_HasStat1        0x0010    /* nRowLogEst set from sqlite_stat1 */
#define TF_WithoutRowid    0x0020    /* No rowid.  PRIMARY KEY is the key */
#define TF_NoVisibleRowid  0x0040    /* No user-visible "rowid" column */
#define TF_OOOHidden       0x0080    /* Out-of-Order hidden columns */
#define TF_SizeChng        0x0100    /* nRowLogEst might be inaccurate */
#define TF_StatsUsed       0x0200    /* Query planner decisions affected by
                                     ** Index.aiRowLogEst[] values */

/*
** Test to see whether or not a table is a virtual table.  This is
** done as a macro so that it will be optimized out when virtual
** table support is omitted from the build.
*/
#ifndef SQLITE_OMIT_VIRTUALTABLE
................................................................................
  u8 onError;              /* OE_Abort, OE_Ignore, OE_Replace, or OE_None */
  unsigned idxType:2;      /* 1==UNIQUE, 2==PRIMARY KEY, 0==CREATE INDEX */
  unsigned bUnordered:1;   /* Use this index for == or IN queries only */
  unsigned uniqNotNull:1;  /* True if UNIQUE and NOT NULL for all columns */
  unsigned isResized:1;    /* True if resizeIndexObject() has been called */
  unsigned isCovering:1;   /* True if this is a covering index */
  unsigned noSkipScan:1;   /* Do not try to use skip-scan if true */
  unsigned hasStat1:1;     /* aiRowLogEst values come from sqlite_stat1 */
#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
  int nSample;             /* Number of elements in aSample[] */
  int nSampleCol;          /* Size of IndexSample.anEq[] and so on */
  tRowcnt *aAvgEq;         /* Average nEq values for keys not in aSample */
  IndexSample *aSample;    /* Samples of the left-most key */
  tRowcnt *aiRowEst;       /* Non-logarithmic stat1 data for this index */
  tRowcnt nRowEst0;        /* Non-logarithmic number of rows in the index */

Changes to src/where.c.

2383
2384
2385
2386
2387
2388
2389





2390
2391
2392
2393
2394
2395
2396
....
2930
2931
2932
2933
2934
2935
2936

2937







2938
2939
2940
2941
2942
2943
2944
     && (eOp & (WO_IS|WO_ISNULL))!=0
    ){
      testcase( eOp & WO_IS );
      testcase( eOp & WO_ISNULL );
      continue;
    }






    pNew->wsFlags = saved_wsFlags;
    pNew->u.btree.nEq = saved_nEq;
    pNew->u.btree.nBtm = saved_nBtm;
    pNew->u.btree.nTop = saved_nTop;
    pNew->nLTerm = saved_nLTerm;
    if( whereLoopResize(db, pNew, pNew->nLTerm+1) ) break; /* OOM */
    pNew->aLTerm[pNew->nLTerm++] = pTerm;
................................................................................
        whereLoopOutputAdjust(pWC, pNew, rSize);
        rc = whereLoopInsert(pBuilder, pNew);
        pNew->nOut = rSize;
        if( rc ) break;
      }
    }


    rc = whereLoopAddBtreeIndex(pBuilder, pSrc, pProbe, 0);







#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
    sqlite3Stat4ProbeFree(pBuilder->pRec);
    pBuilder->nRecValid = 0;
    pBuilder->pRec = 0;
#endif

    /* If there was an INDEXED BY clause, then only that one index is







>
>
>
>
>







 







>

>
>
>
>
>
>
>







2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
....
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
     && (eOp & (WO_IS|WO_ISNULL))!=0
    ){
      testcase( eOp & WO_IS );
      testcase( eOp & WO_ISNULL );
      continue;
    }

    if( IsUniqueIndex(pProbe) && saved_nEq==pProbe->nKeyCol-1 ){
      pBuilder->bldFlags |= SQLITE_BLDF_UNIQUE;
    }else{
      pBuilder->bldFlags |= SQLITE_BLDF_INDEXED;
    }
    pNew->wsFlags = saved_wsFlags;
    pNew->u.btree.nEq = saved_nEq;
    pNew->u.btree.nBtm = saved_nBtm;
    pNew->u.btree.nTop = saved_nTop;
    pNew->nLTerm = saved_nLTerm;
    if( whereLoopResize(db, pNew, pNew->nLTerm+1) ) break; /* OOM */
    pNew->aLTerm[pNew->nLTerm++] = pTerm;
................................................................................
        whereLoopOutputAdjust(pWC, pNew, rSize);
        rc = whereLoopInsert(pBuilder, pNew);
        pNew->nOut = rSize;
        if( rc ) break;
      }
    }

    pBuilder->bldFlags = 0;
    rc = whereLoopAddBtreeIndex(pBuilder, pSrc, pProbe, 0);
    if( pBuilder->bldFlags==SQLITE_BLDF_INDEXED ){
      /* If a non-unique index is used, or if a prefix of the key for
      ** unique index is used (making the index functionally non-unique)
      ** then the sqlite_stat1 data becomes important for scoring the
      ** plan */
      pTab->tabFlags |= TF_StatsUsed;
    }
#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
    sqlite3Stat4ProbeFree(pBuilder->pRec);
    pBuilder->nRecValid = 0;
    pBuilder->pRec = 0;
#endif

    /* If there was an INDEXED BY clause, then only that one index is

Changes to src/whereInt.h.

391
392
393
394
395
396
397

398
399




400
401
402
403
404
405
406
  ExprList *pOrderBy;       /* ORDER BY clause */
  WhereLoop *pNew;          /* Template WhereLoop */
  WhereOrSet *pOrSet;       /* Record best loops here, if not NULL */
#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
  UnpackedRecord *pRec;     /* Probe for stat4 (if required) */
  int nRecValid;            /* Number of valid fields currently in pRec */
#endif

};





/*
** The WHERE clause processing routine has two halves.  The
** first part does the start of the WHERE loop and the second
** half does the tail of the WHERE loop.  An instance of
** this structure is returned by the first half and passed
** into the second half to give some continuity.
**







>


>
>
>
>







391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
  ExprList *pOrderBy;       /* ORDER BY clause */
  WhereLoop *pNew;          /* Template WhereLoop */
  WhereOrSet *pOrSet;       /* Record best loops here, if not NULL */
#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
  UnpackedRecord *pRec;     /* Probe for stat4 (if required) */
  int nRecValid;            /* Number of valid fields currently in pRec */
#endif
  unsigned int bldFlags;    /* SQLITE_BLDF_* flags */
};

/* Allowed values for WhereLoopBuider.bldFlags */
#define SQLITE_BLDF_INDEXED  0x0001   /* An index is used */
#define SQLITE_BLDF_UNIQUE   0x0002   /* All keys of a UNIQUE index used */

/*
** The WHERE clause processing routine has two halves.  The
** first part does the start of the WHERE loop and the second
** half does the tail of the WHERE loop.  An instance of
** this structure is returned by the first half and passed
** into the second half to give some continuity.
**

Added test/autoanalyze1.test.

























































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
# 2017-02-17
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#***********************************************************************
#
# This file implements tests for the logic used to estimate when
# running ANALYZE would be beneficial.
#
# Note that this test uses some hard-coded bitmask values from sqliteInt.h.
# If any of the following constants changes:
#
#    define TF_HasStat1   0x0010
#    define TF_SizeChng   0x0100
#    define TF_StatsUsed  0x0200
#
# then some of the magic numbers in test results below might need to be
# adjusted.
#

set testdir [file dirname $argv0]
source $testdir/tester.tcl

# There is nothing to test if ANALYZE is disable for this build.
# These tests also use "PRAGMA stats" which are only enabled for
# debugging builds.
#
ifcapable {!debug || !analyze} {
  finish_test
  return
}

do_execsql_test autoanalyze1-100 {
  -- Build up a test table with some indexes
  CREATE TABLE t1(a INTEGER PRIMARY KEY, b, c, d);
  CREATE UNIQUE INDEX t1bc ON t1(b,c);
  CREATE INDEX t1d ON t1(d);
  WITH RECURSIVE c(x) AS (VALUES(1) UNION ALL SELECT x+1 FROM c WHERE x<100)
    INSERT INTO t1(a,b,c,d) SELECT x, x, x, x FROM c;
  -- Verify that the hasStat1 flag is clear on on indexes
  SELECT idx, flgs FROM pragma_stats
   WHERE idx IS NOT NULL
   ORDER BY idx;
  -- Verify that the TF_HasStat1 flag is clear on the table
  SELECT tbl, (flgs & 0x10)!=0 FROM pragma_stats WHERE tbl='t1' AND idx IS NULL;
} {t1bc 0 t1d 0 t1 0}

# No use of stat1 recorded so far
do_execsql_test autoanalyze1-110 {
  SELECT (flgs & 0x0200)!=0 FROM pragma_stats WHERE tbl='t1' AND idx IS NULL;
} {0}

# Access using a unique index does not set the TF_StatsUsed flag.
#
do_execsql_test autoanalyze1-200 {
  SELECT * FROM t1 WHERE a=55;
} {55 55 55 55}
do_execsql_test autoanalyze1-201 {
  SELECT (flgs & 0x0200)!=0 FROM pragma_stats WHERE tbl='t1' AND idx IS NULL;
} {0}

do_execsql_test autoanalyze1-210 {
  SELECT * FROM t1 WHERE a IN (55,199,299);
} {55 55 55 55}
do_execsql_test autoanalyze1-211 {
  SELECT (flgs & 0x0200)!=0 FROM pragma_stats WHERE tbl='t1' AND idx IS NULL;
} {0}

do_execsql_test autoanalyze1-220 {
  SELECT * FROM t1 WHERE (b,c)=(45,45);
} {45 45 45 45}
do_execsql_test autoanalyze1-221 {
  SELECT (flgs & 0x0200)!=0 FROM pragma_stats WHERE tbl='t1' AND idx IS NULL;
} {0}

# Any use of the non-unique t1d index triggers the use of stats.
#
sqlite3 db test.db
do_execsql_test autoanalyze1-300 {
  SELECT * FROM t1 WHERE d=45;
} {45 45 45 45}
do_execsql_test autoanalyze1-301 {
  SELECT (flgs & 0x0200)!=0 FROM pragma_stats WHERE tbl='t1' AND idx IS NULL;
} {1}

sqlite3 db test.db
do_execsql_test autoanalyze1-310 {
  SELECT * FROM t1 WHERE d=45 AND a=45;
} {45 45 45 45}
do_execsql_test autoanalyze1-311 {
  SELECT (flgs & 0x0200)!=0 FROM pragma_stats WHERE tbl='t1' AND idx IS NULL;
} {0}  ;# The ROWID lookup short-circuits the d=45 constraint

sqlite3 db test.db
do_execsql_test autoanalyze1-320 {
  SELECT * FROM t1 WHERE d=45 AND a IN (45,46);
} {45 45 45 45}
do_execsql_test autoanalyze1-321 {
  SELECT (flgs & 0x0200)!=0 FROM pragma_stats WHERE tbl='t1' AND idx IS NULL;
} {1}

# Any use of prefix of a unique index triggers the use of stats
#
sqlite3 db test.db
do_execsql_test autoanalyze1-400 {
  SELECT * FROM t1 WHERE b=45;
} {45 45 45 45}
do_execsql_test autoanalyze1-401 {
  SELECT (flgs & 0x0200)!=0 FROM pragma_stats WHERE tbl='t1' AND idx IS NULL;
} {1}

# The TF_StatsUsed flag is reset when the database is reopened
#
sqlite3 db test.db
do_execsql_test autoanalyze1-500 {
  SELECT (flgs & 0x0200)!=0 FROM pragma_stats WHERE tbl='t1' AND idx IS NULL;
} {0}

finish_test

Changes to tool/mkpragmatab.tcl.

221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
  NAME: table_info
  FLAG: NeedSchema Result1 SchemaOpt
  COLS: cid name type notnull dflt_value pk
  IF:   !defined(SQLITE_OMIT_SCHEMA_PRAGMAS)

  NAME: stats
  FLAG: NeedSchema Result0 SchemaReq
  COLS: table index width height flags
  IF:   !defined(SQLITE_OMIT_SCHEMA_PRAGMAS) && defined(SQLITE_DEBUG)

  NAME: index_info
  TYPE: INDEX_INFO
  ARG:  0
  FLAG: NeedSchema Result1 SchemaOpt
  COLS: seqno cid name







|







221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
  NAME: table_info
  FLAG: NeedSchema Result1 SchemaOpt
  COLS: cid name type notnull dflt_value pk
  IF:   !defined(SQLITE_OMIT_SCHEMA_PRAGMAS)

  NAME: stats
  FLAG: NeedSchema Result0 SchemaReq
  COLS: tbl idx wdth hght flgs
  IF:   !defined(SQLITE_OMIT_SCHEMA_PRAGMAS) && defined(SQLITE_DEBUG)

  NAME: index_info
  TYPE: INDEX_INFO
  ARG:  0
  FLAG: NeedSchema Result1 SchemaOpt
  COLS: seqno cid name