/ Check-in [bfbdd074]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:The analyze_as_needed pragma now responds to table size growth and will automatically rerun the analysis after each 10x size increase.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | auto-analyze
Files: files | file ages | folders
SHA1: bfbdd07409688fac4ccddbab3639745f6152e23d
User & Date: drh 2017-02-17 19:24:06
Context
2017-02-18
02:19
In the analyze_as_needed pragma, avoid running unnecessary OP_LoadAnalysis and OP_Expire opcodes. Make the analyze_as_needed pragma responsive to the schema name. check-in: 882599a4 user: drh tags: auto-analyze
2017-02-17
19:24
The analyze_as_needed pragma now responds to table size growth and will automatically rerun the analysis after each 10x size increase. check-in: bfbdd074 user: drh tags: auto-analyze
16:26
Add the "PRAGMA analyze_as_needed" command. check-in: e93db237 user: drh tags: auto-analyze
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to src/analyze.c.

   954    954   */
   955    955   static void analyzeOneTable(
   956    956     Parse *pParse,   /* Parser context */
   957    957     Table *pTab,     /* Table whose indices are to be analyzed */
   958    958     Index *pOnlyIdx, /* If not NULL, only analyze this one index */
   959    959     int iStatCur,    /* Index of VdbeCursor that writes the sqlite_stat1 table */
   960    960     int iMem,        /* Available memory locations begin here */
   961         -  int iTab         /* Next available cursor */
          961  +  int iTab,        /* Next available cursor */
          962  +  LogEst szOld     /* Run the analysis if table row count is larger than this */
   962    963   ){
   963    964     sqlite3 *db = pParse->db;    /* Database handle */
   964    965     Index *pIdx;                 /* An index to being analyzed */
          966  +  int addrSizeCk = 0;          /* Address of the IfSmaller */
   965    967     int iIdxCur;                 /* Cursor open on index being analyzed */
   966    968     int iTabCur;                 /* Table cursor */
   967    969     Vdbe *v;                     /* The virtual machine being built up */
   968    970     int i;                       /* Loop counter */
   969    971     int jZeroRows = -1;          /* Jump from here if number of rows is zero */
   970    972     int iDb;                     /* Index of database containing pTab */
   971    973     u8 needTableCnt = 1;         /* True to count the table */
................................................................................
  1010   1012     ** to use for scanning indexes (iIdxCur). No index cursor is opened at
  1011   1013     ** this time though.  */
  1012   1014     sqlite3TableLock(pParse, iDb, pTab->tnum, 0, pTab->zName);
  1013   1015     iTabCur = iTab++;
  1014   1016     iIdxCur = iTab++;
  1015   1017     pParse->nTab = MAX(pParse->nTab, iTab);
  1016   1018     sqlite3OpenTable(pParse, iTabCur, iDb, pTab, OP_OpenRead);
         1019  +  if( szOld>0 ){
         1020  +    addrSizeCk = sqlite3VdbeAddOp3(v, OP_IfSmaller, iTabCur, 0, szOld);
         1021  +   }
  1017   1022     sqlite3VdbeLoadString(v, regTabname, pTab->zName);
  1018   1023   
  1019   1024     for(pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext){
  1020   1025       int nCol;                     /* Number of columns in pIdx. "N" */
  1021   1026       int addrRewind;               /* Address of "OP_Rewind iIdxCur" */
  1022   1027       int addrNextRow;              /* Address of "next_row:" */
  1023   1028       const char *zIdxName;         /* Name of the index */
................................................................................
  1267   1272       assert( "BBB"[0]==SQLITE_AFF_TEXT );
  1268   1273       sqlite3VdbeAddOp4(v, OP_MakeRecord, regTabname, 3, regTemp, "BBB", 0);
  1269   1274       sqlite3VdbeAddOp2(v, OP_NewRowid, iStatCur, regNewRowid);
  1270   1275       sqlite3VdbeAddOp3(v, OP_Insert, iStatCur, regTemp, regNewRowid);
  1271   1276       sqlite3VdbeChangeP5(v, OPFLAG_APPEND);
  1272   1277       sqlite3VdbeJumpHere(v, jZeroRows);
  1273   1278     }
         1279  +  sqlite3VdbeJumpHere(v, addrSizeCk);
  1274   1280   }
  1275   1281   
  1276   1282   
  1277   1283   /*
  1278   1284   ** Generate code that will cause the most recent index analysis to
  1279   1285   ** be loaded into internal hash tables where is can be used.
  1280   1286   */
................................................................................
  1282   1288     Vdbe *v = sqlite3GetVdbe(pParse);
  1283   1289     if( v ){
  1284   1290       sqlite3VdbeAddOp1(v, OP_LoadAnalysis, iDb);
  1285   1291     }
  1286   1292   }
  1287   1293   
  1288   1294   /*
  1289         -** Return true if table pTab is in need of being reanalyzed.
         1295  +** Return true if table pTab might need to being reanalyzed.  Return
         1296  +** false if we know that pTab should not be reanalyzed.
         1297  +**
         1298  +** If returning true, also set *pThreshold to a size threshold that
         1299  +** will determine at run-time whether or not the reanalysis occurs.
         1300  +** The reanalysis will only occur if the size of the table is greater
         1301  +** than the threshold. Not that the threshold is a logarithmic LogEst
         1302  +** value.
  1290   1303   */
  1291         -static int analyzeNeeded(Table *pTab){
         1304  +static int analyzeNeeded(Table *pTab, LogEst *pThreshold){
  1292   1305     Index *pIdx;
  1293   1306     if( (pTab->tabFlags & TF_StatsUsed)==0 ) return 0;
  1294         -  if( (pTab->tabFlags & TF_SizeChng)!=0 ) return 1;
         1307  +
         1308  +  /* If TF_StatsUsed is true, then we might need to reanalyze.  But
         1309  +  ** only reanalyze if the table size has grown by a factor of 10 or more */
         1310  +  *pThreshold = pTab->nRowLogEst + 33;  assert( sqlite3LogEst(10)==33 );
         1311  +
         1312  +  /* Except, if any of the indexes of the table do not have valid
         1313  +  ** sqlite_stat1 entries, then set the size threshold to zero to
         1314  +  ** ensure the analysis will always occur. */
  1295   1315     for(pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext){
  1296         -    if( !pIdx->hasStat1 ) return 1;
         1316  +    if( !pIdx->hasStat1 ){
         1317  +      *pThreshold = 0;
         1318  +      break;
         1319  +    }
  1297   1320     }
  1298         -  return 0;
         1321  +  return 1;
  1299   1322   }
  1300   1323   
  1301   1324   /*
  1302   1325   ** Generate code that will do an analysis of an entire database.
  1303   1326   **
  1304   1327   ** Return a count of the number of tables actually analyzed.  Return 0
  1305   1328   ** if nothing was analyzed.
................................................................................
  1325   1348     for(iDb=iDbFirst; iDb<=iDbLast; iDb++){
  1326   1349       if( iDb==1 ) continue;  /* Do not analyze the TEMP database */
  1327   1350       pSchema = db->aDb[iDb].pSchema;
  1328   1351       assert( sqlite3SchemaMutexHeld(db, iDb, 0) );
  1329   1352       cnt = 0;
  1330   1353       for(k=sqliteHashFirst(&pSchema->tblHash); k; k=sqliteHashNext(k)){
  1331   1354         Table *pTab = (Table*)sqliteHashData(k);
  1332         -      if( !onlyIfNeeded || analyzeNeeded(pTab) ){
         1355  +      LogEst szThreshold = 0;
         1356  +      if( !onlyIfNeeded || analyzeNeeded(pTab, &szThreshold) ){
  1333   1357           if( cnt==0 ){
  1334   1358             sqlite3BeginWriteOperation(pParse, 0, iDb);
  1335   1359             iStatCur = pParse->nTab;
  1336   1360             pParse->nTab += 3;
  1337   1361             openStatTable(pParse, iDb, iStatCur, 0, 0);
  1338   1362             iMem = pParse->nMem+1;
  1339   1363             iTab = pParse->nTab;
  1340   1364           }
  1341         -        analyzeOneTable(pParse, pTab, 0, iStatCur, iMem, iTab);
         1365  +        analyzeOneTable(pParse, pTab, 0, iStatCur, iMem, iTab, szThreshold);
  1342   1366           cnt++;
  1343   1367           allCnt++;
  1344   1368         }
  1345   1369       }
  1346   1370       if( cnt ) loadAnalysis(pParse, iDb);
  1347   1371     }
  1348   1372     return allCnt;
................................................................................
  1364   1388     iStatCur = pParse->nTab;
  1365   1389     pParse->nTab += 3;
  1366   1390     if( pOnlyIdx ){
  1367   1391       openStatTable(pParse, iDb, iStatCur, pOnlyIdx->zName, "idx");
  1368   1392     }else{
  1369   1393       openStatTable(pParse, iDb, iStatCur, pTab->zName, "tbl");
  1370   1394     }
  1371         -  analyzeOneTable(pParse, pTab, pOnlyIdx, iStatCur,pParse->nMem+1,pParse->nTab);
         1395  +  analyzeOneTable(pParse, pTab, pOnlyIdx, iStatCur,pParse->nMem+1,
         1396  +                  pParse->nTab, 0);
  1372   1397     loadAnalysis(pParse, iDb);
  1373   1398   }
  1374   1399   
  1375   1400   /*
  1376   1401   ** Generate code for the ANALYZE command.  The parser calls this routine
  1377   1402   ** when it recognizes an ANALYZE command.
  1378   1403   **

Changes to src/btree.c.

  5314   5314   int sqlite3BtreeEof(BtCursor *pCur){
  5315   5315     /* TODO: What if the cursor is in CURSOR_REQUIRESEEK but all table entries
  5316   5316     ** have been deleted? This API will need to change to return an error code
  5317   5317     ** as well as the boolean result value.
  5318   5318     */
  5319   5319     return (CURSOR_VALID!=pCur->eState);
  5320   5320   }
         5321  +
         5322  +/*
         5323  +** Return an estimate for the number of rows in the table that pCur is
         5324  +** pointing to.  Return a negative number if no estimate is currently 
         5325  +** available.
         5326  +*/
         5327  +i64 sqlite3BtreeRowCountEst(BtCursor *pCur){
         5328  +  i64 n;
         5329  +  u8 i;
         5330  +
         5331  +  assert( cursorOwnsBtShared(pCur) );
         5332  +  assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) );
         5333  +  if( pCur->eState!=CURSOR_VALID ) return -1;
         5334  +  if( pCur->apPage[pCur->iPage-1]->leaf==0 ) return -1;
         5335  +  for(n=1, i=0; i<pCur->iPage; i++){
         5336  +    n *= pCur->apPage[i]->nCell;
         5337  +  }
         5338  +  return n;
         5339  +}
  5321   5340   
  5322   5341   /*
  5323   5342   ** Advance the cursor to the next entry in the database.  If
  5324   5343   ** successful then set *pRes=0.  If the cursor
  5325   5344   ** was already pointing to the last entry in the database before
  5326   5345   ** this routine was called, then set *pRes=1.
  5327   5346   **

Changes to src/btree.h.

   292    292   i64 sqlite3BtreeIntegerKey(BtCursor*);
   293    293   int sqlite3BtreePayload(BtCursor*, u32 offset, u32 amt, void*);
   294    294   const void *sqlite3BtreePayloadFetch(BtCursor*, u32 *pAmt);
   295    295   u32 sqlite3BtreePayloadSize(BtCursor*);
   296    296   
   297    297   char *sqlite3BtreeIntegrityCheck(Btree*, int *aRoot, int nRoot, int, int*);
   298    298   struct Pager *sqlite3BtreePager(Btree*);
          299  +i64 sqlite3BtreeRowCountEst(BtCursor*);
   299    300   
   300    301   #ifndef SQLITE_OMIT_INCRBLOB
   301    302   int sqlite3BtreePayloadChecked(BtCursor*, u32 offset, u32 amt, void*);
   302    303   int sqlite3BtreePutData(BtCursor*, u32 offset, u32 amt, void*);
   303    304   void sqlite3BtreeIncrblobCursor(BtCursor *);
   304    305   #endif
   305    306   void sqlite3BtreeClearCursor(BtCursor *);

Changes to src/sqliteInt.h.

  1883   1883   #define TF_Ephemeral       0x0002    /* An ephemeral table */
  1884   1884   #define TF_HasPrimaryKey   0x0004    /* Table has a primary key */
  1885   1885   #define TF_Autoincrement   0x0008    /* Integer primary key is autoincrement */
  1886   1886   #define TF_HasStat1        0x0010    /* nRowLogEst set from sqlite_stat1 */
  1887   1887   #define TF_WithoutRowid    0x0020    /* No rowid.  PRIMARY KEY is the key */
  1888   1888   #define TF_NoVisibleRowid  0x0040    /* No user-visible "rowid" column */
  1889   1889   #define TF_OOOHidden       0x0080    /* Out-of-Order hidden columns */
  1890         -#define TF_SizeChng        0x0100    /* nRowLogEst might be inaccurate */
  1891         -#define TF_StatsUsed       0x0200    /* Query planner decisions affected by
         1890  +#define TF_StatsUsed       0x0100    /* Query planner decisions affected by
  1892   1891                                        ** Index.aiRowLogEst[] values */
  1893   1892   
  1894   1893   /*
  1895   1894   ** Test to see whether or not a table is a virtual table.  This is
  1896   1895   ** done as a macro so that it will be optimized out when virtual
  1897   1896   ** table support is omitted from the build.
  1898   1897   */

Changes to src/vdbe.c.

  4842   4842         if( res ) goto jump_to_p2;
  4843   4843       }
  4844   4844     }else{
  4845   4845       assert( pOp->p2==0 );
  4846   4846     }
  4847   4847     break;
  4848   4848   }
         4849  +
         4850  +/* Opcode: IfSmaller P1 P2 P3 * *
         4851  +**
         4852  +** Estimate the number of rows in the table P1.  Jump to P2 if that
         4853  +** estimate is less than approximately 2**(0.1*P3).
         4854  +*/
         4855  +case OP_IfSmaller: {        /* jump */
         4856  +  VdbeCursor *pC;
         4857  +  BtCursor *pCrsr;
         4858  +  int res;
         4859  +  i64 sz;
         4860  +
         4861  +  assert( pOp->p1>=0 && pOp->p1<p->nCursor );
         4862  +  pC = p->apCsr[pOp->p1];
         4863  +  assert( pC!=0 );
         4864  +  pCrsr = pC->uc.pCursor;
         4865  +  assert( pCrsr );
         4866  +  rc = sqlite3BtreeFirst(pCrsr, &res);
         4867  +  if( rc ) goto abort_due_to_error;
         4868  +  if( res==0 ){
         4869  +    sz = sqlite3BtreeRowCountEst(pCrsr);
         4870  +    if( ALWAYS(sz>=0) && sqlite3LogEst((u64)sz)<pOp->p3 ) res = 1;
         4871  +  }
         4872  +  VdbeBranchTaken(res!=0,2);
         4873  +  if( res ) goto jump_to_p2;
         4874  +  break;
         4875  +}
  4849   4876   
  4850   4877   
  4851   4878   /* Opcode: SorterSort P1 P2 * * *
  4852   4879   **
  4853   4880   ** After all records have been inserted into the Sorter object
  4854   4881   ** identified by P1, invoke this opcode to actually do the sorting.
  4855   4882   ** Jump to P2 if there are no records to be sorted.

Changes to test/autoanalyze1.test.

    12     12   # This file implements tests for the logic used to estimate when
    13     13   # running ANALYZE would be beneficial.
    14     14   #
    15     15   # Note that this test uses some hard-coded bitmask values from sqliteInt.h.
    16     16   # If any of the following constants changes:
    17     17   #
    18     18   #    define TF_HasStat1   0x0010
    19         -#    define TF_SizeChng   0x0100
    20         -#    define TF_StatsUsed  0x0200
           19  +#    define TF_StatsUsed  0x0100
    21     20   #
    22     21   # then some of the magic numbers in test results below might need to be
    23     22   # adjusted.
    24     23   #
    25     24   
    26     25   set testdir [file dirname $argv0]
    27     26   source $testdir/tester.tcl
................................................................................
    48     47      ORDER BY idx;
    49     48     -- Verify that the TF_HasStat1 flag is clear on the table
    50     49     SELECT tbl, (flgs & 0x10)!=0 FROM pragma_stats WHERE tbl='t1' AND idx IS NULL;
    51     50   } {t1bc 0 t1d 0 t1 0}
    52     51   
    53     52   # No use of stat1 recorded so far
    54     53   do_execsql_test autoanalyze1-110 {
    55         -  SELECT (flgs & 0x0200)!=0 FROM pragma_stats WHERE tbl='t1' AND idx IS NULL;
           54  +  SELECT (flgs & 0x0100)!=0 FROM pragma_stats WHERE tbl='t1' AND idx IS NULL;
    56     55   } {0}
    57     56   
    58     57   # Access using a unique index does not set the TF_StatsUsed flag.
    59     58   #
    60     59   do_execsql_test autoanalyze1-200 {
    61     60     SELECT * FROM t1 WHERE a=55;
    62     61   } {55 55 55 55}
    63     62   do_execsql_test autoanalyze1-201 {
    64         -  SELECT (flgs & 0x0200)!=0 FROM pragma_stats WHERE tbl='t1' AND idx IS NULL;
           63  +  SELECT (flgs & 0x0100)!=0 FROM pragma_stats WHERE tbl='t1' AND idx IS NULL;
    65     64   } {0}
    66     65   
    67     66   do_execsql_test autoanalyze1-210 {
    68     67     SELECT * FROM t1 WHERE a IN (55,199,299);
    69     68   } {55 55 55 55}
    70     69   do_execsql_test autoanalyze1-211 {
    71         -  SELECT (flgs & 0x0200)!=0 FROM pragma_stats WHERE tbl='t1' AND idx IS NULL;
           70  +  SELECT (flgs & 0x0100)!=0 FROM pragma_stats WHERE tbl='t1' AND idx IS NULL;
    72     71   } {0}
    73     72   
    74     73   do_execsql_test autoanalyze1-220 {
    75     74     SELECT * FROM t1 WHERE (b,c)=(45,45);
    76     75   } {45 45 45 45}
    77     76   do_execsql_test autoanalyze1-221 {
    78         -  SELECT (flgs & 0x0200)!=0 FROM pragma_stats WHERE tbl='t1' AND idx IS NULL;
           77  +  SELECT (flgs & 0x0100)!=0 FROM pragma_stats WHERE tbl='t1' AND idx IS NULL;
    79     78   } {0}
    80     79   
    81     80   # Any use of the non-unique t1d index triggers the use of stats.
    82     81   #
    83     82   sqlite3 db test.db
    84     83   do_execsql_test autoanalyze1-300 {
    85     84     SELECT * FROM t1 WHERE d=45;
    86     85   } {45 45 45 45}
    87     86   do_execsql_test autoanalyze1-301 {
    88         -  SELECT (flgs & 0x0200)!=0 FROM pragma_stats WHERE tbl='t1' AND idx IS NULL;
           87  +  SELECT (flgs & 0x0100)!=0 FROM pragma_stats WHERE tbl='t1' AND idx IS NULL;
    89     88   } {1}
    90     89   
    91     90   sqlite3 db test.db
    92     91   do_execsql_test autoanalyze1-310 {
    93     92     SELECT * FROM t1 WHERE d=45 AND a=45;
    94     93   } {45 45 45 45}
    95     94   do_execsql_test autoanalyze1-311 {
    96         -  SELECT (flgs & 0x0200)!=0 FROM pragma_stats WHERE tbl='t1' AND idx IS NULL;
           95  +  SELECT (flgs & 0x0100)!=0 FROM pragma_stats WHERE tbl='t1' AND idx IS NULL;
    97     96   } {0}  ;# The ROWID lookup short-circuits the d=45 constraint
    98     97   
    99     98   sqlite3 db test.db
   100     99   do_execsql_test autoanalyze1-320 {
   101    100     SELECT * FROM t1 WHERE d=45 AND a IN (45,46);
   102    101   } {45 45 45 45}
   103    102   do_execsql_test autoanalyze1-321 {
   104         -  SELECT (flgs & 0x0200)!=0 FROM pragma_stats WHERE tbl='t1' AND idx IS NULL;
          103  +  SELECT (flgs & 0x0100)!=0 FROM pragma_stats WHERE tbl='t1' AND idx IS NULL;
   105    104   } {1}
   106    105   
   107    106   # Any use of prefix of a unique index triggers the use of stats
   108    107   #
   109    108   sqlite3 db test.db
   110    109   do_execsql_test autoanalyze1-400 {
   111    110     SELECT * FROM t1 WHERE b=45;
   112    111   } {45 45 45 45}
   113    112   do_execsql_test autoanalyze1-401 {
   114         -  SELECT (flgs & 0x0200)!=0 FROM pragma_stats WHERE tbl='t1' AND idx IS NULL;
          113  +  SELECT (flgs & 0x0100)!=0 FROM pragma_stats WHERE tbl='t1' AND idx IS NULL;
   115    114   } {1}
   116    115   
   117    116   # The TF_StatsUsed flag is reset when the database is reopened
   118    117   #
   119    118   sqlite3 db test.db
   120    119   do_execsql_test autoanalyze1-500 {
   121         -  SELECT (flgs & 0x0200)!=0 FROM pragma_stats WHERE tbl='t1' AND idx IS NULL;
          120  +  SELECT (flgs & 0x0100)!=0 FROM pragma_stats WHERE tbl='t1' AND idx IS NULL;
   122    121   } {0}
   123    122   
   124    123   finish_test