/ Check-in [3e1e79e1]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Improve the performance of the ANALYZE command by taking advantage of the fact that every row of a UNIQUE index is distinct.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | faster-analyze
Files: files | file ages | folders
SHA1:3e1e79e1335f7ad33cd35f384f2a063c4aa2253b
User & Date: drh 2014-07-23 18:36:55
Context
2014-07-23
19:37
Ugh. Consecutive UNIQUE index entries are only distinct if the index is on NOT NULL columns. So the previous version was not quite right. This check-in fixes the problem. check-in: 30033f96 user: drh tags: faster-analyze
18:36
Improve the performance of the ANALYZE command by taking advantage of the fact that every row of a UNIQUE index is distinct. check-in: 3e1e79e1 user: drh tags: faster-analyze
15:51
Updated documentation on sqlite3_temp_directory. No changes to code. check-in: e6225a7b user: drh tags: trunk
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to src/analyze.c.

   367    367   #endif
   368    368     sqlite3DbFree(p->db, p);
   369    369   }
   370    370   
   371    371   /*
   372    372   ** Implementation of the stat_init(N,K,C) SQL function. The three parameters
   373    373   ** are:
   374         -**     N:    The number of columns in the index including the rowid/pk
   375         -**     K:    The number of columns in the index excluding the rowid/pk
   376         -**     C:    The number of rows in the index
          374  +**     N:    The number of columns in the index including the rowid/pk (note 1)
          375  +**     K:    The number of columns in the index excluding the rowid/pk.
          376  +**     C:    The number of rows in the index (note 2)
   377    377   **
   378         -** C is only used for STAT3 and STAT4.
          378  +** Note 1:  In the special case of the covering index that implements a
          379  +** WITHOUT ROWID table, N is the number of PRIMARY KEY columns, not the
          380  +** total number of columns in the table.
   379    381   **
   380         -** For ordinary rowid tables, N==K+1.  But for WITHOUT ROWID tables,
   381         -** N=K+P where P is the number of columns in the primary key.  For the
   382         -** covering index that implements the original WITHOUT ROWID table, N==K.
          382  +** Note 2:  C is only used for STAT3 and STAT4.
          383  +**
          384  +** For indexes on ordinary rowid tables, N==K+1.  But for indexes on
          385  +** WITHOUT ROWID tables, N=K+P where P is the number of columns in the
          386  +** PRIMARY KEY of the table.  The covering index that implements the
          387  +** original WITHOUT ROWID table as N==K as a special case.
   383    388   **
   384    389   ** This routine allocates the Stat4Accum object in heap memory. The return 
   385    390   ** value is a pointer to the the Stat4Accum object encoded as a blob (i.e. 
   386    391   ** the size of the blob is sizeof(void*) bytes). 
   387    392   */
   388    393   static void statInit(
   389    394     sqlite3_context *context,
................................................................................
   685    690   ** Arguments:
   686    691   **
   687    692   **    P     Pointer to the Stat4Accum object created by stat_init()
   688    693   **    C     Index of left-most column to differ from previous row
   689    694   **    R     Rowid for the current row.  Might be a key record for
   690    695   **          WITHOUT ROWID tables.
   691    696   **
   692         -** The SQL function always returns NULL.
          697  +** This SQL function always returns NULL.  It's purpose it to accumulate
          698  +** statistical data and/or samples in the Stat4Accum object about the
          699  +** index being analyzed.  The stat_get() SQL function will later be used to
          700  +** extract relevant information for constructing the sqlite_statN tables.
   693    701   **
   694    702   ** The R parameter is only used for STAT3 and STAT4
   695    703   */
   696    704   static void statPush(
   697    705     sqlite3_context *context,
   698    706     int argc,
   699    707     sqlite3_value **argv
................................................................................
   779    787   #define STAT_GET_ROWID 1          /* "rowid" column of stat[34] entry */
   780    788   #define STAT_GET_NEQ   2          /* "neq" column of stat[34] entry */
   781    789   #define STAT_GET_NLT   3          /* "nlt" column of stat[34] entry */
   782    790   #define STAT_GET_NDLT  4          /* "ndlt" column of stat[34] entry */
   783    791   
   784    792   /*
   785    793   ** Implementation of the stat_get(P,J) SQL function.  This routine is
   786         -** used to query the results.  Content is returned for parameter J
          794  +** used to query statistical information that has been gathered into
          795  +** the Stat4Accum object by prior calls to stat_push().  The P parameter
          796  +** is a BLOB which is decoded into a pointer to the Stat4Accum objects.
          797  +** The content to returned is determined by the parameter J
   787    798   ** which is one of the STAT_GET_xxxx values defined above.
   788    799   **
   789    800   ** If neither STAT3 nor STAT4 are enabled, then J is always
   790    801   ** STAT_GET_STAT1 and is hence omitted and this routine becomes
   791    802   ** a one-parameter function, stat_get(P), that always returns the
   792    803   ** stat1 table entry information.
   793    804   */
................................................................................
   998   1009     iTabCur = iTab++;
   999   1010     iIdxCur = iTab++;
  1000   1011     pParse->nTab = MAX(pParse->nTab, iTab);
  1001   1012     sqlite3OpenTable(pParse, iTabCur, iDb, pTab, OP_OpenRead);
  1002   1013     sqlite3VdbeAddOp4(v, OP_String8, 0, regTabname, 0, pTab->zName, 0);
  1003   1014   
  1004   1015     for(pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext){
  1005         -    int nCol;                     /* Number of columns indexed by pIdx */
         1016  +    int nCol;                     /* Number of columns in pIdx. "N" */
  1006   1017       int *aGotoChng;               /* Array of jump instruction addresses */
  1007   1018       int addrRewind;               /* Address of "OP_Rewind iIdxCur" */
  1008         -    int addrGotoChng0;            /* Address of "Goto addr_chng_0" */
  1009   1019       int addrNextRow;              /* Address of "next_row:" */
  1010   1020       const char *zIdxName;         /* Name of the index */
         1021  +    int nColTest;                 /* Number of columns to test for changes */
  1011   1022   
  1012   1023       if( pOnlyIdx && pOnlyIdx!=pIdx ) continue;
  1013   1024       if( pIdx->pPartIdxWhere==0 ) needTableCnt = 0;
  1014   1025       if( !HasRowid(pTab) && IsPrimaryKeyIndex(pIdx) ){
  1015   1026         nCol = pIdx->nKeyCol;
  1016   1027         zIdxName = pTab->zName;
         1028  +      nColTest = nCol - 1;
  1017   1029       }else{
  1018   1030         nCol = pIdx->nColumn;
  1019   1031         zIdxName = pIdx->zName;
         1032  +      nColTest = pIdx->onError==OE_None ? nCol-1 : pIdx->nKeyCol-1;
  1020   1033       }
  1021         -    aGotoChng = sqlite3DbMallocRaw(db, sizeof(int)*(nCol+1));
         1034  +    aGotoChng = sqlite3DbMallocRaw(db, sizeof(int)*(nColTest+1));
  1022   1035       if( aGotoChng==0 ) continue;
  1023   1036   
  1024   1037       /* Populate the register containing the index name. */
  1025   1038       sqlite3VdbeAddOp4(v, OP_String8, 0, regIdxname, 0, zIdxName, 0);
  1026   1039       VdbeComment((v, "Analysis for %s.%s", pTab->zName, zIdxName));
  1027   1040   
  1028   1041       /*
................................................................................
  1057   1070       **  end_of_scan:
  1058   1071       */
  1059   1072   
  1060   1073       /* Make sure there are enough memory cells allocated to accommodate 
  1061   1074       ** the regPrev array and a trailing rowid (the rowid slot is required
  1062   1075       ** when building a record to insert into the sample column of 
  1063   1076       ** the sqlite_stat4 table.  */
  1064         -    pParse->nMem = MAX(pParse->nMem, regPrev+nCol);
         1077  +    pParse->nMem = MAX(pParse->nMem, regPrev+nColTest);
  1065   1078   
  1066   1079       /* Open a read-only cursor on the index being analyzed. */
  1067   1080       assert( iDb==sqlite3SchemaToIndex(db, pIdx->pSchema) );
  1068   1081       sqlite3VdbeAddOp3(v, OP_OpenRead, iIdxCur, pIdx->tnum, iDb);
  1069   1082       sqlite3VdbeSetP4KeyInfo(pParse, pIdx);
  1070   1083       VdbeComment((v, "%s", pIdx->zName));
  1071   1084   
  1072   1085       /* Invoke the stat_init() function. The arguments are:
  1073   1086       ** 
  1074         -    **    (1) the number of columns in the index including the rowid,
  1075         -    **    (2) the number of rows in the index,
         1087  +    **    (1) the number of columns in the index including the rowid
         1088  +    **        (or for a WITHOUT ROWID table, the number of PK columns),
         1089  +    **    (2) the number of columns in the key without the rowid/pk
         1090  +    **    (3) the number of rows in the index,
         1091  +    **
  1076   1092       **
  1077         -    ** The second argument is only used for STAT3 and STAT4
         1093  +    ** The third argument is only used for STAT3 and STAT4
  1078   1094       */
  1079   1095   #ifdef SQLITE_ENABLE_STAT3_OR_STAT4
  1080   1096       sqlite3VdbeAddOp2(v, OP_Count, iIdxCur, regStat4+3);
  1081   1097   #endif
  1082   1098       sqlite3VdbeAddOp2(v, OP_Integer, nCol, regStat4+1);
  1083   1099       sqlite3VdbeAddOp2(v, OP_Integer, pIdx->nKeyCol, regStat4+2);
  1084   1100       sqlite3VdbeAddOp3(v, OP_Function, 0, regStat4+1, regStat4);
................................................................................
  1092   1108       **   regChng = 0
  1093   1109       **   goto next_push_0;
  1094   1110       **
  1095   1111       */
  1096   1112       addrRewind = sqlite3VdbeAddOp1(v, OP_Rewind, iIdxCur);
  1097   1113       VdbeCoverage(v);
  1098   1114       sqlite3VdbeAddOp2(v, OP_Integer, 0, regChng);
  1099         -    addrGotoChng0 = sqlite3VdbeAddOp0(v, OP_Goto);
  1100         -
  1101         -    /*
  1102         -    **  next_row:
  1103         -    **   regChng = 0
  1104         -    **   if( idx(0) != regPrev(0) ) goto chng_addr_0
  1105         -    **   regChng = 1
  1106         -    **   if( idx(1) != regPrev(1) ) goto chng_addr_1
  1107         -    **   ...
  1108         -    **   regChng = N
  1109         -    **   goto chng_addr_N
  1110         -    */
  1111   1115       addrNextRow = sqlite3VdbeCurrentAddr(v);
  1112         -    for(i=0; i<nCol-1; i++){
  1113         -      char *pColl = (char*)sqlite3LocateCollSeq(pParse, pIdx->azColl[i]);
  1114         -      sqlite3VdbeAddOp2(v, OP_Integer, i, regChng);
  1115         -      sqlite3VdbeAddOp3(v, OP_Column, iIdxCur, i, regTemp);
  1116         -      aGotoChng[i] = 
  1117         -      sqlite3VdbeAddOp4(v, OP_Ne, regTemp, 0, regPrev+i, pColl, P4_COLLSEQ);
  1118         -      sqlite3VdbeChangeP5(v, SQLITE_NULLEQ);
  1119         -      VdbeCoverage(v);
  1120         -    }
  1121         -    sqlite3VdbeAddOp2(v, OP_Integer, nCol-1, regChng);
  1122         -    aGotoChng[nCol] = sqlite3VdbeAddOp0(v, OP_Goto);
  1123         -
  1124         -    /*
  1125         -    **  chng_addr_0:
  1126         -    **   regPrev(0) = idx(0)
  1127         -    **  chng_addr_1:
  1128         -    **   regPrev(1) = idx(1)
  1129         -    **  ...
  1130         -    */
  1131         -    sqlite3VdbeJumpHere(v, addrGotoChng0);
  1132         -    for(i=0; i<nCol-1; i++){
  1133         -      sqlite3VdbeJumpHere(v, aGotoChng[i]);
  1134         -      sqlite3VdbeAddOp3(v, OP_Column, iIdxCur, i, regPrev+i);
  1135         -    }
  1136         -
         1116  +
         1117  +    if( nColTest>0 ){
         1118  +      /*
         1119  +      **  next_row:
         1120  +      **   regChng = 0
         1121  +      **   if( idx(0) != regPrev(0) ) goto chng_addr_0
         1122  +      **   regChng = 1
         1123  +      **   if( idx(1) != regPrev(1) ) goto chng_addr_1
         1124  +      **   ...
         1125  +      **   regChng = N
         1126  +      **   goto chng_addr_N
         1127  +      */
         1128  +      sqlite3VdbeAddOp0(v, OP_Goto);
         1129  +      addrNextRow = sqlite3VdbeCurrentAddr(v);
         1130  +      for(i=0; i<nColTest; i++){
         1131  +        char *pColl = (char*)sqlite3LocateCollSeq(pParse, pIdx->azColl[i]);
         1132  +        sqlite3VdbeAddOp2(v, OP_Integer, i, regChng);
         1133  +        sqlite3VdbeAddOp3(v, OP_Column, iIdxCur, i, regTemp);
         1134  +        aGotoChng[i] = 
         1135  +        sqlite3VdbeAddOp4(v, OP_Ne, regTemp, 0, regPrev+i, pColl, P4_COLLSEQ);
         1136  +        sqlite3VdbeChangeP5(v, SQLITE_NULLEQ);
         1137  +        VdbeCoverage(v);
         1138  +      }
         1139  +      sqlite3VdbeAddOp2(v, OP_Integer, nColTest, regChng);
         1140  +      aGotoChng[nColTest] = sqlite3VdbeAddOp0(v, OP_Goto);
         1141  +  
         1142  +  
         1143  +      /*
         1144  +      **  chng_addr_0:
         1145  +      **   regPrev(0) = idx(0)
         1146  +      **  chng_addr_1:
         1147  +      **   regPrev(1) = idx(1)
         1148  +      **  ...
         1149  +      */
         1150  +      sqlite3VdbeJumpHere(v, addrNextRow-1);
         1151  +      for(i=0; i<nColTest; i++){
         1152  +        sqlite3VdbeJumpHere(v, aGotoChng[i]);
         1153  +        sqlite3VdbeAddOp3(v, OP_Column, iIdxCur, i, regPrev+i);
         1154  +      }
         1155  +      sqlite3VdbeJumpHere(v, aGotoChng[nColTest]);
         1156  +    }
         1157  +  
  1137   1158       /*
  1138   1159       **  chng_addr_N:
  1139   1160       **   regRowid = idx(rowid)            // STAT34 only
  1140   1161       **   stat_push(P, regChng, regRowid)  // 3rd parameter STAT34 only
  1141   1162       **   Next csr
  1142   1163       **   if !eof(csr) goto next_row;
  1143   1164       */
  1144         -    sqlite3VdbeJumpHere(v, aGotoChng[nCol]);
  1145   1165   #ifdef SQLITE_ENABLE_STAT3_OR_STAT4
  1146   1166       assert( regRowid==(regStat4+2) );
  1147   1167       if( HasRowid(pTab) ){
  1148   1168         sqlite3VdbeAddOp2(v, OP_IdxRowid, iIdxCur, regRowid);
  1149   1169       }else{
  1150   1170         Index *pPk = sqlite3PrimaryKeyIndex(pIdx->pTable);
  1151   1171         int j, k, regKey;