/ Check-in [114dcf33]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Improve the performance of the ANALYZE command by taking advantage of UNIQUE constraints on indices.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 114dcf33670fd98a1ebbac0e44f66b2d8bcccddf
User & Date: drh 2014-07-24 23:23:26
Context
2014-07-25
18:01
Add constraints (enforced only when SQLITE_DEBUG is enabled) on the use of OP_Next and OP_Prev. check-in: 2230c74f user: drh tags: trunk
2014-07-24
23:23
Improve the performance of the ANALYZE command by taking advantage of UNIQUE constraints on indices. check-in: 114dcf33 user: drh tags: trunk
22:41
Fix a bug in the whereRangeSkipScanEst() procedure (added by check-in [d09ca6d5efad3e4cfa]) where it fails to consider the possibility of a ROWID column when computing the affinity of a table column. check-in: 6aea2258 user: drh tags: trunk
20:25
Avoid trying to allocation zero bytes when analyzing a unique non-null index. Closed-Leaf check-in: 85e2bade user: drh tags: faster-analyze
Changes
Hide Diffs Side-by-Side Diffs Show Whitespace Changes Patch

Changes to src/analyze.c.

   367    367   #endif
   368    368     sqlite3DbFree(p->db, p);
   369    369   }
   370    370   
   371    371   /*
   372    372   ** Implementation of the stat_init(N,K,C) SQL function. The three parameters
   373    373   ** are:
   374         -**     N:    The number of columns in the index including the rowid/pk
   375         -**     K:    The number of columns in the index excluding the rowid/pk
   376         -**     C:    The number of rows in the index
          374  +**     N:    The number of columns in the index including the rowid/pk (note 1)
          375  +**     K:    The number of columns in the index excluding the rowid/pk.
          376  +**     C:    The number of rows in the index (note 2)
   377    377   **
   378         -** C is only used for STAT3 and STAT4.
          378  +** Note 1:  In the special case of the covering index that implements a
          379  +** WITHOUT ROWID table, N is the number of PRIMARY KEY columns, not the
          380  +** total number of columns in the table.
   379    381   **
   380         -** For ordinary rowid tables, N==K+1.  But for WITHOUT ROWID tables,
   381         -** N=K+P where P is the number of columns in the primary key.  For the
   382         -** covering index that implements the original WITHOUT ROWID table, N==K.
          382  +** Note 2:  C is only used for STAT3 and STAT4.
          383  +**
          384  +** For indexes on ordinary rowid tables, N==K+1.  But for indexes on
          385  +** WITHOUT ROWID tables, N=K+P where P is the number of columns in the
          386  +** PRIMARY KEY of the table.  The covering index that implements the
          387  +** original WITHOUT ROWID table as N==K as a special case.
   383    388   **
   384    389   ** This routine allocates the Stat4Accum object in heap memory. The return 
   385    390   ** value is a pointer to the the Stat4Accum object encoded as a blob (i.e. 
   386    391   ** the size of the blob is sizeof(void*) bytes). 
   387    392   */
   388    393   static void statInit(
   389    394     sqlite3_context *context,
................................................................................
   685    690   ** Arguments:
   686    691   **
   687    692   **    P     Pointer to the Stat4Accum object created by stat_init()
   688    693   **    C     Index of left-most column to differ from previous row
   689    694   **    R     Rowid for the current row.  Might be a key record for
   690    695   **          WITHOUT ROWID tables.
   691    696   **
   692         -** The SQL function always returns NULL.
          697  +** This SQL function always returns NULL.  It's purpose it to accumulate
          698  +** statistical data and/or samples in the Stat4Accum object about the
          699  +** index being analyzed.  The stat_get() SQL function will later be used to
          700  +** extract relevant information for constructing the sqlite_statN tables.
   693    701   **
   694    702   ** The R parameter is only used for STAT3 and STAT4
   695    703   */
   696    704   static void statPush(
   697    705     sqlite3_context *context,
   698    706     int argc,
   699    707     sqlite3_value **argv
................................................................................
   779    787   #define STAT_GET_ROWID 1          /* "rowid" column of stat[34] entry */
   780    788   #define STAT_GET_NEQ   2          /* "neq" column of stat[34] entry */
   781    789   #define STAT_GET_NLT   3          /* "nlt" column of stat[34] entry */
   782    790   #define STAT_GET_NDLT  4          /* "ndlt" column of stat[34] entry */
   783    791   
   784    792   /*
   785    793   ** Implementation of the stat_get(P,J) SQL function.  This routine is
   786         -** used to query the results.  Content is returned for parameter J
          794  +** used to query statistical information that has been gathered into
          795  +** the Stat4Accum object by prior calls to stat_push().  The P parameter
          796  +** is a BLOB which is decoded into a pointer to the Stat4Accum objects.
          797  +** The content to returned is determined by the parameter J
   787    798   ** which is one of the STAT_GET_xxxx values defined above.
   788    799   **
   789    800   ** If neither STAT3 nor STAT4 are enabled, then J is always
   790    801   ** STAT_GET_STAT1 and is hence omitted and this routine becomes
   791    802   ** a one-parameter function, stat_get(P), that always returns the
   792    803   ** stat1 table entry information.
   793    804   */
................................................................................
   998   1009     iTabCur = iTab++;
   999   1010     iIdxCur = iTab++;
  1000   1011     pParse->nTab = MAX(pParse->nTab, iTab);
  1001   1012     sqlite3OpenTable(pParse, iTabCur, iDb, pTab, OP_OpenRead);
  1002   1013     sqlite3VdbeAddOp4(v, OP_String8, 0, regTabname, 0, pTab->zName, 0);
  1003   1014   
  1004   1015     for(pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext){
  1005         -    int nCol;                     /* Number of columns indexed by pIdx */
  1006         -    int *aGotoChng;               /* Array of jump instruction addresses */
         1016  +    int nCol;                     /* Number of columns in pIdx. "N" */
  1007   1017       int addrRewind;               /* Address of "OP_Rewind iIdxCur" */
  1008         -    int addrGotoChng0;            /* Address of "Goto addr_chng_0" */
  1009   1018       int addrNextRow;              /* Address of "next_row:" */
  1010   1019       const char *zIdxName;         /* Name of the index */
         1020  +    int nColTest;                 /* Number of columns to test for changes */
  1011   1021   
  1012   1022       if( pOnlyIdx && pOnlyIdx!=pIdx ) continue;
  1013   1023       if( pIdx->pPartIdxWhere==0 ) needTableCnt = 0;
  1014   1024       if( !HasRowid(pTab) && IsPrimaryKeyIndex(pIdx) ){
  1015   1025         nCol = pIdx->nKeyCol;
  1016   1026         zIdxName = pTab->zName;
         1027  +      nColTest = nCol - 1;
  1017   1028       }else{
  1018   1029         nCol = pIdx->nColumn;
  1019   1030         zIdxName = pIdx->zName;
         1031  +      nColTest = pIdx->uniqNotNull ? pIdx->nKeyCol-1 : nCol-1;
  1020   1032       }
  1021         -    aGotoChng = sqlite3DbMallocRaw(db, sizeof(int)*(nCol+1));
  1022         -    if( aGotoChng==0 ) continue;
  1023   1033   
  1024   1034       /* Populate the register containing the index name. */
  1025   1035       sqlite3VdbeAddOp4(v, OP_String8, 0, regIdxname, 0, zIdxName, 0);
  1026   1036       VdbeComment((v, "Analysis for %s.%s", pTab->zName, zIdxName));
  1027   1037   
  1028   1038       /*
  1029   1039       ** Pseudo-code for loop that calls stat_push():
................................................................................
  1044   1054       **
  1045   1055       **  chng_addr_0:
  1046   1056       **   regPrev(0) = idx(0)
  1047   1057       **  chng_addr_1:
  1048   1058       **   regPrev(1) = idx(1)
  1049   1059       **  ...
  1050   1060       **
  1051         -    **  chng_addr_N:
         1061  +    **  endDistinctTest:
  1052   1062       **   regRowid = idx(rowid)
  1053   1063       **   stat_push(P, regChng, regRowid)
  1054   1064       **   Next csr
  1055   1065       **   if !eof(csr) goto next_row;
  1056   1066       **
  1057   1067       **  end_of_scan:
  1058   1068       */
  1059   1069   
  1060   1070       /* Make sure there are enough memory cells allocated to accommodate 
  1061   1071       ** the regPrev array and a trailing rowid (the rowid slot is required
  1062   1072       ** when building a record to insert into the sample column of 
  1063   1073       ** the sqlite_stat4 table.  */
  1064         -    pParse->nMem = MAX(pParse->nMem, regPrev+nCol);
         1074  +    pParse->nMem = MAX(pParse->nMem, regPrev+nColTest);
  1065   1075   
  1066   1076       /* Open a read-only cursor on the index being analyzed. */
  1067   1077       assert( iDb==sqlite3SchemaToIndex(db, pIdx->pSchema) );
  1068   1078       sqlite3VdbeAddOp3(v, OP_OpenRead, iIdxCur, pIdx->tnum, iDb);
  1069   1079       sqlite3VdbeSetP4KeyInfo(pParse, pIdx);
  1070   1080       VdbeComment((v, "%s", pIdx->zName));
  1071   1081   
  1072   1082       /* Invoke the stat_init() function. The arguments are:
  1073   1083       ** 
  1074         -    **    (1) the number of columns in the index including the rowid,
  1075         -    **    (2) the number of rows in the index,
         1084  +    **    (1) the number of columns in the index including the rowid
         1085  +    **        (or for a WITHOUT ROWID table, the number of PK columns),
         1086  +    **    (2) the number of columns in the key without the rowid/pk
         1087  +    **    (3) the number of rows in the index,
  1076   1088       **
  1077         -    ** The second argument is only used for STAT3 and STAT4
         1089  +    **
         1090  +    ** The third argument is only used for STAT3 and STAT4
  1078   1091       */
  1079   1092   #ifdef SQLITE_ENABLE_STAT3_OR_STAT4
  1080   1093       sqlite3VdbeAddOp2(v, OP_Count, iIdxCur, regStat4+3);
  1081   1094   #endif
  1082   1095       sqlite3VdbeAddOp2(v, OP_Integer, nCol, regStat4+1);
  1083   1096       sqlite3VdbeAddOp2(v, OP_Integer, pIdx->nKeyCol, regStat4+2);
  1084   1097       sqlite3VdbeAddOp3(v, OP_Function, 0, regStat4+1, regStat4);
................................................................................
  1092   1105       **   regChng = 0
  1093   1106       **   goto next_push_0;
  1094   1107       **
  1095   1108       */
  1096   1109       addrRewind = sqlite3VdbeAddOp1(v, OP_Rewind, iIdxCur);
  1097   1110       VdbeCoverage(v);
  1098   1111       sqlite3VdbeAddOp2(v, OP_Integer, 0, regChng);
  1099         -    addrGotoChng0 = sqlite3VdbeAddOp0(v, OP_Goto);
         1112  +    addrNextRow = sqlite3VdbeCurrentAddr(v);
         1113  +
         1114  +    if( nColTest>0 ){
         1115  +      int endDistinctTest = sqlite3VdbeMakeLabel(v);
         1116  +      int *aGotoChng;               /* Array of jump instruction addresses */
         1117  +      aGotoChng = sqlite3DbMallocRaw(db, sizeof(int)*nColTest);
         1118  +      if( aGotoChng==0 ) continue;
  1100   1119   
  1101   1120       /*
  1102   1121       **  next_row:
  1103   1122       **   regChng = 0
  1104   1123       **   if( idx(0) != regPrev(0) ) goto chng_addr_0
  1105   1124       **   regChng = 1
  1106   1125       **   if( idx(1) != regPrev(1) ) goto chng_addr_1
  1107   1126       **   ...
  1108   1127       **   regChng = N
  1109         -    **   goto chng_addr_N
         1128  +      **   goto endDistinctTest
  1110   1129       */
         1130  +      sqlite3VdbeAddOp0(v, OP_Goto);
  1111   1131       addrNextRow = sqlite3VdbeCurrentAddr(v);
  1112         -    for(i=0; i<nCol-1; i++){
         1132  +      if( nColTest==1 && pIdx->nKeyCol==1 && pIdx->onError!=OE_None ){
         1133  +        /* For a single-column UNIQUE index, once we have found a non-NULL
         1134  +        ** row, we know that all the rest will be distinct, so skip 
         1135  +        ** subsequent distinctness tests. */
         1136  +        sqlite3VdbeAddOp2(v, OP_NotNull, regPrev, endDistinctTest);
         1137  +        VdbeCoverage(v);
         1138  +      }
         1139  +      for(i=0; i<nColTest; i++){
  1113   1140         char *pColl = (char*)sqlite3LocateCollSeq(pParse, pIdx->azColl[i]);
  1114   1141         sqlite3VdbeAddOp2(v, OP_Integer, i, regChng);
  1115   1142         sqlite3VdbeAddOp3(v, OP_Column, iIdxCur, i, regTemp);
  1116   1143         aGotoChng[i] = 
  1117   1144         sqlite3VdbeAddOp4(v, OP_Ne, regTemp, 0, regPrev+i, pColl, P4_COLLSEQ);
  1118   1145         sqlite3VdbeChangeP5(v, SQLITE_NULLEQ);
  1119   1146         VdbeCoverage(v);
  1120   1147       }
  1121         -    sqlite3VdbeAddOp2(v, OP_Integer, nCol-1, regChng);
  1122         -    aGotoChng[nCol] = sqlite3VdbeAddOp0(v, OP_Goto);
         1148  +      sqlite3VdbeAddOp2(v, OP_Integer, nColTest, regChng);
         1149  +      sqlite3VdbeAddOp2(v, OP_Goto, 0, endDistinctTest);
         1150  +  
  1123   1151   
  1124   1152       /*
  1125   1153       **  chng_addr_0:
  1126   1154       **   regPrev(0) = idx(0)
  1127   1155       **  chng_addr_1:
  1128   1156       **   regPrev(1) = idx(1)
  1129   1157       **  ...
  1130   1158       */
  1131         -    sqlite3VdbeJumpHere(v, addrGotoChng0);
  1132         -    for(i=0; i<nCol-1; i++){
         1159  +      sqlite3VdbeJumpHere(v, addrNextRow-1);
         1160  +      for(i=0; i<nColTest; i++){
  1133   1161         sqlite3VdbeJumpHere(v, aGotoChng[i]);
  1134   1162         sqlite3VdbeAddOp3(v, OP_Column, iIdxCur, i, regPrev+i);
         1163  +      }
         1164  +      sqlite3VdbeResolveLabel(v, endDistinctTest);
         1165  +      sqlite3DbFree(db, aGotoChng);
  1135   1166       }
  1136   1167   
  1137   1168       /*
  1138   1169       **  chng_addr_N:
  1139   1170       **   regRowid = idx(rowid)            // STAT34 only
  1140   1171       **   stat_push(P, regChng, regRowid)  // 3rd parameter STAT34 only
  1141   1172       **   Next csr
  1142   1173       **   if !eof(csr) goto next_row;
  1143   1174       */
  1144         -    sqlite3VdbeJumpHere(v, aGotoChng[nCol]);
  1145   1175   #ifdef SQLITE_ENABLE_STAT3_OR_STAT4
  1146   1176       assert( regRowid==(regStat4+2) );
  1147   1177       if( HasRowid(pTab) ){
  1148   1178         sqlite3VdbeAddOp2(v, OP_IdxRowid, iIdxCur, regRowid);
  1149   1179       }else{
  1150   1180         Index *pPk = sqlite3PrimaryKeyIndex(pIdx->pTable);
  1151   1181         int j, k, regKey;
................................................................................
  1215   1245         sqlite3VdbeAddOp2(v, OP_Goto, 1, addrNext); /* P1==1 for end-of-loop */
  1216   1246         sqlite3VdbeJumpHere(v, addrIsNull);
  1217   1247       }
  1218   1248   #endif /* SQLITE_ENABLE_STAT3_OR_STAT4 */
  1219   1249   
  1220   1250       /* End of analysis */
  1221   1251       sqlite3VdbeJumpHere(v, addrRewind);
  1222         -    sqlite3DbFree(db, aGotoChng);
  1223   1252     }
  1224   1253   
  1225   1254   
  1226   1255     /* Create a single sqlite_stat1 entry containing NULL as the index
  1227   1256     ** name and the row count as the content.
  1228   1257     */
  1229   1258     if( pOnlyIdx==0 && needTableCnt ){