/ Check-in [3e1e79e1]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Improve the performance of the ANALYZE command by taking advantage of the fact that every row of a UNIQUE index is distinct.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | faster-analyze
Files: files | file ages | folders
SHA1:3e1e79e1335f7ad33cd35f384f2a063c4aa2253b
User & Date: drh 2014-07-23 18:36:55
Context
2014-07-23
19:37
Ugh. Consecutive UNIQUE index entries are only distinct if the index is on NOT NULL columns. So the previous version was not quite right. This check-in fixes the problem. check-in: 30033f96 user: drh tags: faster-analyze
18:36
Improve the performance of the ANALYZE command by taking advantage of the fact that every row of a UNIQUE index is distinct. check-in: 3e1e79e1 user: drh tags: faster-analyze
15:51
Updated documentation on sqlite3_temp_directory. No changes to code. check-in: e6225a7b user: drh tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to src/analyze.c.

367
368
369
370
371
372
373
374
375
376
377




378
379
380
381
382

383
384
385
386
387
388
389
...
685
686
687
688
689
690
691
692



693
694
695
696
697
698
699
...
779
780
781
782
783
784
785
786



787
788
789
790
791
792
793
...
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010

1011
1012
1013
1014
1015
1016

1017
1018
1019

1020
1021
1022
1023
1024
1025
1026
1027
1028
....
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074


1075
1076

1077
1078
1079
1080
1081
1082
1083
1084
....
1092
1093
1094
1095
1096
1097
1098
1099
1100

1101
1102
1103
1104
1105
1106
1107
1108
1109
1110

1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123

1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135

1136

1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
#endif
  sqlite3DbFree(p->db, p);
}

/*
** Implementation of the stat_init(N,K,C) SQL function. The three parameters
** are:
**     N:    The number of columns in the index including the rowid/pk
**     K:    The number of columns in the index excluding the rowid/pk
**     C:    The number of rows in the index
**




** C is only used for STAT3 and STAT4.
**
** For ordinary rowid tables, N==K+1.  But for WITHOUT ROWID tables,
** N=K+P where P is the number of columns in the primary key.  For the
** covering index that implements the original WITHOUT ROWID table, N==K.

**
** This routine allocates the Stat4Accum object in heap memory. The return 
** value is a pointer to the the Stat4Accum object encoded as a blob (i.e. 
** the size of the blob is sizeof(void*) bytes). 
*/
static void statInit(
  sqlite3_context *context,
................................................................................
** Arguments:
**
**    P     Pointer to the Stat4Accum object created by stat_init()
**    C     Index of left-most column to differ from previous row
**    R     Rowid for the current row.  Might be a key record for
**          WITHOUT ROWID tables.
**
** The SQL function always returns NULL.



**
** The R parameter is only used for STAT3 and STAT4
*/
static void statPush(
  sqlite3_context *context,
  int argc,
  sqlite3_value **argv
................................................................................
#define STAT_GET_ROWID 1          /* "rowid" column of stat[34] entry */
#define STAT_GET_NEQ   2          /* "neq" column of stat[34] entry */
#define STAT_GET_NLT   3          /* "nlt" column of stat[34] entry */
#define STAT_GET_NDLT  4          /* "ndlt" column of stat[34] entry */

/*
** Implementation of the stat_get(P,J) SQL function.  This routine is
** used to query the results.  Content is returned for parameter J



** which is one of the STAT_GET_xxxx values defined above.
**
** If neither STAT3 nor STAT4 are enabled, then J is always
** STAT_GET_STAT1 and is hence omitted and this routine becomes
** a one-parameter function, stat_get(P), that always returns the
** stat1 table entry information.
*/
................................................................................
  iTabCur = iTab++;
  iIdxCur = iTab++;
  pParse->nTab = MAX(pParse->nTab, iTab);
  sqlite3OpenTable(pParse, iTabCur, iDb, pTab, OP_OpenRead);
  sqlite3VdbeAddOp4(v, OP_String8, 0, regTabname, 0, pTab->zName, 0);

  for(pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext){
    int nCol;                     /* Number of columns indexed by pIdx */
    int *aGotoChng;               /* Array of jump instruction addresses */
    int addrRewind;               /* Address of "OP_Rewind iIdxCur" */
    int addrGotoChng0;            /* Address of "Goto addr_chng_0" */
    int addrNextRow;              /* Address of "next_row:" */
    const char *zIdxName;         /* Name of the index */


    if( pOnlyIdx && pOnlyIdx!=pIdx ) continue;
    if( pIdx->pPartIdxWhere==0 ) needTableCnt = 0;
    if( !HasRowid(pTab) && IsPrimaryKeyIndex(pIdx) ){
      nCol = pIdx->nKeyCol;
      zIdxName = pTab->zName;

    }else{
      nCol = pIdx->nColumn;
      zIdxName = pIdx->zName;

    }
    aGotoChng = sqlite3DbMallocRaw(db, sizeof(int)*(nCol+1));
    if( aGotoChng==0 ) continue;

    /* Populate the register containing the index name. */
    sqlite3VdbeAddOp4(v, OP_String8, 0, regIdxname, 0, zIdxName, 0);
    VdbeComment((v, "Analysis for %s.%s", pTab->zName, zIdxName));

    /*
................................................................................
    **  end_of_scan:
    */

    /* Make sure there are enough memory cells allocated to accommodate 
    ** the regPrev array and a trailing rowid (the rowid slot is required
    ** when building a record to insert into the sample column of 
    ** the sqlite_stat4 table.  */
    pParse->nMem = MAX(pParse->nMem, regPrev+nCol);

    /* Open a read-only cursor on the index being analyzed. */
    assert( iDb==sqlite3SchemaToIndex(db, pIdx->pSchema) );
    sqlite3VdbeAddOp3(v, OP_OpenRead, iIdxCur, pIdx->tnum, iDb);
    sqlite3VdbeSetP4KeyInfo(pParse, pIdx);
    VdbeComment((v, "%s", pIdx->zName));

    /* Invoke the stat_init() function. The arguments are:
    ** 
    **    (1) the number of columns in the index including the rowid,


    **    (2) the number of rows in the index,
    **

    ** The second argument is only used for STAT3 and STAT4
    */
#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
    sqlite3VdbeAddOp2(v, OP_Count, iIdxCur, regStat4+3);
#endif
    sqlite3VdbeAddOp2(v, OP_Integer, nCol, regStat4+1);
    sqlite3VdbeAddOp2(v, OP_Integer, pIdx->nKeyCol, regStat4+2);
    sqlite3VdbeAddOp3(v, OP_Function, 0, regStat4+1, regStat4);
................................................................................
    **   regChng = 0
    **   goto next_push_0;
    **
    */
    addrRewind = sqlite3VdbeAddOp1(v, OP_Rewind, iIdxCur);
    VdbeCoverage(v);
    sqlite3VdbeAddOp2(v, OP_Integer, 0, regChng);
    addrGotoChng0 = sqlite3VdbeAddOp0(v, OP_Goto);


    /*
    **  next_row:
    **   regChng = 0
    **   if( idx(0) != regPrev(0) ) goto chng_addr_0
    **   regChng = 1
    **   if( idx(1) != regPrev(1) ) goto chng_addr_1
    **   ...
    **   regChng = N
    **   goto chng_addr_N
    */

    addrNextRow = sqlite3VdbeCurrentAddr(v);
    for(i=0; i<nCol-1; i++){
      char *pColl = (char*)sqlite3LocateCollSeq(pParse, pIdx->azColl[i]);
      sqlite3VdbeAddOp2(v, OP_Integer, i, regChng);
      sqlite3VdbeAddOp3(v, OP_Column, iIdxCur, i, regTemp);
      aGotoChng[i] = 
      sqlite3VdbeAddOp4(v, OP_Ne, regTemp, 0, regPrev+i, pColl, P4_COLLSEQ);
      sqlite3VdbeChangeP5(v, SQLITE_NULLEQ);
      VdbeCoverage(v);
    }
    sqlite3VdbeAddOp2(v, OP_Integer, nCol-1, regChng);
    aGotoChng[nCol] = sqlite3VdbeAddOp0(v, OP_Goto);


    /*
    **  chng_addr_0:
    **   regPrev(0) = idx(0)
    **  chng_addr_1:
    **   regPrev(1) = idx(1)
    **  ...
    */
    sqlite3VdbeJumpHere(v, addrGotoChng0);
    for(i=0; i<nCol-1; i++){
      sqlite3VdbeJumpHere(v, aGotoChng[i]);
      sqlite3VdbeAddOp3(v, OP_Column, iIdxCur, i, regPrev+i);
    }



    /*
    **  chng_addr_N:
    **   regRowid = idx(rowid)            // STAT34 only
    **   stat_push(P, regChng, regRowid)  // 3rd parameter STAT34 only
    **   Next csr
    **   if !eof(csr) goto next_row;
    */
    sqlite3VdbeJumpHere(v, aGotoChng[nCol]);
#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
    assert( regRowid==(regStat4+2) );
    if( HasRowid(pTab) ){
      sqlite3VdbeAddOp2(v, OP_IdxRowid, iIdxCur, regRowid);
    }else{
      Index *pPk = sqlite3PrimaryKeyIndex(pIdx->pTable);
      int j, k, regKey;







|
|
|

>
>
>
>
|

|
|
|
>







 







|
>
>
>







 







|
>
>
>







 







|


<


>






>



>

|







 







|









|
>
>
|

>
|







 







|

>
|
|
|
|
|
|
|
|
|
|
>
|
|
|
|
|
|
|
|
|
|
|
|
|
>
|
|
|
|
|
|
|
|
|
|
|
|
>
|
>







<







367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
...
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
...
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
....
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018

1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
....
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
....
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164

1165
1166
1167
1168
1169
1170
1171
#endif
  sqlite3DbFree(p->db, p);
}

/*
** Implementation of the stat_init(N,K,C) SQL function. The three parameters
** are:
**     N:    The number of columns in the index including the rowid/pk (note 1)
**     K:    The number of columns in the index excluding the rowid/pk.
**     C:    The number of rows in the index (note 2)
**
** Note 1:  In the special case of the covering index that implements a
** WITHOUT ROWID table, N is the number of PRIMARY KEY columns, not the
** total number of columns in the table.
**
** Note 2:  C is only used for STAT3 and STAT4.
**
** For indexes on ordinary rowid tables, N==K+1.  But for indexes on
** WITHOUT ROWID tables, N=K+P where P is the number of columns in the
** PRIMARY KEY of the table.  The covering index that implements the
** original WITHOUT ROWID table as N==K as a special case.
**
** This routine allocates the Stat4Accum object in heap memory. The return 
** value is a pointer to the the Stat4Accum object encoded as a blob (i.e. 
** the size of the blob is sizeof(void*) bytes). 
*/
static void statInit(
  sqlite3_context *context,
................................................................................
** Arguments:
**
**    P     Pointer to the Stat4Accum object created by stat_init()
**    C     Index of left-most column to differ from previous row
**    R     Rowid for the current row.  Might be a key record for
**          WITHOUT ROWID tables.
**
** This SQL function always returns NULL.  It's purpose it to accumulate
** statistical data and/or samples in the Stat4Accum object about the
** index being analyzed.  The stat_get() SQL function will later be used to
** extract relevant information for constructing the sqlite_statN tables.
**
** The R parameter is only used for STAT3 and STAT4
*/
static void statPush(
  sqlite3_context *context,
  int argc,
  sqlite3_value **argv
................................................................................
#define STAT_GET_ROWID 1          /* "rowid" column of stat[34] entry */
#define STAT_GET_NEQ   2          /* "neq" column of stat[34] entry */
#define STAT_GET_NLT   3          /* "nlt" column of stat[34] entry */
#define STAT_GET_NDLT  4          /* "ndlt" column of stat[34] entry */

/*
** Implementation of the stat_get(P,J) SQL function.  This routine is
** used to query statistical information that has been gathered into
** the Stat4Accum object by prior calls to stat_push().  The P parameter
** is a BLOB which is decoded into a pointer to the Stat4Accum objects.
** The content to returned is determined by the parameter J
** which is one of the STAT_GET_xxxx values defined above.
**
** If neither STAT3 nor STAT4 are enabled, then J is always
** STAT_GET_STAT1 and is hence omitted and this routine becomes
** a one-parameter function, stat_get(P), that always returns the
** stat1 table entry information.
*/
................................................................................
  iTabCur = iTab++;
  iIdxCur = iTab++;
  pParse->nTab = MAX(pParse->nTab, iTab);
  sqlite3OpenTable(pParse, iTabCur, iDb, pTab, OP_OpenRead);
  sqlite3VdbeAddOp4(v, OP_String8, 0, regTabname, 0, pTab->zName, 0);

  for(pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext){
    int nCol;                     /* Number of columns in pIdx. "N" */
    int *aGotoChng;               /* Array of jump instruction addresses */
    int addrRewind;               /* Address of "OP_Rewind iIdxCur" */

    int addrNextRow;              /* Address of "next_row:" */
    const char *zIdxName;         /* Name of the index */
    int nColTest;                 /* Number of columns to test for changes */

    if( pOnlyIdx && pOnlyIdx!=pIdx ) continue;
    if( pIdx->pPartIdxWhere==0 ) needTableCnt = 0;
    if( !HasRowid(pTab) && IsPrimaryKeyIndex(pIdx) ){
      nCol = pIdx->nKeyCol;
      zIdxName = pTab->zName;
      nColTest = nCol - 1;
    }else{
      nCol = pIdx->nColumn;
      zIdxName = pIdx->zName;
      nColTest = pIdx->onError==OE_None ? nCol-1 : pIdx->nKeyCol-1;
    }
    aGotoChng = sqlite3DbMallocRaw(db, sizeof(int)*(nColTest+1));
    if( aGotoChng==0 ) continue;

    /* Populate the register containing the index name. */
    sqlite3VdbeAddOp4(v, OP_String8, 0, regIdxname, 0, zIdxName, 0);
    VdbeComment((v, "Analysis for %s.%s", pTab->zName, zIdxName));

    /*
................................................................................
    **  end_of_scan:
    */

    /* Make sure there are enough memory cells allocated to accommodate 
    ** the regPrev array and a trailing rowid (the rowid slot is required
    ** when building a record to insert into the sample column of 
    ** the sqlite_stat4 table.  */
    pParse->nMem = MAX(pParse->nMem, regPrev+nColTest);

    /* Open a read-only cursor on the index being analyzed. */
    assert( iDb==sqlite3SchemaToIndex(db, pIdx->pSchema) );
    sqlite3VdbeAddOp3(v, OP_OpenRead, iIdxCur, pIdx->tnum, iDb);
    sqlite3VdbeSetP4KeyInfo(pParse, pIdx);
    VdbeComment((v, "%s", pIdx->zName));

    /* Invoke the stat_init() function. The arguments are:
    ** 
    **    (1) the number of columns in the index including the rowid
    **        (or for a WITHOUT ROWID table, the number of PK columns),
    **    (2) the number of columns in the key without the rowid/pk
    **    (3) the number of rows in the index,
    **
    **
    ** The third argument is only used for STAT3 and STAT4
    */
#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
    sqlite3VdbeAddOp2(v, OP_Count, iIdxCur, regStat4+3);
#endif
    sqlite3VdbeAddOp2(v, OP_Integer, nCol, regStat4+1);
    sqlite3VdbeAddOp2(v, OP_Integer, pIdx->nKeyCol, regStat4+2);
    sqlite3VdbeAddOp3(v, OP_Function, 0, regStat4+1, regStat4);
................................................................................
    **   regChng = 0
    **   goto next_push_0;
    **
    */
    addrRewind = sqlite3VdbeAddOp1(v, OP_Rewind, iIdxCur);
    VdbeCoverage(v);
    sqlite3VdbeAddOp2(v, OP_Integer, 0, regChng);
    addrNextRow = sqlite3VdbeCurrentAddr(v);

    if( nColTest>0 ){
      /*
      **  next_row:
      **   regChng = 0
      **   if( idx(0) != regPrev(0) ) goto chng_addr_0
      **   regChng = 1
      **   if( idx(1) != regPrev(1) ) goto chng_addr_1
      **   ...
      **   regChng = N
      **   goto chng_addr_N
      */
      sqlite3VdbeAddOp0(v, OP_Goto);
      addrNextRow = sqlite3VdbeCurrentAddr(v);
      for(i=0; i<nColTest; i++){
        char *pColl = (char*)sqlite3LocateCollSeq(pParse, pIdx->azColl[i]);
        sqlite3VdbeAddOp2(v, OP_Integer, i, regChng);
        sqlite3VdbeAddOp3(v, OP_Column, iIdxCur, i, regTemp);
        aGotoChng[i] = 
        sqlite3VdbeAddOp4(v, OP_Ne, regTemp, 0, regPrev+i, pColl, P4_COLLSEQ);
        sqlite3VdbeChangeP5(v, SQLITE_NULLEQ);
        VdbeCoverage(v);
      }
      sqlite3VdbeAddOp2(v, OP_Integer, nColTest, regChng);
      aGotoChng[nColTest] = sqlite3VdbeAddOp0(v, OP_Goto);
  
  
      /*
      **  chng_addr_0:
      **   regPrev(0) = idx(0)
      **  chng_addr_1:
      **   regPrev(1) = idx(1)
      **  ...
      */
      sqlite3VdbeJumpHere(v, addrNextRow-1);
      for(i=0; i<nColTest; i++){
        sqlite3VdbeJumpHere(v, aGotoChng[i]);
        sqlite3VdbeAddOp3(v, OP_Column, iIdxCur, i, regPrev+i);
      }
      sqlite3VdbeJumpHere(v, aGotoChng[nColTest]);
    }
  
    /*
    **  chng_addr_N:
    **   regRowid = idx(rowid)            // STAT34 only
    **   stat_push(P, regChng, regRowid)  // 3rd parameter STAT34 only
    **   Next csr
    **   if !eof(csr) goto next_row;
    */

#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
    assert( regRowid==(regStat4+2) );
    if( HasRowid(pTab) ){
      sqlite3VdbeAddOp2(v, OP_IdxRowid, iIdxCur, regRowid);
    }else{
      Index *pPk = sqlite3PrimaryKeyIndex(pIdx->pTable);
      int j, k, regKey;