/ Check-in [13ed5ac1]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Change the way samples for the sqlite_stat4 table are collected.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | sqlite_stat4
Files: files | file ages | folders
SHA1: 13ed5ac13562e7a39905d70fd47059f4d8001bba
User & Date: dan 2013-08-07 16:15:32
Context
2013-08-07
16:38
Fix typos in a comment in analyze.c. No code changes. check-in: 812ed0c5 user: dan tags: sqlite_stat4
16:15
Change the way samples for the sqlite_stat4 table are collected. check-in: 13ed5ac1 user: dan tags: sqlite_stat4
16:04
Fix the ".dump" command on the command-line shell so that it works for "sqlite_stat4" in addition to "sqlite_stat1". check-in: 1e80c4b1 user: drh tags: sqlite_stat4
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to src/analyze.c.

   214    214   ** share an instance of the following structure to hold their state
   215    215   ** information.
   216    216   */
   217    217   typedef struct Stat4Accum Stat4Accum;
   218    218   struct Stat4Accum {
   219    219     tRowcnt nRow;             /* Number of rows in the entire table */
   220    220     tRowcnt nPSample;         /* How often to do a periodic sample */
   221         -  int iMin;                 /* Index of entry with minimum nSumEq and hash */
          221  +  int iMin;                 /* Index of entry with minimum nEq and hash */
   222    222     int mxSample;             /* Maximum number of samples to accumulate */
   223    223     int nSample;              /* Current number of samples */
   224    224     int nCol;                 /* Number of columns in the index */
   225    225     u32 iPrn;                 /* Pseudo-random number used for sampling */
   226    226     struct Stat4Sample {
   227    227       i64 iRowid;                /* Rowid in main table of the key */
   228         -    tRowcnt nSumEq;            /* Sum of anEq[] values */
   229    228       tRowcnt *anEq;             /* sqlite_stat4.nEq */
   230    229       tRowcnt *anLt;             /* sqlite_stat4.nLt */
   231    230       tRowcnt *anDLt;            /* sqlite_stat4.nDLt */
   232    231       u8 isPSample;              /* True if a periodic sample */
   233    232       u32 iHash;                 /* Tiebreaker hash */
   234    233     } *a;                     /* An array of samples */
   235    234   };
................................................................................
   345    344   
   346    345     UNUSED_PARAMETER(context);
   347    346     UNUSED_PARAMETER(argc);
   348    347   
   349    348     assert( p->nCol>0 );
   350    349     assert( argc==(2 + 3*p->nCol) );
   351    350   
   352         -  /* Set nSumEq to the sum of all nEq parameters. */
   353         -  for(i=0; i<p->nCol; i++){
   354         -    nSumEq += sqlite3_value_int64(aEq[i]);
   355         -  }
   356         -  if( nSumEq==0 ) return;
   357         -
   358         -  /* Figure out if this sample will be used. Set isPSample to true if this
   359         -  ** is a periodic sample, or false if it is being captured because of a
   360         -  ** large nSumEq value. If the sample will not be used, return early.  */
          351  +  /* Figure out if this sample will be used. There are two reasons a
          352  +  ** sample may be used:
          353  +  **
          354  +  **   1. It may be a periodic sample. In this case set isPSample to true
          355  +  **      as well. Or,
          356  +  **
          357  +  **   2. Less than p->mxSample samples have been collected so far, or
          358  +  **
          359  +  **   3. It is more desirable than some other non-periodic sample that has
          360  +  **      already been collected. Samples are compared based on the values
          361  +  **      in the anEq array, starting from last (right-most index column)
          362  +  **      to first (left-most index column). If all elements of the anEq
          363  +  **      array are equal, samples are compared by hash value.
          364  +  */
   361    365     h = p->iPrn = p->iPrn*1103515245 + 12345;
   362    366     if( (nLt/p->nPSample)!=((nEq+nLt)/p->nPSample) ){
   363    367       doInsert = isPSample = 1;
   364         -  }else if( (p->nSample<p->mxSample)
   365         -         || (nSumEq>p->a[iMin].nSumEq)
   366         -         || (nSumEq==p->a[iMin].nSumEq && h>p->a[iMin].iHash) 
   367         -  ){
          368  +  }else if( p->nSample<p->mxSample ){
   368    369       doInsert = 1;
          370  +  }else{
          371  +    tRowcnt *aMinEq = p->a[iMin].anEq;
          372  +    for(i=p->nCol-1; i>=0; i--){
          373  +      i64 nEq = sqlite3_value_int64(aEq[i]);
          374  +      if( nEq<aMinEq[i] ) break;
          375  +      if( nEq>aMinEq[i] ){
          376  +        doInsert = 1;
          377  +        break;
          378  +      }
          379  +    }
          380  +    if( i<0 && h>p->a[iMin].iHash ){
          381  +      doInsert = 1;
          382  +    }
   369    383     }
   370    384     if( !doInsert ) return;
   371    385   
   372    386     /* Fill in the new Stat4Sample object. */
   373    387     if( p->nSample==p->mxSample ){
   374    388       struct Stat4Sample *pMin = &p->a[iMin];
   375    389       tRowcnt *anEq = pMin->anEq;
................................................................................
   383    397       pSample->anLt = anLt;
   384    398     }else{
   385    399       pSample = &p->a[p->nSample++];
   386    400     }
   387    401     pSample->iRowid = rowid;
   388    402     pSample->iHash = h;
   389    403     pSample->isPSample = isPSample;
   390         -  pSample->nSumEq = nSumEq;
   391    404     for(i=0; i<p->nCol; i++){
   392    405       pSample->anEq[i] = sqlite3_value_int64(aEq[i]);
   393    406       pSample->anLt[i] = sqlite3_value_int64(aLt[i]);
   394    407       pSample->anDLt[i] = sqlite3_value_int64(aDLt[i])-1;
   395    408       assert( sqlite3_value_int64(aDLt[i])>0 );
   396    409     } 
   397    410   
   398    411     /* Find the new minimum */
   399    412     if( p->nSample==p->mxSample ){
   400         -    u32 iHash = 0;                /* Hash corresponding to iMin/nSumEq entry */
   401         -    i64 nMinEq = LARGEST_INT64;   /* Smallest nSumEq seen so far */
   402         -    assert( iMin = -1 );
   403         -
          413  +    iMin = -1;
   404    414       for(i=0; i<p->mxSample; i++){
   405    415         if( p->a[i].isPSample ) continue;
   406         -      if( (p->a[i].nSumEq<nMinEq)
   407         -       || (p->a[i].nSumEq==nMinEq && p->a[i].iHash<iHash)
   408         -      ){
          416  +      if( iMin<0 ){
   409    417           iMin = i;
   410         -        nMinEq = p->a[i].nSumEq;
   411         -        iHash = p->a[i].iHash;
          418  +      }else{
          419  +        int j;
          420  +        for(j=p->nCol-1; j>=0; j++){
          421  +          i64 iCmp = (p->a[iMin].anEq[j] - p->a[i].anEq[j]);
          422  +          if( iCmp<0 ){ iMin = i; }
          423  +          if( iCmp ) break;
          424  +        }
          425  +        if( j==0 && p->a[iMin].iHash<p->a[i].iHash ){
          426  +          iMin = i;
          427  +        }
   412    428         }
   413    429       }
   414    430       assert( iMin>=0 );
   415    431       p->iMin = iMin;
   416    432     }
   417    433   }
   418    434   static const FuncDef stat4PushFuncdef = {