/ Check-in [5a0143c9]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Modify the ANALYZE command to store worst-case statistics in sqlite_stat1, rather thn average case.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | analyze-worst-case
Files: files | file ages | folders
SHA1:5a0143c94ec0682798f3c09fba63593e695d2e2d
User & Date: drh 2016-02-29 18:30:30
Context
2016-02-29
21:27
The ANALYZE command automatically appends "noskipscan" to sqlite_stat1 entries that have large worst-case repeat estimates but small average repeat estimates. check-in: 6326ba58 user: drh tags: analyze-worst-case
18:30
Modify the ANALYZE command to store worst-case statistics in sqlite_stat1, rather thn average case. check-in: 5a0143c9 user: drh tags: analyze-worst-case
17:34
Fix a problem in fts5 where a corrupt db could lead to a (huge) buffer overread. check-in: c9a30e11 user: dan tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to src/analyze.c.

263
264
265
266
267
268
269

270
271
272
273
274
275
276
...
414
415
416
417
418
419
420
421
422

423
424
425
426
427
428
429
...
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449

450
451
452
453
454
455
456
457
...
717
718
719
720
721
722
723
724



725
726
727
728
729
730
731

732
733
734
735
736

737
738
739
740
741
742
743
...
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
** information.
*/
typedef struct Stat4Accum Stat4Accum;
typedef struct Stat4Sample Stat4Sample;
struct Stat4Sample {
  tRowcnt *anEq;                  /* sqlite_stat4.nEq */
  tRowcnt *anDLt;                 /* sqlite_stat4.nDLt */

#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
  tRowcnt *anLt;                  /* sqlite_stat4.nLt */
  union {
    i64 iRowid;                     /* Rowid in main table of the key */
    u8 *aRowid;                     /* Key for WITHOUT ROWID tables */
  } u;
  u32 nRowid;                     /* Sizeof aRowid[] */
................................................................................
  nKeyCol = sqlite3_value_int(argv[1]);
  assert( nKeyCol<=nCol );
  assert( nKeyCol>0 );

  /* Allocate the space required for the Stat4Accum object */
  n = sizeof(*p) 
    + sizeof(tRowcnt)*nColUp                  /* Stat4Accum.anEq */
    + sizeof(tRowcnt)*nColUp                  /* Stat4Accum.anDLt */
#ifdef SQLITE_ENABLE_STAT3_OR_STAT4

    + sizeof(tRowcnt)*nColUp                  /* Stat4Accum.anLt */
    + sizeof(Stat4Sample)*(nCol+mxSample)     /* Stat4Accum.aBest[], a[] */
    + sizeof(tRowcnt)*3*nColUp*(nCol+mxSample)
#endif
  ;
  db = sqlite3_context_db_handle(context);
  p = sqlite3DbMallocZero(db, n);
................................................................................
    return;
  }

  p->db = db;
  p->nRow = 0;
  p->nCol = nCol;
  p->nKeyCol = nKeyCol;
  p->current.anDLt = (tRowcnt*)&p[1];
  p->current.anEq = &p->current.anDLt[nColUp];

#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
  {
    u8 *pSpace;                     /* Allocated space not yet assigned */
    int i;                          /* Used to iterate through p->aSample[] */

    p->iGet = -1;
    p->mxSample = mxSample;
    p->nPSample = (tRowcnt)(sqlite3_value_int64(argv[2])/(mxSample/3+1) + 1);

    p->current.anLt = &p->current.anEq[nColUp];
    p->iPrn = 0x689e962d*(u32)nCol ^ 0xd0944565*(u32)sqlite3_value_int(argv[2]);
  
    /* Set up the Stat4Accum.a[] and aBest[] arrays */
    p->a = (struct Stat4Sample*)&p->current.anLt[nColUp];
    p->aBest = &p->a[mxSample];
    pSpace = (u8*)(&p->a[mxSample+nCol]);
    for(i=0; i<(mxSample+nCol); i++){
................................................................................
  UNUSED_PARAMETER( argc );
  UNUSED_PARAMETER( context );
  assert( p->nCol>0 );
  assert( iChng<p->nCol );

  if( p->nRow==0 ){
    /* This is the first call to this function. Do initialization. */
    for(i=0; i<p->nCol; i++) p->current.anEq[i] = 1;



  }else{
    /* Second and subsequent calls get processed here */
    samplePushPrevious(p, iChng);

    /* Update anDLt[], anLt[] and anEq[] to reflect the values that apply
    ** to the current row of the index. */
    for(i=0; i<iChng; i++){

      p->current.anEq[i]++;
    }
    for(i=iChng; i<p->nCol; i++){
      p->current.anDLt[i]++;
#ifdef SQLITE_ENABLE_STAT3_OR_STAT4

      p->current.anLt[i] += p->current.anEq[i];
#endif
      p->current.anEq[i] = 1;
    }
  }
  p->nRow++;
#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
................................................................................
    /* Return the value to store in the "stat" column of the sqlite_stat1
    ** table for this index.
    **
    ** The value is a string composed of a list of integers describing 
    ** the index. The first integer in the list is the total number of 
    ** entries in the index. There is one additional integer in the list 
    ** for each indexed column. This additional integer is an estimate of
    ** the number of rows matched by a stabbing query on the index using
    ** a key with the corresponding number of fields. In other words,
    ** if the index is on columns (a,b) and the sqlite_stat1 value is 
    ** "100 10 2", then SQLite estimates that:
    **
    **   * the index contains 100 rows,
    **   * "WHERE a=?" matches 10 rows, and
    **   * "WHERE a=? AND b=?" matches 2 rows.
    **
    ** If D is the count of distinct values and K is the total number of 
    ** rows, then each estimate is computed as:
    **
    **        I = (K+D-1)/D
    */
    char *z;
    int i;

    char *zRet = sqlite3MallocZero( (p->nKeyCol+1)*25 );
    if( zRet==0 ){
      sqlite3_result_error_nomem(context);
      return;
    }

    sqlite3_snprintf(24, zRet, "%llu", (u64)p->nRow);
    z = zRet + sqlite3Strlen30(zRet);
    for(i=0; i<p->nKeyCol; i++){
      u64 nDistinct = p->current.anDLt[i] + 1;
      u64 iVal = (p->nRow + nDistinct - 1) / nDistinct;
      sqlite3_snprintf(24, z, " %llu", iVal);
      z += sqlite3Strlen30(z);
      assert( p->current.anEq[i] );
    }
    assert( z[0]=='\0' && z>zRet );

    sqlite3_result_text(context, zRet, -1, sqlite3_free);







>







 







|

>







 







|
|




|




>
|







 







|
>
>
>







>



<

>







 







|








|
|
<
<













|
<







263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
...
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
...
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
...
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741

742
743
744
745
746
747
748
749
750
...
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848


849
850
851
852
853
854
855
856
857
858
859
860
861
862

863
864
865
866
867
868
869
** information.
*/
typedef struct Stat4Accum Stat4Accum;
typedef struct Stat4Sample Stat4Sample;
struct Stat4Sample {
  tRowcnt *anEq;                  /* sqlite_stat4.nEq */
  tRowcnt *anDLt;                 /* sqlite_stat4.nDLt */
  tRowcnt *amxEq;                 /* Maximum length run of equal values */
#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
  tRowcnt *anLt;                  /* sqlite_stat4.nLt */
  union {
    i64 iRowid;                     /* Rowid in main table of the key */
    u8 *aRowid;                     /* Key for WITHOUT ROWID tables */
  } u;
  u32 nRowid;                     /* Sizeof aRowid[] */
................................................................................
  nKeyCol = sqlite3_value_int(argv[1]);
  assert( nKeyCol<=nCol );
  assert( nKeyCol>0 );

  /* Allocate the space required for the Stat4Accum object */
  n = sizeof(*p) 
    + sizeof(tRowcnt)*nColUp                  /* Stat4Accum.anEq */
    + sizeof(tRowcnt)*nColUp                  /* Stat4Accum.amxEq */
#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
    + sizeof(tRowcnt)*nColUp                  /* Stat4Accum.anDLt */
    + sizeof(tRowcnt)*nColUp                  /* Stat4Accum.anLt */
    + sizeof(Stat4Sample)*(nCol+mxSample)     /* Stat4Accum.aBest[], a[] */
    + sizeof(tRowcnt)*3*nColUp*(nCol+mxSample)
#endif
  ;
  db = sqlite3_context_db_handle(context);
  p = sqlite3DbMallocZero(db, n);
................................................................................
    return;
  }

  p->db = db;
  p->nRow = 0;
  p->nCol = nCol;
  p->nKeyCol = nKeyCol;
  p->current.anEq = (tRowcnt*)&p[1];
  p->current.amxEq = &p->current.anEq[nColUp];

#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
  {
    u8 *pSpace;                     /* Allocated space not yet assigned */
    int i;                          /* Loop counter */

    p->iGet = -1;
    p->mxSample = mxSample;
    p->nPSample = (tRowcnt)(sqlite3_value_int64(argv[2])/(mxSample/3+1) + 1);
    p->current.anDLt = &p->current.amxEq[nColUp];
    p->current.anLt = &p->current.anDLt[nColUp];
    p->iPrn = 0x689e962d*(u32)nCol ^ 0xd0944565*(u32)sqlite3_value_int(argv[2]);
  
    /* Set up the Stat4Accum.a[] and aBest[] arrays */
    p->a = (struct Stat4Sample*)&p->current.anLt[nColUp];
    p->aBest = &p->a[mxSample];
    pSpace = (u8*)(&p->a[mxSample+nCol]);
    for(i=0; i<(mxSample+nCol); i++){
................................................................................
  UNUSED_PARAMETER( argc );
  UNUSED_PARAMETER( context );
  assert( p->nCol>0 );
  assert( iChng<p->nCol );

  if( p->nRow==0 ){
    /* This is the first call to this function. Do initialization. */
    for(i=0; i<p->nCol; i++){
      p->current.anEq[i] = 1;
      p->current.amxEq[i] = 1;
    }
  }else{
    /* Second and subsequent calls get processed here */
    samplePushPrevious(p, iChng);

    /* Update anDLt[], anLt[] and anEq[] to reflect the values that apply
    ** to the current row of the index. */
    for(i=0; i<iChng; i++){
      if( p->current.amxEq[i]==p->current.anEq[i] ) p->current.amxEq[i]++;
      p->current.anEq[i]++;
    }
    for(i=iChng; i<p->nCol; i++){

#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
      p->current.anDLt[i]++;
      p->current.anLt[i] += p->current.anEq[i];
#endif
      p->current.anEq[i] = 1;
    }
  }
  p->nRow++;
#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
................................................................................
    /* Return the value to store in the "stat" column of the sqlite_stat1
    ** table for this index.
    **
    ** The value is a string composed of a list of integers describing 
    ** the index. The first integer in the list is the total number of 
    ** entries in the index. There is one additional integer in the list 
    ** for each indexed column. This additional integer is an estimate of
    ** the number of rows matched by a query on the index using
    ** a key with the corresponding number of fields. In other words,
    ** if the index is on columns (a,b) and the sqlite_stat1 value is 
    ** "100 10 2", then SQLite estimates that:
    **
    **   * the index contains 100 rows,
    **   * "WHERE a=?" matches 10 rows, and
    **   * "WHERE a=? AND b=?" matches 2 rows.
    **
    ** Use the worst-case estimate: the maximum number of repeated entries
    ** in the index.


    */
    char *z;
    int i;

    char *zRet = sqlite3MallocZero( (p->nKeyCol+1)*25 );
    if( zRet==0 ){
      sqlite3_result_error_nomem(context);
      return;
    }

    sqlite3_snprintf(24, zRet, "%llu", (u64)p->nRow);
    z = zRet + sqlite3Strlen30(zRet);
    for(i=0; i<p->nKeyCol; i++){
      u64 iVal = p->current.amxEq[i];

      sqlite3_snprintf(24, z, " %llu", iVal);
      z += sqlite3Strlen30(z);
      assert( p->current.anEq[i] );
    }
    assert( z[0]=='\0' && z>zRet );

    sqlite3_result_text(context, zRet, -1, sqlite3_free);