/ Check-in [6326ba58]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:The ANALYZE command automatically appends "noskipscan" to sqlite_stat1 entries that have large worst-case repeat estimates but small average repeat estimates.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | analyze-worst-case
Files: files | file ages | folders
SHA1: 6326ba5891fae9c6be0c0c51cebcbe44c9f3f057
User & Date: drh 2016-02-29 21:27:16
Context
2016-02-29
23:02
Improvements to the logic for adding the "noskipscan" flag to stat1 entries. check-in: 421b5b54 user: drh tags: analyze-worst-case
21:27
The ANALYZE command automatically appends "noskipscan" to sqlite_stat1 entries that have large worst-case repeat estimates but small average repeat estimates. check-in: 6326ba58 user: drh tags: analyze-worst-case
18:30
Modify the ANALYZE command to store worst-case statistics in sqlite_stat1, rather thn average case. check-in: 5a0143c9 user: drh tags: analyze-worst-case
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to src/analyze.c.

416
417
418
419
420
421
422
423
424

425
426
427
428
429
430
431
...
436
437
438
439
440
441
442

443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
...
735
736
737
738
739
740
741
742
743

744
745
746
747
748
749
750
...
841
842
843
844
845
846
847
848





849
850
851

852
853
854
855
856
857
858
859
860
861
862
863
864
865




866
867
868

869
870
871
872
873
874
875
  assert( nKeyCol<=nCol );
  assert( nKeyCol>0 );

  /* Allocate the space required for the Stat4Accum object */
  n = sizeof(*p) 
    + sizeof(tRowcnt)*nColUp                  /* Stat4Accum.anEq */
    + sizeof(tRowcnt)*nColUp                  /* Stat4Accum.amxEq */
#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
    + sizeof(tRowcnt)*nColUp                  /* Stat4Accum.anDLt */

    + sizeof(tRowcnt)*nColUp                  /* Stat4Accum.anLt */
    + sizeof(Stat4Sample)*(nCol+mxSample)     /* Stat4Accum.aBest[], a[] */
    + sizeof(tRowcnt)*3*nColUp*(nCol+mxSample)
#endif
  ;
  db = sqlite3_context_db_handle(context);
  p = sqlite3DbMallocZero(db, n);
................................................................................

  p->db = db;
  p->nRow = 0;
  p->nCol = nCol;
  p->nKeyCol = nKeyCol;
  p->current.anEq = (tRowcnt*)&p[1];
  p->current.amxEq = &p->current.anEq[nColUp];


#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
  {
    u8 *pSpace;                     /* Allocated space not yet assigned */
    int i;                          /* Loop counter */

    p->iGet = -1;
    p->mxSample = mxSample;
    p->nPSample = (tRowcnt)(sqlite3_value_int64(argv[2])/(mxSample/3+1) + 1);
    p->current.anDLt = &p->current.amxEq[nColUp];
    p->current.anLt = &p->current.anDLt[nColUp];
    p->iPrn = 0x689e962d*(u32)nCol ^ 0xd0944565*(u32)sqlite3_value_int(argv[2]);
  
    /* Set up the Stat4Accum.a[] and aBest[] arrays */
    p->a = (struct Stat4Sample*)&p->current.anLt[nColUp];
    p->aBest = &p->a[mxSample];
    pSpace = (u8*)(&p->a[mxSample+nCol]);
................................................................................
    /* Update anDLt[], anLt[] and anEq[] to reflect the values that apply
    ** to the current row of the index. */
    for(i=0; i<iChng; i++){
      if( p->current.amxEq[i]==p->current.anEq[i] ) p->current.amxEq[i]++;
      p->current.anEq[i]++;
    }
    for(i=iChng; i<p->nCol; i++){
#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
      p->current.anDLt[i]++;

      p->current.anLt[i] += p->current.anEq[i];
#endif
      p->current.anEq[i] = 1;
    }
  }
  p->nRow++;
#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
................................................................................
    ** "100 10 2", then SQLite estimates that:
    **
    **   * the index contains 100 rows,
    **   * "WHERE a=?" matches 10 rows, and
    **   * "WHERE a=? AND b=?" matches 2 rows.
    **
    ** Use the worst-case estimate: the maximum number of repeated entries
    ** in the index.





    */
    char *z;
    int i;


    char *zRet = sqlite3MallocZero( (p->nKeyCol+1)*25 );
    if( zRet==0 ){
      sqlite3_result_error_nomem(context);
      return;
    }

    sqlite3_snprintf(24, zRet, "%llu", (u64)p->nRow);
    z = zRet + sqlite3Strlen30(zRet);
    for(i=0; i<p->nKeyCol; i++){
      u64 iVal = p->current.amxEq[i];
      sqlite3_snprintf(24, z, " %llu", iVal);
      z += sqlite3Strlen30(z);
      assert( p->current.anEq[i] );




    }
    assert( z[0]=='\0' && z>zRet );


    sqlite3_result_text(context, zRet, -1, sqlite3_free);
  }
#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
  else if( eCall==STAT_GET_ROWID ){
    if( p->iGet<0 ){
      samplePushPrevious(p, 0);
      p->iGet = 0;







<

>







 







>









<







 







<

>







 







|
>
>
>
>
>



>

|












>
>
>
>
|
<
|
>







416
417
418
419
420
421
422

423
424
425
426
427
428
429
430
431
...
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452

453
454
455
456
457
458
459
...
735
736
737
738
739
740
741

742
743
744
745
746
747
748
749
750
...
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876

877
878
879
880
881
882
883
884
885
  assert( nKeyCol<=nCol );
  assert( nKeyCol>0 );

  /* Allocate the space required for the Stat4Accum object */
  n = sizeof(*p) 
    + sizeof(tRowcnt)*nColUp                  /* Stat4Accum.anEq */
    + sizeof(tRowcnt)*nColUp                  /* Stat4Accum.amxEq */

    + sizeof(tRowcnt)*nColUp                  /* Stat4Accum.anDLt */
#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
    + sizeof(tRowcnt)*nColUp                  /* Stat4Accum.anLt */
    + sizeof(Stat4Sample)*(nCol+mxSample)     /* Stat4Accum.aBest[], a[] */
    + sizeof(tRowcnt)*3*nColUp*(nCol+mxSample)
#endif
  ;
  db = sqlite3_context_db_handle(context);
  p = sqlite3DbMallocZero(db, n);
................................................................................

  p->db = db;
  p->nRow = 0;
  p->nCol = nCol;
  p->nKeyCol = nKeyCol;
  p->current.anEq = (tRowcnt*)&p[1];
  p->current.amxEq = &p->current.anEq[nColUp];
  p->current.anDLt = &p->current.amxEq[nColUp];

#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
  {
    u8 *pSpace;                     /* Allocated space not yet assigned */
    int i;                          /* Loop counter */

    p->iGet = -1;
    p->mxSample = mxSample;
    p->nPSample = (tRowcnt)(sqlite3_value_int64(argv[2])/(mxSample/3+1) + 1);

    p->current.anLt = &p->current.anDLt[nColUp];
    p->iPrn = 0x689e962d*(u32)nCol ^ 0xd0944565*(u32)sqlite3_value_int(argv[2]);
  
    /* Set up the Stat4Accum.a[] and aBest[] arrays */
    p->a = (struct Stat4Sample*)&p->current.anLt[nColUp];
    p->aBest = &p->a[mxSample];
    pSpace = (u8*)(&p->a[mxSample+nCol]);
................................................................................
    /* Update anDLt[], anLt[] and anEq[] to reflect the values that apply
    ** to the current row of the index. */
    for(i=0; i<iChng; i++){
      if( p->current.amxEq[i]==p->current.anEq[i] ) p->current.amxEq[i]++;
      p->current.anEq[i]++;
    }
    for(i=iChng; i<p->nCol; i++){

      p->current.anDLt[i]++;
#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
      p->current.anLt[i] += p->current.anEq[i];
#endif
      p->current.anEq[i] = 1;
    }
  }
  p->nRow++;
#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
................................................................................
    ** "100 10 2", then SQLite estimates that:
    **
    **   * the index contains 100 rows,
    **   * "WHERE a=?" matches 10 rows, and
    **   * "WHERE a=? AND b=?" matches 2 rows.
    **
    ** Use the worst-case estimate: the maximum number of repeated entries
    ** in the index.  The worst-case estimate is best for picking indexes.
    ** But for skip-scan, we want an average case estimate.  The worst-case
    ** estimate might be too high.  To avoid undesirable skip-scans, if the
    ** worst-case estimate is above the WHERE_SKIPSCAN_ONSET but the average
    ** estimate is below, simply disable skipscans on this index by adding
    ** the "noskipscan" modifier onto the end of the stat line.
    */
    char *z;
    int i;
    int noSkipScan = 0;

    char *zRet = sqlite3MallocZero( (p->nKeyCol+2)*25 );
    if( zRet==0 ){
      sqlite3_result_error_nomem(context);
      return;
    }

    sqlite3_snprintf(24, zRet, "%llu", (u64)p->nRow);
    z = zRet + sqlite3Strlen30(zRet);
    for(i=0; i<p->nKeyCol; i++){
      u64 iVal = p->current.amxEq[i];
      sqlite3_snprintf(24, z, " %llu", iVal);
      z += sqlite3Strlen30(z);
      assert( p->current.anEq[i] );
      if( i>0 && iVal>=WHERE_SKIPSCAN_ONSET+5 ){
        iVal = p->current.anDLt[i];
        iVal = (p->nRow + iVal)/(iVal + 1);
        if( iVal<WHERE_SKIPSCAN_ONSET-5 ) noSkipScan = 1;
      }

    }
    if( noSkipScan ) sqlite3_snprintf(14, z, " noskipscan");
    sqlite3_result_text(context, zRet, -1, sqlite3_free);
  }
#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
  else if( eCall==STAT_GET_ROWID ){
    if( p->iGet<0 ){
      samplePushPrevious(p, 0);
      p->iGet = 0;

Changes to src/sqliteInt.h.

2418
2419
2420
2421
2422
2423
2424









2425
2426
2427
2428
2429
2430
2431
#define JT_CROSS     0x0002    /* Explicit use of the CROSS keyword */
#define JT_NATURAL   0x0004    /* True for a "natural" join */
#define JT_LEFT      0x0008    /* Left outer join */
#define JT_RIGHT     0x0010    /* Right outer join */
#define JT_OUTER     0x0020    /* The "OUTER" keyword is present */
#define JT_ERROR     0x0040    /* unknown or unsupported join type */











/*
** Flags appropriate for the wctrlFlags parameter of sqlite3WhereBegin()
** and the WhereInfo.wctrlFlags member.
*/
#define WHERE_ORDERBY_NORMAL   0x0000 /* No-op */
#define WHERE_ORDERBY_MIN      0x0001 /* ORDER BY processing for min() func */







>
>
>
>
>
>
>
>
>







2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
#define JT_CROSS     0x0002    /* Explicit use of the CROSS keyword */
#define JT_NATURAL   0x0004    /* True for a "natural" join */
#define JT_LEFT      0x0008    /* Left outer join */
#define JT_RIGHT     0x0010    /* Right outer join */
#define JT_OUTER     0x0020    /* The "OUTER" keyword is present */
#define JT_ERROR     0x0040    /* unknown or unsupported join type */

/*
** TUNING: The skip-scan optimization is only profitable if the average 
** number of repeats of an entry in the index is greater than or equal to
** WHERE_SKIPSCAN_ONSET.  If the average numbe of repeats is less
** than WHERE_SKIPSCAN_ONSET, then it is faster to do a full table
** scan.
*/
#define WHERE_SKIPSCAN_ONSET         18
#define WHERE_SKIPSCAN_ONSET_LOG     42

/*
** Flags appropriate for the wctrlFlags parameter of sqlite3WhereBegin()
** and the WhereInfo.wctrlFlags member.
*/
#define WHERE_ORDERBY_NORMAL   0x0000 /* No-op */
#define WHERE_ORDERBY_MIN      0x0001 /* ORDER BY processing for min() func */

Changes to src/where.c.

2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
  **
  ** The magic number 18 is selected on the basis that scanning 17 rows
  ** is almost always quicker than an index seek (even though if the index
  ** contains fewer than 2^17 rows we assume otherwise in other parts of
  ** the code). And, even if it is not, it should not be too much slower. 
  ** On the other hand, the extra seeks could end up being significantly
  ** more expensive.  */
  assert( 42==sqlite3LogEst(18) );
  if( saved_nEq==saved_nSkip
   && saved_nEq+1<pProbe->nKeyCol
   && pProbe->noSkipScan==0
   && pProbe->aiRowLogEst[saved_nEq+1]>=42  /* TUNING: Minimum for skip-scan */
   && (rc = whereLoopResize(db, pNew, pNew->nLTerm+1))==SQLITE_OK
  ){
    LogEst nIter;
    pNew->u.btree.nEq++;
    pNew->nSkip++;
    pNew->aLTerm[pNew->nLTerm++] = 0;
    pNew->wsFlags |= WHERE_SKIPSCAN;







|



|







2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
  **
  ** The magic number 18 is selected on the basis that scanning 17 rows
  ** is almost always quicker than an index seek (even though if the index
  ** contains fewer than 2^17 rows we assume otherwise in other parts of
  ** the code). And, even if it is not, it should not be too much slower. 
  ** On the other hand, the extra seeks could end up being significantly
  ** more expensive.  */
  assert( WHERE_SKIPSCAN_ONSET_LOG==sqlite3LogEst(WHERE_SKIPSCAN_ONSET) );
  if( saved_nEq==saved_nSkip
   && saved_nEq+1<pProbe->nKeyCol
   && pProbe->noSkipScan==0
   && pProbe->aiRowLogEst[saved_nEq+1]>=WHERE_SKIPSCAN_ONSET_LOG
   && (rc = whereLoopResize(db, pNew, pNew->nLTerm+1))==SQLITE_OK
  ){
    LogEst nIter;
    pNew->u.btree.nEq++;
    pNew->nSkip++;
    pNew->aLTerm[pNew->nLTerm++] = 0;
    pNew->wsFlags |= WHERE_SKIPSCAN;