SQLite

Changes On Branch analyze-worst-case
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Changes In Branch analyze-worst-case Excluding Merge-Ins

This is equivalent to a diff from cb9302cc to c4488730

2016-03-04
21:18
Fix an assert() in sqlite3VarintLen(), even though it is impossible to hit in SQLite due to the way sqlite3VarintLen() is used. (check-in: 251424c5 user: drh tags: trunk)
19:55
Simplify the computation of Index.aAvgEq. (Leaf check-in: c4488730 user: drh tags: analyze-worst-case)
18:45
Merge changes from trunk. (check-in: 5294c977 user: drh tags: analyze-worst-case)
16:42
Merge recent enhancements from trunk. Default page size is 4096. Writes to statement journals are avoided. (check-in: 456df336 user: drh tags: sessions)
14:57
Merge recent enhancements from trunk, and especially the changes that reduce the heap-memory footprint of schemas, and defer opening and writing to statement journals. (check-in: 2f0c195c user: drh tags: apple-osx)
14:43
Defer opening and writing statement journals until the size reaches a threshold (currently 64KiB). (check-in: cb9302cc user: drh tags: trunk)
14:23
Update test cases to taken deferred statement-journal opening into account. (Closed-Leaf check-in: 5b2fe521 user: drh tags: memjournal-exp)
04:01
Change the default cache_size to -2000 (which means 2000*1024 bytes independent of page_size). (check-in: 2682e8e4 user: drh tags: trunk)

Changes to src/analyze.c.

263
264
265
266
267
268
269

270
271
272
273
274
275
276
** information.
*/
typedef struct Stat4Accum Stat4Accum;
typedef struct Stat4Sample Stat4Sample;
struct Stat4Sample {
  tRowcnt *anEq;                  /* sqlite_stat4.nEq */
  tRowcnt *anDLt;                 /* sqlite_stat4.nDLt */

#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
  tRowcnt *anLt;                  /* sqlite_stat4.nLt */
  union {
    i64 iRowid;                     /* Rowid in main table of the key */
    u8 *aRowid;                     /* Key for WITHOUT ROWID tables */
  } u;
  u32 nRowid;                     /* Sizeof aRowid[] */







>







263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
** information.
*/
typedef struct Stat4Accum Stat4Accum;
typedef struct Stat4Sample Stat4Sample;
struct Stat4Sample {
  tRowcnt *anEq;                  /* sqlite_stat4.nEq */
  tRowcnt *anDLt;                 /* sqlite_stat4.nDLt */
  tRowcnt *amxEq;                 /* Maximum length run of equal values */
#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
  tRowcnt *anLt;                  /* sqlite_stat4.nLt */
  union {
    i64 iRowid;                     /* Rowid in main table of the key */
    u8 *aRowid;                     /* Key for WITHOUT ROWID tables */
  } u;
  u32 nRowid;                     /* Sizeof aRowid[] */
414
415
416
417
418
419
420

421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440

441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
  nKeyCol = sqlite3_value_int(argv[1]);
  assert( nKeyCol<=nCol );
  assert( nKeyCol>0 );

  /* Allocate the space required for the Stat4Accum object */
  n = sizeof(*p) 
    + sizeof(tRowcnt)*nColUp                  /* Stat4Accum.anEq */

    + sizeof(tRowcnt)*nColUp                  /* Stat4Accum.anDLt */
#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
    + sizeof(tRowcnt)*nColUp                  /* Stat4Accum.anLt */
    + sizeof(Stat4Sample)*(nCol+mxSample)     /* Stat4Accum.aBest[], a[] */
    + sizeof(tRowcnt)*3*nColUp*(nCol+mxSample)
#endif
  ;
  db = sqlite3_context_db_handle(context);
  p = sqlite3DbMallocZero(db, n);
  if( p==0 ){
    sqlite3_result_error_nomem(context);
    return;
  }

  p->db = db;
  p->nRow = 0;
  p->nCol = nCol;
  p->nKeyCol = nKeyCol;
  p->current.anDLt = (tRowcnt*)&p[1];
  p->current.anEq = &p->current.anDLt[nColUp];


#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
  {
    u8 *pSpace;                     /* Allocated space not yet assigned */
    int i;                          /* Used to iterate through p->aSample[] */

    p->iGet = -1;
    p->mxSample = mxSample;
    p->nPSample = (tRowcnt)(sqlite3_value_int64(argv[2])/(mxSample/3+1) + 1);
    p->current.anLt = &p->current.anEq[nColUp];
    p->iPrn = 0x689e962d*(u32)nCol ^ 0xd0944565*(u32)sqlite3_value_int(argv[2]);
  
    /* Set up the Stat4Accum.a[] and aBest[] arrays */
    p->a = (struct Stat4Sample*)&p->current.anLt[nColUp];
    p->aBest = &p->a[mxSample];
    pSpace = (u8*)(&p->a[mxSample+nCol]);
    for(i=0; i<(mxSample+nCol); i++){







>


















|
|
>




|




|







415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
  nKeyCol = sqlite3_value_int(argv[1]);
  assert( nKeyCol<=nCol );
  assert( nKeyCol>0 );

  /* Allocate the space required for the Stat4Accum object */
  n = sizeof(*p) 
    + sizeof(tRowcnt)*nColUp                  /* Stat4Accum.anEq */
    + sizeof(tRowcnt)*nColUp                  /* Stat4Accum.amxEq */
    + sizeof(tRowcnt)*nColUp                  /* Stat4Accum.anDLt */
#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
    + sizeof(tRowcnt)*nColUp                  /* Stat4Accum.anLt */
    + sizeof(Stat4Sample)*(nCol+mxSample)     /* Stat4Accum.aBest[], a[] */
    + sizeof(tRowcnt)*3*nColUp*(nCol+mxSample)
#endif
  ;
  db = sqlite3_context_db_handle(context);
  p = sqlite3DbMallocZero(db, n);
  if( p==0 ){
    sqlite3_result_error_nomem(context);
    return;
  }

  p->db = db;
  p->nRow = 0;
  p->nCol = nCol;
  p->nKeyCol = nKeyCol;
  p->current.anEq = (tRowcnt*)&p[1];
  p->current.amxEq = &p->current.anEq[nColUp];
  p->current.anDLt = &p->current.amxEq[nColUp];

#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
  {
    u8 *pSpace;                     /* Allocated space not yet assigned */
    int i;                          /* Loop counter */

    p->iGet = -1;
    p->mxSample = mxSample;
    p->nPSample = (tRowcnt)(sqlite3_value_int64(argv[2])/(mxSample/3+1) + 1);
    p->current.anLt = &p->current.anDLt[nColUp];
    p->iPrn = 0x689e962d*(u32)nCol ^ 0xd0944565*(u32)sqlite3_value_int(argv[2]);
  
    /* Set up the Stat4Accum.a[] and aBest[] arrays */
    p->a = (struct Stat4Sample*)&p->current.anLt[nColUp];
    p->aBest = &p->a[mxSample];
    pSpace = (u8*)(&p->a[mxSample+nCol]);
    for(i=0; i<(mxSample+nCol); i++){
717
718
719
720
721
722
723
724



725
726
727
728
729
730
731

732
733
734
735
736
737
738
  UNUSED_PARAMETER( argc );
  UNUSED_PARAMETER( context );
  assert( p->nCol>0 );
  assert( iChng<p->nCol );

  if( p->nRow==0 ){
    /* This is the first call to this function. Do initialization. */
    for(i=0; i<p->nCol; i++) p->current.anEq[i] = 1;



  }else{
    /* Second and subsequent calls get processed here */
    samplePushPrevious(p, iChng);

    /* Update anDLt[], anLt[] and anEq[] to reflect the values that apply
    ** to the current row of the index. */
    for(i=0; i<iChng; i++){

      p->current.anEq[i]++;
    }
    for(i=iChng; i<p->nCol; i++){
      p->current.anDLt[i]++;
#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
      p->current.anLt[i] += p->current.anEq[i];
#endif







|
>
>
>







>







720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
  UNUSED_PARAMETER( argc );
  UNUSED_PARAMETER( context );
  assert( p->nCol>0 );
  assert( iChng<p->nCol );

  if( p->nRow==0 ){
    /* This is the first call to this function. Do initialization. */
    for(i=0; i<p->nCol; i++){
      p->current.anEq[i] = 1;
      p->current.amxEq[i] = 1;
    }
  }else{
    /* Second and subsequent calls get processed here */
    samplePushPrevious(p, iChng);

    /* Update anDLt[], anLt[] and anEq[] to reflect the values that apply
    ** to the current row of the index. */
    for(i=0; i<iChng; i++){
      if( p->current.amxEq[i]==p->current.anEq[i] ) p->current.amxEq[i]++;
      p->current.anEq[i]++;
    }
    for(i=iChng; i<p->nCol; i++){
      p->current.anDLt[i]++;
#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
      p->current.anLt[i] += p->current.anEq[i];
#endif
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832

833
834
835
836
837

838
839
840

841
842
843



















844
845
846

847
848
849
850
851
852
853
854
855
856
857


858
859
860
861


862
863
864

865
866
867
868
869
870
871
       || eCall==STAT_GET_NDLT 
  );
  if( eCall==STAT_GET_STAT1 )
#else
  assert( argc==1 );
#endif
  {
    /* Return the value to store in the "stat" column of the sqlite_stat1
    ** table for this index.
    **
    ** The value is a string composed of a list of integers describing 
    ** the index. The first integer in the list is the total number of 
    ** entries in the index. There is one additional integer in the list 
    ** for each indexed column. This additional integer is an estimate of
    ** the number of rows matched by a stabbing query on the index using
    ** a key with the corresponding number of fields. In other words,

    ** if the index is on columns (a,b) and the sqlite_stat1 value is 
    ** "100 10 2", then SQLite estimates that:
    **
    **   * the index contains 100 rows,
    **   * "WHERE a=?" matches 10 rows, and

    **   * "WHERE a=? AND b=?" matches 2 rows.
    **
    ** If D is the count of distinct values and K is the total number of 

    ** rows, then each estimate is computed as:
    **
    **        I = (K+D-1)/D



















    */
    char *z;
    int i;


    char *zRet = sqlite3MallocZero( (p->nKeyCol+1)*25 );
    if( zRet==0 ){
      sqlite3_result_error_nomem(context);
      return;
    }

    sqlite3_snprintf(24, zRet, "%llu", (u64)p->nRow);
    z = zRet + sqlite3Strlen30(zRet);
    for(i=0; i<p->nKeyCol; i++){
      u64 nDistinct = p->current.anDLt[i] + 1;


      u64 iVal = (p->nRow + nDistinct - 1) / nDistinct;
      sqlite3_snprintf(24, z, " %llu", iVal);
      z += sqlite3Strlen30(z);
      assert( p->current.anEq[i] );


    }
    assert( z[0]=='\0' && z>zRet );


    sqlite3_result_text(context, zRet, -1, sqlite3_free);
  }
#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
  else if( eCall==STAT_GET_ROWID ){
    if( p->iGet<0 ){
      samplePushPrevious(p, 0);
      p->iGet = 0;







|



|
|
|
|
|
>
|
<

|
|
>
|

|
>
|

|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>



>

|








|
>
>
|



>
>
|
<
|
>







824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841

842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895

896
897
898
899
900
901
902
903
904
       || eCall==STAT_GET_NDLT 
  );
  if( eCall==STAT_GET_STAT1 )
#else
  assert( argc==1 );
#endif
  {
    /* Return a value for the "stat" column of the sqlite_stat1
    ** table for this index.
    **
    ** The value is a string composed of a list of integers describing 
    ** the index. The first integer is the (estimated) total number
    ** entries in the index. There is one additional integer for each
    ** column in the index.  The first added integer is an estimate of
    ** the number of rows that match a single key in the first column of
    ** the index.  The second added integer is an estimate on of the number
    ** of rows that match a single key consisting of the first two columns
    ** of the index.  And so forth.

    **
    ** For example, for an index on columns (a,b), if the sqlite_stat1.stat
    ** values is "100 10 2", that means there are about 100 rows in the
    ** index, and that a query against a=$key1 will match about 10 rows
    ** and a query against "a=$key1 AND b=$key2" will match about 2 rows.
    **
    ** Let V be the average number of rows that match a key, and let M
    ** be the most number of rows that match the key for any possible value
    ** of that key.  The estimate is computed as:
    **
    **     E = (2*V + M)/3
    **
    ** Consider two indexes.  Index X has with 100 values of exactly 0 and 
    ** 100 singleton values between 1 and 100.  Index Y has 200 values
    ** evenly distributed between 1 and 20.  If only the average (V) is
    ** used in the estimate, X would have "200 2" and Y would have "200 10"
    ** and so the planner would think X is the more selective index.  And
    ** X often would be more selective.  But when searching for 0, index X
    ** would perform badly.  To avoid this problem, the M is added into the
    ** estimate so that the stat for X is "200 34" and Y is still "200 10".
    ** In this way, Y is the preferred index (all else being equal) and
    ** the pathological case is avoided.
    **
    ** For deciding whether or not to do a skip-scan, we want to know the
    ** average number of rows (V) with the same key, not the mixed estimate
    ** E shown above.  Usually E will be close enough.  However, if E is
    ** large but V is small, that could trick the query planner into thinking
    ** that a skip-scan might work well on this index.  To avoid that, the
    ** "noskipscan" flag is added in cases where the divergence between E
    ** and V might mislead the query planner.
    */
    char *z;
    int i;
    int noSkipScan = 0;

    char *zRet = sqlite3MallocZero( (p->nKeyCol+2)*25 );
    if( zRet==0 ){
      sqlite3_result_error_nomem(context);
      return;
    }

    sqlite3_snprintf(24, zRet, "%llu", (u64)p->nRow);
    z = zRet + sqlite3Strlen30(zRet);
    for(i=0; i<p->nKeyCol; i++){
      u64 nDistinct = p->current.anDLt[i];
      u64 iMx = p->current.amxEq[i];                /* M: Most rows per key */
      u64 iAvg = (p->nRow+nDistinct)/(nDistinct+1); /* V: Average per key */
      u64 iVal = (iMx+iAvg*2)/3;                    /* E: The estimate */
      sqlite3_snprintf(24, z, " %llu", iVal);
      z += sqlite3Strlen30(z);
      assert( p->current.anEq[i] );
      if( iVal>=WHERE_SKIPSCAN_ONSET && iAvg<(WHERE_SKIPSCAN_ONSET*2/3) ){
        noSkipScan = 1;
      }

    }
    if( noSkipScan ) sqlite3_snprintf(14, z, " noskipscan");
    sqlite3_result_text(context, zRet, -1, sqlite3_free);
  }
#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
  else if( eCall==STAT_GET_ROWID ){
    if( p->iGet<0 ){
      samplePushPrevious(p, 0);
      p->iGet = 0;
1567
1568
1569
1570
1571
1572
1573


1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588

1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614

1615

1616
1617
1618


1619
1620
1621
1622
1623
1624
1625
1626
#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
/*
** Populate the pIdx->aAvgEq[] array based on the samples currently
** stored in pIdx->aSample[]. 
*/
static void initAvgEq(Index *pIdx){
  if( pIdx ){


    IndexSample *aSample = pIdx->aSample;
    IndexSample *pFinal = &aSample[pIdx->nSample-1];
    int iCol;
    int nCol = 1;
    if( pIdx->nSampleCol>1 ){
      /* If this is stat4 data, then calculate aAvgEq[] values for all
      ** sample columns except the last. The last is always set to 1, as
      ** once the trailing PK fields are considered all index keys are
      ** unique.  */
      nCol = pIdx->nSampleCol-1;
      pIdx->aAvgEq[nCol] = 1;
    }
    for(iCol=0; iCol<nCol; iCol++){
      int nSample = pIdx->nSample;
      int i;                    /* Used to iterate through samples */

      tRowcnt sumEq = 0;        /* Sum of the nEq values */
      tRowcnt avgEq = 0;
      tRowcnt nRow;             /* Number of rows in index */
      i64 nSum100 = 0;          /* Number of terms contributing to sumEq */
      i64 nDist100;             /* Number of distinct values in index */

      if( !pIdx->aiRowEst || iCol>=pIdx->nKeyCol || pIdx->aiRowEst[iCol+1]==0 ){
        nRow = pFinal->anLt[iCol];
        nDist100 = (i64)100 * pFinal->anDLt[iCol];
        nSample--;
      }else{
        nRow = pIdx->aiRowEst[0];
        nDist100 = ((i64)100 * pIdx->aiRowEst[0]) / pIdx->aiRowEst[iCol+1];
      }
      pIdx->nRowEst0 = nRow;

      /* Set nSum to the number of distinct (iCol+1) field prefixes that
      ** occur in the stat4 table for this index. Set sumEq to the sum of 
      ** the nEq values for column iCol for the same set (adding the value 
      ** only once where there exist duplicate prefixes).  */
      for(i=0; i<nSample; i++){
        if( i==(pIdx->nSample-1)
         || aSample[i].anDLt[iCol]!=aSample[i+1].anDLt[iCol] 
        ){
          sumEq += aSample[i].anEq[iCol];
          nSum100 += 100;

        }

      }

      if( nDist100>nSum100 ){


        avgEq = ((i64)100 * (nRow - sumEq))/(nDist100 - nSum100);
      }
      if( avgEq==0 ) avgEq = 1;
      pIdx->aAvgEq[iCol] = avgEq;
    }
  }
}








>
>
|
|
|
<
|




|



<

>

|

|
|


|
<
<


<


|
<
<
<
<
<
|
<
<

<
>

>

|
<
>
>
|







1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611

1612
1613
1614
1615
1616
1617
1618
1619
1620

1621
1622
1623
1624
1625
1626
1627
1628
1629
1630


1631
1632

1633
1634
1635





1636


1637

1638
1639
1640
1641
1642

1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
/*
** Populate the pIdx->aAvgEq[] array based on the samples currently
** stored in pIdx->aSample[]. 
*/
static void initAvgEq(Index *pIdx){
  if( pIdx ){
    int nSample = pIdx->nSample;                /* Number of samples */
    int nCol = pIdx->nSampleCol;                /* Number of columns sampled */
    IndexSample *aSample = pIdx->aSample;       /* The samples */
    IndexSample *pFinal = &aSample[nSample-1];  /* The last sample */
    int iCol;                                   /* Loop counter of columns */

    if( nCol>1 ){
      /* If this is stat4 data, then calculate aAvgEq[] values for all
      ** sample columns except the last. The last is always set to 1, as
      ** once the trailing PK fields are considered all index keys are
      ** unique.  */
      nCol--;
      pIdx->aAvgEq[nCol] = 1;
    }
    for(iCol=0; iCol<nCol; iCol++){

      int i;                    /* Used to iterate through samples */
      u32 nDSample = 0;         /* Number of distinct samples less than pFinal*/
      tRowcnt sumEq = 0;        /* Sum of the nEq values */
      tRowcnt avgEq;            /* Average repeats for unsampled entries */
      tRowcnt nRow;             /* Number of rows in index */
      tRowcnt prevLt = 0;       /* Number of values less than previous sample */
      tRowcnt thisLt;           /* Number of values less than current sample */

      if( !pIdx->aiRowEst || iCol>=pIdx->nKeyCol || pIdx->aiRowEst[iCol+1]==0 ){
        nRow = pFinal->anLt[iCol] + pFinal->anEq[iCol];


      }else{
        nRow = pIdx->aiRowEst[0];

      }
      pIdx->nRowEst0 = nRow;
      for(i=0; (thisLt = aSample[i].anLt[iCol])<pFinal->anLt[iCol]; i++){





        if( i==0 || thisLt!=prevLt ){


          sumEq += aSample[i].anEq[iCol];

          nDSample++;
        }
        prevLt = thisLt;
      }
      if( pFinal->anDLt[iCol] > nDSample ){

        avgEq = (pFinal->anLt[iCol] - sumEq)/(pFinal->anDLt[iCol] - nDSample);
      }else{
        avgEq = 1;
      }
      if( avgEq==0 ) avgEq = 1;
      pIdx->aAvgEq[iCol] = avgEq;
    }
  }
}

Changes to src/sqliteInt.h.

2417
2418
2419
2420
2421
2422
2423









2424
2425
2426
2427
2428
2429
2430
#define JT_CROSS     0x0002    /* Explicit use of the CROSS keyword */
#define JT_NATURAL   0x0004    /* True for a "natural" join */
#define JT_LEFT      0x0008    /* Left outer join */
#define JT_RIGHT     0x0010    /* Right outer join */
#define JT_OUTER     0x0020    /* The "OUTER" keyword is present */
#define JT_ERROR     0x0040    /* unknown or unsupported join type */











/*
** Flags appropriate for the wctrlFlags parameter of sqlite3WhereBegin()
** and the WhereInfo.wctrlFlags member.
*/
#define WHERE_ORDERBY_NORMAL   0x0000 /* No-op */
#define WHERE_ORDERBY_MIN      0x0001 /* ORDER BY processing for min() func */







>
>
>
>
>
>
>
>
>







2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
#define JT_CROSS     0x0002    /* Explicit use of the CROSS keyword */
#define JT_NATURAL   0x0004    /* True for a "natural" join */
#define JT_LEFT      0x0008    /* Left outer join */
#define JT_RIGHT     0x0010    /* Right outer join */
#define JT_OUTER     0x0020    /* The "OUTER" keyword is present */
#define JT_ERROR     0x0040    /* unknown or unsupported join type */

/*
** TUNING: The skip-scan optimization is only profitable if the average 
** number of repeats of an entry in the index is greater than or equal to
** WHERE_SKIPSCAN_ONSET.  If the average numbe of repeats is less
** than WHERE_SKIPSCAN_ONSET, then it is faster to do a full table
** scan.
*/
#define WHERE_SKIPSCAN_ONSET         18
#define WHERE_SKIPSCAN_ONSET_LOG     42

/*
** Flags appropriate for the wctrlFlags parameter of sqlite3WhereBegin()
** and the WhereInfo.wctrlFlags member.
*/
#define WHERE_ORDERBY_NORMAL   0x0000 /* No-op */
#define WHERE_ORDERBY_MIN      0x0001 /* ORDER BY processing for min() func */

Changes to src/where.c.

2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
  **
  ** The magic number 18 is selected on the basis that scanning 17 rows
  ** is almost always quicker than an index seek (even though if the index
  ** contains fewer than 2^17 rows we assume otherwise in other parts of
  ** the code). And, even if it is not, it should not be too much slower. 
  ** On the other hand, the extra seeks could end up being significantly
  ** more expensive.  */
  assert( 42==sqlite3LogEst(18) );
  if( saved_nEq==saved_nSkip
   && saved_nEq+1<pProbe->nKeyCol
   && pProbe->noSkipScan==0
   && pProbe->aiRowLogEst[saved_nEq+1]>=42  /* TUNING: Minimum for skip-scan */
   && (rc = whereLoopResize(db, pNew, pNew->nLTerm+1))==SQLITE_OK
  ){
    LogEst nIter;
    pNew->u.btree.nEq++;
    pNew->nSkip++;
    pNew->aLTerm[pNew->nLTerm++] = 0;
    pNew->wsFlags |= WHERE_SKIPSCAN;







|



|







2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
  **
  ** The magic number 18 is selected on the basis that scanning 17 rows
  ** is almost always quicker than an index seek (even though if the index
  ** contains fewer than 2^17 rows we assume otherwise in other parts of
  ** the code). And, even if it is not, it should not be too much slower. 
  ** On the other hand, the extra seeks could end up being significantly
  ** more expensive.  */
  assert( WHERE_SKIPSCAN_ONSET_LOG==sqlite3LogEst(WHERE_SKIPSCAN_ONSET) );
  if( saved_nEq==saved_nSkip
   && saved_nEq+1<pProbe->nKeyCol
   && pProbe->noSkipScan==0
   && pProbe->aiRowLogEst[saved_nEq+1]>=WHERE_SKIPSCAN_ONSET_LOG
   && (rc = whereLoopResize(db, pNew, pNew->nLTerm+1))==SQLITE_OK
  ){
    LogEst nIter;
    pNew->u.btree.nEq++;
    pNew->nSkip++;
    pNew->aLTerm[pNew->nLTerm++] = 0;
    pNew->wsFlags |= WHERE_SKIPSCAN;

Changes to test/skipscan2.test.

44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122

123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
  INSERT INTO people VALUES('Nathan','student',163);
  INSERT INTO people VALUES('Olivia','student',161);
  INSERT INTO people VALUES('Patrick','teacher',180);
  INSERT INTO people VALUES('Quiana','student',182);
  INSERT INTO people VALUES('Robert','student',159);
  INSERT INTO people VALUES('Sally','student',166);
  INSERT INTO people VALUES('Tom','student',171);
  INSERT INTO people VALUES('Ursula','student',170);
  INSERT INTO people VALUES('Vance','student',179);
  INSERT INTO people VALUES('Willma','student',175);
  INSERT INTO people VALUES('Xavier','teacher',185);
  INSERT INTO people VALUES('Yvonne','student',149);
  INSERT INTO people VALUES('Zach','student',170);
}

# Without ANALYZE, a skip-scan is not used
#
do_execsql_test skipscan2-1.3 {
  SELECT name FROM people WHERE height>=180 ORDER BY +name;
} {David Jack Patrick Quiana Xavier}
do_execsql_test skipscan2-1.3eqp {
  EXPLAIN QUERY PLAN
  SELECT name FROM people WHERE height>=180 ORDER BY +name;
} {~/*INDEX people_idx1 */}

# Now do an ANALYZE.  A skip-scan can be used after ANALYZE.
#
do_execsql_test skipscan2-1.4 {
  ANALYZE;
  -- We do not have enough people above to actually force the use
  -- of a skip-scan.  So make a manual adjustment to the stat1 table
  -- to make it seem like there are many more.
  UPDATE sqlite_stat1 SET stat='10000 5000 20' WHERE idx='people_idx1';
  UPDATE sqlite_stat1 SET stat='10000 1' WHERE idx='sqlite_autoindex_people_1';
  ANALYZE sqlite_master;
}
db cache flush
do_execsql_test skipscan2-1.5 {
  SELECT name FROM people WHERE height>=180 ORDER BY +name;
} {David Jack Patrick Quiana Xavier}
do_execsql_test skipscan2-1.5eqp {
  EXPLAIN QUERY PLAN
  SELECT name FROM people WHERE height>=180 ORDER BY +name;
} {/*INDEX people_idx1 */}

# Same answer with other formulations of the same query
#
do_execsql_test skipscan2-1.6 {
  SELECT name FROM people
   WHERE role IN (SELECT DISTINCT role FROM people)
     AND height>=180 ORDER BY +name;
} {David Jack Patrick Quiana Xavier}
do_execsql_test skipscan2-1.7 {
  SELECT name FROM people WHERE role='teacher' AND height>=180
  UNION ALL
  SELECT name FROM people WHERE role='student' AND height>=180
  ORDER BY 1;
} {David Jack Patrick Quiana Xavier}

# Add 8 more people, bringing the total to 34.  Then the number of
# duplicates in the left-column of the index will be 17 and 
# skip-scan should not be used after an (unfudged) ANALYZE.
#
do_execsql_test skipscan2-1.8 {
  INSERT INTO people VALUES('Angie','student',166);
  INSERT INTO people VALUES('Brad','student',176);
  INSERT INTO people VALUES('Claire','student',168);
  INSERT INTO people VALUES('Donald','student',162);
  INSERT INTO people VALUES('Elaine','student',177);
  INSERT INTO people VALUES('Frazier','student',159);
  INSERT INTO people VALUES('Grace','student',179);
  INSERT INTO people VALUES('Horace','student',166);
  ANALYZE;
  SELECT stat FROM sqlite_stat1 WHERE idx='people_idx1';
} {{34 17 2}}
db cache flush
do_execsql_test skipscan2-1.9 {
  SELECT name FROM people WHERE height>=180 ORDER BY +name;
} {David Jack Patrick Quiana Xavier}

do_execsql_test skipscan2-1.9eqp {
  EXPLAIN QUERY PLAN
  SELECT name FROM people WHERE height>=180 ORDER BY +name;
} {~/*INDEX people_idx1 */}

# Add 2 more people, bringing the total to 36.  Then the number of
# duplicates in the left-column of the index will be 18 and 
# skip-scan will be used after an (unfudged) ANALYZE.
#
do_execsql_test skipscan2-1.10 {
  INSERT INTO people VALUES('Ingrad','student',155);
  INSERT INTO people VALUES('Jacob','student',179);
  ANALYZE;
  SELECT stat FROM sqlite_stat1 WHERE idx='people_idx1';
} {{36 18 2}}
db cache flush
do_execsql_test skipscan2-1.11 {
  SELECT name FROM people WHERE height>=180 ORDER BY +name;
} {David Jack Patrick Quiana Xavier}
do_execsql_test skipscan2-1.11eqp {
  EXPLAIN QUERY PLAN
  SELECT name FROM people WHERE height>=180 ORDER BY +name;
} {/*INDEX people_idx1 */}


# Repeat using a WITHOUT ROWID table.
#
do_execsql_test skipscan2-2.1 {
  CREATE TABLE peoplew(
    name TEXT PRIMARY KEY,
    role TEXT NOT NULL,
    height INT NOT NULL, -- in cm
    CHECK( role IN ('student','teacher') )
  ) WITHOUT ROWID;
  CREATE INDEX peoplew_idx1 ON peoplew(role, height);
  INSERT INTO peoplew(name,role,height)
     SELECT name, role, height FROM  people;
  ALTER TABLE people RENAME TO old_people;
  SELECT name FROM peoplew WHERE height>=180 ORDER BY +name;
} {David Jack Patrick Quiana Xavier}
do_execsql_test skipscan2-2.2 {
  SELECT name FROM peoplew
   WHERE role IN (SELECT DISTINCT role FROM peoplew)
     AND height>=180 ORDER BY +name;
} {David Jack Patrick Quiana Xavier}
do_execsql_test skipscan2-2.2 {
  SELECT name FROM peoplew WHERE role='teacher' AND height>=180
  UNION ALL
  SELECT name FROM peoplew WHERE role='student' AND height>=180
  ORDER BY 1;
} {David Jack Patrick Quiana Xavier}

# Now do an ANALYZE.  A skip-scan can be used after ANALYZE.
#
do_execsql_test skipscan2-2.4 {
  ANALYZE;
}
db cache flush
do_execsql_test skipscan2-2.5 {
  SELECT name FROM peoplew WHERE height>=180 ORDER BY +name;
} {David Jack Patrick Quiana Xavier}
do_execsql_test skipscan2-2.5eqp {
  EXPLAIN QUERY PLAN
  SELECT name FROM peoplew WHERE height>=180 ORDER BY +name;
} {/*INDEX peoplew_idx1 */}

# A skip-scan on a PK index of a WITHOUT ROWID table.
#
do_execsql_test skipscan2-3.1 {
  CREATE TABLE t3(a, b, c, PRIMARY KEY(a, b)) WITHOUT ROWID;
}
do_test skipscan2-3.2 {







<
<
<
<
<
<





|
|


|















|
|


|







|
|

|

|

|

|
|
|


|
<
<
<
<
<
<
<


|


|
<
>


|


<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<














|
|



|
|

|

|

|








|
|


|
|







44
45
46
47
48
49
50






51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102







103
104
105
106
107
108

109
110
111
112
113
114



















115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
  INSERT INTO people VALUES('Nathan','student',163);
  INSERT INTO people VALUES('Olivia','student',161);
  INSERT INTO people VALUES('Patrick','teacher',180);
  INSERT INTO people VALUES('Quiana','student',182);
  INSERT INTO people VALUES('Robert','student',159);
  INSERT INTO people VALUES('Sally','student',166);
  INSERT INTO people VALUES('Tom','student',171);






}

# Without ANALYZE, a skip-scan is not used
#
do_execsql_test skipscan2-1.3 {
  SELECT name FROM people WHERE height<=161 ORDER BY +name;
} {Alice Bob Cindy Emily Megan Olivia Robert}
do_execsql_test skipscan2-1.3eqp {
  EXPLAIN QUERY PLAN
  SELECT name FROM people WHERE height<=161 ORDER BY +name;
} {~/*INDEX people_idx1 */}

# Now do an ANALYZE.  A skip-scan can be used after ANALYZE.
#
do_execsql_test skipscan2-1.4 {
  ANALYZE;
  -- We do not have enough people above to actually force the use
  -- of a skip-scan.  So make a manual adjustment to the stat1 table
  -- to make it seem like there are many more.
  UPDATE sqlite_stat1 SET stat='10000 5000 20' WHERE idx='people_idx1';
  UPDATE sqlite_stat1 SET stat='10000 1' WHERE idx='sqlite_autoindex_people_1';
  ANALYZE sqlite_master;
}
db cache flush
do_execsql_test skipscan2-1.5 {
  SELECT name FROM people WHERE height<=161 ORDER BY +name;
} {Alice Bob Cindy Emily Megan Olivia Robert}
do_execsql_test skipscan2-1.5eqp {
  EXPLAIN QUERY PLAN
  SELECT name FROM people WHERE height<=161 ORDER BY +name;
} {/*INDEX people_idx1 */}

# Same answer with other formulations of the same query
#
do_execsql_test skipscan2-1.6 {
  SELECT name FROM people
   WHERE role IN (SELECT DISTINCT role FROM people)
     AND height<=161 ORDER BY +name;
} {Alice Bob Cindy Emily Megan Olivia Robert}
do_execsql_test skipscan2-1.7 {
  SELECT name FROM people WHERE role='teacher' AND height<=161
  UNION ALL
  SELECT name FROM people WHERE role='student' AND height<=161
  ORDER BY 1;
} {Alice Bob Cindy Emily Megan Olivia Robert}

# Add more people so that the estimated number of rows per key is
# equal to the skip-scan threshold of 18.  Then run an (unfudged)
# ANALYZE.  skip-scan should work.
#
do_execsql_test skipscan2-1.8 {
  INSERT INTO people VALUES('Ursula','student',170);







  ANALYZE;
  SELECT stat FROM sqlite_stat1 WHERE idx='people_idx1';
} {{21 13 2}}
db cache flush
do_execsql_test skipscan2-1.9 {
  SELECT name FROM people WHERE height<=161 ORDER BY +name;

} {Alice Bob Cindy Emily Megan Olivia Robert}
do_execsql_test skipscan2-1.9eqp {
  EXPLAIN QUERY PLAN
  SELECT name FROM people WHERE height<=161 ORDER BY +name;
} {~/*INDEX people_idx1 */}





















# Repeat using a WITHOUT ROWID table.
#
do_execsql_test skipscan2-2.1 {
  CREATE TABLE peoplew(
    name TEXT PRIMARY KEY,
    role TEXT NOT NULL,
    height INT NOT NULL, -- in cm
    CHECK( role IN ('student','teacher') )
  ) WITHOUT ROWID;
  CREATE INDEX peoplew_idx1 ON peoplew(role, height);
  INSERT INTO peoplew(name,role,height)
     SELECT name, role, height FROM  people;
  ALTER TABLE people RENAME TO old_people;
  SELECT name FROM peoplew WHERE height<=161 ORDER BY +name;
} {Alice Bob Cindy Emily Megan Olivia Robert}
do_execsql_test skipscan2-2.2 {
  SELECT name FROM peoplew
   WHERE role IN (SELECT DISTINCT role FROM peoplew)
     AND height<=161 ORDER BY +name;
} {Alice Bob Cindy Emily Megan Olivia Robert}
do_execsql_test skipscan2-2.2 {
  SELECT name FROM peoplew WHERE role='teacher' AND height<=161
  UNION ALL
  SELECT name FROM peoplew WHERE role='student' AND height<=161
  ORDER BY 1;
} {Alice Bob Cindy Emily Megan Olivia Robert}

# Now do an ANALYZE.  A skip-scan can be used after ANALYZE.
#
do_execsql_test skipscan2-2.4 {
  ANALYZE;
}
db cache flush
do_execsql_test skipscan2-2.5 {
  SELECT name FROM peoplew WHERE height<=161 ORDER BY +name;
} {Alice Bob Cindy Emily Megan Olivia Robert}
do_execsql_test skipscan2-2.5eqp {
  EXPLAIN QUERY PLAN
  SELECT name FROM peoplew WHERE height<=161 ORDER BY +name;
} {/*INDEX peoplew_idx1*/}

# A skip-scan on a PK index of a WITHOUT ROWID table.
#
do_execsql_test skipscan2-3.1 {
  CREATE TABLE t3(a, b, c, PRIMARY KEY(a, b)) WITHOUT ROWID;
}
do_test skipscan2-3.2 {