/ Check-in [c4488730]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Simplify the computation of Index.aAvgEq.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | analyze-worst-case
Files: files | file ages | folders
SHA1: c448873006cda294cdaf6b61e75a07f121ba34ea
User & Date: drh 2016-03-04 19:55:02
Context
2016-03-04
19:55
Simplify the computation of Index.aAvgEq. Leaf check-in: c4488730 user: drh tags: analyze-worst-case
18:45
Merge changes from trunk. check-in: 5294c977 user: drh tags: analyze-worst-case
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to src/analyze.c.

1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611

1612
1613
1614
1615
1616
1617
1618
1619
1620
1621

1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645


1646
1647

1648

1649
1650
1651
1652




1653
1654
1655
1656
1657
1658
1659
#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
/*
** Populate the pIdx->aAvgEq[] array based on the samples currently
** stored in pIdx->aSample[]. 
*/
static void initAvgEq(Index *pIdx){
  if( pIdx ){
    IndexSample *aSample = pIdx->aSample;
    IndexSample *pFinal = &aSample[pIdx->nSample-1];
    int iCol;
    int nCol = 1;
    if( pIdx->nSampleCol>1 ){

      /* If this is stat4 data, then calculate aAvgEq[] values for all
      ** sample columns except the last. The last is always set to 1, as
      ** once the trailing PK fields are considered all index keys are
      ** unique.  */
      nCol = pIdx->nSampleCol-1;
      pIdx->aAvgEq[nCol] = 1;
    }
    for(iCol=0; iCol<nCol; iCol++){
      int nSample = pIdx->nSample;
      int i;                    /* Used to iterate through samples */

      tRowcnt sumEq = 0;        /* Sum of the nEq values */
      tRowcnt avgEq = 0;
      tRowcnt nRow;             /* Number of rows in index */
      i64 nSum100 = 0;          /* Number of terms contributing to sumEq */
      i64 nDist100;             /* Number of distinct values in index */

      if( !pIdx->aiRowEst || iCol>=pIdx->nKeyCol || pIdx->aiRowEst[iCol+1]==0 ){
        nRow = pFinal->anLt[iCol];
        nDist100 = (i64)100 * pFinal->anDLt[iCol];
        nSample--;
      }else{
        nRow = pIdx->aiRowEst[0];
        nDist100 = ((i64)100 * pIdx->aiRowEst[0]) / pIdx->aiRowEst[iCol+1];
      }
      pIdx->nRowEst0 = nRow;

      /* Set nSum to the number of distinct (iCol+1) field prefixes that
      ** occur in the stat4 table for this index. Set sumEq to the sum of 
      ** the nEq values for column iCol for the same set (adding the value 
      ** only once where there exist duplicate prefixes).  */
      for(i=0; i<nSample; i++){
        if( i==(pIdx->nSample-1)
         || aSample[i].anDLt[iCol]!=aSample[i+1].anDLt[iCol] 
        ){


          sumEq += aSample[i].anEq[iCol];
          nSum100 += 100;

        }

      }

      if( nDist100>nSum100 ){
        avgEq = ((i64)100 * (nRow - sumEq))/(nDist100 - nSum100);




      }
      if( avgEq==0 ) avgEq = 1;
      pIdx->aAvgEq[iCol] = avgEq;
    }
  }
}








|
|
|
|
|
>




|



<

>

|

|
|


|
<
<


<


<
<
<
<
<
<
<
<
<
>
>

<
>

>

<
<
<
>
>
>
>







1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620

1621
1622
1623
1624
1625
1626
1627
1628
1629
1630


1631
1632

1633
1634









1635
1636
1637

1638
1639
1640
1641



1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
/*
** Populate the pIdx->aAvgEq[] array based on the samples currently
** stored in pIdx->aSample[]. 
*/
static void initAvgEq(Index *pIdx){
  if( pIdx ){
    int nSample = pIdx->nSample;                /* Number of samples */
    int nCol = pIdx->nSampleCol;                /* Number of columns sampled */
    IndexSample *aSample = pIdx->aSample;       /* The samples */
    IndexSample *pFinal = &aSample[nSample-1];  /* The last sample */
    int iCol;                                   /* Loop counter of columns */
    if( nCol>1 ){
      /* If this is stat4 data, then calculate aAvgEq[] values for all
      ** sample columns except the last. The last is always set to 1, as
      ** once the trailing PK fields are considered all index keys are
      ** unique.  */
      nCol--;
      pIdx->aAvgEq[nCol] = 1;
    }
    for(iCol=0; iCol<nCol; iCol++){

      int i;                    /* Used to iterate through samples */
      u32 nDSample = 0;         /* Number of distinct samples less than pFinal*/
      tRowcnt sumEq = 0;        /* Sum of the nEq values */
      tRowcnt avgEq;            /* Average repeats for unsampled entries */
      tRowcnt nRow;             /* Number of rows in index */
      tRowcnt prevLt = 0;       /* Number of values less than previous sample */
      tRowcnt thisLt;           /* Number of values less than current sample */

      if( !pIdx->aiRowEst || iCol>=pIdx->nKeyCol || pIdx->aiRowEst[iCol+1]==0 ){
        nRow = pFinal->anLt[iCol] + pFinal->anEq[iCol];


      }else{
        nRow = pIdx->aiRowEst[0];

      }
      pIdx->nRowEst0 = nRow;









      for(i=0; (thisLt = aSample[i].anLt[iCol])<pFinal->anLt[iCol]; i++){
        if( i==0 || thisLt!=prevLt ){
          sumEq += aSample[i].anEq[iCol];

          nDSample++;
        }
        prevLt = thisLt;
      }



      if( pFinal->anDLt[iCol] > nDSample ){
        avgEq = (pFinal->anLt[iCol] - sumEq)/(pFinal->anDLt[iCol] - nDSample);
      }else{
        avgEq = 1;
      }
      if( avgEq==0 ) avgEq = 1;
      pIdx->aAvgEq[iCol] = avgEq;
    }
  }
}

Changes to test/skipscan2.test.

44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
...
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
...
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
  INSERT INTO people VALUES('Nathan','student',163);
  INSERT INTO people VALUES('Olivia','student',161);
  INSERT INTO people VALUES('Patrick','teacher',180);
  INSERT INTO people VALUES('Quiana','student',182);
  INSERT INTO people VALUES('Robert','student',159);
  INSERT INTO people VALUES('Sally','student',166);
  INSERT INTO people VALUES('Tom','student',171);
/*
  INSERT INTO people VALUES('Ursula','student',170);
  INSERT INTO people VALUES('Vance','student',179);
  INSERT INTO people VALUES('Willma','student',175);
  INSERT INTO people VALUES('Xavier','teacher',185);
  INSERT INTO people VALUES('Yvonne','student',149);
  INSERT INTO people VALUES('Zach','student',170);
*/
}

# Without ANALYZE, a skip-scan is not used
#
do_execsql_test skipscan2-1.3 {
  SELECT name FROM people WHERE height<=161 ORDER BY +name;
} {Alice Bob Cindy Emily Megan Olivia Robert}
................................................................................
# equal to the skip-scan threshold of 18.  Then run an (unfudged)
# ANALYZE.  skip-scan should work.
#
do_execsql_test skipscan2-1.8 {
  INSERT INTO people VALUES('Ursula','student',170);
  ANALYZE;
  SELECT stat FROM sqlite_stat1 WHERE idx='people_idx1';
} {{21 18 3}}
db cache flush
do_execsql_test skipscan2-1.9 {
  SELECT name FROM people WHERE height<=161 ORDER BY +name;
} {Alice Bob Cindy Emily Megan Olivia Robert}
do_execsql_test skipscan2-1.9eqp {
  EXPLAIN QUERY PLAN
  SELECT name FROM people WHERE height<=161 ORDER BY +name;
................................................................................
db cache flush
do_execsql_test skipscan2-2.5 {
  SELECT name FROM peoplew WHERE height<=161 ORDER BY +name;
} {Alice Bob Cindy Emily Megan Olivia Robert}
do_execsql_test skipscan2-2.5eqp {
  EXPLAIN QUERY PLAN
  SELECT name FROM peoplew WHERE height<=161 ORDER BY +name;
} {/*INDEX peoplew_idx1 */}

# A skip-scan on a PK index of a WITHOUT ROWID table.
#
do_execsql_test skipscan2-3.1 {
  CREATE TABLE t3(a, b, c, PRIMARY KEY(a, b)) WITHOUT ROWID;
}
do_test skipscan2-3.2 {







<
<
<
<
<
<
<
<







 







|







 







|







44
45
46
47
48
49
50








51
52
53
54
55
56
57
..
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
...
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
  INSERT INTO people VALUES('Nathan','student',163);
  INSERT INTO people VALUES('Olivia','student',161);
  INSERT INTO people VALUES('Patrick','teacher',180);
  INSERT INTO people VALUES('Quiana','student',182);
  INSERT INTO people VALUES('Robert','student',159);
  INSERT INTO people VALUES('Sally','student',166);
  INSERT INTO people VALUES('Tom','student',171);








}

# Without ANALYZE, a skip-scan is not used
#
do_execsql_test skipscan2-1.3 {
  SELECT name FROM people WHERE height<=161 ORDER BY +name;
} {Alice Bob Cindy Emily Megan Olivia Robert}
................................................................................
# equal to the skip-scan threshold of 18.  Then run an (unfudged)
# ANALYZE.  skip-scan should work.
#
do_execsql_test skipscan2-1.8 {
  INSERT INTO people VALUES('Ursula','student',170);
  ANALYZE;
  SELECT stat FROM sqlite_stat1 WHERE idx='people_idx1';
} {{21 13 2}}
db cache flush
do_execsql_test skipscan2-1.9 {
  SELECT name FROM people WHERE height<=161 ORDER BY +name;
} {Alice Bob Cindy Emily Megan Olivia Robert}
do_execsql_test skipscan2-1.9eqp {
  EXPLAIN QUERY PLAN
  SELECT name FROM people WHERE height<=161 ORDER BY +name;
................................................................................
db cache flush
do_execsql_test skipscan2-2.5 {
  SELECT name FROM peoplew WHERE height<=161 ORDER BY +name;
} {Alice Bob Cindy Emily Megan Olivia Robert}
do_execsql_test skipscan2-2.5eqp {
  EXPLAIN QUERY PLAN
  SELECT name FROM peoplew WHERE height<=161 ORDER BY +name;
} {/*INDEX peoplew_idx1*/}

# A skip-scan on a PK index of a WITHOUT ROWID table.
#
do_execsql_test skipscan2-3.1 {
  CREATE TABLE t3(a, b, c, PRIMARY KEY(a, b)) WITHOUT ROWID;
}
do_test skipscan2-3.2 {