SQLite

Changes On Branch stat4-avgeq
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Changes In Branch stat4-avgeq Excluding Merge-Ins

This is equivalent to a diff from d2fc3227 to fc619be0

2014-10-06
14:37
Improve the accuracy of the estimates used when searching an index for values not present in any stat4 samples. (check-in: 3aff9a9c user: dan tags: trunk)
2014-10-04
11:59
Updates to documentation and requirements marks. No code changes. (check-in: 0f8102d7 user: drh tags: trunk)
10:22
Add a test to show that the change on this branch is effective. (Closed-Leaf check-in: fc619be0 user: dan tags: stat4-avgeq)
00:07
Avoid leaking Index.aiRowEst memory if an OOM causes a rollback which deletes the index before the aiRowEst deletion code in sqlite3AnalysisLoad() routine has a chance to run. Since the aiRowEst now might be deleted from freeIndex() which does not always have a db pointer, make sure the aiRowEst memory is not held in lookaside. (check-in: efd87ba1 user: drh tags: stat4-avgeq)
2014-10-03
19:16
Improve the accuracy of the estimates used when searching an index for values not present in any stat4 samples under some circumstances. (check-in: e6f7f97d user: dan tags: stat4-avgeq)
16:00
Add requirements marks on the sqlite3_db_status() interface implementation. Fix a typo in the documentation. Fix the new sqlite3_result_text64() routine so that it works correctly with an encoding parameter of SQLITE_UTF16. (check-in: d2fc3227 user: drh tags: trunk)
14:54
Update to requirements marks related to changes in the memory allocation interface and enhancement of the documentation regarding DEFAULT clauses in CREATE TABLE. (check-in: 440705b9 user: drh tags: trunk)

Changes to src/analyze.c.

1444
1445
1446
1447
1448
1449
1450
1451

1452
1453
1454
1455
1456

1457
1458
1459
1460
1461
1462
1463
    while( (c=z[0])>='0' && c<='9' ){
      v = v*10 + c - '0';
      z++;
    }
#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
    if( aOut ){
      aOut[i] = v;
    }else

#else
    assert( aOut==0 );
    UNUSED_PARAMETER(aOut);
#endif
    {

      aLog[i] = sqlite3LogEst(v);
    }
    if( *z==' ' ) z++;
  }
#ifndef SQLITE_ENABLE_STAT3_OR_STAT4
  assert( pIndex!=0 );
#else







<
>




<
>







1444
1445
1446
1447
1448
1449
1450

1451
1452
1453
1454
1455

1456
1457
1458
1459
1460
1461
1462
1463
    while( (c=z[0])>='0' && c<='9' ){
      v = v*10 + c - '0';
      z++;
    }
#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
    if( aOut ){
      aOut[i] = v;

    }
#else
    assert( aOut==0 );
    UNUSED_PARAMETER(aOut);
#endif

    if( aLog ){
      aLog[i] = sqlite3LogEst(v);
    }
    if( *z==' ' ) z++;
  }
#ifndef SQLITE_ENABLE_STAT3_OR_STAT4
  assert( pIndex!=0 );
#else
1512
1513
1514
1515
1516
1517
1518









1519
1520
1521
1522
1523
1524
1525
1526
1527
    pIndex = sqlite3PrimaryKeyIndex(pTable);
  }else{
    pIndex = sqlite3FindIndex(pInfo->db, argv[1], pInfo->zDatabase);
  }
  z = argv[2];

  if( pIndex ){









    pIndex->bUnordered = 0;
    decodeIntArray((char*)z, pIndex->nKeyCol+1, 0, pIndex->aiRowLogEst, pIndex);
    if( pIndex->pPartIdxWhere==0 ) pTable->nRowLogEst = pIndex->aiRowLogEst[0];
  }else{
    Index fakeIdx;
    fakeIdx.szIdxRow = pTable->szTabRow;
#ifdef SQLITE_ENABLE_COSTMULT
    fakeIdx.pTable = pTable;
#endif







>
>
>
>
>
>
>
>
>

|







1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
    pIndex = sqlite3PrimaryKeyIndex(pTable);
  }else{
    pIndex = sqlite3FindIndex(pInfo->db, argv[1], pInfo->zDatabase);
  }
  z = argv[2];

  if( pIndex ){
    int nCol = pIndex->nKeyCol+1;
#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
    tRowcnt * const aiRowEst = pIndex->aiRowEst = (tRowcnt*)sqlite3MallocZero(
        sizeof(tRowcnt) * nCol
    );
    if( aiRowEst==0 ) pInfo->db->mallocFailed = 1;
#else
    tRowcnt * const aiRowEst = 0;
#endif
    pIndex->bUnordered = 0;
    decodeIntArray((char*)z, nCol, aiRowEst, pIndex->aiRowLogEst, pIndex);
    if( pIndex->pPartIdxWhere==0 ) pTable->nRowLogEst = pIndex->aiRowLogEst[0];
  }else{
    Index fakeIdx;
    fakeIdx.szIdxRow = pTable->szTabRow;
#ifdef SQLITE_ENABLE_COSTMULT
    fakeIdx.pTable = pTable;
#endif
1572
1573
1574
1575
1576
1577
1578

1579
1580
1581
1582






1583




1584

1585
1586
1587
1588
1589

1590
1591

1592
1593
1594
1595
1596

1597
1598
1599
1600
1601
1602
1603
1604
      ** sample columns except the last. The last is always set to 1, as
      ** once the trailing PK fields are considered all index keys are
      ** unique.  */
      nCol = pIdx->nSampleCol-1;
      pIdx->aAvgEq[nCol] = 1;
    }
    for(iCol=0; iCol<nCol; iCol++){

      int i;                    /* Used to iterate through samples */
      tRowcnt sumEq = 0;        /* Sum of the nEq values */
      tRowcnt nSum = 0;         /* Number of terms contributing to sumEq */
      tRowcnt avgEq = 0;






      tRowcnt nDLt = pFinal->anDLt[iCol];






      /* Set nSum to the number of distinct (iCol+1) field prefixes that
      ** occur in the stat4 table for this index before pFinal. Set
      ** sumEq to the sum of the nEq values for column iCol for the same
      ** set (adding the value only once where there exist duplicate 
      ** prefixes).  */

      for(i=0; i<(pIdx->nSample-1); i++){
        if( aSample[i].anDLt[iCol]!=aSample[i+1].anDLt[iCol] ){

          sumEq += aSample[i].anEq[iCol];
          nSum++;
        }
      }
      if( nDLt>nSum ){

        avgEq = (pFinal->anLt[iCol] - sumEq)/(nDLt - nSum);
      }
      if( avgEq==0 ) avgEq = 1;
      pIdx->aAvgEq[iCol] = avgEq;
    }
  }
}








>


<

>
>
>
>
>
>
|
>
>
>
>
|
>

|
|
|
<
>
|
|
>

|


|
>
|







1581
1582
1583
1584
1585
1586
1587
1588
1589
1590

1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608

1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
      ** sample columns except the last. The last is always set to 1, as
      ** once the trailing PK fields are considered all index keys are
      ** unique.  */
      nCol = pIdx->nSampleCol-1;
      pIdx->aAvgEq[nCol] = 1;
    }
    for(iCol=0; iCol<nCol; iCol++){
      int nSample = pIdx->nSample;
      int i;                    /* Used to iterate through samples */
      tRowcnt sumEq = 0;        /* Sum of the nEq values */

      tRowcnt avgEq = 0;
      tRowcnt nRow;             /* Number of rows in index */
      i64 nSum100 = 0;          /* Number of terms contributing to sumEq */
      i64 nDist100;             /* Number of distinct values in index */

      if( pIdx->aiRowEst==0 || pIdx->aiRowEst[iCol+1]==0 ){
        nRow = pFinal->anLt[iCol];
        nDist100 = (i64)100 * pFinal->anDLt[iCol];
        nSample--;
      }else{
        nRow = pIdx->aiRowEst[0];
        nDist100 = ((i64)100 * pIdx->aiRowEst[0]) / pIdx->aiRowEst[iCol+1];
      }

      /* Set nSum to the number of distinct (iCol+1) field prefixes that
      ** occur in the stat4 table for this index. Set sumEq to the sum of 
      ** the nEq values for column iCol for the same set (adding the value 
      ** only once where there exist duplicate prefixes).  */

      for(i=0; i<nSample; i++){
        if( i==(pIdx->nSample-1)
         || aSample[i].anDLt[iCol]!=aSample[i+1].anDLt[iCol] 
        ){
          sumEq += aSample[i].anEq[iCol];
          nSum100 += 100;
        }
      }

      if( nDist100>nSum100 ){
        avgEq = ((i64)100 * (nRow - sumEq))/(nDist100 - nSum100);
      }
      if( avgEq==0 ) avgEq = 1;
      pIdx->aAvgEq[iCol] = avgEq;
    }
  }
}

1841
1842
1843
1844
1845
1846
1847





1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
  /* Load the statistics from the sqlite_stat4 table. */
#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
  if( rc==SQLITE_OK ){
    int lookasideEnabled = db->lookaside.bEnabled;
    db->lookaside.bEnabled = 0;
    rc = loadStat4(db, sInfo.zDatabase);
    db->lookaside.bEnabled = lookasideEnabled;





  }
#endif

  if( rc==SQLITE_NOMEM ){
    db->mallocFailed = 1;
  }
  return rc;
}


#endif /* SQLITE_OMIT_ANALYZE */







>
>
>
>
>











1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
  /* Load the statistics from the sqlite_stat4 table. */
#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
  if( rc==SQLITE_OK ){
    int lookasideEnabled = db->lookaside.bEnabled;
    db->lookaside.bEnabled = 0;
    rc = loadStat4(db, sInfo.zDatabase);
    db->lookaside.bEnabled = lookasideEnabled;
  }
  for(i=sqliteHashFirst(&db->aDb[iDb].pSchema->idxHash);i;i=sqliteHashNext(i)){
    Index *pIdx = sqliteHashData(i);
    sqlite3_free(pIdx->aiRowEst);
    pIdx->aiRowEst = 0;
  }
#endif

  if( rc==SQLITE_NOMEM ){
    db->mallocFailed = 1;
  }
  return rc;
}


#endif /* SQLITE_OMIT_ANALYZE */

Changes to src/build.c.

431
432
433
434
435
436
437



438
439
440
441
442
443
444
#ifndef SQLITE_OMIT_ANALYZE
  sqlite3DeleteIndexSamples(db, p);
#endif
  if( db==0 || db->pnBytesFreed==0 ) sqlite3KeyInfoUnref(p->pKeyInfo);
  sqlite3ExprDelete(db, p->pPartIdxWhere);
  sqlite3DbFree(db, p->zColAff);
  if( p->isResized ) sqlite3DbFree(db, p->azColl);



  sqlite3DbFree(db, p);
}

/*
** For the index called zIdxName which is found in the database iDb,
** unlike that index from its Table then remove the index from
** the index hash table and free all memory structures associated







>
>
>







431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
#ifndef SQLITE_OMIT_ANALYZE
  sqlite3DeleteIndexSamples(db, p);
#endif
  if( db==0 || db->pnBytesFreed==0 ) sqlite3KeyInfoUnref(p->pKeyInfo);
  sqlite3ExprDelete(db, p->pPartIdxWhere);
  sqlite3DbFree(db, p->zColAff);
  if( p->isResized ) sqlite3DbFree(db, p->azColl);
#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
  sqlite3_free(p->aiRowEst);
#endif
  sqlite3DbFree(db, p);
}

/*
** For the index called zIdxName which is found in the database iDb,
** unlike that index from its Table then remove the index from
** the index hash table and free all memory structures associated

Changes to src/sqliteInt.h.

1797
1798
1799
1800
1801
1802
1803

1804
1805
1806
1807
1808
1809
1810
  unsigned isResized:1;    /* True if resizeIndexObject() has been called */
  unsigned isCovering:1;   /* True if this is a covering index */
#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
  int nSample;             /* Number of elements in aSample[] */
  int nSampleCol;          /* Size of IndexSample.anEq[] and so on */
  tRowcnt *aAvgEq;         /* Average nEq values for keys not in aSample */
  IndexSample *aSample;    /* Samples of the left-most key */

#endif
};

/*
** Allowed values for Index.idxType
*/
#define SQLITE_IDXTYPE_APPDEF      0   /* Created using CREATE INDEX */







>







1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
  unsigned isResized:1;    /* True if resizeIndexObject() has been called */
  unsigned isCovering:1;   /* True if this is a covering index */
#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
  int nSample;             /* Number of elements in aSample[] */
  int nSampleCol;          /* Size of IndexSample.anEq[] and so on */
  tRowcnt *aAvgEq;         /* Average nEq values for keys not in aSample */
  IndexSample *aSample;    /* Samples of the left-most key */
  tRowcnt *aiRowEst;       /* Non-logarithmic stat1 data for this table */
#endif
};

/*
** Allowed values for Index.idxType
*/
#define SQLITE_IDXTYPE_APPDEF      0   /* Created using CREATE INDEX */

Added test/analyzeD.test.











































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
# 2005 July 22
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#***********************************************************************
# This file implements regression tests for SQLite library.
# This file implements tests for the ANALYZE command.
#
# $Id: analyze.test,v 1.9 2008/08/11 18:44:58 drh Exp $

set testdir [file dirname $argv0]
source $testdir/tester.tcl
set ::testprefix analyzeD

ifcapable {!stat4} {
  finish_test
  return
}


# Set up a table with the following properties:
#
#    * Contains 1000 rows.
#    * Column a contains even integers between 0 and 18, inclusive (so that
#      a=? for any such integer matches 100 rows).
#    * Column b contains integers between 0 and 9, inclusive.
#    * Column c contains integers between 0 and 199, inclusive (so that
#      for any such integer, c=? matches 5 rows).
#    * Then add 7 rows with a new value for "a" - 3001. The stat4 table will
#      not contain any samples with a=3001.
#
do_execsql_test 1.0 {
  CREATE TABLE t1(a, b, c);
}
do_test 1.1 {
  for {set i 1} {$i < 1000} {incr i} {
    set c [expr $i % 200]
    execsql { INSERT INTO t1(a, b, c) VALUES( 2*($i/100), $i%10, $c ) }
  }

  execsql {
    INSERT INTO t1 VALUES(3001, 3001, 3001);
    INSERT INTO t1 VALUES(3001, 3001, 3002);
    INSERT INTO t1 VALUES(3001, 3001, 3003);
    INSERT INTO t1 VALUES(3001, 3001, 3004);
    INSERT INTO t1 VALUES(3001, 3001, 3005);
    INSERT INTO t1 VALUES(3001, 3001, 3006);
    INSERT INTO t1 VALUES(3001, 3001, 3007);

    CREATE INDEX t1_ab ON t1(a, b);
    CREATE INDEX t1_c ON t1(c);

    ANALYZE;
  }
} {}

# With full ANALYZE data, SQLite sees that c=150 (5 rows) is better than
# a=3001 (7 rows).
#
do_eqp_test 1.2 {
  SELECT * FROM t1 WHERE a=3001 AND c=150;
} {
  0 0 0 {SEARCH TABLE t1 USING INDEX t1_c (c=?)}
}

do_test 1.3 {
  execsql { DELETE FROM sqlite_stat1 }
  db close
  sqlite3 db test.db
} {}

# Without stat1, because 3001 is larger than all samples in the stat4
# table, SQLite things that a=3001 matches just 1 row. So it (incorrectly)
# chooses it over the c=150 index (5 rows). Even with stat1 data, things
# worked this way before commit [e6f7f97dbc].
#
do_eqp_test 1.4 {
  SELECT * FROM t1 WHERE a=3001 AND c=150;
} {
  0 0 0 {SEARCH TABLE t1 USING INDEX t1_ab (a=?)}
}

do_test 1.5 {
  execsql { 
    UPDATE t1 SET a=13 WHERE a = 3001;
    ANALYZE;
  }
} {}

do_eqp_test 1.6 {
  SELECT * FROM t1 WHERE a=13 AND c=150;
} {
  0 0 0 {SEARCH TABLE t1 USING INDEX t1_c (c=?)}
}

do_test 1.7 {
  execsql { DELETE FROM sqlite_stat1 }
  db close
  sqlite3 db test.db
} {}

# Same test as 1.4, except this time the 7 rows that match the a=? condition 
# do not feature larger values than all rows in the stat4 table. So SQLite
# gets this right, even without stat1 data.
do_eqp_test 1.8 {
  SELECT * FROM t1 WHERE a=13 AND c=150;
} {
  0 0 0 {SEARCH TABLE t1 USING INDEX t1_c (c=?)}
}

finish_test