SQLite

Check-in [e6f7f97dbc]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Improve the accuracy of the estimates used when searching an index for values not present in any stat4 samples under some circumstances.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | stat4-avgeq
Files: files | file ages | folders
SHA1: e6f7f97dbc677c9f01b23142928c3fa7307c2fba
User & Date: dan 2014-10-03 19:16:53.018
Context
2014-10-03
19:29
Fix a division-by-zero error that might occur if the sqlite_stat1 table is corrupt. (check-in: f9c053b23e user: dan tags: stat4-avgeq)
19:16
Improve the accuracy of the estimates used when searching an index for values not present in any stat4 samples under some circumstances. (check-in: e6f7f97dbc user: dan tags: stat4-avgeq)
16:00
Add requirements marks on the sqlite3_db_status() interface implementation. Fix a typo in the documentation. Fix the new sqlite3_result_text64() routine so that it works correctly with an encoding parameter of SQLITE_UTF16. (check-in: d2fc322728 user: drh tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Changes to src/analyze.c.
1444
1445
1446
1447
1448
1449
1450
1451

1452
1453
1454
1455
1456

1457
1458
1459
1460
1461
1462
1463
    while( (c=z[0])>='0' && c<='9' ){
      v = v*10 + c - '0';
      z++;
    }
#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
    if( aOut ){
      aOut[i] = v;
    }else

#else
    assert( aOut==0 );
    UNUSED_PARAMETER(aOut);
#endif
    {

      aLog[i] = sqlite3LogEst(v);
    }
    if( *z==' ' ) z++;
  }
#ifndef SQLITE_ENABLE_STAT3_OR_STAT4
  assert( pIndex!=0 );
#else







<
>




<
>







1444
1445
1446
1447
1448
1449
1450

1451
1452
1453
1454
1455

1456
1457
1458
1459
1460
1461
1462
1463
    while( (c=z[0])>='0' && c<='9' ){
      v = v*10 + c - '0';
      z++;
    }
#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
    if( aOut ){
      aOut[i] = v;

    }
#else
    assert( aOut==0 );
    UNUSED_PARAMETER(aOut);
#endif

    if( aLog ){
      aLog[i] = sqlite3LogEst(v);
    }
    if( *z==' ' ) z++;
  }
#ifndef SQLITE_ENABLE_STAT3_OR_STAT4
  assert( pIndex!=0 );
#else
1512
1513
1514
1515
1516
1517
1518








1519
1520
1521
1522
1523
1524
1525
1526
1527
    pIndex = sqlite3PrimaryKeyIndex(pTable);
  }else{
    pIndex = sqlite3FindIndex(pInfo->db, argv[1], pInfo->zDatabase);
  }
  z = argv[2];

  if( pIndex ){








    pIndex->bUnordered = 0;
    decodeIntArray((char*)z, pIndex->nKeyCol+1, 0, pIndex->aiRowLogEst, pIndex);
    if( pIndex->pPartIdxWhere==0 ) pTable->nRowLogEst = pIndex->aiRowLogEst[0];
  }else{
    Index fakeIdx;
    fakeIdx.szIdxRow = pTable->szTabRow;
#ifdef SQLITE_ENABLE_COSTMULT
    fakeIdx.pTable = pTable;
#endif







>
>
>
>
>
>
>
>

|







1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
    pIndex = sqlite3PrimaryKeyIndex(pTable);
  }else{
    pIndex = sqlite3FindIndex(pInfo->db, argv[1], pInfo->zDatabase);
  }
  z = argv[2];

  if( pIndex ){
    int nCol = pIndex->nKeyCol+1;
#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
    tRowcnt * const aiRowEst = pIndex->aiRowEst = (tRowcnt*)sqlite3DbMallocZero(
        pInfo->db, sizeof(tRowcnt) * nCol
    );
#else
    tRowcnt * const aiRowEst = 0;
#endif
    pIndex->bUnordered = 0;
    decodeIntArray((char*)z, nCol, aiRowEst, pIndex->aiRowLogEst, pIndex);
    if( pIndex->pPartIdxWhere==0 ) pTable->nRowLogEst = pIndex->aiRowLogEst[0];
  }else{
    Index fakeIdx;
    fakeIdx.szIdxRow = pTable->szTabRow;
#ifdef SQLITE_ENABLE_COSTMULT
    fakeIdx.pTable = pTable;
#endif
1572
1573
1574
1575
1576
1577
1578

1579
1580
1581
1582






1583




1584

1585
1586
1587
1588
1589

1590
1591

1592
1593
1594
1595
1596

1597
1598
1599
1600
1601
1602
1603
1604
      ** sample columns except the last. The last is always set to 1, as
      ** once the trailing PK fields are considered all index keys are
      ** unique.  */
      nCol = pIdx->nSampleCol-1;
      pIdx->aAvgEq[nCol] = 1;
    }
    for(iCol=0; iCol<nCol; iCol++){

      int i;                    /* Used to iterate through samples */
      tRowcnt sumEq = 0;        /* Sum of the nEq values */
      tRowcnt nSum = 0;         /* Number of terms contributing to sumEq */
      tRowcnt avgEq = 0;






      tRowcnt nDLt = pFinal->anDLt[iCol];






      /* Set nSum to the number of distinct (iCol+1) field prefixes that
      ** occur in the stat4 table for this index before pFinal. Set
      ** sumEq to the sum of the nEq values for column iCol for the same
      ** set (adding the value only once where there exist duplicate 
      ** prefixes).  */

      for(i=0; i<(pIdx->nSample-1); i++){
        if( aSample[i].anDLt[iCol]!=aSample[i+1].anDLt[iCol] ){

          sumEq += aSample[i].anEq[iCol];
          nSum++;
        }
      }
      if( nDLt>nSum ){

        avgEq = (pFinal->anLt[iCol] - sumEq)/(nDLt - nSum);
      }
      if( avgEq==0 ) avgEq = 1;
      pIdx->aAvgEq[iCol] = avgEq;
    }
  }
}








>


<

>
>
>
>
>
>
|
>
>
>
>
|
>

|
|
|
<
>
|
|
>

|


|
>
|







1580
1581
1582
1583
1584
1585
1586
1587
1588
1589

1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607

1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
      ** sample columns except the last. The last is always set to 1, as
      ** once the trailing PK fields are considered all index keys are
      ** unique.  */
      nCol = pIdx->nSampleCol-1;
      pIdx->aAvgEq[nCol] = 1;
    }
    for(iCol=0; iCol<nCol; iCol++){
      int nSample = pIdx->nSample;
      int i;                    /* Used to iterate through samples */
      tRowcnt sumEq = 0;        /* Sum of the nEq values */

      tRowcnt avgEq = 0;
      tRowcnt nRow;             /* Number of rows in index */
      i64 nSum100 = 0;          /* Number of terms contributing to sumEq */
      i64 nDist100;             /* Number of distinct values in index */

      if( pIdx->aiRowEst==0 ){
        nRow = pFinal->anLt[iCol];
        nDist100 = (i64)100 * pFinal->anDLt[iCol];
        nSample--;
      }else{
        nRow = pIdx->aiRowEst[0];
        nDist100 = ((i64)100 * pIdx->aiRowEst[0]) / pIdx->aiRowEst[iCol+1];
      }

      /* Set nSum to the number of distinct (iCol+1) field prefixes that
      ** occur in the stat4 table for this index. Set sumEq to the sum of 
      ** the nEq values for column iCol for the same set (adding the value 
      ** only once where there exist duplicate prefixes).  */

      for(i=0; i<nSample; i++){
        if( i==(pIdx->nSample-1)
         || aSample[i].anDLt[iCol]!=aSample[i+1].anDLt[iCol] 
        ){
          sumEq += aSample[i].anEq[iCol];
          nSum100 += 100;
        }
      }

      if( nDist100>nSum100 ){
        avgEq = ((i64)100 * (nRow - sumEq))/(nDist100 - nSum100);
      }
      if( avgEq==0 ) avgEq = 1;
      pIdx->aAvgEq[iCol] = avgEq;
    }
  }
}

1841
1842
1843
1844
1845
1846
1847





1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
  /* Load the statistics from the sqlite_stat4 table. */
#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
  if( rc==SQLITE_OK ){
    int lookasideEnabled = db->lookaside.bEnabled;
    db->lookaside.bEnabled = 0;
    rc = loadStat4(db, sInfo.zDatabase);
    db->lookaside.bEnabled = lookasideEnabled;





  }
#endif

  if( rc==SQLITE_NOMEM ){
    db->mallocFailed = 1;
  }
  return rc;
}


#endif /* SQLITE_OMIT_ANALYZE */







>
>
>
>
>











1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
  /* Load the statistics from the sqlite_stat4 table. */
#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
  if( rc==SQLITE_OK ){
    int lookasideEnabled = db->lookaside.bEnabled;
    db->lookaside.bEnabled = 0;
    rc = loadStat4(db, sInfo.zDatabase);
    db->lookaside.bEnabled = lookasideEnabled;
  }
  for(i=sqliteHashFirst(&db->aDb[iDb].pSchema->idxHash);i;i=sqliteHashNext(i)){
    Index *pIdx = sqliteHashData(i);
    sqlite3DbFree(db, pIdx->aiRowEst);
    pIdx->aiRowEst = 0;
  }
#endif

  if( rc==SQLITE_NOMEM ){
    db->mallocFailed = 1;
  }
  return rc;
}


#endif /* SQLITE_OMIT_ANALYZE */
Changes to src/sqliteInt.h.
1797
1798
1799
1800
1801
1802
1803

1804
1805
1806
1807
1808
1809
1810
  unsigned isResized:1;    /* True if resizeIndexObject() has been called */
  unsigned isCovering:1;   /* True if this is a covering index */
#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
  int nSample;             /* Number of elements in aSample[] */
  int nSampleCol;          /* Size of IndexSample.anEq[] and so on */
  tRowcnt *aAvgEq;         /* Average nEq values for keys not in aSample */
  IndexSample *aSample;    /* Samples of the left-most key */

#endif
};

/*
** Allowed values for Index.idxType
*/
#define SQLITE_IDXTYPE_APPDEF      0   /* Created using CREATE INDEX */







>







1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
  unsigned isResized:1;    /* True if resizeIndexObject() has been called */
  unsigned isCovering:1;   /* True if this is a covering index */
#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
  int nSample;             /* Number of elements in aSample[] */
  int nSampleCol;          /* Size of IndexSample.anEq[] and so on */
  tRowcnt *aAvgEq;         /* Average nEq values for keys not in aSample */
  IndexSample *aSample;    /* Samples of the left-most key */
  tRowcnt *aiRowEst;       /* Non-logarithmic stat1 data for this table */
#endif
};

/*
** Allowed values for Index.idxType
*/
#define SQLITE_IDXTYPE_APPDEF      0   /* Created using CREATE INDEX */