/ Check-in [e6f7f97d]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Improve the accuracy of the estimates used when searching an index for values not present in any stat4 samples under some circumstances.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | stat4-avgeq
Files: files | file ages | folders
SHA1: e6f7f97dbc677c9f01b23142928c3fa7307c2fba
User & Date: dan 2014-10-03 19:16:53
Context
2014-10-03
19:29
Fix a division-by-zero error that might occur if the sqlite_stat1 table is corrupt. check-in: f9c053b2 user: dan tags: stat4-avgeq
19:16
Improve the accuracy of the estimates used when searching an index for values not present in any stat4 samples under some circumstances. check-in: e6f7f97d user: dan tags: stat4-avgeq
16:00
Add requirements marks on the sqlite3_db_status() interface implementation. Fix a typo in the documentation. Fix the new sqlite3_result_text64() routine so that it works correctly with an encoding parameter of SQLITE_UTF16. check-in: d2fc3227 user: drh tags: trunk
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to src/analyze.c.

  1444   1444       while( (c=z[0])>='0' && c<='9' ){
  1445   1445         v = v*10 + c - '0';
  1446   1446         z++;
  1447   1447       }
  1448   1448   #ifdef SQLITE_ENABLE_STAT3_OR_STAT4
  1449   1449       if( aOut ){
  1450   1450         aOut[i] = v;
  1451         -    }else
         1451  +    }
  1452   1452   #else
  1453   1453       assert( aOut==0 );
  1454   1454       UNUSED_PARAMETER(aOut);
  1455   1455   #endif
  1456         -    {
         1456  +    if( aLog ){
  1457   1457         aLog[i] = sqlite3LogEst(v);
  1458   1458       }
  1459   1459       if( *z==' ' ) z++;
  1460   1460     }
  1461   1461   #ifndef SQLITE_ENABLE_STAT3_OR_STAT4
  1462   1462     assert( pIndex!=0 );
  1463   1463   #else
................................................................................
  1512   1512       pIndex = sqlite3PrimaryKeyIndex(pTable);
  1513   1513     }else{
  1514   1514       pIndex = sqlite3FindIndex(pInfo->db, argv[1], pInfo->zDatabase);
  1515   1515     }
  1516   1516     z = argv[2];
  1517   1517   
  1518   1518     if( pIndex ){
         1519  +    int nCol = pIndex->nKeyCol+1;
         1520  +#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
         1521  +    tRowcnt * const aiRowEst = pIndex->aiRowEst = (tRowcnt*)sqlite3DbMallocZero(
         1522  +        pInfo->db, sizeof(tRowcnt) * nCol
         1523  +    );
         1524  +#else
         1525  +    tRowcnt * const aiRowEst = 0;
         1526  +#endif
  1519   1527       pIndex->bUnordered = 0;
  1520         -    decodeIntArray((char*)z, pIndex->nKeyCol+1, 0, pIndex->aiRowLogEst, pIndex);
         1528  +    decodeIntArray((char*)z, nCol, aiRowEst, pIndex->aiRowLogEst, pIndex);
  1521   1529       if( pIndex->pPartIdxWhere==0 ) pTable->nRowLogEst = pIndex->aiRowLogEst[0];
  1522   1530     }else{
  1523   1531       Index fakeIdx;
  1524   1532       fakeIdx.szIdxRow = pTable->szTabRow;
  1525   1533   #ifdef SQLITE_ENABLE_COSTMULT
  1526   1534       fakeIdx.pTable = pTable;
  1527   1535   #endif
................................................................................
  1572   1580         ** sample columns except the last. The last is always set to 1, as
  1573   1581         ** once the trailing PK fields are considered all index keys are
  1574   1582         ** unique.  */
  1575   1583         nCol = pIdx->nSampleCol-1;
  1576   1584         pIdx->aAvgEq[nCol] = 1;
  1577   1585       }
  1578   1586       for(iCol=0; iCol<nCol; iCol++){
         1587  +      int nSample = pIdx->nSample;
  1579   1588         int i;                    /* Used to iterate through samples */
  1580   1589         tRowcnt sumEq = 0;        /* Sum of the nEq values */
  1581         -      tRowcnt nSum = 0;         /* Number of terms contributing to sumEq */
  1582   1590         tRowcnt avgEq = 0;
  1583         -      tRowcnt nDLt = pFinal->anDLt[iCol];
         1591  +      tRowcnt nRow;             /* Number of rows in index */
         1592  +      i64 nSum100 = 0;          /* Number of terms contributing to sumEq */
         1593  +      i64 nDist100;             /* Number of distinct values in index */
         1594  +
         1595  +      if( pIdx->aiRowEst==0 ){
         1596  +        nRow = pFinal->anLt[iCol];
         1597  +        nDist100 = (i64)100 * pFinal->anDLt[iCol];
         1598  +        nSample--;
         1599  +      }else{
         1600  +        nRow = pIdx->aiRowEst[0];
         1601  +        nDist100 = ((i64)100 * pIdx->aiRowEst[0]) / pIdx->aiRowEst[iCol+1];
         1602  +      }
  1584   1603   
  1585   1604         /* Set nSum to the number of distinct (iCol+1) field prefixes that
  1586         -      ** occur in the stat4 table for this index before pFinal. Set
  1587         -      ** sumEq to the sum of the nEq values for column iCol for the same
  1588         -      ** set (adding the value only once where there exist duplicate 
  1589         -      ** prefixes).  */
  1590         -      for(i=0; i<(pIdx->nSample-1); i++){
  1591         -        if( aSample[i].anDLt[iCol]!=aSample[i+1].anDLt[iCol] ){
         1605  +      ** occur in the stat4 table for this index. Set sumEq to the sum of 
         1606  +      ** the nEq values for column iCol for the same set (adding the value 
         1607  +      ** only once where there exist duplicate prefixes).  */
         1608  +      for(i=0; i<nSample; i++){
         1609  +        if( i==(pIdx->nSample-1)
         1610  +         || aSample[i].anDLt[iCol]!=aSample[i+1].anDLt[iCol] 
         1611  +        ){
  1592   1612             sumEq += aSample[i].anEq[iCol];
  1593         -          nSum++;
         1613  +          nSum100 += 100;
  1594   1614           }
  1595   1615         }
  1596         -      if( nDLt>nSum ){
  1597         -        avgEq = (pFinal->anLt[iCol] - sumEq)/(nDLt - nSum);
         1616  +
         1617  +      if( nDist100>nSum100 ){
         1618  +        avgEq = ((i64)100 * (nRow - sumEq))/(nDist100 - nSum100);
  1598   1619         }
  1599   1620         if( avgEq==0 ) avgEq = 1;
  1600   1621         pIdx->aAvgEq[iCol] = avgEq;
  1601   1622       }
  1602   1623     }
  1603   1624   }
  1604   1625   
................................................................................
  1841   1862     /* Load the statistics from the sqlite_stat4 table. */
  1842   1863   #ifdef SQLITE_ENABLE_STAT3_OR_STAT4
  1843   1864     if( rc==SQLITE_OK ){
  1844   1865       int lookasideEnabled = db->lookaside.bEnabled;
  1845   1866       db->lookaside.bEnabled = 0;
  1846   1867       rc = loadStat4(db, sInfo.zDatabase);
  1847   1868       db->lookaside.bEnabled = lookasideEnabled;
         1869  +  }
         1870  +  for(i=sqliteHashFirst(&db->aDb[iDb].pSchema->idxHash);i;i=sqliteHashNext(i)){
         1871  +    Index *pIdx = sqliteHashData(i);
         1872  +    sqlite3DbFree(db, pIdx->aiRowEst);
         1873  +    pIdx->aiRowEst = 0;
  1848   1874     }
  1849   1875   #endif
  1850   1876   
  1851   1877     if( rc==SQLITE_NOMEM ){
  1852   1878       db->mallocFailed = 1;
  1853   1879     }
  1854   1880     return rc;
  1855   1881   }
  1856   1882   
  1857   1883   
  1858   1884   #endif /* SQLITE_OMIT_ANALYZE */

Changes to src/sqliteInt.h.

  1797   1797     unsigned isResized:1;    /* True if resizeIndexObject() has been called */
  1798   1798     unsigned isCovering:1;   /* True if this is a covering index */
  1799   1799   #ifdef SQLITE_ENABLE_STAT3_OR_STAT4
  1800   1800     int nSample;             /* Number of elements in aSample[] */
  1801   1801     int nSampleCol;          /* Size of IndexSample.anEq[] and so on */
  1802   1802     tRowcnt *aAvgEq;         /* Average nEq values for keys not in aSample */
  1803   1803     IndexSample *aSample;    /* Samples of the left-most key */
         1804  +  tRowcnt *aiRowEst;       /* Non-logarithmic stat1 data for this table */
  1804   1805   #endif
  1805   1806   };
  1806   1807   
  1807   1808   /*
  1808   1809   ** Allowed values for Index.idxType
  1809   1810   */
  1810   1811   #define SQLITE_IDXTYPE_APPDEF      0   /* Created using CREATE INDEX */