/ Check-in [90e36676]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Modify the way the costs of various query plans are estimated. If the user supplies a likelihood() value (or equivalent) on an indexed WHERE constraint, use it to estimate the number of index rows visited.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 90e36676476e8db00658772e6c938242f766d306
User & Date: dan 2014-04-30 15:22:25
References
2019-08-15
14:28 New ticket [e4598ecb] Division by zero in the query planner.. artifact: d93508fc user: drh
2015-04-11
11:06 New ticket [7b4fee9f] Expressions like (a IS NULL AND b = ?) optimized by a UNIQUE index matching a single row only. artifact: a72fb3c9 user: dan
Context
2014-04-30
18:11
Fix a problem in calculating the costs of "OR" scans. check-in: 9bbca48b user: dan tags: trunk
15:22
Modify the way the costs of various query plans are estimated. If the user supplies a likelihood() value (or equivalent) on an indexed WHERE constraint, use it to estimate the number of index rows visited. check-in: 90e36676 user: dan tags: trunk
15:00
Add text to the header comment of whereLoopAddBtree() describing how the costs of various b-tree loops are estimated. Closed-Leaf check-in: 05e6e16c user: dan tags: experimental-costs
2014-04-28
17:56
Add the sqlite3_rtree_query_callback() API to the RTree virtual table. (Cherrypick from the sessions branch.) check-in: af2cbe64 user: drh tags: trunk
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to src/analyze.c.

  1367   1367   ** list of space separated integers. Read the first nOut of these into
  1368   1368   ** the array aOut[].
  1369   1369   */
  1370   1370   static void decodeIntArray(
  1371   1371     char *zIntArray,       /* String containing int array to decode */
  1372   1372     int nOut,              /* Number of slots in aOut[] */
  1373   1373     tRowcnt *aOut,         /* Store integers here */
         1374  +  LogEst *aLog,          /* Or, if aOut==0, here */
  1374   1375     Index *pIndex          /* Handle extra flags for this index, if not NULL */
  1375   1376   ){
  1376   1377     char *z = zIntArray;
  1377   1378     int c;
  1378   1379     int i;
  1379   1380     tRowcnt v;
  1380   1381   
................................................................................
  1385   1386   #endif
  1386   1387     for(i=0; *z && i<nOut; i++){
  1387   1388       v = 0;
  1388   1389       while( (c=z[0])>='0' && c<='9' ){
  1389   1390         v = v*10 + c - '0';
  1390   1391         z++;
  1391   1392       }
  1392         -    aOut[i] = v;
         1393  +    if( aOut ){
         1394  +      aOut[i] = v;
         1395  +    }else{
         1396  +      aLog[i] = sqlite3LogEst(v);
         1397  +    }
  1393   1398       if( *z==' ' ) z++;
  1394   1399     }
  1395   1400   #ifndef SQLITE_ENABLE_STAT3_OR_STAT4
  1396   1401     assert( pIndex!=0 );
  1397   1402   #else
  1398   1403     if( pIndex )
  1399   1404   #endif
................................................................................
  1441   1446       pIndex = sqlite3PrimaryKeyIndex(pTable);
  1442   1447     }else{
  1443   1448       pIndex = sqlite3FindIndex(pInfo->db, argv[1], pInfo->zDatabase);
  1444   1449     }
  1445   1450     z = argv[2];
  1446   1451   
  1447   1452     if( pIndex ){
  1448         -    decodeIntArray((char*)z, pIndex->nKeyCol+1, pIndex->aiRowEst, pIndex);
  1449         -    if( pIndex->pPartIdxWhere==0 ) pTable->nRowEst = pIndex->aiRowEst[0];
         1453  +    decodeIntArray((char*)z, pIndex->nKeyCol+1, 0, pIndex->aiRowLogEst, pIndex);
         1454  +    if( pIndex->pPartIdxWhere==0 ) pTable->nRowLogEst = pIndex->aiRowLogEst[0];
  1450   1455     }else{
  1451   1456       Index fakeIdx;
  1452   1457       fakeIdx.szIdxRow = pTable->szTabRow;
  1453         -    decodeIntArray((char*)z, 1, &pTable->nRowEst, &fakeIdx);
         1458  +    decodeIntArray((char*)z, 1, 0, &pTable->nRowLogEst, &fakeIdx);
  1454   1459       pTable->szTabRow = fakeIdx.szIdxRow;
  1455   1460     }
  1456   1461   
  1457   1462     return 0;
  1458   1463   }
  1459   1464   
  1460   1465   /*
................................................................................
  1638   1643       nCol = pIdx->nSampleCol;
  1639   1644       if( bStat3 && nCol>1 ) continue;
  1640   1645       if( pIdx!=pPrevIdx ){
  1641   1646         initAvgEq(pPrevIdx);
  1642   1647         pPrevIdx = pIdx;
  1643   1648       }
  1644   1649       pSample = &pIdx->aSample[pIdx->nSample];
  1645         -    decodeIntArray((char*)sqlite3_column_text(pStmt,1), nCol, pSample->anEq, 0);
  1646         -    decodeIntArray((char*)sqlite3_column_text(pStmt,2), nCol, pSample->anLt, 0);
  1647         -    decodeIntArray((char*)sqlite3_column_text(pStmt,3), nCol, pSample->anDLt,0);
         1650  +    decodeIntArray((char*)sqlite3_column_text(pStmt,1),nCol,pSample->anEq,0,0);
         1651  +    decodeIntArray((char*)sqlite3_column_text(pStmt,2),nCol,pSample->anLt,0,0);
         1652  +    decodeIntArray((char*)sqlite3_column_text(pStmt,3),nCol,pSample->anDLt,0,0);
  1648   1653   
  1649   1654       /* Take a copy of the sample. Add two 0x00 bytes the end of the buffer.
  1650   1655       ** This is in case the sample record is corrupted. In that case, the
  1651   1656       ** sqlite3VdbeRecordCompare() may read up to two varints past the
  1652   1657       ** end of the allocated buffer before it realizes it is dealing with
  1653   1658       ** a corrupt record. Adding the two 0x00 bytes prevents this from causing
  1654   1659       ** a buffer overread.  */

Changes to src/build.c.

   901    901       pParse->nErr++;
   902    902       goto begin_table_error;
   903    903     }
   904    904     pTable->zName = zName;
   905    905     pTable->iPKey = -1;
   906    906     pTable->pSchema = db->aDb[iDb].pSchema;
   907    907     pTable->nRef = 1;
   908         -  pTable->nRowEst = 1048576;
          908  +  pTable->nRowLogEst = 200; assert( 200==sqlite3LogEst(1048576) );
   909    909     assert( pParse->pNewTable==0 );
   910    910     pParse->pNewTable = pTable;
   911    911   
   912    912     /* If this is the magic sqlite_sequence table used by autoincrement,
   913    913     ** then record a pointer to this table in the main database structure
   914    914     ** so that INSERT can find the table easily.
   915    915     */
................................................................................
  2726   2726     char **ppExtra       /* Pointer to the "extra" space */
  2727   2727   ){
  2728   2728     Index *p;            /* Allocated index object */
  2729   2729     int nByte;           /* Bytes of space for Index object + arrays */
  2730   2730   
  2731   2731     nByte = ROUND8(sizeof(Index)) +              /* Index structure  */
  2732   2732             ROUND8(sizeof(char*)*nCol) +         /* Index.azColl     */
  2733         -          ROUND8(sizeof(tRowcnt)*(nCol+1) +    /* Index.aiRowEst   */
         2733  +          ROUND8(sizeof(LogEst)*(nCol+1) +     /* Index.aiRowLogEst   */
  2734   2734                    sizeof(i16)*nCol +            /* Index.aiColumn   */
  2735   2735                    sizeof(u8)*nCol);             /* Index.aSortOrder */
  2736   2736     p = sqlite3DbMallocZero(db, nByte + nExtra);
  2737   2737     if( p ){
  2738   2738       char *pExtra = ((char*)p)+ROUND8(sizeof(Index));
  2739         -    p->azColl = (char**)pExtra;      pExtra += ROUND8(sizeof(char*)*nCol);
  2740         -    p->aiRowEst = (tRowcnt*)pExtra;  pExtra += sizeof(tRowcnt)*(nCol+1);
  2741         -    p->aiColumn = (i16*)pExtra;      pExtra += sizeof(i16)*nCol;
         2739  +    p->azColl = (char**)pExtra;       pExtra += ROUND8(sizeof(char*)*nCol);
         2740  +    p->aiRowLogEst = (LogEst*)pExtra; pExtra += sizeof(LogEst)*(nCol+1);
         2741  +    p->aiColumn = (i16*)pExtra;       pExtra += sizeof(i16)*nCol;
  2742   2742       p->aSortOrder = (u8*)pExtra;
  2743   2743       p->nColumn = nCol;
  2744   2744       p->nKeyCol = nCol - 1;
  2745   2745       *ppExtra = ((char*)p) + nByte;
  2746   2746     }
  2747   2747     return p;
  2748   2748   }
................................................................................
  2964   2964     nName = sqlite3Strlen30(zName);
  2965   2965     nExtraCol = pPk ? pPk->nKeyCol : 1;
  2966   2966     pIndex = sqlite3AllocateIndexObject(db, pList->nExpr + nExtraCol,
  2967   2967                                         nName + nExtra + 1, &zExtra);
  2968   2968     if( db->mallocFailed ){
  2969   2969       goto exit_create_index;
  2970   2970     }
  2971         -  assert( EIGHT_BYTE_ALIGNMENT(pIndex->aiRowEst) );
         2971  +  assert( EIGHT_BYTE_ALIGNMENT(pIndex->aiRowLogEst) );
  2972   2972     assert( EIGHT_BYTE_ALIGNMENT(pIndex->azColl) );
  2973   2973     pIndex->zName = zExtra;
  2974   2974     zExtra += nName + 1;
  2975   2975     memcpy(pIndex->zName, zName, nName+1);
  2976   2976     pIndex->pTable = pTab;
  2977   2977     pIndex->onError = (u8)onError;
  2978   2978     pIndex->uniqNotNull = onError!=OE_None;
................................................................................
  3245   3245   ** Fill the Index.aiRowEst[] array with default information - information
  3246   3246   ** to be used when we have not run the ANALYZE command.
  3247   3247   **
  3248   3248   ** aiRowEst[0] is suppose to contain the number of elements in the index.
  3249   3249   ** Since we do not know, guess 1 million.  aiRowEst[1] is an estimate of the
  3250   3250   ** number of rows in the table that match any particular value of the
  3251   3251   ** first column of the index.  aiRowEst[2] is an estimate of the number
  3252         -** of rows that match any particular combiniation of the first 2 columns
         3252  +** of rows that match any particular combination of the first 2 columns
  3253   3253   ** of the index.  And so forth.  It must always be the case that
  3254   3254   *
  3255   3255   **           aiRowEst[N]<=aiRowEst[N-1]
  3256   3256   **           aiRowEst[N]>=1
  3257   3257   **
  3258   3258   ** Apart from that, we have little to go on besides intuition as to
  3259   3259   ** how aiRowEst[] should be initialized.  The numbers generated here
  3260   3260   ** are based on typical values found in actual indices.
  3261   3261   */
  3262   3262   void sqlite3DefaultRowEst(Index *pIdx){
  3263         -  tRowcnt *a = pIdx->aiRowEst;
         3263  +  /*                10,  9,  8,  7,  6 */
         3264  +  LogEst aVal[] = { 33, 32, 30, 28, 26 };
         3265  +  LogEst *a = pIdx->aiRowLogEst;
         3266  +  int nCopy = MIN(ArraySize(aVal), pIdx->nKeyCol);
  3264   3267     int i;
  3265         -  tRowcnt n;
  3266         -  assert( a!=0 );
  3267         -  a[0] = pIdx->pTable->nRowEst;
  3268         -  if( a[0]<10 ) a[0] = 10;
  3269         -  n = 10;
  3270         -  for(i=1; i<=pIdx->nKeyCol; i++){
  3271         -    a[i] = n;
  3272         -    if( n>5 ) n--;
         3268  +
         3269  +  /* Set the first entry (number of rows in the index) to the estimated 
         3270  +  ** number of rows in the table. Or 10, if the estimated number of rows 
         3271  +  ** in the table is less than that.  */
         3272  +  a[0] = pIdx->pTable->nRowLogEst;
         3273  +  if( a[0]<33 ) a[0] = 33;        assert( 33==sqlite3LogEst(10) );
         3274  +
         3275  +  /* Estimate that a[1] is 10, a[2] is 9, a[3] is 8, a[4] is 7, a[5] is
         3276  +  ** 6 and each subsequent value (if any) is 5.  */
         3277  +  memcpy(&a[1], aVal, nCopy*sizeof(LogEst));
         3278  +  for(i=nCopy+1; i<=pIdx->nKeyCol; i++){
         3279  +    a[i] = 23;                    assert( 23==sqlite3LogEst(5) );
  3273   3280     }
  3274         -  if( pIdx->onError!=OE_None ){
  3275         -    a[pIdx->nKeyCol] = 1;
  3276         -  }
         3281  +
         3282  +  assert( 0==sqlite3LogEst(1) );
         3283  +  if( pIdx->onError!=OE_None ) a[pIdx->nKeyCol] = 0;
  3277   3284   }
  3278   3285   
  3279   3286   /*
  3280   3287   ** This routine will drop an existing named index.  This routine
  3281   3288   ** implements the DROP INDEX statement.
  3282   3289   */
  3283   3290   void sqlite3DropIndex(Parse *pParse, SrcList *pName, int ifExists){

Changes to src/pragma.c.

  1484   1484       sqlite3VdbeSetColName(v, 3, COLNAME_NAME, "height", SQLITE_STATIC);
  1485   1485       for(i=sqliteHashFirst(&pDb->pSchema->tblHash); i; i=sqliteHashNext(i)){
  1486   1486         Table *pTab = sqliteHashData(i);
  1487   1487         sqlite3VdbeAddOp4(v, OP_String8, 0, 1, 0, pTab->zName, 0);
  1488   1488         sqlite3VdbeAddOp2(v, OP_Null, 0, 2);
  1489   1489         sqlite3VdbeAddOp2(v, OP_Integer,
  1490   1490                              (int)sqlite3LogEstToInt(pTab->szTabRow), 3);
  1491         -      sqlite3VdbeAddOp2(v, OP_Integer, (int)pTab->nRowEst, 4);
         1491  +      sqlite3VdbeAddOp2(v, OP_Integer, 
         1492  +          (int)sqlite3LogEstToInt(pTab->nRowLogEst), 4);
  1492   1493         sqlite3VdbeAddOp2(v, OP_ResultRow, 1, 4);
  1493   1494         for(pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext){
  1494   1495           sqlite3VdbeAddOp4(v, OP_String8, 0, 2, 0, pIdx->zName, 0);
  1495   1496           sqlite3VdbeAddOp2(v, OP_Integer,
  1496   1497                                (int)sqlite3LogEstToInt(pIdx->szIdxRow), 3);
  1497         -        sqlite3VdbeAddOp2(v, OP_Integer, (int)pIdx->aiRowEst[0], 4);
         1498  +        sqlite3VdbeAddOp2(v, OP_Integer, 
         1499  +            (int)sqlite3LogEstToInt(pIdx->aiRowLogEst[0]), 4);
  1498   1500           sqlite3VdbeAddOp2(v, OP_ResultRow, 1, 4);
  1499   1501         }
  1500   1502       }
  1501   1503     }
  1502   1504     break;
  1503   1505   
  1504   1506     case PragTyp_INDEX_INFO: if( zRight ){

Changes to src/select.c.

  1686   1686       return 0;
  1687   1687     }
  1688   1688     /* The sqlite3ResultSetOfSelect() is only used n contexts where lookaside
  1689   1689     ** is disabled */
  1690   1690     assert( db->lookaside.bEnabled==0 );
  1691   1691     pTab->nRef = 1;
  1692   1692     pTab->zName = 0;
  1693         -  pTab->nRowEst = 1048576;
         1693  +  pTab->nRowLogEst = 200; assert( 200==sqlite3LogEst(1048576) );
  1694   1694     selectColumnsFromExprList(pParse, pSelect->pEList, &pTab->nCol, &pTab->aCol);
  1695   1695     selectAddColumnTypeAndCollation(pParse, pTab, pSelect);
  1696   1696     pTab->iPKey = -1;
  1697   1697     if( db->mallocFailed ){
  1698   1698       sqlite3DeleteTable(db, pTab);
  1699   1699       return 0;
  1700   1700     }
................................................................................
  3825   3825   
  3826   3826       assert( pFrom->pTab==0 );
  3827   3827       pFrom->pTab = pTab = sqlite3DbMallocZero(db, sizeof(Table));
  3828   3828       if( pTab==0 ) return WRC_Abort;
  3829   3829       pTab->nRef = 1;
  3830   3830       pTab->zName = sqlite3DbStrDup(db, pCte->zName);
  3831   3831       pTab->iPKey = -1;
  3832         -    pTab->nRowEst = 1048576;
         3832  +    pTab->nRowLogEst = 200; assert( 200==sqlite3LogEst(1048576) );
  3833   3833       pTab->tabFlags |= TF_Ephemeral;
  3834   3834       pFrom->pSelect = sqlite3SelectDup(db, pCte->pSelect, 0);
  3835   3835       if( db->mallocFailed ) return SQLITE_NOMEM;
  3836   3836       assert( pFrom->pSelect );
  3837   3837   
  3838   3838       /* Check if this is a recursive CTE. */
  3839   3839       pSel = pFrom->pSelect;
................................................................................
  4001   4001         pFrom->pTab = pTab = sqlite3DbMallocZero(db, sizeof(Table));
  4002   4002         if( pTab==0 ) return WRC_Abort;
  4003   4003         pTab->nRef = 1;
  4004   4004         pTab->zName = sqlite3MPrintf(db, "sqlite_sq_%p", (void*)pTab);
  4005   4005         while( pSel->pPrior ){ pSel = pSel->pPrior; }
  4006   4006         selectColumnsFromExprList(pParse, pSel->pEList, &pTab->nCol, &pTab->aCol);
  4007   4007         pTab->iPKey = -1;
  4008         -      pTab->nRowEst = 1048576;
         4008  +      pTab->nRowLogEst = 200; assert( 200==sqlite3LogEst(1048576) );
  4009   4009         pTab->tabFlags |= TF_Ephemeral;
  4010   4010   #endif
  4011   4011       }else{
  4012   4012         /* An ordinary table or view name in the FROM clause */
  4013   4013         assert( pFrom->pTab==0 );
  4014   4014         pFrom->pTab = pTab = sqlite3LocateTableItem(pParse, 0, pFrom);
  4015   4015         if( pTab==0 ) return WRC_Abort;
................................................................................
  4651   4651         pItem->regReturn = ++pParse->nMem;
  4652   4652         sqlite3VdbeAddOp3(v, OP_InitCoroutine, pItem->regReturn, 0, addrTop);
  4653   4653         VdbeComment((v, "%s", pItem->pTab->zName));
  4654   4654         pItem->addrFillSub = addrTop;
  4655   4655         sqlite3SelectDestInit(&dest, SRT_Coroutine, pItem->regReturn);
  4656   4656         explainSetInteger(pItem->iSelectId, (u8)pParse->iNextSelectId);
  4657   4657         sqlite3Select(pParse, pSub, &dest);
  4658         -      pItem->pTab->nRowEst = (unsigned)pSub->nSelectRow;
         4658  +      pItem->pTab->nRowLogEst = sqlite3LogEst(pSub->nSelectRow);
  4659   4659         pItem->viaCoroutine = 1;
  4660   4660         pItem->regResult = dest.iSdst;
  4661   4661         sqlite3VdbeAddOp1(v, OP_EndCoroutine, pItem->regReturn);
  4662   4662         sqlite3VdbeJumpHere(v, addrTop-1);
  4663   4663         sqlite3ClearTempRegCache(pParse);
  4664   4664       }else{
  4665   4665         /* Generate a subroutine that will fill an ephemeral table with
................................................................................
  4682   4682           VdbeComment((v, "materialize \"%s\"", pItem->pTab->zName));
  4683   4683         }else{
  4684   4684           VdbeNoopComment((v, "materialize \"%s\"", pItem->pTab->zName));
  4685   4685         }
  4686   4686         sqlite3SelectDestInit(&dest, SRT_EphemTab, pItem->iCursor);
  4687   4687         explainSetInteger(pItem->iSelectId, (u8)pParse->iNextSelectId);
  4688   4688         sqlite3Select(pParse, pSub, &dest);
  4689         -      pItem->pTab->nRowEst = (unsigned)pSub->nSelectRow;
         4689  +      pItem->pTab->nRowLogEst = sqlite3LogEst(pSub->nSelectRow);
  4690   4690         if( onceAddr ) sqlite3VdbeJumpHere(v, onceAddr);
  4691   4691         retAddr = sqlite3VdbeAddOp1(v, OP_Return, pItem->regReturn);
  4692   4692         VdbeComment((v, "end %s", pItem->pTab->zName));
  4693   4693         sqlite3VdbeChangeP1(v, topAddr, retAddr);
  4694   4694         sqlite3ClearTempRegCache(pParse);
  4695   4695       }
  4696   4696       if( /*pParse->nErr ||*/ db->mallocFailed ){

Changes to src/sqliteInt.h.

   521    521   
   522    522   /*
   523    523   ** Estimated quantities used for query planning are stored as 16-bit
   524    524   ** logarithms.  For quantity X, the value stored is 10*log2(X).  This
   525    525   ** gives a possible range of values of approximately 1.0e986 to 1e-986.
   526    526   ** But the allowed values are "grainy".  Not every value is representable.
   527    527   ** For example, quantities 16 and 17 are both represented by a LogEst
   528         -** of 40.  However, since LogEst quantatites are suppose to be estimates,
          528  +** of 40.  However, since LogEst quantaties are suppose to be estimates,
   529    529   ** not exact values, this imprecision is not a problem.
   530    530   **
   531         -** "LogEst" is short for "Logarithimic Estimate".
          531  +** "LogEst" is short for "Logarithmic Estimate".
   532    532   **
   533    533   ** Examples:
   534    534   **      1 -> 0              20 -> 43          10000 -> 132
   535    535   **      2 -> 10             25 -> 46          25000 -> 146
   536    536   **      3 -> 16            100 -> 66        1000000 -> 199
   537    537   **      4 -> 20           1000 -> 99        1048576 -> 200
   538    538   **     10 -> 33           1024 -> 100    4294967296 -> 320
................................................................................
  1467   1467     Index *pIndex;       /* List of SQL indexes on this table. */
  1468   1468     Select *pSelect;     /* NULL for tables.  Points to definition if a view. */
  1469   1469     FKey *pFKey;         /* Linked list of all foreign keys in this table */
  1470   1470     char *zColAff;       /* String defining the affinity of each column */
  1471   1471   #ifndef SQLITE_OMIT_CHECK
  1472   1472     ExprList *pCheck;    /* All CHECK constraints */
  1473   1473   #endif
  1474         -  tRowcnt nRowEst;     /* Estimated rows in table - from sqlite_stat1 table */
         1474  +  LogEst nRowLogEst;   /* Estimated rows in table - from sqlite_stat1 table */
  1475   1475     int tnum;            /* Root BTree node for this table (see note above) */
  1476   1476     i16 iPKey;           /* If not negative, use aCol[iPKey] as the primary key */
  1477   1477     i16 nCol;            /* Number of columns in this table */
  1478   1478     u16 nRef;            /* Number of pointers to this Table */
  1479   1479     LogEst szTabRow;     /* Estimated size of each table row in bytes */
  1480   1480     u8 tabFlags;         /* Mask of TF_* values */
  1481   1481     u8 keyConf;          /* What to do in case of uniqueness conflict on iPKey */
................................................................................
  1676   1676   ** and the value of Index.onError indicate the which conflict resolution 
  1677   1677   ** algorithm to employ whenever an attempt is made to insert a non-unique
  1678   1678   ** element.
  1679   1679   */
  1680   1680   struct Index {
  1681   1681     char *zName;             /* Name of this index */
  1682   1682     i16 *aiColumn;           /* Which columns are used by this index.  1st is 0 */
  1683         -  tRowcnt *aiRowEst;       /* From ANALYZE: Est. rows selected by each column */
         1683  +  LogEst *aiRowLogEst;     /* From ANALYZE: Est. rows selected by each column */
  1684   1684     Table *pTable;           /* The SQL table being indexed */
  1685   1685     char *zColAff;           /* String defining the affinity of each column */
  1686   1686     Index *pNext;            /* The next index associated with the same table */
  1687   1687     Schema *pSchema;         /* Schema containing this index */
  1688   1688     u8 *aSortOrder;          /* for each column: True==DESC, False==ASC */
  1689   1689     char **azColl;           /* Array of collation sequence names for index */
  1690   1690     Expr *pPartIdxWhere;     /* WHERE clause for partial indices */

Changes to src/util.c.

  1242   1242       if( b>a+49 ) return b;
  1243   1243       if( b>a+31 ) return b+1;
  1244   1244       return b+x[b-a];
  1245   1245     }
  1246   1246   }
  1247   1247   
  1248   1248   /*
  1249         -** Convert an integer into a LogEst.  In other words, compute a
  1250         -** good approximatation for 10*log2(x).
         1249  +** Convert an integer into a LogEst.  In other words, compute an
         1250  +** approximation for 10*log2(x).
  1251   1251   */
  1252   1252   LogEst sqlite3LogEst(u64 x){
  1253   1253     static LogEst a[] = { 0, 2, 3, 5, 6, 7, 8, 9 };
  1254   1254     LogEst y = 40;
  1255   1255     if( x<8 ){
  1256   1256       if( x<2 ) return 0;
  1257   1257       while( x<8 ){  y -= 10; x <<= 1; }

Changes to src/where.c.

   223    223       }
   224    224       pWC->nSlot = sqlite3DbMallocSize(db, pWC->a)/sizeof(pWC->a[0]);
   225    225     }
   226    226     pTerm = &pWC->a[idx = pWC->nTerm++];
   227    227     if( p && ExprHasProperty(p, EP_Unlikely) ){
   228    228       pTerm->truthProb = sqlite3LogEst(p->iTable) - 99;
   229    229     }else{
   230         -    pTerm->truthProb = -1;
          230  +    pTerm->truthProb = 1;
   231    231     }
   232    232     pTerm->pExpr = sqlite3ExprSkipCollate(p);
   233    233     pTerm->wtFlags = wtFlags;
   234    234     pTerm->pWC = pWC;
   235    235     pTerm->iParent = -1;
   236    236     return idx;
   237    237   }
................................................................................
  1952   1952       aStat[1] = aSample[i].anEq[iCol];
  1953   1953     }else{
  1954   1954       tRowcnt iLower, iUpper, iGap;
  1955   1955       if( i==0 ){
  1956   1956         iLower = 0;
  1957   1957         iUpper = aSample[0].anLt[iCol];
  1958   1958       }else{
  1959         -      iUpper = i>=pIdx->nSample ? pIdx->aiRowEst[0] : aSample[i].anLt[iCol];
         1959  +      i64 nRow0 = sqlite3LogEstToInt(pIdx->aiRowLogEst[0]);
         1960  +      iUpper = i>=pIdx->nSample ? nRow0 : aSample[i].anLt[iCol];
  1960   1961         iLower = aSample[i-1].anEq[iCol] + aSample[i-1].anLt[iCol];
  1961   1962       }
  1962   1963       aStat[1] = (pIdx->nKeyCol>iCol ? pIdx->aAvgEq[iCol] : 1);
  1963   1964       if( iLower>=iUpper ){
  1964   1965         iGap = 0;
  1965   1966       }else{
  1966   1967         iGap = iUpper - iLower;
................................................................................
  1970   1971       }else{
  1971   1972         iGap = iGap/3;
  1972   1973       }
  1973   1974       aStat[0] = iLower + iGap;
  1974   1975     }
  1975   1976   }
  1976   1977   #endif /* SQLITE_ENABLE_STAT3_OR_STAT4 */
         1978  +
         1979  +/*
         1980  +** If it is not NULL, pTerm is a term that provides an upper or lower
         1981  +** bound on a range scan. Without considering pTerm, it is estimated 
         1982  +** that the scan will visit nNew rows. This function returns the number
         1983  +** estimated to be visited after taking pTerm into account.
         1984  +**
         1985  +** If the user explicitly specified a likelihood() value for this term,
         1986  +** then the return value is the likelihood multiplied by the number of
         1987  +** input rows. Otherwise, this function assumes that an "IS NOT NULL" term
         1988  +** has a likelihood of 0.50, and any other term a likelihood of 0.25.
         1989  +*/
         1990  +static LogEst whereRangeAdjust(WhereTerm *pTerm, LogEst nNew){
         1991  +  LogEst nRet = nNew;
         1992  +  if( pTerm ){
         1993  +    if( pTerm->truthProb<=0 ){
         1994  +      nRet += pTerm->truthProb;
         1995  +    }else if( (pTerm->wtFlags & TERM_VNULL)==0 ){
         1996  +      nRet -= 20;        assert( 20==sqlite3LogEst(4) );
         1997  +    }
         1998  +  }
         1999  +  return nRet;
         2000  +}
  1977   2001   
  1978   2002   /*
  1979   2003   ** This function is used to estimate the number of rows that will be visited
  1980   2004   ** by scanning an index for a range of values. The range may have an upper
  1981   2005   ** bound, a lower bound, or both. The WHERE clause terms that set the upper
  1982   2006   ** and lower bounds are represented by pLower and pUpper respectively. For
  1983   2007   ** example, assuming that index p is on t1(a):
................................................................................
  2063   2087         aff = SQLITE_AFF_INTEGER;
  2064   2088       }else{
  2065   2089         aff = p->pTable->aCol[p->aiColumn[nEq]].affinity;
  2066   2090       }
  2067   2091       /* Determine iLower and iUpper using ($P) only. */
  2068   2092       if( nEq==0 ){
  2069   2093         iLower = 0;
  2070         -      iUpper = p->aiRowEst[0];
         2094  +      iUpper = sqlite3LogEstToInt(p->aiRowLogEst[0]);
  2071   2095       }else{
  2072   2096         /* Note: this call could be optimized away - since the same values must 
  2073   2097         ** have been requested when testing key $P in whereEqualScanEst().  */
  2074   2098         whereKeyStats(pParse, p, pRec, 0, a);
  2075   2099         iLower = a[0];
  2076   2100         iUpper = a[0] + a[1];
  2077   2101       }
................................................................................
  2123   2147       }
  2124   2148     }
  2125   2149   #else
  2126   2150     UNUSED_PARAMETER(pParse);
  2127   2151     UNUSED_PARAMETER(pBuilder);
  2128   2152   #endif
  2129   2153     assert( pLower || pUpper );
  2130         -  /* TUNING:  Each inequality constraint reduces the search space 4-fold.
  2131         -  ** A BETWEEN operator, therefore, reduces the search space 16-fold */
  2132         -  nNew = nOut;
  2133         -  if( pLower && (pLower->wtFlags & TERM_VNULL)==0 ){
  2134         -    nNew -= 20;        assert( 20==sqlite3LogEst(4) );
  2135         -    nOut--;
  2136         -  }
  2137         -  if( pUpper ){
  2138         -    nNew -= 20;        assert( 20==sqlite3LogEst(4) );
  2139         -    nOut--;
  2140         -  }
         2154  +  assert( pUpper==0 || (pUpper->wtFlags & TERM_VNULL)==0 );
         2155  +  nNew = whereRangeAdjust(pLower, nOut);
         2156  +  nNew = whereRangeAdjust(pUpper, nNew);
         2157  +
         2158  +  /* TUNING: If there is both an upper and lower limit, assume the range is
         2159  +  ** reduced by an additional 75%. This means that, by default, an open-ended
         2160  +  ** range query (e.g. col > ?) is assumed to match 1/4 of the rows in the
         2161  +  ** index. While a closed range (e.g. col BETWEEN ? AND ?) is estimated to
         2162  +  ** match 1/64 of the index. */ 
         2163  +  if( pLower && pUpper ) nNew -= 20;
         2164  +
         2165  +  nOut -= (pLower!=0) + (pUpper!=0);
  2141   2166     if( nNew<10 ) nNew = 10;
  2142   2167     if( nNew<nOut ) nOut = nNew;
  2143   2168     pLoop->nOut = (LogEst)nOut;
  2144   2169     return rc;
  2145   2170   }
  2146   2171   
  2147   2172   #ifdef SQLITE_ENABLE_STAT3_OR_STAT4
................................................................................
  2230   2255   static int whereInScanEst(
  2231   2256     Parse *pParse,       /* Parsing & code generating context */
  2232   2257     WhereLoopBuilder *pBuilder,
  2233   2258     ExprList *pList,     /* The value list on the RHS of "x IN (v1,v2,v3,...)" */
  2234   2259     tRowcnt *pnRow       /* Write the revised row estimate here */
  2235   2260   ){
  2236   2261     Index *p = pBuilder->pNew->u.btree.pIndex;
         2262  +  i64 nRow0 = sqlite3LogEstToInt(p->aiRowLogEst[0]);
  2237   2263     int nRecValid = pBuilder->nRecValid;
  2238   2264     int rc = SQLITE_OK;     /* Subfunction return code */
  2239   2265     tRowcnt nEst;           /* Number of rows for a single term */
  2240   2266     tRowcnt nRowEst = 0;    /* New estimate of the number of rows */
  2241   2267     int i;                  /* Loop counter */
  2242   2268   
  2243   2269     assert( p->aSample!=0 );
  2244   2270     for(i=0; rc==SQLITE_OK && i<pList->nExpr; i++){
  2245         -    nEst = p->aiRowEst[0];
         2271  +    nEst = nRow0;
  2246   2272       rc = whereEqualScanEst(pParse, pBuilder, pList->a[i].pExpr, &nEst);
  2247   2273       nRowEst += nEst;
  2248   2274       pBuilder->nRecValid = nRecValid;
  2249   2275     }
  2250   2276   
  2251   2277     if( rc==SQLITE_OK ){
  2252         -    if( nRowEst > p->aiRowEst[0] ) nRowEst = p->aiRowEst[0];
         2278  +    if( nRowEst > nRow0 ) nRowEst = nRow0;
  2253   2279       *pnRow = nRowEst;
  2254   2280       WHERETRACE(0x10,("IN row estimate: est=%g\n", nRowEst));
  2255   2281     }
  2256   2282     assert( pBuilder->nRecValid==nRecValid );
  2257   2283     return rc;
  2258   2284   }
  2259   2285   #endif /* SQLITE_ENABLE_STAT3_OR_STAT4 */
................................................................................
  3756   3782   **
  3757   3783   ** To say "WhereLoop X is a proper subset of Y" means that X uses fewer
  3758   3784   ** WHERE clause terms than Y and that every WHERE clause term used by X is
  3759   3785   ** also used by Y.
  3760   3786   */
  3761   3787   static void whereLoopAdjustCost(const WhereLoop *p, WhereLoop *pTemplate){
  3762   3788     if( (pTemplate->wsFlags & WHERE_INDEXED)==0 ) return;
         3789  +  if( (pTemplate->wsFlags & WHERE_SKIPSCAN)!=0 ) return;
  3763   3790     for(; p; p=p->pNextLoop){
  3764   3791       if( p->iTab!=pTemplate->iTab ) continue;
  3765   3792       if( (p->wsFlags & WHERE_INDEXED)==0 ) continue;
         3793  +    if( (p->wsFlags & WHERE_SKIPSCAN)!=0 ) continue;
  3766   3794       if( whereLoopCheaperProperSubset(p, pTemplate) ){
  3767   3795         /* Adjust pTemplate cost downward so that it is cheaper than its 
  3768   3796         ** subset p */
  3769   3797         pTemplate->rRun = p->rRun;
  3770   3798         pTemplate->nOut = p->nOut - 1;
  3771   3799       }else if( whereLoopCheaperProperSubset(pTemplate, p) ){
  3772   3800         /* Adjust pTemplate cost upward so that it is costlier than p since
................................................................................
  3983   4011       if( (pTerm->prereqAll & notAllowed)!=0 ) continue;
  3984   4012       for(j=pLoop->nLTerm-1; j>=0; j--){
  3985   4013         pX = pLoop->aLTerm[j];
  3986   4014         if( pX==0 ) continue;
  3987   4015         if( pX==pTerm ) break;
  3988   4016         if( pX->iParent>=0 && (&pWC->a[pX->iParent])==pTerm ) break;
  3989   4017       }
  3990         -    if( j<0 ) pLoop->nOut += pTerm->truthProb;
         4018  +    if( j<0 ){
         4019  +      pLoop->nOut += (pTerm->truthProb<=0 ? pTerm->truthProb : -1);
         4020  +    }
  3991   4021     }
  3992   4022   }
  3993   4023   
  3994   4024   /*
  3995         -** We have so far matched pBuilder->pNew->u.btree.nEq terms of the index pIndex.
  3996         -** Try to match one more.
         4025  +** We have so far matched pBuilder->pNew->u.btree.nEq terms of the 
         4026  +** index pIndex. Try to match one more.
         4027  +**
         4028  +** When this function is called, pBuilder->pNew->nOut contains the 
         4029  +** number of rows expected to be visited by filtering using the nEq 
         4030  +** terms only. If it is modified, this value is restored before this 
         4031  +** function returns.
  3997   4032   **
  3998   4033   ** If pProbe->tnum==0, that means pIndex is a fake index used for the
  3999   4034   ** INTEGER PRIMARY KEY.
  4000   4035   */
  4001   4036   static int whereLoopAddBtreeIndex(
  4002   4037     WhereLoopBuilder *pBuilder,     /* The WhereLoop factory */
  4003   4038     struct SrcList_item *pSrc,      /* FROM clause term being analyzed */
................................................................................
  4015   4050     u16 saved_nLTerm;               /* Original value of pNew->nLTerm */
  4016   4051     u16 saved_nEq;                  /* Original value of pNew->u.btree.nEq */
  4017   4052     u16 saved_nSkip;                /* Original value of pNew->u.btree.nSkip */
  4018   4053     u32 saved_wsFlags;              /* Original value of pNew->wsFlags */
  4019   4054     LogEst saved_nOut;              /* Original value of pNew->nOut */
  4020   4055     int iCol;                       /* Index of the column in the table */
  4021   4056     int rc = SQLITE_OK;             /* Return code */
  4022         -  LogEst nRowEst;                 /* Estimated index selectivity */
  4023   4057     LogEst rLogSize;                /* Logarithm of table size */
  4024   4058     WhereTerm *pTop = 0, *pBtm = 0; /* Top and bottom range constraints */
  4025   4059   
  4026   4060     pNew = pBuilder->pNew;
  4027   4061     if( db->mallocFailed ) return SQLITE_NOMEM;
  4028   4062   
  4029   4063     assert( (pNew->wsFlags & WHERE_VIRTUALTABLE)==0 );
................................................................................
  4036   4070       opMask = WO_EQ|WO_IN|WO_ISNULL|WO_GT|WO_GE|WO_LT|WO_LE;
  4037   4071     }
  4038   4072     if( pProbe->bUnordered ) opMask &= ~(WO_GT|WO_GE|WO_LT|WO_LE);
  4039   4073   
  4040   4074     assert( pNew->u.btree.nEq<=pProbe->nKeyCol );
  4041   4075     if( pNew->u.btree.nEq < pProbe->nKeyCol ){
  4042   4076       iCol = pProbe->aiColumn[pNew->u.btree.nEq];
  4043         -    nRowEst = sqlite3LogEst(pProbe->aiRowEst[pNew->u.btree.nEq+1]);
  4044         -    if( nRowEst==0 && pProbe->onError==OE_None ) nRowEst = 1;
  4045   4077     }else{
  4046   4078       iCol = -1;
  4047         -    nRowEst = 0;
  4048   4079     }
  4049   4080     pTerm = whereScanInit(&scan, pBuilder->pWC, pSrc->iCursor, iCol,
  4050   4081                           opMask, pProbe);
  4051   4082     saved_nEq = pNew->u.btree.nEq;
  4052   4083     saved_nSkip = pNew->u.btree.nSkip;
  4053   4084     saved_nLTerm = pNew->nLTerm;
  4054   4085     saved_wsFlags = pNew->wsFlags;
  4055   4086     saved_prereq = pNew->prereq;
  4056   4087     saved_nOut = pNew->nOut;
  4057   4088     pNew->rSetup = 0;
  4058         -  rLogSize = estLog(sqlite3LogEst(pProbe->aiRowEst[0]));
         4089  +  rLogSize = estLog(pProbe->aiRowLogEst[0]);
  4059   4090   
  4060   4091     /* Consider using a skip-scan if there are no WHERE clause constraints
  4061   4092     ** available for the left-most terms of the index, and if the average
  4062         -  ** number of repeats in the left-most terms is at least 18.  The magic
  4063         -  ** number 18 was found by experimentation to be the payoff point where
  4064         -  ** skip-scan become faster than a full-scan.
  4065         -  */
         4093  +  ** number of repeats in the left-most terms is at least 18. 
         4094  +  **
         4095  +  ** The magic number 18 is selected on the basis that scanning 17 rows
         4096  +  ** is almost always quicker than an index seek (even though if the index
         4097  +  ** contains fewer than 2^17 rows we assume otherwise in other parts of
         4098  +  ** the code). And, even if it is not, it should not be too much slower. 
         4099  +  ** On the other hand, the extra seeks could end up being significantly
         4100  +  ** more expensive.  */
         4101  +  assert( 42==sqlite3LogEst(18) );
  4066   4102     if( pTerm==0
  4067   4103      && saved_nEq==saved_nSkip
  4068   4104      && saved_nEq+1<pProbe->nKeyCol
  4069         -   && pProbe->aiRowEst[saved_nEq+1]>=18  /* TUNING: Minimum for skip-scan */
         4105  +   && pProbe->aiRowLogEst[saved_nEq+1]>=42  /* TUNING: Minimum for skip-scan */
  4070   4106      && (rc = whereLoopResize(db, pNew, pNew->nLTerm+1))==SQLITE_OK
  4071   4107     ){
  4072   4108       LogEst nIter;
  4073   4109       pNew->u.btree.nEq++;
  4074   4110       pNew->u.btree.nSkip++;
  4075   4111       pNew->aLTerm[pNew->nLTerm++] = 0;
  4076   4112       pNew->wsFlags |= WHERE_SKIPSCAN;
  4077         -    nIter = sqlite3LogEst(pProbe->aiRowEst[0]/pProbe->aiRowEst[saved_nEq+1]);
  4078         -    pNew->rRun = rLogSize + nIter;
  4079         -    pNew->nOut += nIter;
  4080         -    whereLoopAddBtreeIndex(pBuilder, pSrc, pProbe, nIter);
         4113  +    nIter = pProbe->aiRowLogEst[saved_nEq] - pProbe->aiRowLogEst[saved_nEq+1];
         4114  +    pNew->nOut -= nIter;
         4115  +    whereLoopAddBtreeIndex(pBuilder, pSrc, pProbe, nIter + nInMul);
  4081   4116       pNew->nOut = saved_nOut;
  4082   4117     }
  4083   4118     for(; rc==SQLITE_OK && pTerm!=0; pTerm = whereScanNext(&scan)){
         4119  +    u16 eOp = pTerm->eOperator;   /* Shorthand for pTerm->eOperator */
         4120  +    LogEst rCostIdx;
         4121  +    LogEst nOutUnadjusted;        /* nOut before IN() and WHERE adjustments */
  4084   4122       int nIn = 0;
  4085   4123   #ifdef SQLITE_ENABLE_STAT3_OR_STAT4
  4086   4124       int nRecValid = pBuilder->nRecValid;
  4087   4125   #endif
  4088         -    if( (pTerm->eOperator==WO_ISNULL || (pTerm->wtFlags&TERM_VNULL)!=0)
         4126  +    if( (eOp==WO_ISNULL || (pTerm->wtFlags&TERM_VNULL)!=0)
  4089   4127        && (iCol<0 || pSrc->pTab->aCol[iCol].notNull)
  4090   4128       ){
  4091   4129         continue; /* ignore IS [NOT] NULL constraints on NOT NULL columns */
  4092   4130       }
  4093   4131       if( pTerm->prereqRight & pNew->maskSelf ) continue;
  4094   4132   
  4095         -    assert( pNew->nOut==saved_nOut );
  4096         -
  4097   4133       pNew->wsFlags = saved_wsFlags;
  4098   4134       pNew->u.btree.nEq = saved_nEq;
  4099   4135       pNew->nLTerm = saved_nLTerm;
  4100   4136       if( whereLoopResize(db, pNew, pNew->nLTerm+1) ) break; /* OOM */
  4101   4137       pNew->aLTerm[pNew->nLTerm++] = pTerm;
  4102   4138       pNew->prereq = (saved_prereq | pTerm->prereqRight) & ~pNew->maskSelf;
  4103         -    pNew->rRun = rLogSize; /* Baseline cost is log2(N).  Adjustments below */
  4104         -    if( pTerm->eOperator & WO_IN ){
         4139  +
         4140  +    assert( nInMul==0
         4141  +        || (pNew->wsFlags & WHERE_COLUMN_NULL)!=0 
         4142  +        || (pNew->wsFlags & WHERE_COLUMN_IN)!=0 
         4143  +        || (pNew->wsFlags & WHERE_SKIPSCAN)!=0 
         4144  +    );
         4145  +
         4146  +    if( eOp & WO_IN ){
  4105   4147         Expr *pExpr = pTerm->pExpr;
  4106   4148         pNew->wsFlags |= WHERE_COLUMN_IN;
  4107   4149         if( ExprHasProperty(pExpr, EP_xIsSelect) ){
  4108   4150           /* "x IN (SELECT ...)":  TUNING: the SELECT returns 25 rows */
  4109   4151           nIn = 46;  assert( 46==sqlite3LogEst(25) );
  4110   4152         }else if( ALWAYS(pExpr->x.pList && pExpr->x.pList->nExpr) ){
  4111   4153           /* "x IN (value, value, ...)" */
  4112   4154           nIn = sqlite3LogEst(pExpr->x.pList->nExpr);
  4113   4155         }
  4114   4156         assert( nIn>0 );  /* RHS always has 2 or more terms...  The parser
  4115   4157                           ** changes "x IN (?)" into "x=?". */
  4116         -      pNew->rRun += nIn;
  4117         -      pNew->u.btree.nEq++;
  4118         -      pNew->nOut = nRowEst + nInMul + nIn;
  4119         -    }else if( pTerm->eOperator & (WO_EQ) ){
  4120         -      assert(
  4121         -        (pNew->wsFlags & (WHERE_COLUMN_NULL|WHERE_COLUMN_IN|WHERE_SKIPSCAN))!=0
  4122         -        || nInMul==0
  4123         -      );
         4158  +
         4159  +    }else if( eOp & (WO_EQ) ){
  4124   4160         pNew->wsFlags |= WHERE_COLUMN_EQ;
  4125         -      if( iCol<0 || (nInMul==0 && pNew->u.btree.nEq==pProbe->nKeyCol-1)){
  4126         -        assert( (pNew->wsFlags & WHERE_COLUMN_IN)==0 || iCol<0 );
         4161  +      if( iCol<0 || (nInMul==0 && pNew->u.btree.nEq==pProbe->nKeyCol-1) ){
  4127   4162           if( iCol>=0 && pProbe->onError==OE_None ){
  4128   4163             pNew->wsFlags |= WHERE_UNQ_WANTED;
  4129   4164           }else{
  4130   4165             pNew->wsFlags |= WHERE_ONEROW;
  4131   4166           }
  4132   4167         }
  4133         -      pNew->u.btree.nEq++;
  4134         -      pNew->nOut = nRowEst + nInMul;
  4135         -    }else if( pTerm->eOperator & (WO_ISNULL) ){
         4168  +    }else if( eOp & WO_ISNULL ){
  4136   4169         pNew->wsFlags |= WHERE_COLUMN_NULL;
  4137         -      pNew->u.btree.nEq++;
  4138         -      /* TUNING: IS NULL selects 2 rows */
  4139         -      nIn = 10;  assert( 10==sqlite3LogEst(2) );
  4140         -      pNew->nOut = nRowEst + nInMul + nIn;
  4141         -    }else if( pTerm->eOperator & (WO_GT|WO_GE) ){
  4142         -      testcase( pTerm->eOperator & WO_GT );
  4143         -      testcase( pTerm->eOperator & WO_GE );
         4170  +    }else if( eOp & (WO_GT|WO_GE) ){
         4171  +      testcase( eOp & WO_GT );
         4172  +      testcase( eOp & WO_GE );
  4144   4173         pNew->wsFlags |= WHERE_COLUMN_RANGE|WHERE_BTM_LIMIT;
  4145   4174         pBtm = pTerm;
  4146   4175         pTop = 0;
  4147   4176       }else{
  4148         -      assert( pTerm->eOperator & (WO_LT|WO_LE) );
  4149         -      testcase( pTerm->eOperator & WO_LT );
  4150         -      testcase( pTerm->eOperator & WO_LE );
         4177  +      assert( eOp & (WO_LT|WO_LE) );
         4178  +      testcase( eOp & WO_LT );
         4179  +      testcase( eOp & WO_LE );
  4151   4180         pNew->wsFlags |= WHERE_COLUMN_RANGE|WHERE_TOP_LIMIT;
  4152   4181         pTop = pTerm;
  4153   4182         pBtm = (pNew->wsFlags & WHERE_BTM_LIMIT)!=0 ?
  4154   4183                        pNew->aLTerm[pNew->nLTerm-2] : 0;
  4155   4184       }
         4185  +
         4186  +    /* At this point pNew->nOut is set to the number of rows expected to
         4187  +    ** be visited by the index scan before considering term pTerm, or the
         4188  +    ** values of nIn and nInMul. In other words, assuming that all 
         4189  +    ** "x IN(...)" terms are replaced with "x = ?". This block updates
         4190  +    ** the value of pNew->nOut to account for pTerm (but not nIn/nInMul).  */
         4191  +    assert( pNew->nOut==saved_nOut );
  4156   4192       if( pNew->wsFlags & WHERE_COLUMN_RANGE ){
  4157         -      /* Adjust nOut and rRun for STAT3 range values */
  4158         -      assert( pNew->nOut==saved_nOut );
         4193  +      /* Adjust nOut using stat3/stat4 data. Or, if there is no stat3/stat4
         4194  +      ** data, using some other estimate.  */
  4159   4195         whereRangeScanEst(pParse, pBuilder, pBtm, pTop, pNew);
  4160         -    }
         4196  +    }else{
         4197  +      int nEq = ++pNew->u.btree.nEq;
         4198  +      assert( eOp & (WO_ISNULL|WO_EQ|WO_IN) );
         4199  +
         4200  +      assert( pNew->nOut==saved_nOut );
         4201  +      if( pTerm->truthProb<=0 && iCol>=0 ){
         4202  +        assert( (eOp & WO_IN) || nIn==0 );
         4203  +        pNew->nOut += pTerm->truthProb;
         4204  +        pNew->nOut -= nIn;
         4205  +        pNew->wsFlags |= WHERE_LIKELIHOOD;
         4206  +      }else{
  4161   4207   #ifdef SQLITE_ENABLE_STAT3_OR_STAT4
  4162         -    if( nInMul==0 
  4163         -     && pProbe->nSample 
  4164         -     && pNew->u.btree.nEq<=pProbe->nSampleCol
  4165         -     && OptimizationEnabled(db, SQLITE_Stat3) 
  4166         -    ){
  4167         -      Expr *pExpr = pTerm->pExpr;
  4168         -      tRowcnt nOut = 0;
  4169         -      if( (pTerm->eOperator & (WO_EQ|WO_ISNULL))!=0 ){
  4170         -        testcase( pTerm->eOperator & WO_EQ );
  4171         -        testcase( pTerm->eOperator & WO_ISNULL );
  4172         -        rc = whereEqualScanEst(pParse, pBuilder, pExpr->pRight, &nOut);
  4173         -      }else if( (pTerm->eOperator & WO_IN)
  4174         -             &&  !ExprHasProperty(pExpr, EP_xIsSelect)  ){
  4175         -        rc = whereInScanEst(pParse, pBuilder, pExpr->x.pList, &nOut);
  4176         -      }
  4177         -      assert( nOut==0 || rc==SQLITE_OK );
  4178         -      if( nOut ){
  4179         -        pNew->nOut = sqlite3LogEst(nOut);
  4180         -        if( pNew->nOut>saved_nOut ) pNew->nOut = saved_nOut;
  4181         -      }
  4182         -    }
  4183         -#endif
         4208  +        tRowcnt nOut = 0;
         4209  +        if( nInMul==0 
         4210  +         && pProbe->nSample 
         4211  +         && pNew->u.btree.nEq<=pProbe->nSampleCol
         4212  +         && OptimizationEnabled(db, SQLITE_Stat3) 
         4213  +         && ((eOp & WO_IN)==0 || !ExprHasProperty(pTerm->pExpr, EP_xIsSelect))
         4214  +         && (pNew->wsFlags & WHERE_LIKELIHOOD)==0
         4215  +        ){
         4216  +          Expr *pExpr = pTerm->pExpr;
         4217  +          if( (eOp & (WO_EQ|WO_ISNULL))!=0 ){
         4218  +            testcase( eOp & WO_EQ );
         4219  +            testcase( eOp & WO_ISNULL );
         4220  +            rc = whereEqualScanEst(pParse, pBuilder, pExpr->pRight, &nOut);
         4221  +          }else{
         4222  +            rc = whereInScanEst(pParse, pBuilder, pExpr->x.pList, &nOut);
         4223  +          }
         4224  +          assert( rc!=SQLITE_OK || nOut>0 );
         4225  +          if( rc==SQLITE_NOTFOUND ) rc = SQLITE_OK;
         4226  +          if( rc!=SQLITE_OK ) break;          /* Jump out of the pTerm loop */
         4227  +          if( nOut ){
         4228  +            pNew->nOut = sqlite3LogEst(nOut);
         4229  +            if( pNew->nOut>saved_nOut ) pNew->nOut = saved_nOut;
         4230  +            pNew->nOut -= nIn;
         4231  +          }
         4232  +        }
         4233  +        if( nOut==0 )
         4234  +#endif
         4235  +        {
         4236  +          pNew->nOut += (pProbe->aiRowLogEst[nEq] - pProbe->aiRowLogEst[nEq-1]);
         4237  +          if( eOp & WO_ISNULL ){
         4238  +            /* TUNING: If there is no likelihood() value, assume that a 
         4239  +            ** "col IS NULL" expression matches twice as many rows 
         4240  +            ** as (col=?). */
         4241  +            pNew->nOut += 10;
         4242  +          }
         4243  +        }
         4244  +      }
         4245  +    }
         4246  +
         4247  +    /* Set rCostIdx to the cost of visiting selected rows in index. Add
         4248  +    ** it to pNew->rRun, which is currently set to the cost of the index
         4249  +    ** seek only. Then, if this is a non-covering index, add the cost of
         4250  +    ** visiting the rows in the main table.  */
         4251  +    rCostIdx = pNew->nOut + 1 + (15*pProbe->szIdxRow)/pSrc->pTab->szTabRow;
         4252  +    pNew->rRun = sqlite3LogEstAdd(rLogSize, rCostIdx);
  4184   4253       if( (pNew->wsFlags & (WHERE_IDX_ONLY|WHERE_IPK))==0 ){
  4185         -      /* Each row involves a step of the index, then a binary search of
  4186         -      ** the main table */
  4187         -      pNew->rRun =  sqlite3LogEstAdd(pNew->rRun,rLogSize>27 ? rLogSize-17 : 10);
         4254  +      pNew->rRun = sqlite3LogEstAdd(pNew->rRun, pNew->nOut + 16);
  4188   4255       }
  4189         -    /* Step cost for each output row */
  4190         -    pNew->rRun = sqlite3LogEstAdd(pNew->rRun, pNew->nOut);
         4256  +
         4257  +    nOutUnadjusted = pNew->nOut;
         4258  +    pNew->rRun += nInMul + nIn;
         4259  +    pNew->nOut += nInMul + nIn;
  4191   4260       whereLoopOutputAdjust(pBuilder->pWC, pNew);
  4192   4261       rc = whereLoopInsert(pBuilder, pNew);
         4262  +
         4263  +    if( pNew->wsFlags & WHERE_COLUMN_RANGE ){
         4264  +      pNew->nOut = saved_nOut;
         4265  +    }else{
         4266  +      pNew->nOut = nOutUnadjusted;
         4267  +    }
         4268  +
  4193   4269       if( (pNew->wsFlags & WHERE_TOP_LIMIT)==0
  4194   4270        && pNew->u.btree.nEq<(pProbe->nKeyCol + (pProbe->zName!=0))
  4195   4271       ){
  4196   4272         whereLoopAddBtreeIndex(pBuilder, pSrc, pProbe, nInMul+nIn);
  4197   4273       }
  4198   4274       pNew->nOut = saved_nOut;
  4199   4275   #ifdef SQLITE_ENABLE_STAT3_OR_STAT4
................................................................................
  4269   4345     return 0;
  4270   4346   }
  4271   4347   
  4272   4348   /*
  4273   4349   ** Add all WhereLoop objects for a single table of the join where the table
  4274   4350   ** is idenfied by pBuilder->pNew->iTab.  That table is guaranteed to be
  4275   4351   ** a b-tree table, not a virtual table.
         4352  +**
         4353  +** The costs (WhereLoop.rRun) of the b-tree loops added by this function
         4354  +** are calculated as follows:
         4355  +**
         4356  +** For a full scan, assuming the table (or index) contains nRow rows:
         4357  +**
         4358  +**     cost = nRow * 3.0                    // full-table scan
         4359  +**     cost = nRow * K                      // scan of covering index
         4360  +**     cost = nRow * (K+3.0)                // scan of non-covering index
         4361  +**
         4362  +** where K is a value between 1.1 and 3.0 set based on the relative 
         4363  +** estimated average size of the index and table records.
         4364  +**
         4365  +** For an index scan, where nVisit is the number of index rows visited
         4366  +** by the scan, and nSeek is the number of seek operations required on 
         4367  +** the index b-tree:
         4368  +**
         4369  +**     cost = nSeek * (log(nRow) + K * nVisit)          // covering index
         4370  +**     cost = nSeek * (log(nRow) + (K+3.0) * nVisit)    // non-covering index
         4371  +**
         4372  +** Normally, nSeek is 1. nSeek values greater than 1 come about if the 
         4373  +** WHERE clause includes "x IN (....)" terms used in place of "x=?". Or when 
         4374  +** implicit "x IN (SELECT x FROM tbl)" terms are added for skip-scans.
  4276   4375   */
  4277   4376   static int whereLoopAddBtree(
  4278   4377     WhereLoopBuilder *pBuilder, /* WHERE clause information */
  4279   4378     Bitmask mExtra              /* Extra prerequesites for using this table */
  4280   4379   ){
  4281   4380     WhereInfo *pWInfo;          /* WHERE analysis context */
  4282   4381     Index *pProbe;              /* An index we are evaluating */
  4283   4382     Index sPk;                  /* A fake index object for the primary key */
  4284         -  tRowcnt aiRowEstPk[2];      /* The aiRowEst[] value for the sPk index */
         4383  +  LogEst aiRowEstPk[2];       /* The aiRowLogEst[] value for the sPk index */
  4285   4384     i16 aiColumnPk = -1;        /* The aColumn[] value for the sPk index */
  4286   4385     SrcList *pTabList;          /* The FROM clause */
  4287   4386     struct SrcList_item *pSrc;  /* The FROM clause btree term to add */
  4288   4387     WhereLoop *pNew;            /* Template WhereLoop object */
  4289   4388     int rc = SQLITE_OK;         /* Return code */
  4290   4389     int iSortIdx = 1;           /* Index number */
  4291   4390     int b;                      /* A boolean value */
................................................................................
  4312   4411       ** variable sPk to represent the rowid primary key index.  Make this
  4313   4412       ** fake index the first in a chain of Index objects with all of the real
  4314   4413       ** indices to follow */
  4315   4414       Index *pFirst;                  /* First of real indices on the table */
  4316   4415       memset(&sPk, 0, sizeof(Index));
  4317   4416       sPk.nKeyCol = 1;
  4318   4417       sPk.aiColumn = &aiColumnPk;
  4319         -    sPk.aiRowEst = aiRowEstPk;
         4418  +    sPk.aiRowLogEst = aiRowEstPk;
  4320   4419       sPk.onError = OE_Replace;
  4321   4420       sPk.pTable = pTab;
  4322         -    aiRowEstPk[0] = pTab->nRowEst;
  4323         -    aiRowEstPk[1] = 1;
         4421  +    sPk.szIdxRow = pTab->szTabRow;
         4422  +    aiRowEstPk[0] = pTab->nRowLogEst;
         4423  +    aiRowEstPk[1] = 0;
  4324   4424       pFirst = pSrc->pTab->pIndex;
  4325   4425       if( pSrc->notIndexed==0 ){
  4326   4426         /* The real indices of the table are only considered if the
  4327   4427         ** NOT INDEXED qualifier is omitted from the FROM clause */
  4328   4428         sPk.pNext = pFirst;
  4329   4429       }
  4330   4430       pProbe = &sPk;
  4331   4431     }
  4332         -  rSize = sqlite3LogEst(pTab->nRowEst);
         4432  +  rSize = pTab->nRowLogEst;
  4333   4433     rLogSize = estLog(rSize);
  4334   4434   
  4335   4435   #ifndef SQLITE_OMIT_AUTOMATIC_INDEX
  4336   4436     /* Automatic indexes */
  4337   4437     if( !pBuilder->pOrSet
  4338   4438      && (pWInfo->pParse->db->flags & SQLITE_AutoIndex)!=0
  4339   4439      && pSrc->pIndex==0
................................................................................
  4375   4475     /* Loop over all indices
  4376   4476     */
  4377   4477     for(; rc==SQLITE_OK && pProbe; pProbe=pProbe->pNext, iSortIdx++){
  4378   4478       if( pProbe->pPartIdxWhere!=0
  4379   4479        && !whereUsablePartialIndex(pNew->iTab, pWC, pProbe->pPartIdxWhere) ){
  4380   4480         continue;  /* Partial index inappropriate for this query */
  4381   4481       }
         4482  +    rSize = pProbe->aiRowLogEst[0];
  4382   4483       pNew->u.btree.nEq = 0;
  4383   4484       pNew->u.btree.nSkip = 0;
  4384   4485       pNew->nLTerm = 0;
  4385   4486       pNew->iSortIdx = 0;
  4386   4487       pNew->rSetup = 0;
  4387   4488       pNew->prereq = mExtra;
  4388   4489       pNew->nOut = rSize;
................................................................................
  4392   4493       assert( (pWInfo->wctrlFlags & WHERE_ONEPASS_DESIRED)==0 || b==0 );
  4393   4494       if( pProbe->tnum<=0 ){
  4394   4495         /* Integer primary key index */
  4395   4496         pNew->wsFlags = WHERE_IPK;
  4396   4497   
  4397   4498         /* Full table scan */
  4398   4499         pNew->iSortIdx = b ? iSortIdx : 0;
  4399         -      /* TUNING: Cost of full table scan is 3*(N + log2(N)).
  4400         -      **  +  The extra 3 factor is to encourage the use of indexed lookups
  4401         -      **     over full scans.  FIXME */
  4402         -      pNew->rRun = sqlite3LogEstAdd(rSize,rLogSize) + 16;
         4500  +      /* TUNING: Cost of full table scan is (N*3.0). */
         4501  +      pNew->rRun = rSize + 16;
  4403   4502         whereLoopOutputAdjust(pWC, pNew);
  4404   4503         rc = whereLoopInsert(pBuilder, pNew);
  4405   4504         pNew->nOut = rSize;
  4406   4505         if( rc ) break;
  4407   4506       }else{
  4408   4507         Bitmask m;
  4409   4508         if( pProbe->isCovering ){
................................................................................
  4422   4521            && (pProbe->szIdxRow<pTab->szTabRow)
  4423   4522            && (pWInfo->wctrlFlags & WHERE_ONEPASS_DESIRED)==0
  4424   4523            && sqlite3GlobalConfig.bUseCis
  4425   4524            && OptimizationEnabled(pWInfo->pParse->db, SQLITE_CoverIdxScan)
  4426   4525             )
  4427   4526         ){
  4428   4527           pNew->iSortIdx = b ? iSortIdx : 0;
  4429         -        /* TUNING:  The base cost of an index scan is N + log2(N).
  4430         -        ** The log2(N) is for the initial seek to the beginning and the N
  4431         -        ** is for the scan itself. */
  4432         -        pNew->rRun = sqlite3LogEstAdd(rSize, rLogSize);
  4433         -        if( m==0 ){
  4434         -          /* TUNING: Cost of a covering index scan is K*(N + log2(N)).
  4435         -          **  +  The extra factor K of between 1.1 and 3.0 that depends
  4436         -          **     on the relative sizes of the table and the index.  K
  4437         -          **     is smaller for smaller indices, thus favoring them.
  4438         -          **     The upper bound on K (3.0) matches the penalty factor
  4439         -          **     on a full table scan that tries to encourage the use of
  4440         -          **     indexed lookups over full scans.
  4441         -          */
  4442         -          pNew->rRun +=  1 + (15*pProbe->szIdxRow)/pTab->szTabRow;
  4443         -        }else{
  4444         -          /* TUNING: The cost of scanning a non-covering index is multiplied
  4445         -          ** by log2(N) to account for the binary search of the main table
  4446         -          ** that must happen for each row of the index.
  4447         -          ** TODO: Should there be a multiplier here, analogous to the 3x
  4448         -          ** multiplier for a fulltable scan or covering index scan, to
  4449         -          ** further discourage the use of an index scan?  Or is the log2(N)
  4450         -          ** term sufficient discouragement?
  4451         -          ** TODO: What if some or all of the WHERE clause terms can be
  4452         -          ** computed without reference to the original table.  Then the
  4453         -          ** penality should reduce to logK where K is the number of output
  4454         -          ** rows.
  4455         -          */
  4456         -          pNew->rRun += rLogSize;
         4528  +
         4529  +        /* The cost of visiting the index rows is N*K, where K is
         4530  +        ** between 1.1 and 3.0, depending on the relative sizes of the
         4531  +        ** index and table rows. If this is a non-covering index scan,
         4532  +        ** also add the cost of visiting table rows (N*3.0).  */
         4533  +        pNew->rRun = rSize + 1 + (15*pProbe->szIdxRow)/pTab->szTabRow;
         4534  +        if( m!=0 ){
         4535  +          pNew->rRun = sqlite3LogEstAdd(pNew->rRun, rSize+16);
  4457   4536           }
         4537  +
  4458   4538           whereLoopOutputAdjust(pWC, pNew);
  4459   4539           rc = whereLoopInsert(pBuilder, pNew);
  4460   4540           pNew->nOut = rSize;
  4461   4541           if( rc ) break;
  4462   4542         }
  4463   4543       }
  4464   4544   
................................................................................
  4728   4808         pNew->nLTerm = 1;
  4729   4809         pNew->aLTerm[0] = pTerm;
  4730   4810         pNew->wsFlags = WHERE_MULTI_OR;
  4731   4811         pNew->rSetup = 0;
  4732   4812         pNew->iSortIdx = 0;
  4733   4813         memset(&pNew->u, 0, sizeof(pNew->u));
  4734   4814         for(i=0; rc==SQLITE_OK && i<sSum.n; i++){
  4735         -        /* TUNING: Multiple by 3.5 for the secondary table lookup */
  4736         -        pNew->rRun = sSum.a[i].rRun + 18;
         4815  +        pNew->rRun = sSum.a[i].rRun;
  4737   4816           pNew->nOut = sSum.a[i].nOut;
  4738   4817           pNew->prereq = sSum.a[i].prereq;
  4739   4818           rc = whereLoopInsert(pBuilder, pNew);
  4740   4819         }
  4741   4820       }
  4742   4821     }
  4743   4822     return rc;
................................................................................
  5175   5254           nOut = pFrom->nRow + pWLoop->nOut;
  5176   5255           maskNew = pFrom->maskLoop | pWLoop->maskSelf;
  5177   5256           if( isOrdered<0 ){
  5178   5257             isOrdered = wherePathSatisfiesOrderBy(pWInfo,
  5179   5258                          pWInfo->pOrderBy, pFrom, pWInfo->wctrlFlags,
  5180   5259                          iLoop, pWLoop, &revMask);
  5181   5260             if( isOrdered>=0 && isOrdered<nOrderBy ){
  5182         -            /* TUNING: Estimated cost of sorting is N*log(N).
  5183         -            ** If the order-by clause has X terms but only the last Y terms
  5184         -            ** are out of order, then block-sorting will reduce the sorting
  5185         -            ** cost to N*log(N)*log(Y/X).  The log(Y/X) term is computed
  5186         -            ** by rScale.
  5187         -            ** TODO: Should the sorting cost get a small multiplier to help
  5188         -            ** discourage the use of sorting and encourage the use of index
  5189         -            ** scans instead?
  5190         -            */
         5261  +            /* TUNING: Estimated cost of a full external sort, where N is 
         5262  +            ** the number of rows to sort is:
         5263  +            **
         5264  +            **   cost = (3.0 * N * log(N)).
         5265  +            ** 
         5266  +            ** Or, if the order-by clause has X terms but only the last Y 
         5267  +            ** terms are out of order, then block-sorting will reduce the 
         5268  +            ** sorting cost to:
         5269  +            **
         5270  +            **   cost = (3.0 * N * log(N)) * (Y/X)
         5271  +            **
         5272  +            ** The (Y/X) term is implemented using stack variable rScale
         5273  +            ** below.  */
  5191   5274               LogEst rScale, rSortCost;
  5192         -            assert( nOrderBy>0 );
         5275  +            assert( nOrderBy>0 && 66==sqlite3LogEst(100) );
  5193   5276               rScale = sqlite3LogEst((nOrderBy-isOrdered)*100/nOrderBy) - 66;
  5194         -            rSortCost = nRowEst + estLog(nRowEst) + rScale;
         5277  +            rSortCost = nRowEst + estLog(nRowEst) + rScale + 16;
         5278  +
  5195   5279               /* TUNING: The cost of implementing DISTINCT using a B-TREE is
  5196         -            ** also N*log(N) but it has a larger constant of proportionality.
  5197         -            ** Multiply by 3.0. */
         5280  +            ** similar but with a larger constant of proportionality. 
         5281  +            ** Multiply by an additional factor of 3.0.  */
  5198   5282               if( pWInfo->wctrlFlags & WHERE_WANT_DISTINCT ){
  5199   5283                 rSortCost += 16;
  5200   5284               }
  5201   5285               WHERETRACE(0x002,
  5202   5286                  ("---- sort cost=%-3d (%d/%d) increases cost %3d to %-3d\n",
  5203   5287                   rSortCost, (nOrderBy-isOrdered), nOrderBy, rCost,
  5204   5288                   sqlite3LogEstAdd(rCost,rSortCost)));

Changes to src/whereInt.h.

   454    454   #define WHERE_VIRTUALTABLE 0x00000400  /* WhereLoop.u.vtab is valid */
   455    455   #define WHERE_IN_ABLE      0x00000800  /* Able to support an IN operator */
   456    456   #define WHERE_ONEROW       0x00001000  /* Selects no more than one row */
   457    457   #define WHERE_MULTI_OR     0x00002000  /* OR using multiple indices */
   458    458   #define WHERE_AUTO_INDEX   0x00004000  /* Uses an ephemeral index */
   459    459   #define WHERE_SKIPSCAN     0x00008000  /* Uses the skip-scan algorithm */
   460    460   #define WHERE_UNQ_WANTED   0x00010000  /* WHERE_ONEROW would have been helpful*/
          461  +#define WHERE_LIKELIHOOD   0x00020000  /* A likelihood() is affecting nOut */

Changes to test/analyze3.test.

    99     99     ifcapable stat4 {
   100    100       execsql { SELECT count(*)>0 FROM sqlite_stat4; }
   101    101     } else {
   102    102       execsql { SELECT count(*)>0 FROM sqlite_stat3; }
   103    103     }
   104    104   } {1}
   105    105   
          106  +do_execsql_test analyze3-1.1.x {
          107  +  SELECT count(*) FROM t1 WHERE x>200 AND x<300;
          108  +  SELECT count(*) FROM t1 WHERE x>0 AND x<1100;
          109  +} {99 1000}
          110  +
          111  +# The first of the following two SELECT statements visits 99 rows. So
          112  +# it is better to use the index. But the second visits every row in 
          113  +# the table (1000 in total) so it is better to do a full-table scan.
          114  +#
   106    115   do_eqp_test analyze3-1.1.2 {
   107    116     SELECT sum(y) FROM t1 WHERE x>200 AND x<300
   108    117   } {0 0 0 {SEARCH TABLE t1 USING INDEX i1 (x>? AND x<?)}}
   109    118   do_eqp_test analyze3-1.1.3 {
   110    119     SELECT sum(y) FROM t1 WHERE x>0 AND x<1100 
   111         -} {0 0 0 {SEARCH TABLE t1 USING INDEX i1 (x>? AND x<?)}}
          120  +} {0 0 0 {SCAN TABLE t1}}
   112    121   
   113    122   do_test analyze3-1.1.4 {
   114    123     sf_execsql { SELECT sum(y) FROM t1 WHERE x>200 AND x<300 }
   115    124   } {199 0 14850}
   116    125   do_test analyze3-1.1.5 {
   117    126     set l [string range "200" 0 end]
   118    127     set u [string range "300" 0 end]
................................................................................
   121    130   do_test analyze3-1.1.6 {
   122    131     set l [expr int(200)]
   123    132     set u [expr int(300)]
   124    133     sf_execsql { SELECT sum(y) FROM t1 WHERE x>$l AND x<$u }
   125    134   } {199 0 14850}
   126    135   do_test analyze3-1.1.7 {
   127    136     sf_execsql { SELECT sum(y) FROM t1 WHERE x>0 AND x<1100 }
   128         -} {2000 0 499500}
          137  +} {999 999 499500}
   129    138   do_test analyze3-1.1.8 {
   130    139     set l [string range "0" 0 end]
   131    140     set u [string range "1100" 0 end]
   132    141     sf_execsql { SELECT sum(y) FROM t1 WHERE x>$l AND x<$u }
   133         -} {2000 0 499500}
          142  +} {999 999 499500}
   134    143   do_test analyze3-1.1.9 {
   135    144     set l [expr int(0)]
   136    145     set u [expr int(1100)]
   137    146     sf_execsql { SELECT sum(y) FROM t1 WHERE x>$l AND x<$u }
   138         -} {2000 0 499500}
          147  +} {999 999 499500}
   139    148   
   140    149   
   141    150   # The following tests are similar to the block above. The difference is
   142    151   # that the indexed column has TEXT affinity in this case. In the tests
   143    152   # above the affinity is INTEGER.
   144    153   #
   145    154   do_test analyze3-1.2.1 {
................................................................................
   148    157         CREATE TABLE t2(x TEXT, y);
   149    158         INSERT INTO t2 SELECT * FROM t1;
   150    159         CREATE INDEX i2 ON t2(x);
   151    160       COMMIT;
   152    161       ANALYZE;
   153    162     }
   154    163   } {}
          164  +do_execsql_test analyze3-2.1.x {
          165  +  SELECT count(*) FROM t2 WHERE x>1 AND x<2;
          166  +  SELECT count(*) FROM t2 WHERE x>0 AND x<99;
          167  +} {200 990}
   155    168   do_eqp_test analyze3-1.2.2 {
   156    169     SELECT sum(y) FROM t2 WHERE x>1 AND x<2
   157    170   } {0 0 0 {SEARCH TABLE t2 USING INDEX i2 (x>? AND x<?)}}
   158    171   do_eqp_test analyze3-1.2.3 {
   159    172     SELECT sum(y) FROM t2 WHERE x>0 AND x<99
   160         -} {0 0 0 {SEARCH TABLE t2 USING INDEX i2 (x>? AND x<?)}}
          173  +} {0 0 0 {SCAN TABLE t2}}
          174  +
   161    175   do_test analyze3-1.2.4 {
   162    176     sf_execsql { SELECT sum(y) FROM t2 WHERE x>12 AND x<20 }
   163    177   } {161 0 4760}
   164    178   do_test analyze3-1.2.5 {
   165    179     set l [string range "12" 0 end]
   166    180     set u [string range "20" 0 end]
   167    181     sf_execsql {SELECT typeof($l), typeof($u), sum(y) FROM t2 WHERE x>$l AND x<$u}
................................................................................
   169    183   do_test analyze3-1.2.6 {
   170    184     set l [expr int(12)]
   171    185     set u [expr int(20)]
   172    186     sf_execsql {SELECT typeof($l), typeof($u), sum(y) FROM t2 WHERE x>$l AND x<$u}
   173    187   } {161 0 integer integer 4760}
   174    188   do_test analyze3-1.2.7 {
   175    189     sf_execsql { SELECT sum(y) FROM t2 WHERE x>0 AND x<99 }
   176         -} {1981 0 490555}
          190  +} {999 999 490555}
   177    191   do_test analyze3-1.2.8 {
   178    192     set l [string range "0" 0 end]
   179    193     set u [string range "99" 0 end]
   180    194     sf_execsql {SELECT typeof($l), typeof($u), sum(y) FROM t2 WHERE x>$l AND x<$u}
   181         -} {1981 0 text text 490555}
          195  +} {999 999 text text 490555}
   182    196   do_test analyze3-1.2.9 {
   183    197     set l [expr int(0)]
   184    198     set u [expr int(99)]
   185    199     sf_execsql {SELECT typeof($l), typeof($u), sum(y) FROM t2 WHERE x>$l AND x<$u}
   186         -} {1981 0 integer integer 490555}
          200  +} {999 999 integer integer 490555}
   187    201   
   188    202   # Same tests a third time. This time, column x has INTEGER affinity and
   189    203   # is not the leftmost column of the table. This triggered a bug causing
   190    204   # SQLite to use sub-optimal query plans in 3.6.18 and earlier.
   191    205   #
   192    206   do_test analyze3-1.3.1 {
   193    207     execsql {
................................................................................
   195    209         CREATE TABLE t3(y TEXT, x INTEGER);
   196    210         INSERT INTO t3 SELECT y, x FROM t1;
   197    211         CREATE INDEX i3 ON t3(x);
   198    212       COMMIT;
   199    213       ANALYZE;
   200    214     }
   201    215   } {}
          216  +do_execsql_test analyze3-1.3.x {
          217  +  SELECT count(*) FROM t3 WHERE x>200 AND x<300;
          218  +  SELECT count(*) FROM t3 WHERE x>0 AND x<1100
          219  +} {99 1000}
   202    220   do_eqp_test analyze3-1.3.2 {
   203    221     SELECT sum(y) FROM t3 WHERE x>200 AND x<300
   204    222   } {0 0 0 {SEARCH TABLE t3 USING INDEX i3 (x>? AND x<?)}}
   205    223   do_eqp_test analyze3-1.3.3 {
   206    224     SELECT sum(y) FROM t3 WHERE x>0 AND x<1100
   207         -} {0 0 0 {SEARCH TABLE t3 USING INDEX i3 (x>? AND x<?)}}
          225  +} {0 0 0 {SCAN TABLE t3}}
   208    226   
   209    227   do_test analyze3-1.3.4 {
   210    228     sf_execsql { SELECT sum(y) FROM t3 WHERE x>200 AND x<300 }
   211    229   } {199 0 14850}
   212    230   do_test analyze3-1.3.5 {
   213    231     set l [string range "200" 0 end]
   214    232     set u [string range "300" 0 end]
................................................................................
   217    235   do_test analyze3-1.3.6 {
   218    236     set l [expr int(200)]
   219    237     set u [expr int(300)]
   220    238     sf_execsql { SELECT sum(y) FROM t3 WHERE x>$l AND x<$u }
   221    239   } {199 0 14850}
   222    240   do_test analyze3-1.3.7 {
   223    241     sf_execsql { SELECT sum(y) FROM t3 WHERE x>0 AND x<1100 }
   224         -} {2000 0 499500}
          242  +} {999 999 499500}
   225    243   do_test analyze3-1.3.8 {
   226    244     set l [string range "0" 0 end]
   227    245     set u [string range "1100" 0 end]
   228    246     sf_execsql { SELECT sum(y) FROM t3 WHERE x>$l AND x<$u }
   229         -} {2000 0 499500}
          247  +} {999 999 499500}
   230    248   do_test analyze3-1.3.9 {
   231    249     set l [expr int(0)]
   232    250     set u [expr int(1100)]
   233    251     sf_execsql { SELECT sum(y) FROM t3 WHERE x>$l AND x<$u }
   234         -} {2000 0 499500}
          252  +} {999 999 499500}
   235    253   
   236    254   #-------------------------------------------------------------------------
   237    255   # Test that the values of bound SQL variables may be used for the LIKE
   238    256   # optimization.
   239    257   #
   240    258   drop_all_tables
   241    259   do_test analyze3-2.1 {

Changes to test/analyze9.test.

   562    562   #-------------------------------------------------------------------------
   563    563   # Check that affinities are taken into account when using stat4 data to
   564    564   # estimate the number of rows scanned by a rowid constraint.
   565    565   #
   566    566   drop_all_tables
   567    567   do_test 13.1 {
   568    568     execsql {
   569         -    CREATE TABLE t1(a, b, c);
          569  +    CREATE TABLE t1(a, b, c, d);
   570    570       CREATE INDEX i1 ON t1(a);
   571    571       CREATE INDEX i2 ON t1(b, c);
   572    572     }
   573    573     for {set i 0} {$i<100} {incr i} {
   574    574       if {$i %2} {set a abc} else {set a def}
   575    575       execsql { INSERT INTO t1(rowid, a, b, c) VALUES($i, $a, $i, $i) }
   576    576     }
   577    577     execsql ANALYZE
   578    578   } {}
   579    579   do_eqp_test 13.2.1 {
   580         -  SELECT * FROM t1 WHERE a='abc' AND rowid<15 AND b<20
          580  +  SELECT * FROM t1 WHERE a='abc' AND rowid<15 AND b<12
   581    581   } {/SEARCH TABLE t1 USING INDEX i1/}
   582    582   do_eqp_test 13.2.2 {
   583         -  SELECT * FROM t1 WHERE a='abc' AND rowid<'15' AND b<20
          583  +  SELECT * FROM t1 WHERE a='abc' AND rowid<'15' AND b<12
   584    584   } {/SEARCH TABLE t1 USING INDEX i1/}
   585    585   do_eqp_test 13.3.1 {
   586         -  SELECT * FROM t1 WHERE a='abc' AND rowid<100 AND b<20
          586  +  SELECT * FROM t1 WHERE a='abc' AND rowid<100 AND b<12
   587    587   } {/SEARCH TABLE t1 USING INDEX i2/}
   588    588   do_eqp_test 13.3.2 {
   589         -  SELECT * FROM t1 WHERE a='abc' AND rowid<'100' AND b<20
          589  +  SELECT * FROM t1 WHERE a='abc' AND rowid<'100' AND b<12
   590    590   } {/SEARCH TABLE t1 USING INDEX i2/}
   591    591   
   592    592   #-------------------------------------------------------------------------
   593    593   # Check also that affinities are taken into account when using stat4 data 
   594    594   # to estimate the number of rows scanned by any other constraint on a 
   595    595   # column other than the leftmost.
   596    596   #

Changes to test/autoindex1.test.

    93     93     db status autoindex
    94     94   } {0}
    95     95   do_test autoindex1-210 {
    96     96     db eval {
    97     97       PRAGMA automatic_index=ON;
    98     98       ANALYZE;
    99     99       UPDATE sqlite_stat1 SET stat='10000' WHERE tbl='t1';
          100  +    -- Table t2 actually contains 8 rows.
          101  +    UPDATE sqlite_stat1 SET stat='16' WHERE tbl='t2';
   100    102       ANALYZE sqlite_master;
   101    103       SELECT b, (SELECT d FROM t2 WHERE c=a) FROM t1;
   102    104     }
   103    105   } {11 911 22 922 33 933 44 944 55 955 66 966 77 977 88 988}
   104    106   do_test autoindex1-211 {
   105    107     db status step
   106    108   } {7}

Added test/cost.test.

            1  +# 2014-04-26
            2  +#
            3  +# The author disclaims copyright to this source code.  In place of
            4  +# a legal notice, here is a blessing:
            5  +#
            6  +#    May you do good and not evil.
            7  +#    May you find forgiveness for yourself and forgive others.
            8  +#    May you share freely, never taking more than you give.
            9  +#
           10  +#***********************************************************************
           11  +# 
           12  +
           13  +set testdir [file dirname $argv0]
           14  +source $testdir/tester.tcl
           15  +set testprefix cost
           16  +
           17  +
           18  +do_execsql_test 1.1 {
           19  +  CREATE TABLE t3(id INTEGER PRIMARY KEY, b NOT NULL);
           20  +  CREATE TABLE t4(c, d, e);
           21  +  CREATE UNIQUE INDEX i3 ON t3(b);
           22  +  CREATE UNIQUE INDEX i4 ON t4(c, d);
           23  +}
           24  +do_eqp_test 1.2 {
           25  +  SELECT e FROM t3, t4 WHERE b=c ORDER BY b, d;
           26  +} {
           27  +  0 0 0 {SCAN TABLE t3 USING COVERING INDEX i3} 
           28  +  0 1 1 {SEARCH TABLE t4 USING INDEX i4 (c=?)}
           29  +}
           30  +
           31  +
           32  +do_execsql_test 2.1 {
           33  +  CREATE TABLE t1(a, b);
           34  +  CREATE INDEX i1 ON t1(a);
           35  +}
           36  +
           37  +# It is better to use an index for ORDER BY than sort externally, even 
           38  +# if the index is a non-covering index.
           39  +do_eqp_test 2.2 {
           40  +  SELECT * FROM t1 ORDER BY a;
           41  +} {
           42  +  0 0 0 {SCAN TABLE t1 USING INDEX i1}
           43  +}
           44  +
           45  +do_execsql_test 3.1 {
           46  +  CREATE TABLE t5(a INTEGER PRIMARY KEY,b,c,d,e,f,g);
           47  +  CREATE INDEX t5b ON t5(b);
           48  +  CREATE INDEX t5c ON t5(c);
           49  +  CREATE INDEX t5d ON t5(d);
           50  +  CREATE INDEX t5e ON t5(e);
           51  +  CREATE INDEX t5f ON t5(f);
           52  +  CREATE INDEX t5g ON t5(g);
           53  +}
           54  +
           55  +do_eqp_test 3.2 {
           56  +  SELECT a FROM t5 
           57  +  WHERE b IS NULL OR c IS NULL OR d IS NULL 
           58  +  ORDER BY a;
           59  +} {
           60  +  0 0 0 {SEARCH TABLE t5 USING INDEX t5b (b=?)} 
           61  +  0 0 0 {SEARCH TABLE t5 USING INDEX t5c (c=?)} 
           62  +  0 0 0 {SEARCH TABLE t5 USING INDEX t5d (d=?)} 
           63  +  0 0 0 {USE TEMP B-TREE FOR ORDER BY}
           64  +}
           65  +
           66  +#-------------------------------------------------------------------------
           67  +# If there is no likelihood() or stat3 data, SQLite assumes that a closed
           68  +# range scan (e.g. one constrained by "col BETWEEN ? AND ?" constraint)
           69  +# visits 1/64 of the rows in a table.
           70  +#
           71  +# Note: 1/63 =~ 0.016
           72  +# Note: 1/65 =~ 0.015
           73  +#
           74  +reset_db
           75  +do_execsql_test 4.1 {
           76  +  CREATE TABLE t1(a, b);
           77  +  CREATE INDEX i1 ON t1(a);
           78  +  CREATE INDEX i2 ON t1(b);
           79  +}
           80  +do_eqp_test 4.2 {
           81  +  SELECT * FROM t1 WHERE likelihood(a=?, 0.014) AND b BETWEEN ? AND ?;
           82  +} {
           83  +  0 0 0 {SEARCH TABLE t1 USING INDEX i1 (a=?)}
           84  +}
           85  +do_eqp_test 4.3 {
           86  +  SELECT * FROM t1 WHERE likelihood(a=?, 0.016) AND b BETWEEN ? AND ?;
           87  +} {
           88  +  0 0 0 {SEARCH TABLE t1 USING INDEX i2 (b>? AND b<?)}
           89  +}
           90  +
           91  +
           92  +#-------------------------------------------------------------------------
           93  +#
           94  +reset_db
           95  +do_execsql_test 5.1 {
           96  +  CREATE TABLE t2(x, y);
           97  +  CREATE INDEX t2i1 ON t2(x);
           98  +}
           99  +
          100  +do_eqp_test 5.2 {
          101  +  SELECT * FROM t2 ORDER BY x, y;
          102  +} {
          103  +  0 0 0 {SCAN TABLE t2 USING INDEX t2i1} 
          104  +  0 0 0 {USE TEMP B-TREE FOR RIGHT PART OF ORDER BY}
          105  +}
          106  +
          107  +do_eqp_test 5.3 {
          108  +  SELECT * FROM t2 WHERE x BETWEEN ? AND ? ORDER BY rowid;
          109  +} {
          110  +  0 0 0 {SEARCH TABLE t2 USING INDEX t2i1 (x>? AND x<?)} 
          111  +  0 0 0 {USE TEMP B-TREE FOR ORDER BY}
          112  +}
          113  +
          114  +# where7.test, where8.test:
          115  +#
          116  +do_execsql_test 6.1 {
          117  +  CREATE TABLE t3(a INTEGER PRIMARY KEY, b, c);
          118  +  CREATE INDEX t3i1 ON t3(b);
          119  +  CREATE INDEX t3i2 ON t3(c);
          120  +}
          121  +
          122  +do_eqp_test 6.2 {
          123  +  SELECT a FROM t3 WHERE (b BETWEEN 2 AND 4) OR c=100 ORDER BY a
          124  +} {
          125  +  0 0 0 {SEARCH TABLE t3 USING INDEX t3i1 (b>? AND b<?)} 
          126  +  0 0 0 {SEARCH TABLE t3 USING INDEX t3i2 (c=?)}
          127  +  0 0 0 {USE TEMP B-TREE FOR ORDER BY}
          128  +}
          129  +
          130  +#-------------------------------------------------------------------------
          131  +#
          132  +reset_db
          133  +do_execsql_test 7.1 {
          134  +  CREATE TABLE t1(a INTEGER PRIMARY KEY,b,c,d,e,f,g);
          135  +  CREATE INDEX t1b ON t1(b);
          136  +  CREATE INDEX t1c ON t1(c);
          137  +  CREATE INDEX t1d ON t1(d);
          138  +  CREATE INDEX t1e ON t1(e);
          139  +  CREATE INDEX t1f ON t1(f);
          140  +  CREATE INDEX t1g ON t1(g);
          141  +}
          142  +
          143  +do_eqp_test 7.2 {
          144  +  SELECT a FROM t1
          145  +     WHERE (b>=950 AND b<=1010) OR (b IS NULL AND c NOT NULL)
          146  +  ORDER BY a
          147  +} {
          148  +  0 0 0 {SEARCH TABLE t1 USING INDEX t1b (b>? AND b<?)} 
          149  +  0 0 0 {SEARCH TABLE t1 USING INDEX t1b (b=?)} 
          150  +  0 0 0 {USE TEMP B-TREE FOR ORDER BY}
          151  +}
          152  +
          153  +#set sqlite_where_trace 0xfff
          154  +do_eqp_test 7.3 {
          155  +  SELECT rowid FROM t1
          156  +  WHERE (+b IS NULL AND c NOT NULL AND d NOT NULL)
          157  +        OR (b NOT NULL AND c IS NULL AND d NOT NULL)
          158  +        OR (b NOT NULL AND c NOT NULL AND d IS NULL)
          159  +} {
          160  +  0 0 0 {SCAN TABLE t1}
          161  +}
          162  +
          163  +#-------------------------------------------------------------------------
          164  +#
          165  +reset_db
          166  +do_execsql_test 8.1 {
          167  +  CREATE TABLE composer(
          168  +    cid INTEGER PRIMARY KEY,
          169  +    cname TEXT
          170  +  );
          171  +  CREATE TABLE album(
          172  +    aid INTEGER PRIMARY KEY,
          173  +    aname TEXT
          174  +  );
          175  +  CREATE TABLE track(
          176  +    tid INTEGER PRIMARY KEY,
          177  +    cid INTEGER REFERENCES composer,
          178  +    aid INTEGER REFERENCES album,
          179  +    title TEXT
          180  +  );
          181  +  CREATE INDEX track_i1 ON track(cid);
          182  +  CREATE INDEX track_i2 ON track(aid);
          183  +}
          184  +
          185  +do_eqp_test 8.2 {
          186  +  SELECT DISTINCT aname
          187  +    FROM album, composer, track
          188  +   WHERE cname LIKE '%bach%'
          189  +     AND unlikely(composer.cid=track.cid)
          190  +     AND unlikely(album.aid=track.aid);
          191  +} {
          192  +  0 0 2 {SCAN TABLE track} 
          193  +  0 1 0 {SEARCH TABLE album USING INTEGER PRIMARY KEY (rowid=?)}
          194  +  0 2 1 {SEARCH TABLE composer USING INTEGER PRIMARY KEY (rowid=?)}
          195  +  0 0 0 {USE TEMP B-TREE FOR DISTINCT}
          196  +}
          197  +
          198  +#-------------------------------------------------------------------------
          199  +#
          200  +do_execsql_test 9.1 {
          201  +  CREATE TABLE t1(
          202  +    a,b,c,d,e, f,g,h,i,j,
          203  +    k,l,m,n,o, p,q,r,s,t
          204  +  );
          205  +  CREATE INDEX i1 ON t1(k,l,m,n,o,p,q,r,s,t);
          206  +}
          207  +do_test 9.2 {
          208  +  for {set i 0} {$i < 100} {incr i} {
          209  +    execsql { INSERT INTO t1 DEFAULT VALUES }
          210  +  }
          211  +  execsql {
          212  +    ANALYZE;
          213  +    CREATE INDEX i2 ON t1(a,b,c,d,e,f,g,h,i,j);
          214  +  }
          215  +} {}
          216  +
          217  +set L [list a=? b=? c=? d=? e=? f=? g=? h=? i=? j=?]
          218  +foreach {tn nTerm nRow} {
          219  +  1   1 10
          220  +  2   2  9
          221  +  3   3  8
          222  +  4   4  7
          223  +  5   5  6
          224  +  6   6  5
          225  +  7   7  5
          226  +  8   8  5
          227  +  9   9  5
          228  +  10 10  5
          229  +} {
          230  +  set w [join [lrange $L 0 [expr $nTerm-1]] " AND "]
          231  +  set p1 [expr ($nRow-1) / 100.0]
          232  +  set p2 [expr ($nRow+1) / 100.0]
          233  +
          234  +  set sql1 "SELECT * FROM t1 WHERE likelihood(k=?, $p1) AND $w"
          235  +  set sql2 "SELECT * FROM t1 WHERE likelihood(k=?, $p2) AND $w"
          236  +
          237  +  do_eqp_test 9.3.$tn.1 $sql1 {/INDEX i1/}
          238  +  do_eqp_test 9.3.$tn.2 $sql2 {/INDEX i2/}
          239  +}
          240  +
          241  +
          242  +
          243  +finish_test
          244  +
          245  +
          246  +

Changes to test/eqp.test.

   308    308     0 0 0 {COMPOUND SUBQUERIES 1 AND 2 (UNION ALL)} 
   309    309   }
   310    310   do_eqp_test 4.2.3 {
   311    311     SELECT * FROM t1 UNION SELECT * FROM t2 ORDER BY 1
   312    312   } {
   313    313     1 0 0 {SCAN TABLE t1} 
   314    314     1 0 0 {USE TEMP B-TREE FOR ORDER BY}
   315         -  2 0 0 {SCAN TABLE t2} 
   316         -  2 0 0 {USE TEMP B-TREE FOR ORDER BY}
          315  +  2 0 0 {SCAN TABLE t2 USING INDEX t2i1} 
          316  +  2 0 0 {USE TEMP B-TREE FOR RIGHT PART OF ORDER BY}
   317    317     0 0 0 {COMPOUND SUBQUERIES 1 AND 2 (UNION)} 
   318    318   }
   319    319   do_eqp_test 4.2.4 {
   320    320     SELECT * FROM t1 INTERSECT SELECT * FROM t2 ORDER BY 1
   321    321   } {
   322    322     1 0 0 {SCAN TABLE t1} 
   323    323     1 0 0 {USE TEMP B-TREE FOR ORDER BY}
   324         -  2 0 0 {SCAN TABLE t2} 
   325         -  2 0 0 {USE TEMP B-TREE FOR ORDER BY}
          324  +  2 0 0 {SCAN TABLE t2 USING INDEX t2i1} 
          325  +  2 0 0 {USE TEMP B-TREE FOR RIGHT PART OF ORDER BY}
   326    326     0 0 0 {COMPOUND SUBQUERIES 1 AND 2 (INTERSECT)} 
   327    327   }
   328    328   do_eqp_test 4.2.5 {
   329    329     SELECT * FROM t1 EXCEPT SELECT * FROM t2 ORDER BY 1
   330    330   } {
   331    331     1 0 0 {SCAN TABLE t1} 
   332    332     1 0 0 {USE TEMP B-TREE FOR ORDER BY}
   333         -  2 0 0 {SCAN TABLE t2} 
   334         -  2 0 0 {USE TEMP B-TREE FOR ORDER BY}
          333  +  2 0 0 {SCAN TABLE t2 USING INDEX t2i1} 
          334  +  2 0 0 {USE TEMP B-TREE FOR RIGHT PART OF ORDER BY}
   335    335     0 0 0 {COMPOUND SUBQUERIES 1 AND 2 (EXCEPT)} 
   336    336   }
   337    337   
   338    338   do_eqp_test 4.3.1 {
   339    339     SELECT x FROM t1 UNION SELECT x FROM t2
   340    340   } {
   341    341     1 0 0 {SCAN TABLE t1} 

Changes to test/index6.test.

   141    141   
   142    142   # Queries use partial indices as appropriate times.
   143    143   #
   144    144   do_test index6-2.1 {
   145    145     execsql {
   146    146       CREATE TABLE t2(a,b);
   147    147       INSERT INTO t2(a,b) SELECT value, value FROM nums WHERE value<1000;
   148         -    UPDATE t2 SET a=NULL WHERE b%5==0;
          148  +    UPDATE t2 SET a=NULL WHERE b%2==0;
   149    149       CREATE INDEX t2a1 ON t2(a) WHERE a IS NOT NULL;
   150    150       SELECT count(*) FROM t2 WHERE a IS NOT NULL;
   151    151     }
   152         -} {800}
          152  +} {500}
   153    153   do_test index6-2.2 {
   154    154     execsql {
   155    155       EXPLAIN QUERY PLAN
   156    156       SELECT * FROM t2 WHERE a=5;
   157    157     }
   158    158   } {/.* TABLE t2 USING INDEX t2a1 .*/}
   159    159   ifcapable stat4||stat3 {
          160  +  execsql ANALYZE
   160    161     do_test index6-2.3stat4 {
   161    162       execsql {
   162    163         EXPLAIN QUERY PLAN
   163    164         SELECT * FROM t2 WHERE a IS NOT NULL;
   164    165       }
   165    166     } {/.* TABLE t2 USING INDEX t2a1 .*/}
   166    167   } else {

Changes to test/orderby5.test.

    76     76     INSERT INTO sqlite_stat1 VALUES('t1','t1bc','1000000 10 9');
    77     77     INSERT INTO sqlite_stat1 VALUES('t2','t2bc','100 10 5');
    78     78     ANALYZE sqlite_master;
    79     79   
    80     80     EXPLAIN QUERY PLAN
    81     81     SELECT * FROM t2 WHERE a=0 ORDER BY a, b, c;
    82     82   } {~/B-TREE/}
           83  +
    83     84   do_execsql_test 2.1b {
    84     85     EXPLAIN QUERY PLAN
    85         -  SELECT * FROM t1 WHERE a=0 ORDER BY a, b, c;
           86  +  SELECT * FROM t1 WHERE likelihood(a=0, 0.05) ORDER BY a, b, c;
    86     87   } {/B-TREE/}
    87         -
    88     88   
    89     89   do_execsql_test 2.2 {
    90     90     EXPLAIN QUERY PLAN
    91     91     SELECT * FROM t1 WHERE +a=0 ORDER BY a, b, c;
    92     92   } {/B-TREE/}
    93     93   do_execsql_test 2.3 {
    94     94     EXPLAIN QUERY PLAN

Changes to test/skipscan2.test.

    70     70   #
    71     71   do_execsql_test skipscan2-1.4 {
    72     72     ANALYZE;
    73     73     -- We do not have enough people above to actually force the use
    74     74     -- of a skip-scan.  So make a manual adjustment to the stat1 table
    75     75     -- to make it seem like there are many more.
    76     76     UPDATE sqlite_stat1 SET stat='10000 5000 20' WHERE idx='people_idx1';
           77  +  UPDATE sqlite_stat1 SET stat='10000 1' WHERE idx='sqlite_autoindex_people_1';
    77     78     ANALYZE sqlite_master;
    78     79   }
    79     80   db cache flush
    80     81   do_execsql_test skipscan2-1.5 {
    81     82     SELECT name FROM people WHERE height>=180 ORDER BY +name;
    82     83   } {David Jack Patrick Quiana Xavier}
    83     84   do_execsql_test skipscan2-1.5eqp {

Changes to test/unordered.test.

    38     38     }
    39     39     db close
    40     40     sqlite3 db test.db
    41     41     foreach {tn sql r(ordered) r(unordered)} {
    42     42       1   "SELECT * FROM t1 ORDER BY a"
    43     43           {0 0 0 {SCAN TABLE t1 USING INDEX i1}}
    44     44           {0 0 0 {SCAN TABLE t1} 0 0 0 {USE TEMP B-TREE FOR ORDER BY}}
    45         -    2   "SELECT * FROM t1 WHERE a >?"
           45  +    2   "SELECT * FROM t1 WHERE a > 100"
    46     46           {0 0 0 {SEARCH TABLE t1 USING INDEX i1 (a>?)}}
    47     47           {0 0 0 {SCAN TABLE t1}}
    48     48       3   "SELECT * FROM t1 WHERE a = ? ORDER BY rowid"
    49     49           {0 0 0 {SEARCH TABLE t1 USING INDEX i1 (a=?)}}
    50     50           {0 0 0 {SEARCH TABLE t1 USING INDEX i1 (a=?)} 
    51     51            0 0 0 {USE TEMP B-TREE FOR ORDER BY}}
    52     52       4   "SELECT max(a) FROM t1"

Changes to test/where3.test.

   227    227   # the planner into use a table for the outer loop that might be indexable
   228    228   # if held until an inner loop.
   229    229   # 
   230    230   do_execsql_test where3-3.0 {
   231    231     CREATE TABLE t301(a INTEGER PRIMARY KEY,b,c);
   232    232     CREATE INDEX t301c ON t301(c);
   233    233     INSERT INTO t301 VALUES(1,2,3);
          234  +  INSERT INTO t301 VALUES(2,2,3);
   234    235     CREATE TABLE t302(x, y);
   235    236     INSERT INTO t302 VALUES(4,5);
   236    237     ANALYZE;
   237    238     explain query plan SELECT * FROM t302, t301 WHERE t302.x=5 AND t301.a=t302.y;
   238    239   } {
   239    240     0 0 0 {SCAN TABLE t302} 
   240    241     0 1 1 {SEARCH TABLE t301 USING INTEGER PRIMARY KEY (rowid=?)}
................................................................................
   247    248     0 1 0 {SEARCH TABLE t301 USING INTEGER PRIMARY KEY (rowid=?)}
   248    249   }
   249    250   do_execsql_test where3-3.2 {
   250    251     SELECT * FROM t301 WHERE c=3 AND a IS NULL;
   251    252   } {}
   252    253   do_execsql_test where3-3.3 {
   253    254     SELECT * FROM t301 WHERE c=3 AND a IS NOT NULL;
   254         -} {1 2 3}
          255  +} {1 2 3 2 2 3}
   255    256   
   256    257   if 0 {  # Query planner no longer does this
   257    258   # Verify that when there are multiple tables in a join which must be
   258    259   # full table scans that the query planner attempts put the table with
   259    260   # the fewest number of output rows as the outer loop.
   260    261   #
   261    262   do_execsql_test where3-4.0 {

Changes to test/whereG.test.

    10     10   #***********************************************************************
    11     11   # 
    12     12   # Test cases for query planning decisions and the unlikely() and
    13     13   # likelihood() functions.
    14     14   
    15     15   set testdir [file dirname $argv0]
    16     16   source $testdir/tester.tcl
           17  +set testprefix whereG
    17     18   
    18     19   do_execsql_test whereG-1.0 {
    19     20     CREATE TABLE composer(
    20     21       cid INTEGER PRIMARY KEY,
    21     22       cname TEXT
    22     23     );
    23     24     CREATE TABLE album(
................................................................................
   175    176     INSERT INTO t4 VALUES('right'),('wrong');
   176    177     SELECT DISTINCT x
   177    178      FROM (SELECT x FROM t4 GROUP BY x)
   178    179      WHERE x='right'
   179    180      ORDER BY x;
   180    181   } {right}
   181    182   
          183  +#-------------------------------------------------------------------------
          184  +# Test that likelihood() specifications on indexed terms are taken into 
          185  +# account by various forms of loops.
          186  +#
          187  +#   5.1.*: open ended range scans
          188  +#   5.2.*: skip-scans
          189  +#
          190  +reset_db
          191  +
          192  +do_execsql_test 5.1 {
          193  +  CREATE TABLE t1(a, b, c);
          194  +  CREATE INDEX i1 ON t1(a, b);
          195  +}
          196  +do_eqp_test 5.1.2 {
          197  +  SELECT * FROM t1 WHERE a>?
          198  +} {0 0 0 {SEARCH TABLE t1 USING INDEX i1 (a>?)}}
          199  +do_eqp_test 5.1.3 {
          200  +  SELECT * FROM t1 WHERE likelihood(a>?, 0.9)
          201  +} {0 0 0 {SCAN TABLE t1}}
          202  +
          203  +do_test 5.2 {
          204  +  for {set i 0} {$i < 100} {incr i} {
          205  +    execsql { INSERT INTO t1 VALUES('abc', $i, $i); }
          206  +  }
          207  +  execsql { INSERT INTO t1 SELECT 'def', b, c FROM t1; }
          208  +  execsql { ANALYZE }
          209  +} {}
          210  +do_eqp_test 5.2.2 {
          211  +  SELECT * FROM t1 WHERE likelihood(b>?, 0.01)
          212  +} {0 0 0 {SEARCH TABLE t1 USING INDEX i1 (ANY(a) AND b>?)}}
          213  +do_eqp_test 5.2.3 {
          214  +  SELECT * FROM t1 WHERE likelihood(b>?, 0.9)
          215  +} {0 0 0 {SCAN TABLE t1}}
          216  +
          217  +do_eqp_test 5.3.1 {
          218  +  SELECT * FROM t1 WHERE a=?
          219  +} {0 0 0 {SEARCH TABLE t1 USING INDEX i1 (a=?)}}
          220  +do_eqp_test 5.3.2 {
          221  +  SELECT * FROM t1 WHERE likelihood(a=?, 0.9)
          222  +} {0 0 0 {SCAN TABLE t1}}
   182    223   
   183    224   finish_test
          225  +

Changes to tool/logest.c.

    79     79     return (n+8)>>(3-x);
    80     80   }
    81     81   static LogEst logEstFromDouble(double x){
    82     82     sqlite3_uint64 a;
    83     83     LogEst e;
    84     84     assert( sizeof(x)==8 && sizeof(a)==8 );
    85     85     if( x<=0.0 ) return -32768;
    86         -  if( x<1.0 ) return -logEstFromDouble(1/x);
           86  +  if( x<0.01 ) return -logEstFromDouble(1.0/x);
           87  +  if( x<1.0 ) return logEstFromDouble(100.0*x) - 66;
    87     88     if( x<1024.0 ) return logEstFromInteger((sqlite3_uint64)(1024.0*x)) - 100;
    88     89     if( x<=2000000000.0 ) return logEstFromInteger((sqlite3_uint64)x);
    89     90     memcpy(&a, &x, 8);
    90     91     e = (a>>52) - 1022;
    91     92     return e*10;
    92     93   }
    93     94   
................................................................................
   152    153       }else if( isFloat(z) && z[0]!='-' ){
   153    154         a[n++] = logEstFromDouble(atof(z));
   154    155       }else{
   155    156         showHelp(argv[0]);
   156    157       }
   157    158     }
   158    159     for(i=n-1; i>=0; i--){
   159         -    if( a[i]<0 ){
          160  +    if( a[i]<-40 ){
   160    161         printf("%5d (%f)\n", a[i], 1.0/(double)logEstToInt(-a[i]));
          162  +    }else if( a[i]<10 ){
          163  +      printf("%5d (%f)\n", a[i], logEstToInt(a[i]+100)/1024.0);
   161    164       }else{
   162    165         sqlite3_uint64 x = logEstToInt(a[i]+100)*100/1024;
   163    166         printf("%5d (%lld.%02lld)\n", a[i], x/100, x%100);
   164    167       }
   165    168     }
   166    169     return 0;
   167    170   }