Index: src/analyze.c ================================================================== --- src/analyze.c +++ src/analyze.c @@ -1369,10 +1369,11 @@ */ static void decodeIntArray( char *zIntArray, /* String containing int array to decode */ int nOut, /* Number of slots in aOut[] */ tRowcnt *aOut, /* Store integers here */ + LogEst *aLog, /* Or, if aOut==0, here */ Index *pIndex /* Handle extra flags for this index, if not NULL */ ){ char *z = zIntArray; int c; int i; @@ -1387,11 +1388,15 @@ v = 0; while( (c=z[0])>='0' && c<='9' ){ v = v*10 + c - '0'; z++; } - aOut[i] = v; + if( aOut ){ + aOut[i] = v; + }else{ + aLog[i] = sqlite3LogEst(v); + } if( *z==' ' ) z++; } #ifndef SQLITE_ENABLE_STAT3_OR_STAT4 assert( pIndex!=0 ); #else @@ -1443,16 +1448,16 @@ pIndex = sqlite3FindIndex(pInfo->db, argv[1], pInfo->zDatabase); } z = argv[2]; if( pIndex ){ - decodeIntArray((char*)z, pIndex->nKeyCol+1, pIndex->aiRowEst, pIndex); - if( pIndex->pPartIdxWhere==0 ) pTable->nRowEst = pIndex->aiRowEst[0]; + decodeIntArray((char*)z, pIndex->nKeyCol+1, 0, pIndex->aiRowLogEst, pIndex); + if( pIndex->pPartIdxWhere==0 ) pTable->nRowLogEst = pIndex->aiRowLogEst[0]; }else{ Index fakeIdx; fakeIdx.szIdxRow = pTable->szTabRow; - decodeIntArray((char*)z, 1, &pTable->nRowEst, &fakeIdx); + decodeIntArray((char*)z, 1, 0, &pTable->nRowLogEst, &fakeIdx); pTable->szTabRow = fakeIdx.szIdxRow; } return 0; } @@ -1640,13 +1645,13 @@ if( pIdx!=pPrevIdx ){ initAvgEq(pPrevIdx); pPrevIdx = pIdx; } pSample = &pIdx->aSample[pIdx->nSample]; - decodeIntArray((char*)sqlite3_column_text(pStmt,1), nCol, pSample->anEq, 0); - decodeIntArray((char*)sqlite3_column_text(pStmt,2), nCol, pSample->anLt, 0); - decodeIntArray((char*)sqlite3_column_text(pStmt,3), nCol, pSample->anDLt,0); + decodeIntArray((char*)sqlite3_column_text(pStmt,1),nCol,pSample->anEq,0,0); + decodeIntArray((char*)sqlite3_column_text(pStmt,2),nCol,pSample->anLt,0,0); + decodeIntArray((char*)sqlite3_column_text(pStmt,3),nCol,pSample->anDLt,0,0); /* Take a copy of the sample. Add two 0x00 bytes the end of the buffer. ** This is in case the sample record is corrupted. In that case, the ** sqlite3VdbeRecordCompare() may read up to two varints past the ** end of the allocated buffer before it realizes it is dealing with Index: src/build.c ================================================================== --- src/build.c +++ src/build.c @@ -903,11 +903,11 @@ } pTable->zName = zName; pTable->iPKey = -1; pTable->pSchema = db->aDb[iDb].pSchema; pTable->nRef = 1; - pTable->nRowEst = 1048576; + pTable->nRowLogEst = 200; assert( 200==sqlite3LogEst(1048576) ); assert( pParse->pNewTable==0 ); pParse->pNewTable = pTable; /* If this is the magic sqlite_sequence table used by autoincrement, ** then record a pointer to this table in the main database structure @@ -2728,19 +2728,19 @@ Index *p; /* Allocated index object */ int nByte; /* Bytes of space for Index object + arrays */ nByte = ROUND8(sizeof(Index)) + /* Index structure */ ROUND8(sizeof(char*)*nCol) + /* Index.azColl */ - ROUND8(sizeof(tRowcnt)*(nCol+1) + /* Index.aiRowEst */ + ROUND8(sizeof(LogEst)*(nCol+1) + /* Index.aiRowLogEst */ sizeof(i16)*nCol + /* Index.aiColumn */ sizeof(u8)*nCol); /* Index.aSortOrder */ p = sqlite3DbMallocZero(db, nByte + nExtra); if( p ){ char *pExtra = ((char*)p)+ROUND8(sizeof(Index)); - p->azColl = (char**)pExtra; pExtra += ROUND8(sizeof(char*)*nCol); - p->aiRowEst = (tRowcnt*)pExtra; pExtra += sizeof(tRowcnt)*(nCol+1); - p->aiColumn = (i16*)pExtra; pExtra += sizeof(i16)*nCol; + p->azColl = (char**)pExtra; pExtra += ROUND8(sizeof(char*)*nCol); + p->aiRowLogEst = (LogEst*)pExtra; pExtra += sizeof(LogEst)*(nCol+1); + p->aiColumn = (i16*)pExtra; pExtra += sizeof(i16)*nCol; p->aSortOrder = (u8*)pExtra; p->nColumn = nCol; p->nKeyCol = nCol - 1; *ppExtra = ((char*)p) + nByte; } @@ -2966,11 +2966,11 @@ pIndex = sqlite3AllocateIndexObject(db, pList->nExpr + nExtraCol, nName + nExtra + 1, &zExtra); if( db->mallocFailed ){ goto exit_create_index; } - assert( EIGHT_BYTE_ALIGNMENT(pIndex->aiRowEst) ); + assert( EIGHT_BYTE_ALIGNMENT(pIndex->aiRowLogEst) ); assert( EIGHT_BYTE_ALIGNMENT(pIndex->azColl) ); pIndex->zName = zExtra; zExtra += nName + 1; memcpy(pIndex->zName, zName, nName+1); pIndex->pTable = pTab; @@ -3247,11 +3247,11 @@ ** ** aiRowEst[0] is suppose to contain the number of elements in the index. ** Since we do not know, guess 1 million. aiRowEst[1] is an estimate of the ** number of rows in the table that match any particular value of the ** first column of the index. aiRowEst[2] is an estimate of the number -** of rows that match any particular combiniation of the first 2 columns +** of rows that match any particular combination of the first 2 columns ** of the index. And so forth. It must always be the case that * ** aiRowEst[N]<=aiRowEst[N-1] ** aiRowEst[N]>=1 ** @@ -3258,24 +3258,31 @@ ** Apart from that, we have little to go on besides intuition as to ** how aiRowEst[] should be initialized. The numbers generated here ** are based on typical values found in actual indices. */ void sqlite3DefaultRowEst(Index *pIdx){ - tRowcnt *a = pIdx->aiRowEst; + /* 10, 9, 8, 7, 6 */ + LogEst aVal[] = { 33, 32, 30, 28, 26 }; + LogEst *a = pIdx->aiRowLogEst; + int nCopy = MIN(ArraySize(aVal), pIdx->nKeyCol); int i; - tRowcnt n; - assert( a!=0 ); - a[0] = pIdx->pTable->nRowEst; - if( a[0]<10 ) a[0] = 10; - n = 10; - for(i=1; i<=pIdx->nKeyCol; i++){ - a[i] = n; - if( n>5 ) n--; - } - if( pIdx->onError!=OE_None ){ - a[pIdx->nKeyCol] = 1; - } + + /* Set the first entry (number of rows in the index) to the estimated + ** number of rows in the table. Or 10, if the estimated number of rows + ** in the table is less than that. */ + a[0] = pIdx->pTable->nRowLogEst; + if( a[0]<33 ) a[0] = 33; assert( 33==sqlite3LogEst(10) ); + + /* Estimate that a[1] is 10, a[2] is 9, a[3] is 8, a[4] is 7, a[5] is + ** 6 and each subsequent value (if any) is 5. */ + memcpy(&a[1], aVal, nCopy*sizeof(LogEst)); + for(i=nCopy+1; i<=pIdx->nKeyCol; i++){ + a[i] = 23; assert( 23==sqlite3LogEst(5) ); + } + + assert( 0==sqlite3LogEst(1) ); + if( pIdx->onError!=OE_None ) a[pIdx->nKeyCol] = 0; } /* ** This routine will drop an existing named index. This routine ** implements the DROP INDEX statement. Index: src/pragma.c ================================================================== --- src/pragma.c +++ src/pragma.c @@ -1486,17 +1486,19 @@ Table *pTab = sqliteHashData(i); sqlite3VdbeAddOp4(v, OP_String8, 0, 1, 0, pTab->zName, 0); sqlite3VdbeAddOp2(v, OP_Null, 0, 2); sqlite3VdbeAddOp2(v, OP_Integer, (int)sqlite3LogEstToInt(pTab->szTabRow), 3); - sqlite3VdbeAddOp2(v, OP_Integer, (int)pTab->nRowEst, 4); + sqlite3VdbeAddOp2(v, OP_Integer, + (int)sqlite3LogEstToInt(pTab->nRowLogEst), 4); sqlite3VdbeAddOp2(v, OP_ResultRow, 1, 4); for(pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext){ sqlite3VdbeAddOp4(v, OP_String8, 0, 2, 0, pIdx->zName, 0); sqlite3VdbeAddOp2(v, OP_Integer, (int)sqlite3LogEstToInt(pIdx->szIdxRow), 3); - sqlite3VdbeAddOp2(v, OP_Integer, (int)pIdx->aiRowEst[0], 4); + sqlite3VdbeAddOp2(v, OP_Integer, + (int)sqlite3LogEstToInt(pIdx->aiRowLogEst[0]), 4); sqlite3VdbeAddOp2(v, OP_ResultRow, 1, 4); } } } break; Index: src/select.c ================================================================== --- src/select.c +++ src/select.c @@ -1688,11 +1688,11 @@ /* The sqlite3ResultSetOfSelect() is only used n contexts where lookaside ** is disabled */ assert( db->lookaside.bEnabled==0 ); pTab->nRef = 1; pTab->zName = 0; - pTab->nRowEst = 1048576; + pTab->nRowLogEst = 200; assert( 200==sqlite3LogEst(1048576) ); selectColumnsFromExprList(pParse, pSelect->pEList, &pTab->nCol, &pTab->aCol); selectAddColumnTypeAndCollation(pParse, pTab, pSelect); pTab->iPKey = -1; if( db->mallocFailed ){ sqlite3DeleteTable(db, pTab); @@ -3827,11 +3827,11 @@ pFrom->pTab = pTab = sqlite3DbMallocZero(db, sizeof(Table)); if( pTab==0 ) return WRC_Abort; pTab->nRef = 1; pTab->zName = sqlite3DbStrDup(db, pCte->zName); pTab->iPKey = -1; - pTab->nRowEst = 1048576; + pTab->nRowLogEst = 200; assert( 200==sqlite3LogEst(1048576) ); pTab->tabFlags |= TF_Ephemeral; pFrom->pSelect = sqlite3SelectDup(db, pCte->pSelect, 0); if( db->mallocFailed ) return SQLITE_NOMEM; assert( pFrom->pSelect ); @@ -4003,11 +4003,11 @@ pTab->nRef = 1; pTab->zName = sqlite3MPrintf(db, "sqlite_sq_%p", (void*)pTab); while( pSel->pPrior ){ pSel = pSel->pPrior; } selectColumnsFromExprList(pParse, pSel->pEList, &pTab->nCol, &pTab->aCol); pTab->iPKey = -1; - pTab->nRowEst = 1048576; + pTab->nRowLogEst = 200; assert( 200==sqlite3LogEst(1048576) ); pTab->tabFlags |= TF_Ephemeral; #endif }else{ /* An ordinary table or view name in the FROM clause */ assert( pFrom->pTab==0 ); @@ -4653,11 +4653,11 @@ VdbeComment((v, "%s", pItem->pTab->zName)); pItem->addrFillSub = addrTop; sqlite3SelectDestInit(&dest, SRT_Coroutine, pItem->regReturn); explainSetInteger(pItem->iSelectId, (u8)pParse->iNextSelectId); sqlite3Select(pParse, pSub, &dest); - pItem->pTab->nRowEst = (unsigned)pSub->nSelectRow; + pItem->pTab->nRowLogEst = sqlite3LogEst(pSub->nSelectRow); pItem->viaCoroutine = 1; pItem->regResult = dest.iSdst; sqlite3VdbeAddOp1(v, OP_EndCoroutine, pItem->regReturn); sqlite3VdbeJumpHere(v, addrTop-1); sqlite3ClearTempRegCache(pParse); @@ -4684,11 +4684,11 @@ VdbeNoopComment((v, "materialize \"%s\"", pItem->pTab->zName)); } sqlite3SelectDestInit(&dest, SRT_EphemTab, pItem->iCursor); explainSetInteger(pItem->iSelectId, (u8)pParse->iNextSelectId); sqlite3Select(pParse, pSub, &dest); - pItem->pTab->nRowEst = (unsigned)pSub->nSelectRow; + pItem->pTab->nRowLogEst = sqlite3LogEst(pSub->nSelectRow); if( onceAddr ) sqlite3VdbeJumpHere(v, onceAddr); retAddr = sqlite3VdbeAddOp1(v, OP_Return, pItem->regReturn); VdbeComment((v, "end %s", pItem->pTab->zName)); sqlite3VdbeChangeP1(v, topAddr, retAddr); sqlite3ClearTempRegCache(pParse); Index: src/sqliteInt.h ================================================================== --- src/sqliteInt.h +++ src/sqliteInt.h @@ -523,14 +523,14 @@ ** Estimated quantities used for query planning are stored as 16-bit ** logarithms. For quantity X, the value stored is 10*log2(X). This ** gives a possible range of values of approximately 1.0e986 to 1e-986. ** But the allowed values are "grainy". Not every value is representable. ** For example, quantities 16 and 17 are both represented by a LogEst -** of 40. However, since LogEst quantatites are suppose to be estimates, +** of 40. However, since LogEst quantaties are suppose to be estimates, ** not exact values, this imprecision is not a problem. ** -** "LogEst" is short for "Logarithimic Estimate". +** "LogEst" is short for "Logarithmic Estimate". ** ** Examples: ** 1 -> 0 20 -> 43 10000 -> 132 ** 2 -> 10 25 -> 46 25000 -> 146 ** 3 -> 16 100 -> 66 1000000 -> 199 @@ -1469,11 +1469,11 @@ FKey *pFKey; /* Linked list of all foreign keys in this table */ char *zColAff; /* String defining the affinity of each column */ #ifndef SQLITE_OMIT_CHECK ExprList *pCheck; /* All CHECK constraints */ #endif - tRowcnt nRowEst; /* Estimated rows in table - from sqlite_stat1 table */ + LogEst nRowLogEst; /* Estimated rows in table - from sqlite_stat1 table */ int tnum; /* Root BTree node for this table (see note above) */ i16 iPKey; /* If not negative, use aCol[iPKey] as the primary key */ i16 nCol; /* Number of columns in this table */ u16 nRef; /* Number of pointers to this Table */ LogEst szTabRow; /* Estimated size of each table row in bytes */ @@ -1678,11 +1678,11 @@ ** element. */ struct Index { char *zName; /* Name of this index */ i16 *aiColumn; /* Which columns are used by this index. 1st is 0 */ - tRowcnt *aiRowEst; /* From ANALYZE: Est. rows selected by each column */ + LogEst *aiRowLogEst; /* From ANALYZE: Est. rows selected by each column */ Table *pTable; /* The SQL table being indexed */ char *zColAff; /* String defining the affinity of each column */ Index *pNext; /* The next index associated with the same table */ Schema *pSchema; /* Schema containing this index */ u8 *aSortOrder; /* for each column: True==DESC, False==ASC */ Index: src/util.c ================================================================== --- src/util.c +++ src/util.c @@ -1244,12 +1244,12 @@ return b+x[b-a]; } } /* -** Convert an integer into a LogEst. In other words, compute a -** good approximatation for 10*log2(x). +** Convert an integer into a LogEst. In other words, compute an +** approximation for 10*log2(x). */ LogEst sqlite3LogEst(u64 x){ static LogEst a[] = { 0, 2, 3, 5, 6, 7, 8, 9 }; LogEst y = 40; if( x<8 ){ Index: src/where.c ================================================================== --- src/where.c +++ src/where.c @@ -225,11 +225,11 @@ } pTerm = &pWC->a[idx = pWC->nTerm++]; if( p && ExprHasProperty(p, EP_Unlikely) ){ pTerm->truthProb = sqlite3LogEst(p->iTable) - 99; }else{ - pTerm->truthProb = -1; + pTerm->truthProb = 1; } pTerm->pExpr = sqlite3ExprSkipCollate(p); pTerm->wtFlags = wtFlags; pTerm->pWC = pWC; pTerm->iParent = -1; @@ -1954,11 +1954,12 @@ tRowcnt iLower, iUpper, iGap; if( i==0 ){ iLower = 0; iUpper = aSample[0].anLt[iCol]; }else{ - iUpper = i>=pIdx->nSample ? pIdx->aiRowEst[0] : aSample[i].anLt[iCol]; + i64 nRow0 = sqlite3LogEstToInt(pIdx->aiRowLogEst[0]); + iUpper = i>=pIdx->nSample ? nRow0 : aSample[i].anLt[iCol]; iLower = aSample[i-1].anEq[iCol] + aSample[i-1].anLt[iCol]; } aStat[1] = (pIdx->nKeyCol>iCol ? pIdx->aAvgEq[iCol] : 1); if( iLower>=iUpper ){ iGap = 0; @@ -1972,10 +1973,33 @@ } aStat[0] = iLower + iGap; } } #endif /* SQLITE_ENABLE_STAT3_OR_STAT4 */ + +/* +** If it is not NULL, pTerm is a term that provides an upper or lower +** bound on a range scan. Without considering pTerm, it is estimated +** that the scan will visit nNew rows. This function returns the number +** estimated to be visited after taking pTerm into account. +** +** If the user explicitly specified a likelihood() value for this term, +** then the return value is the likelihood multiplied by the number of +** input rows. Otherwise, this function assumes that an "IS NOT NULL" term +** has a likelihood of 0.50, and any other term a likelihood of 0.25. +*/ +static LogEst whereRangeAdjust(WhereTerm *pTerm, LogEst nNew){ + LogEst nRet = nNew; + if( pTerm ){ + if( pTerm->truthProb<=0 ){ + nRet += pTerm->truthProb; + }else if( (pTerm->wtFlags & TERM_VNULL)==0 ){ + nRet -= 20; assert( 20==sqlite3LogEst(4) ); + } + } + return nRet; +} /* ** This function is used to estimate the number of rows that will be visited ** by scanning an index for a range of values. The range may have an upper ** bound, a lower bound, or both. The WHERE clause terms that set the upper @@ -2065,11 +2089,11 @@ aff = p->pTable->aCol[p->aiColumn[nEq]].affinity; } /* Determine iLower and iUpper using ($P) only. */ if( nEq==0 ){ iLower = 0; - iUpper = p->aiRowEst[0]; + iUpper = sqlite3LogEstToInt(p->aiRowLogEst[0]); }else{ /* Note: this call could be optimized away - since the same values must ** have been requested when testing key $P in whereEqualScanEst(). */ whereKeyStats(pParse, p, pRec, 0, a); iLower = a[0]; @@ -2125,21 +2149,22 @@ #else UNUSED_PARAMETER(pParse); UNUSED_PARAMETER(pBuilder); #endif assert( pLower || pUpper ); - /* TUNING: Each inequality constraint reduces the search space 4-fold. - ** A BETWEEN operator, therefore, reduces the search space 16-fold */ - nNew = nOut; - if( pLower && (pLower->wtFlags & TERM_VNULL)==0 ){ - nNew -= 20; assert( 20==sqlite3LogEst(4) ); - nOut--; - } - if( pUpper ){ - nNew -= 20; assert( 20==sqlite3LogEst(4) ); - nOut--; - } + assert( pUpper==0 || (pUpper->wtFlags & TERM_VNULL)==0 ); + nNew = whereRangeAdjust(pLower, nOut); + nNew = whereRangeAdjust(pUpper, nNew); + + /* TUNING: If there is both an upper and lower limit, assume the range is + ** reduced by an additional 75%. This means that, by default, an open-ended + ** range query (e.g. col > ?) is assumed to match 1/4 of the rows in the + ** index. While a closed range (e.g. col BETWEEN ? AND ?) is estimated to + ** match 1/64 of the index. */ + if( pLower && pUpper ) nNew -= 20; + + nOut -= (pLower!=0) + (pUpper!=0); if( nNew<10 ) nNew = 10; if( nNewnOut = (LogEst)nOut; return rc; } @@ -2232,26 +2257,27 @@ WhereLoopBuilder *pBuilder, ExprList *pList, /* The value list on the RHS of "x IN (v1,v2,v3,...)" */ tRowcnt *pnRow /* Write the revised row estimate here */ ){ Index *p = pBuilder->pNew->u.btree.pIndex; + i64 nRow0 = sqlite3LogEstToInt(p->aiRowLogEst[0]); int nRecValid = pBuilder->nRecValid; int rc = SQLITE_OK; /* Subfunction return code */ tRowcnt nEst; /* Number of rows for a single term */ tRowcnt nRowEst = 0; /* New estimate of the number of rows */ int i; /* Loop counter */ assert( p->aSample!=0 ); for(i=0; rc==SQLITE_OK && inExpr; i++){ - nEst = p->aiRowEst[0]; + nEst = nRow0; rc = whereEqualScanEst(pParse, pBuilder, pList->a[i].pExpr, &nEst); nRowEst += nEst; pBuilder->nRecValid = nRecValid; } if( rc==SQLITE_OK ){ - if( nRowEst > p->aiRowEst[0] ) nRowEst = p->aiRowEst[0]; + if( nRowEst > nRow0 ) nRowEst = nRow0; *pnRow = nRowEst; WHERETRACE(0x10,("IN row estimate: est=%g\n", nRowEst)); } assert( pBuilder->nRecValid==nRecValid ); return rc; @@ -3758,13 +3784,15 @@ ** WHERE clause terms than Y and that every WHERE clause term used by X is ** also used by Y. */ static void whereLoopAdjustCost(const WhereLoop *p, WhereLoop *pTemplate){ if( (pTemplate->wsFlags & WHERE_INDEXED)==0 ) return; + if( (pTemplate->wsFlags & WHERE_SKIPSCAN)!=0 ) return; for(; p; p=p->pNextLoop){ if( p->iTab!=pTemplate->iTab ) continue; if( (p->wsFlags & WHERE_INDEXED)==0 ) continue; + if( (p->wsFlags & WHERE_SKIPSCAN)!=0 ) continue; if( whereLoopCheaperProperSubset(p, pTemplate) ){ /* Adjust pTemplate cost downward so that it is cheaper than its ** subset p */ pTemplate->rRun = p->rRun; pTemplate->nOut = p->nOut - 1; @@ -3985,17 +4013,24 @@ pX = pLoop->aLTerm[j]; if( pX==0 ) continue; if( pX==pTerm ) break; if( pX->iParent>=0 && (&pWC->a[pX->iParent])==pTerm ) break; } - if( j<0 ) pLoop->nOut += pTerm->truthProb; + if( j<0 ){ + pLoop->nOut += (pTerm->truthProb<=0 ? pTerm->truthProb : -1); + } } } /* -** We have so far matched pBuilder->pNew->u.btree.nEq terms of the index pIndex. -** Try to match one more. +** We have so far matched pBuilder->pNew->u.btree.nEq terms of the +** index pIndex. Try to match one more. +** +** When this function is called, pBuilder->pNew->nOut contains the +** number of rows expected to be visited by filtering using the nEq +** terms only. If it is modified, this value is restored before this +** function returns. ** ** If pProbe->tnum==0, that means pIndex is a fake index used for the ** INTEGER PRIMARY KEY. */ static int whereLoopAddBtreeIndex( @@ -4017,11 +4052,10 @@ u16 saved_nSkip; /* Original value of pNew->u.btree.nSkip */ u32 saved_wsFlags; /* Original value of pNew->wsFlags */ LogEst saved_nOut; /* Original value of pNew->nOut */ int iCol; /* Index of the column in the table */ int rc = SQLITE_OK; /* Return code */ - LogEst nRowEst; /* Estimated index selectivity */ LogEst rLogSize; /* Logarithm of table size */ WhereTerm *pTop = 0, *pBtm = 0; /* Top and bottom range constraints */ pNew = pBuilder->pNew; if( db->mallocFailed ) return SQLITE_NOMEM; @@ -4038,15 +4072,12 @@ if( pProbe->bUnordered ) opMask &= ~(WO_GT|WO_GE|WO_LT|WO_LE); assert( pNew->u.btree.nEq<=pProbe->nKeyCol ); if( pNew->u.btree.nEq < pProbe->nKeyCol ){ iCol = pProbe->aiColumn[pNew->u.btree.nEq]; - nRowEst = sqlite3LogEst(pProbe->aiRowEst[pNew->u.btree.nEq+1]); - if( nRowEst==0 && pProbe->onError==OE_None ) nRowEst = 1; }else{ iCol = -1; - nRowEst = 0; } pTerm = whereScanInit(&scan, pBuilder->pWC, pSrc->iCursor, iCol, opMask, pProbe); saved_nEq = pNew->u.btree.nEq; saved_nSkip = pNew->u.btree.nSkip; @@ -4053,57 +4084,68 @@ saved_nLTerm = pNew->nLTerm; saved_wsFlags = pNew->wsFlags; saved_prereq = pNew->prereq; saved_nOut = pNew->nOut; pNew->rSetup = 0; - rLogSize = estLog(sqlite3LogEst(pProbe->aiRowEst[0])); + rLogSize = estLog(pProbe->aiRowLogEst[0]); /* Consider using a skip-scan if there are no WHERE clause constraints ** available for the left-most terms of the index, and if the average - ** number of repeats in the left-most terms is at least 18. The magic - ** number 18 was found by experimentation to be the payoff point where - ** skip-scan become faster than a full-scan. - */ + ** number of repeats in the left-most terms is at least 18. + ** + ** The magic number 18 is selected on the basis that scanning 17 rows + ** is almost always quicker than an index seek (even though if the index + ** contains fewer than 2^17 rows we assume otherwise in other parts of + ** the code). And, even if it is not, it should not be too much slower. + ** On the other hand, the extra seeks could end up being significantly + ** more expensive. */ + assert( 42==sqlite3LogEst(18) ); if( pTerm==0 && saved_nEq==saved_nSkip && saved_nEq+1nKeyCol - && pProbe->aiRowEst[saved_nEq+1]>=18 /* TUNING: Minimum for skip-scan */ + && pProbe->aiRowLogEst[saved_nEq+1]>=42 /* TUNING: Minimum for skip-scan */ && (rc = whereLoopResize(db, pNew, pNew->nLTerm+1))==SQLITE_OK ){ LogEst nIter; pNew->u.btree.nEq++; pNew->u.btree.nSkip++; pNew->aLTerm[pNew->nLTerm++] = 0; pNew->wsFlags |= WHERE_SKIPSCAN; - nIter = sqlite3LogEst(pProbe->aiRowEst[0]/pProbe->aiRowEst[saved_nEq+1]); - pNew->rRun = rLogSize + nIter; - pNew->nOut += nIter; - whereLoopAddBtreeIndex(pBuilder, pSrc, pProbe, nIter); + nIter = pProbe->aiRowLogEst[saved_nEq] - pProbe->aiRowLogEst[saved_nEq+1]; + pNew->nOut -= nIter; + whereLoopAddBtreeIndex(pBuilder, pSrc, pProbe, nIter + nInMul); pNew->nOut = saved_nOut; } for(; rc==SQLITE_OK && pTerm!=0; pTerm = whereScanNext(&scan)){ + u16 eOp = pTerm->eOperator; /* Shorthand for pTerm->eOperator */ + LogEst rCostIdx; + LogEst nOutUnadjusted; /* nOut before IN() and WHERE adjustments */ int nIn = 0; #ifdef SQLITE_ENABLE_STAT3_OR_STAT4 int nRecValid = pBuilder->nRecValid; #endif - if( (pTerm->eOperator==WO_ISNULL || (pTerm->wtFlags&TERM_VNULL)!=0) + if( (eOp==WO_ISNULL || (pTerm->wtFlags&TERM_VNULL)!=0) && (iCol<0 || pSrc->pTab->aCol[iCol].notNull) ){ continue; /* ignore IS [NOT] NULL constraints on NOT NULL columns */ } if( pTerm->prereqRight & pNew->maskSelf ) continue; - assert( pNew->nOut==saved_nOut ); - pNew->wsFlags = saved_wsFlags; pNew->u.btree.nEq = saved_nEq; pNew->nLTerm = saved_nLTerm; if( whereLoopResize(db, pNew, pNew->nLTerm+1) ) break; /* OOM */ pNew->aLTerm[pNew->nLTerm++] = pTerm; pNew->prereq = (saved_prereq | pTerm->prereqRight) & ~pNew->maskSelf; - pNew->rRun = rLogSize; /* Baseline cost is log2(N). Adjustments below */ - if( pTerm->eOperator & WO_IN ){ + + assert( nInMul==0 + || (pNew->wsFlags & WHERE_COLUMN_NULL)!=0 + || (pNew->wsFlags & WHERE_COLUMN_IN)!=0 + || (pNew->wsFlags & WHERE_SKIPSCAN)!=0 + ); + + if( eOp & WO_IN ){ Expr *pExpr = pTerm->pExpr; pNew->wsFlags |= WHERE_COLUMN_IN; if( ExprHasProperty(pExpr, EP_xIsSelect) ){ /* "x IN (SELECT ...)": TUNING: the SELECT returns 25 rows */ nIn = 46; assert( 46==sqlite3LogEst(25) ); @@ -4111,87 +4153,121 @@ /* "x IN (value, value, ...)" */ nIn = sqlite3LogEst(pExpr->x.pList->nExpr); } assert( nIn>0 ); /* RHS always has 2 or more terms... The parser ** changes "x IN (?)" into "x=?". */ - pNew->rRun += nIn; - pNew->u.btree.nEq++; - pNew->nOut = nRowEst + nInMul + nIn; - }else if( pTerm->eOperator & (WO_EQ) ){ - assert( - (pNew->wsFlags & (WHERE_COLUMN_NULL|WHERE_COLUMN_IN|WHERE_SKIPSCAN))!=0 - || nInMul==0 - ); + + }else if( eOp & (WO_EQ) ){ pNew->wsFlags |= WHERE_COLUMN_EQ; - if( iCol<0 || (nInMul==0 && pNew->u.btree.nEq==pProbe->nKeyCol-1)){ - assert( (pNew->wsFlags & WHERE_COLUMN_IN)==0 || iCol<0 ); + if( iCol<0 || (nInMul==0 && pNew->u.btree.nEq==pProbe->nKeyCol-1) ){ if( iCol>=0 && pProbe->onError==OE_None ){ pNew->wsFlags |= WHERE_UNQ_WANTED; }else{ pNew->wsFlags |= WHERE_ONEROW; } } - pNew->u.btree.nEq++; - pNew->nOut = nRowEst + nInMul; - }else if( pTerm->eOperator & (WO_ISNULL) ){ + }else if( eOp & WO_ISNULL ){ pNew->wsFlags |= WHERE_COLUMN_NULL; - pNew->u.btree.nEq++; - /* TUNING: IS NULL selects 2 rows */ - nIn = 10; assert( 10==sqlite3LogEst(2) ); - pNew->nOut = nRowEst + nInMul + nIn; - }else if( pTerm->eOperator & (WO_GT|WO_GE) ){ - testcase( pTerm->eOperator & WO_GT ); - testcase( pTerm->eOperator & WO_GE ); + }else if( eOp & (WO_GT|WO_GE) ){ + testcase( eOp & WO_GT ); + testcase( eOp & WO_GE ); pNew->wsFlags |= WHERE_COLUMN_RANGE|WHERE_BTM_LIMIT; pBtm = pTerm; pTop = 0; }else{ - assert( pTerm->eOperator & (WO_LT|WO_LE) ); - testcase( pTerm->eOperator & WO_LT ); - testcase( pTerm->eOperator & WO_LE ); + assert( eOp & (WO_LT|WO_LE) ); + testcase( eOp & WO_LT ); + testcase( eOp & WO_LE ); pNew->wsFlags |= WHERE_COLUMN_RANGE|WHERE_TOP_LIMIT; pTop = pTerm; pBtm = (pNew->wsFlags & WHERE_BTM_LIMIT)!=0 ? pNew->aLTerm[pNew->nLTerm-2] : 0; } + + /* At this point pNew->nOut is set to the number of rows expected to + ** be visited by the index scan before considering term pTerm, or the + ** values of nIn and nInMul. In other words, assuming that all + ** "x IN(...)" terms are replaced with "x = ?". This block updates + ** the value of pNew->nOut to account for pTerm (but not nIn/nInMul). */ + assert( pNew->nOut==saved_nOut ); if( pNew->wsFlags & WHERE_COLUMN_RANGE ){ - /* Adjust nOut and rRun for STAT3 range values */ - assert( pNew->nOut==saved_nOut ); + /* Adjust nOut using stat3/stat4 data. Or, if there is no stat3/stat4 + ** data, using some other estimate. */ whereRangeScanEst(pParse, pBuilder, pBtm, pTop, pNew); - } + }else{ + int nEq = ++pNew->u.btree.nEq; + assert( eOp & (WO_ISNULL|WO_EQ|WO_IN) ); + + assert( pNew->nOut==saved_nOut ); + if( pTerm->truthProb<=0 && iCol>=0 ){ + assert( (eOp & WO_IN) || nIn==0 ); + pNew->nOut += pTerm->truthProb; + pNew->nOut -= nIn; + pNew->wsFlags |= WHERE_LIKELIHOOD; + }else{ #ifdef SQLITE_ENABLE_STAT3_OR_STAT4 - if( nInMul==0 - && pProbe->nSample - && pNew->u.btree.nEq<=pProbe->nSampleCol - && OptimizationEnabled(db, SQLITE_Stat3) - ){ - Expr *pExpr = pTerm->pExpr; - tRowcnt nOut = 0; - if( (pTerm->eOperator & (WO_EQ|WO_ISNULL))!=0 ){ - testcase( pTerm->eOperator & WO_EQ ); - testcase( pTerm->eOperator & WO_ISNULL ); - rc = whereEqualScanEst(pParse, pBuilder, pExpr->pRight, &nOut); - }else if( (pTerm->eOperator & WO_IN) - && !ExprHasProperty(pExpr, EP_xIsSelect) ){ - rc = whereInScanEst(pParse, pBuilder, pExpr->x.pList, &nOut); - } - assert( nOut==0 || rc==SQLITE_OK ); - if( nOut ){ - pNew->nOut = sqlite3LogEst(nOut); - if( pNew->nOut>saved_nOut ) pNew->nOut = saved_nOut; - } - } + tRowcnt nOut = 0; + if( nInMul==0 + && pProbe->nSample + && pNew->u.btree.nEq<=pProbe->nSampleCol + && OptimizationEnabled(db, SQLITE_Stat3) + && ((eOp & WO_IN)==0 || !ExprHasProperty(pTerm->pExpr, EP_xIsSelect)) + && (pNew->wsFlags & WHERE_LIKELIHOOD)==0 + ){ + Expr *pExpr = pTerm->pExpr; + if( (eOp & (WO_EQ|WO_ISNULL))!=0 ){ + testcase( eOp & WO_EQ ); + testcase( eOp & WO_ISNULL ); + rc = whereEqualScanEst(pParse, pBuilder, pExpr->pRight, &nOut); + }else{ + rc = whereInScanEst(pParse, pBuilder, pExpr->x.pList, &nOut); + } + assert( rc!=SQLITE_OK || nOut>0 ); + if( rc==SQLITE_NOTFOUND ) rc = SQLITE_OK; + if( rc!=SQLITE_OK ) break; /* Jump out of the pTerm loop */ + if( nOut ){ + pNew->nOut = sqlite3LogEst(nOut); + if( pNew->nOut>saved_nOut ) pNew->nOut = saved_nOut; + pNew->nOut -= nIn; + } + } + if( nOut==0 ) #endif + { + pNew->nOut += (pProbe->aiRowLogEst[nEq] - pProbe->aiRowLogEst[nEq-1]); + if( eOp & WO_ISNULL ){ + /* TUNING: If there is no likelihood() value, assume that a + ** "col IS NULL" expression matches twice as many rows + ** as (col=?). */ + pNew->nOut += 10; + } + } + } + } + + /* Set rCostIdx to the cost of visiting selected rows in index. Add + ** it to pNew->rRun, which is currently set to the cost of the index + ** seek only. Then, if this is a non-covering index, add the cost of + ** visiting the rows in the main table. */ + rCostIdx = pNew->nOut + 1 + (15*pProbe->szIdxRow)/pSrc->pTab->szTabRow; + pNew->rRun = sqlite3LogEstAdd(rLogSize, rCostIdx); if( (pNew->wsFlags & (WHERE_IDX_ONLY|WHERE_IPK))==0 ){ - /* Each row involves a step of the index, then a binary search of - ** the main table */ - pNew->rRun = sqlite3LogEstAdd(pNew->rRun,rLogSize>27 ? rLogSize-17 : 10); + pNew->rRun = sqlite3LogEstAdd(pNew->rRun, pNew->nOut + 16); } - /* Step cost for each output row */ - pNew->rRun = sqlite3LogEstAdd(pNew->rRun, pNew->nOut); + + nOutUnadjusted = pNew->nOut; + pNew->rRun += nInMul + nIn; + pNew->nOut += nInMul + nIn; whereLoopOutputAdjust(pBuilder->pWC, pNew); rc = whereLoopInsert(pBuilder, pNew); + + if( pNew->wsFlags & WHERE_COLUMN_RANGE ){ + pNew->nOut = saved_nOut; + }else{ + pNew->nOut = nOutUnadjusted; + } + if( (pNew->wsFlags & WHERE_TOP_LIMIT)==0 && pNew->u.btree.nEq<(pProbe->nKeyCol + (pProbe->zName!=0)) ){ whereLoopAddBtreeIndex(pBuilder, pSrc, pProbe, nInMul+nIn); } @@ -4271,19 +4347,42 @@ /* ** Add all WhereLoop objects for a single table of the join where the table ** is idenfied by pBuilder->pNew->iTab. That table is guaranteed to be ** a b-tree table, not a virtual table. +** +** The costs (WhereLoop.rRun) of the b-tree loops added by this function +** are calculated as follows: +** +** For a full scan, assuming the table (or index) contains nRow rows: +** +** cost = nRow * 3.0 // full-table scan +** cost = nRow * K // scan of covering index +** cost = nRow * (K+3.0) // scan of non-covering index +** +** where K is a value between 1.1 and 3.0 set based on the relative +** estimated average size of the index and table records. +** +** For an index scan, where nVisit is the number of index rows visited +** by the scan, and nSeek is the number of seek operations required on +** the index b-tree: +** +** cost = nSeek * (log(nRow) + K * nVisit) // covering index +** cost = nSeek * (log(nRow) + (K+3.0) * nVisit) // non-covering index +** +** Normally, nSeek is 1. nSeek values greater than 1 come about if the +** WHERE clause includes "x IN (....)" terms used in place of "x=?". Or when +** implicit "x IN (SELECT x FROM tbl)" terms are added for skip-scans. */ static int whereLoopAddBtree( WhereLoopBuilder *pBuilder, /* WHERE clause information */ Bitmask mExtra /* Extra prerequesites for using this table */ ){ WhereInfo *pWInfo; /* WHERE analysis context */ Index *pProbe; /* An index we are evaluating */ Index sPk; /* A fake index object for the primary key */ - tRowcnt aiRowEstPk[2]; /* The aiRowEst[] value for the sPk index */ + LogEst aiRowEstPk[2]; /* The aiRowLogEst[] value for the sPk index */ i16 aiColumnPk = -1; /* The aColumn[] value for the sPk index */ SrcList *pTabList; /* The FROM clause */ struct SrcList_item *pSrc; /* The FROM clause btree term to add */ WhereLoop *pNew; /* Template WhereLoop object */ int rc = SQLITE_OK; /* Return code */ @@ -4314,24 +4413,25 @@ ** indices to follow */ Index *pFirst; /* First of real indices on the table */ memset(&sPk, 0, sizeof(Index)); sPk.nKeyCol = 1; sPk.aiColumn = &aiColumnPk; - sPk.aiRowEst = aiRowEstPk; + sPk.aiRowLogEst = aiRowEstPk; sPk.onError = OE_Replace; sPk.pTable = pTab; - aiRowEstPk[0] = pTab->nRowEst; - aiRowEstPk[1] = 1; + sPk.szIdxRow = pTab->szTabRow; + aiRowEstPk[0] = pTab->nRowLogEst; + aiRowEstPk[1] = 0; pFirst = pSrc->pTab->pIndex; if( pSrc->notIndexed==0 ){ /* The real indices of the table are only considered if the ** NOT INDEXED qualifier is omitted from the FROM clause */ sPk.pNext = pFirst; } pProbe = &sPk; } - rSize = sqlite3LogEst(pTab->nRowEst); + rSize = pTab->nRowLogEst; rLogSize = estLog(rSize); #ifndef SQLITE_OMIT_AUTOMATIC_INDEX /* Automatic indexes */ if( !pBuilder->pOrSet @@ -4377,10 +4477,11 @@ for(; rc==SQLITE_OK && pProbe; pProbe=pProbe->pNext, iSortIdx++){ if( pProbe->pPartIdxWhere!=0 && !whereUsablePartialIndex(pNew->iTab, pWC, pProbe->pPartIdxWhere) ){ continue; /* Partial index inappropriate for this query */ } + rSize = pProbe->aiRowLogEst[0]; pNew->u.btree.nEq = 0; pNew->u.btree.nSkip = 0; pNew->nLTerm = 0; pNew->iSortIdx = 0; pNew->rSetup = 0; @@ -4394,14 +4495,12 @@ /* Integer primary key index */ pNew->wsFlags = WHERE_IPK; /* Full table scan */ pNew->iSortIdx = b ? iSortIdx : 0; - /* TUNING: Cost of full table scan is 3*(N + log2(N)). - ** + The extra 3 factor is to encourage the use of indexed lookups - ** over full scans. FIXME */ - pNew->rRun = sqlite3LogEstAdd(rSize,rLogSize) + 16; + /* TUNING: Cost of full table scan is (N*3.0). */ + pNew->rRun = rSize + 16; whereLoopOutputAdjust(pWC, pNew); rc = whereLoopInsert(pBuilder, pNew); pNew->nOut = rSize; if( rc ) break; }else{ @@ -4424,39 +4523,20 @@ && sqlite3GlobalConfig.bUseCis && OptimizationEnabled(pWInfo->pParse->db, SQLITE_CoverIdxScan) ) ){ pNew->iSortIdx = b ? iSortIdx : 0; - /* TUNING: The base cost of an index scan is N + log2(N). - ** The log2(N) is for the initial seek to the beginning and the N - ** is for the scan itself. */ - pNew->rRun = sqlite3LogEstAdd(rSize, rLogSize); - if( m==0 ){ - /* TUNING: Cost of a covering index scan is K*(N + log2(N)). - ** + The extra factor K of between 1.1 and 3.0 that depends - ** on the relative sizes of the table and the index. K - ** is smaller for smaller indices, thus favoring them. - ** The upper bound on K (3.0) matches the penalty factor - ** on a full table scan that tries to encourage the use of - ** indexed lookups over full scans. - */ - pNew->rRun += 1 + (15*pProbe->szIdxRow)/pTab->szTabRow; - }else{ - /* TUNING: The cost of scanning a non-covering index is multiplied - ** by log2(N) to account for the binary search of the main table - ** that must happen for each row of the index. - ** TODO: Should there be a multiplier here, analogous to the 3x - ** multiplier for a fulltable scan or covering index scan, to - ** further discourage the use of an index scan? Or is the log2(N) - ** term sufficient discouragement? - ** TODO: What if some or all of the WHERE clause terms can be - ** computed without reference to the original table. Then the - ** penality should reduce to logK where K is the number of output - ** rows. - */ - pNew->rRun += rLogSize; - } + + /* The cost of visiting the index rows is N*K, where K is + ** between 1.1 and 3.0, depending on the relative sizes of the + ** index and table rows. If this is a non-covering index scan, + ** also add the cost of visiting table rows (N*3.0). */ + pNew->rRun = rSize + 1 + (15*pProbe->szIdxRow)/pTab->szTabRow; + if( m!=0 ){ + pNew->rRun = sqlite3LogEstAdd(pNew->rRun, rSize+16); + } + whereLoopOutputAdjust(pWC, pNew); rc = whereLoopInsert(pBuilder, pNew); pNew->nOut = rSize; if( rc ) break; } @@ -4730,12 +4810,11 @@ pNew->wsFlags = WHERE_MULTI_OR; pNew->rSetup = 0; pNew->iSortIdx = 0; memset(&pNew->u, 0, sizeof(pNew->u)); for(i=0; rc==SQLITE_OK && irRun = sSum.a[i].rRun + 18; + pNew->rRun = sSum.a[i].rRun; pNew->nOut = sSum.a[i].nOut; pNew->prereq = sSum.a[i].prereq; rc = whereLoopInsert(pBuilder, pNew); } } @@ -5177,26 +5256,31 @@ if( isOrdered<0 ){ isOrdered = wherePathSatisfiesOrderBy(pWInfo, pWInfo->pOrderBy, pFrom, pWInfo->wctrlFlags, iLoop, pWLoop, &revMask); if( isOrdered>=0 && isOrdered0 ); + assert( nOrderBy>0 && 66==sqlite3LogEst(100) ); rScale = sqlite3LogEst((nOrderBy-isOrdered)*100/nOrderBy) - 66; - rSortCost = nRowEst + estLog(nRowEst) + rScale; + rSortCost = nRowEst + estLog(nRowEst) + rScale + 16; + /* TUNING: The cost of implementing DISTINCT using a B-TREE is - ** also N*log(N) but it has a larger constant of proportionality. - ** Multiply by 3.0. */ + ** similar but with a larger constant of proportionality. + ** Multiply by an additional factor of 3.0. */ if( pWInfo->wctrlFlags & WHERE_WANT_DISTINCT ){ rSortCost += 16; } WHERETRACE(0x002, ("---- sort cost=%-3d (%d/%d) increases cost %3d to %-3d\n", Index: src/whereInt.h ================================================================== --- src/whereInt.h +++ src/whereInt.h @@ -456,5 +456,6 @@ #define WHERE_ONEROW 0x00001000 /* Selects no more than one row */ #define WHERE_MULTI_OR 0x00002000 /* OR using multiple indices */ #define WHERE_AUTO_INDEX 0x00004000 /* Uses an ephemeral index */ #define WHERE_SKIPSCAN 0x00008000 /* Uses the skip-scan algorithm */ #define WHERE_UNQ_WANTED 0x00010000 /* WHERE_ONEROW would have been helpful*/ +#define WHERE_LIKELIHOOD 0x00020000 /* A likelihood() is affecting nOut */ Index: test/analyze3.test ================================================================== --- test/analyze3.test +++ test/analyze3.test @@ -101,16 +101,25 @@ } else { execsql { SELECT count(*)>0 FROM sqlite_stat3; } } } {1} +do_execsql_test analyze3-1.1.x { + SELECT count(*) FROM t1 WHERE x>200 AND x<300; + SELECT count(*) FROM t1 WHERE x>0 AND x<1100; +} {99 1000} + +# The first of the following two SELECT statements visits 99 rows. So +# it is better to use the index. But the second visits every row in +# the table (1000 in total) so it is better to do a full-table scan. +# do_eqp_test analyze3-1.1.2 { SELECT sum(y) FROM t1 WHERE x>200 AND x<300 } {0 0 0 {SEARCH TABLE t1 USING INDEX i1 (x>? AND x0 AND x<1100 -} {0 0 0 {SEARCH TABLE t1 USING INDEX i1 (x>? AND x200 AND x<300 } } {199 0 14850} do_test analyze3-1.1.5 { @@ -123,21 +132,21 @@ set u [expr int(300)] sf_execsql { SELECT sum(y) FROM t1 WHERE x>$l AND x<$u } } {199 0 14850} do_test analyze3-1.1.7 { sf_execsql { SELECT sum(y) FROM t1 WHERE x>0 AND x<1100 } -} {2000 0 499500} +} {999 999 499500} do_test analyze3-1.1.8 { set l [string range "0" 0 end] set u [string range "1100" 0 end] sf_execsql { SELECT sum(y) FROM t1 WHERE x>$l AND x<$u } -} {2000 0 499500} +} {999 999 499500} do_test analyze3-1.1.9 { set l [expr int(0)] set u [expr int(1100)] sf_execsql { SELECT sum(y) FROM t1 WHERE x>$l AND x<$u } -} {2000 0 499500} +} {999 999 499500} # The following tests are similar to the block above. The difference is # that the indexed column has TEXT affinity in this case. In the tests # above the affinity is INTEGER. @@ -150,16 +159,21 @@ CREATE INDEX i2 ON t2(x); COMMIT; ANALYZE; } } {} +do_execsql_test analyze3-2.1.x { + SELECT count(*) FROM t2 WHERE x>1 AND x<2; + SELECT count(*) FROM t2 WHERE x>0 AND x<99; +} {200 990} do_eqp_test analyze3-1.2.2 { SELECT sum(y) FROM t2 WHERE x>1 AND x<2 } {0 0 0 {SEARCH TABLE t2 USING INDEX i2 (x>? AND x0 AND x<99 -} {0 0 0 {SEARCH TABLE t2 USING INDEX i2 (x>? AND x12 AND x<20 } } {161 0 4760} do_test analyze3-1.2.5 { set l [string range "12" 0 end] @@ -171,21 +185,21 @@ set u [expr int(20)] sf_execsql {SELECT typeof($l), typeof($u), sum(y) FROM t2 WHERE x>$l AND x<$u} } {161 0 integer integer 4760} do_test analyze3-1.2.7 { sf_execsql { SELECT sum(y) FROM t2 WHERE x>0 AND x<99 } -} {1981 0 490555} +} {999 999 490555} do_test analyze3-1.2.8 { set l [string range "0" 0 end] set u [string range "99" 0 end] sf_execsql {SELECT typeof($l), typeof($u), sum(y) FROM t2 WHERE x>$l AND x<$u} -} {1981 0 text text 490555} +} {999 999 text text 490555} do_test analyze3-1.2.9 { set l [expr int(0)] set u [expr int(99)] sf_execsql {SELECT typeof($l), typeof($u), sum(y) FROM t2 WHERE x>$l AND x<$u} -} {1981 0 integer integer 490555} +} {999 999 integer integer 490555} # Same tests a third time. This time, column x has INTEGER affinity and # is not the leftmost column of the table. This triggered a bug causing # SQLite to use sub-optimal query plans in 3.6.18 and earlier. # @@ -197,16 +211,20 @@ CREATE INDEX i3 ON t3(x); COMMIT; ANALYZE; } } {} +do_execsql_test analyze3-1.3.x { + SELECT count(*) FROM t3 WHERE x>200 AND x<300; + SELECT count(*) FROM t3 WHERE x>0 AND x<1100 +} {99 1000} do_eqp_test analyze3-1.3.2 { SELECT sum(y) FROM t3 WHERE x>200 AND x<300 } {0 0 0 {SEARCH TABLE t3 USING INDEX i3 (x>? AND x0 AND x<1100 -} {0 0 0 {SEARCH TABLE t3 USING INDEX i3 (x>? AND x200 AND x<300 } } {199 0 14850} do_test analyze3-1.3.5 { @@ -219,21 +237,21 @@ set u [expr int(300)] sf_execsql { SELECT sum(y) FROM t3 WHERE x>$l AND x<$u } } {199 0 14850} do_test analyze3-1.3.7 { sf_execsql { SELECT sum(y) FROM t3 WHERE x>0 AND x<1100 } -} {2000 0 499500} +} {999 999 499500} do_test analyze3-1.3.8 { set l [string range "0" 0 end] set u [string range "1100" 0 end] sf_execsql { SELECT sum(y) FROM t3 WHERE x>$l AND x<$u } -} {2000 0 499500} +} {999 999 499500} do_test analyze3-1.3.9 { set l [expr int(0)] set u [expr int(1100)] sf_execsql { SELECT sum(y) FROM t3 WHERE x>$l AND x<$u } -} {2000 0 499500} +} {999 999 499500} #------------------------------------------------------------------------- # Test that the values of bound SQL variables may be used for the LIKE # optimization. # Index: test/analyze9.test ================================================================== --- test/analyze9.test +++ test/analyze9.test @@ -564,11 +564,11 @@ # estimate the number of rows scanned by a rowid constraint. # drop_all_tables do_test 13.1 { execsql { - CREATE TABLE t1(a, b, c); + CREATE TABLE t1(a, b, c, d); CREATE INDEX i1 ON t1(a); CREATE INDEX i2 ON t1(b, c); } for {set i 0} {$i<100} {incr i} { if {$i %2} {set a abc} else {set a def} @@ -575,20 +575,20 @@ execsql { INSERT INTO t1(rowid, a, b, c) VALUES($i, $a, $i, $i) } } execsql ANALYZE } {} do_eqp_test 13.2.1 { - SELECT * FROM t1 WHERE a='abc' AND rowid<15 AND b<20 + SELECT * FROM t1 WHERE a='abc' AND rowid<15 AND b<12 } {/SEARCH TABLE t1 USING INDEX i1/} do_eqp_test 13.2.2 { - SELECT * FROM t1 WHERE a='abc' AND rowid<'15' AND b<20 + SELECT * FROM t1 WHERE a='abc' AND rowid<'15' AND b<12 } {/SEARCH TABLE t1 USING INDEX i1/} do_eqp_test 13.3.1 { - SELECT * FROM t1 WHERE a='abc' AND rowid<100 AND b<20 + SELECT * FROM t1 WHERE a='abc' AND rowid<100 AND b<12 } {/SEARCH TABLE t1 USING INDEX i2/} do_eqp_test 13.3.2 { - SELECT * FROM t1 WHERE a='abc' AND rowid<'100' AND b<20 + SELECT * FROM t1 WHERE a='abc' AND rowid<'100' AND b<12 } {/SEARCH TABLE t1 USING INDEX i2/} #------------------------------------------------------------------------- # Check also that affinities are taken into account when using stat4 data # to estimate the number of rows scanned by any other constraint on a Index: test/autoindex1.test ================================================================== --- test/autoindex1.test +++ test/autoindex1.test @@ -95,10 +95,12 @@ do_test autoindex1-210 { db eval { PRAGMA automatic_index=ON; ANALYZE; UPDATE sqlite_stat1 SET stat='10000' WHERE tbl='t1'; + -- Table t2 actually contains 8 rows. + UPDATE sqlite_stat1 SET stat='16' WHERE tbl='t2'; ANALYZE sqlite_master; SELECT b, (SELECT d FROM t2 WHERE c=a) FROM t1; } } {11 911 22 922 33 933 44 944 55 955 66 966 77 977 88 988} do_test autoindex1-211 { ADDED test/cost.test Index: test/cost.test ================================================================== --- /dev/null +++ test/cost.test @@ -0,0 +1,246 @@ +# 2014-04-26 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl +set testprefix cost + + +do_execsql_test 1.1 { + CREATE TABLE t3(id INTEGER PRIMARY KEY, b NOT NULL); + CREATE TABLE t4(c, d, e); + CREATE UNIQUE INDEX i3 ON t3(b); + CREATE UNIQUE INDEX i4 ON t4(c, d); +} +do_eqp_test 1.2 { + SELECT e FROM t3, t4 WHERE b=c ORDER BY b, d; +} { + 0 0 0 {SCAN TABLE t3 USING COVERING INDEX i3} + 0 1 1 {SEARCH TABLE t4 USING INDEX i4 (c=?)} +} + + +do_execsql_test 2.1 { + CREATE TABLE t1(a, b); + CREATE INDEX i1 ON t1(a); +} + +# It is better to use an index for ORDER BY than sort externally, even +# if the index is a non-covering index. +do_eqp_test 2.2 { + SELECT * FROM t1 ORDER BY a; +} { + 0 0 0 {SCAN TABLE t1 USING INDEX i1} +} + +do_execsql_test 3.1 { + CREATE TABLE t5(a INTEGER PRIMARY KEY,b,c,d,e,f,g); + CREATE INDEX t5b ON t5(b); + CREATE INDEX t5c ON t5(c); + CREATE INDEX t5d ON t5(d); + CREATE INDEX t5e ON t5(e); + CREATE INDEX t5f ON t5(f); + CREATE INDEX t5g ON t5(g); +} + +do_eqp_test 3.2 { + SELECT a FROM t5 + WHERE b IS NULL OR c IS NULL OR d IS NULL + ORDER BY a; +} { + 0 0 0 {SEARCH TABLE t5 USING INDEX t5b (b=?)} + 0 0 0 {SEARCH TABLE t5 USING INDEX t5c (c=?)} + 0 0 0 {SEARCH TABLE t5 USING INDEX t5d (d=?)} + 0 0 0 {USE TEMP B-TREE FOR ORDER BY} +} + +#------------------------------------------------------------------------- +# If there is no likelihood() or stat3 data, SQLite assumes that a closed +# range scan (e.g. one constrained by "col BETWEEN ? AND ?" constraint) +# visits 1/64 of the rows in a table. +# +# Note: 1/63 =~ 0.016 +# Note: 1/65 =~ 0.015 +# +reset_db +do_execsql_test 4.1 { + CREATE TABLE t1(a, b); + CREATE INDEX i1 ON t1(a); + CREATE INDEX i2 ON t1(b); +} +do_eqp_test 4.2 { + SELECT * FROM t1 WHERE likelihood(a=?, 0.014) AND b BETWEEN ? AND ?; +} { + 0 0 0 {SEARCH TABLE t1 USING INDEX i1 (a=?)} +} +do_eqp_test 4.3 { + SELECT * FROM t1 WHERE likelihood(a=?, 0.016) AND b BETWEEN ? AND ?; +} { + 0 0 0 {SEARCH TABLE t1 USING INDEX i2 (b>? AND b? AND x? AND b=950 AND b<=1010) OR (b IS NULL AND c NOT NULL) + ORDER BY a +} { + 0 0 0 {SEARCH TABLE t1 USING INDEX t1b (b>? AND b=180 ORDER BY +name; Index: test/unordered.test ================================================================== --- test/unordered.test +++ test/unordered.test @@ -40,11 +40,11 @@ sqlite3 db test.db foreach {tn sql r(ordered) r(unordered)} { 1 "SELECT * FROM t1 ORDER BY a" {0 0 0 {SCAN TABLE t1 USING INDEX i1}} {0 0 0 {SCAN TABLE t1} 0 0 0 {USE TEMP B-TREE FOR ORDER BY}} - 2 "SELECT * FROM t1 WHERE a >?" + 2 "SELECT * FROM t1 WHERE a > 100" {0 0 0 {SEARCH TABLE t1 USING INDEX i1 (a>?)}} {0 0 0 {SCAN TABLE t1}} 3 "SELECT * FROM t1 WHERE a = ? ORDER BY rowid" {0 0 0 {SEARCH TABLE t1 USING INDEX i1 (a=?)}} {0 0 0 {SEARCH TABLE t1 USING INDEX i1 (a=?)} Index: test/where3.test ================================================================== --- test/where3.test +++ test/where3.test @@ -229,10 +229,11 @@ # do_execsql_test where3-3.0 { CREATE TABLE t301(a INTEGER PRIMARY KEY,b,c); CREATE INDEX t301c ON t301(c); INSERT INTO t301 VALUES(1,2,3); + INSERT INTO t301 VALUES(2,2,3); CREATE TABLE t302(x, y); INSERT INTO t302 VALUES(4,5); ANALYZE; explain query plan SELECT * FROM t302, t301 WHERE t302.x=5 AND t301.a=t302.y; } { @@ -249,11 +250,11 @@ do_execsql_test where3-3.2 { SELECT * FROM t301 WHERE c=3 AND a IS NULL; } {} do_execsql_test where3-3.3 { SELECT * FROM t301 WHERE c=3 AND a IS NOT NULL; -} {1 2 3} +} {1 2 3 2 2 3} if 0 { # Query planner no longer does this # Verify that when there are multiple tables in a join which must be # full table scans that the query planner attempts put the table with # the fewest number of output rows as the outer loop. Index: test/whereG.test ================================================================== --- test/whereG.test +++ test/whereG.test @@ -12,10 +12,11 @@ # Test cases for query planning decisions and the unlikely() and # likelihood() functions. set testdir [file dirname $argv0] source $testdir/tester.tcl +set testprefix whereG do_execsql_test whereG-1.0 { CREATE TABLE composer( cid INTEGER PRIMARY KEY, cname TEXT @@ -177,7 +178,48 @@ FROM (SELECT x FROM t4 GROUP BY x) WHERE x='right' ORDER BY x; } {right} +#------------------------------------------------------------------------- +# Test that likelihood() specifications on indexed terms are taken into +# account by various forms of loops. +# +# 5.1.*: open ended range scans +# 5.2.*: skip-scans +# +reset_db + +do_execsql_test 5.1 { + CREATE TABLE t1(a, b, c); + CREATE INDEX i1 ON t1(a, b); +} +do_eqp_test 5.1.2 { + SELECT * FROM t1 WHERE a>? +} {0 0 0 {SEARCH TABLE t1 USING INDEX i1 (a>?)}} +do_eqp_test 5.1.3 { + SELECT * FROM t1 WHERE likelihood(a>?, 0.9) +} {0 0 0 {SCAN TABLE t1}} + +do_test 5.2 { + for {set i 0} {$i < 100} {incr i} { + execsql { INSERT INTO t1 VALUES('abc', $i, $i); } + } + execsql { INSERT INTO t1 SELECT 'def', b, c FROM t1; } + execsql { ANALYZE } +} {} +do_eqp_test 5.2.2 { + SELECT * FROM t1 WHERE likelihood(b>?, 0.01) +} {0 0 0 {SEARCH TABLE t1 USING INDEX i1 (ANY(a) AND b>?)}} +do_eqp_test 5.2.3 { + SELECT * FROM t1 WHERE likelihood(b>?, 0.9) +} {0 0 0 {SCAN TABLE t1}} + +do_eqp_test 5.3.1 { + SELECT * FROM t1 WHERE a=? +} {0 0 0 {SEARCH TABLE t1 USING INDEX i1 (a=?)}} +do_eqp_test 5.3.2 { + SELECT * FROM t1 WHERE likelihood(a=?, 0.9) +} {0 0 0 {SCAN TABLE t1}} finish_test + Index: tool/logest.c ================================================================== --- tool/logest.c +++ tool/logest.c @@ -81,11 +81,12 @@ static LogEst logEstFromDouble(double x){ sqlite3_uint64 a; LogEst e; assert( sizeof(x)==8 && sizeof(a)==8 ); if( x<=0.0 ) return -32768; - if( x<1.0 ) return -logEstFromDouble(1/x); + if( x<0.01 ) return -logEstFromDouble(1.0/x); + if( x<1.0 ) return logEstFromDouble(100.0*x) - 66; if( x<1024.0 ) return logEstFromInteger((sqlite3_uint64)(1024.0*x)) - 100; if( x<=2000000000.0 ) return logEstFromInteger((sqlite3_uint64)x); memcpy(&a, &x, 8); e = (a>>52) - 1022; return e*10; @@ -154,14 +155,16 @@ }else{ showHelp(argv[0]); } } for(i=n-1; i>=0; i--){ - if( a[i]<0 ){ + if( a[i]<-40 ){ printf("%5d (%f)\n", a[i], 1.0/(double)logEstToInt(-a[i])); + }else if( a[i]<10 ){ + printf("%5d (%f)\n", a[i], logEstToInt(a[i]+100)/1024.0); }else{ sqlite3_uint64 x = logEstToInt(a[i]+100)*100/1024; printf("%5d (%lld.%02lld)\n", a[i], x/100, x%100); } } return 0; }