Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Fix a performance regression (relative to version 3.6.23.1) caused by the query planner taking into account non-indexable WHERE clause terms to select the outermost join loops when it should be selecting tables for the outermost loop that do not benefit from being in an inner loop. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA1: |
ece641eb8951c6314cedbdb3243f91cb |
User & Date: | drh 2010-10-04 23:55:51.000 |
Context
2010-10-05
| ||
08:13 | Prevent backcompat.test from mistaking directories for binary executables. (check-in: 717a1e50f0 user: dan tags: trunk) | |
2010-10-04
| ||
23:55 | Fix a performance regression (relative to version 3.6.23.1) caused by the query planner taking into account non-indexable WHERE clause terms to select the outermost join loops when it should be selecting tables for the outermost loop that do not benefit from being in an inner loop. (check-in: ece641eb89 user: drh tags: trunk) | |
16:06 | Fix a couple of test script problems. (check-in: dd10690140 user: dan tags: trunk) | |
Changes
Changes to src/where.c.
︙ | ︙ | |||
1552 1553 1554 1555 1556 1557 1558 | #define TRACE_IDX_OUTPUTS(A) #endif /* ** Required because bestIndex() is called by bestOrClauseIndex() */ static void bestIndex( | | > | > | 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 | #define TRACE_IDX_OUTPUTS(A) #endif /* ** Required because bestIndex() is called by bestOrClauseIndex() */ static void bestIndex( Parse*, WhereClause*, struct SrcList_item*, Bitmask, Bitmask, ExprList*, WhereCost*); /* ** This routine attempts to find an scanning strategy that can be used ** to optimize an 'OR' expression that is part of a WHERE clause. ** ** The table associated with FROM clause term pSrc may be either a ** regular B-Tree table or a virtual table. */ static void bestOrClauseIndex( Parse *pParse, /* The parsing context */ WhereClause *pWC, /* The WHERE clause */ struct SrcList_item *pSrc, /* The FROM clause term to search */ Bitmask notReady, /* Mask of cursors not available for indexing */ Bitmask notValid, /* Cursors not available for any purpose */ ExprList *pOrderBy, /* The ORDER BY clause */ WhereCost *pCost /* Lowest cost query plan */ ){ #ifndef SQLITE_OMIT_OR_OPTIMIZATION const int iCur = pSrc->iCursor; /* The cursor of the table to be accessed */ const Bitmask maskSrc = getMask(pWC->pMaskSet, iCur); /* Bitmask for pSrc */ WhereTerm * const pWCEnd = &pWC->a[pWC->nTerm]; /* End of pWC->a[] */ |
︙ | ︙ | |||
1601 1602 1603 1604 1605 1606 1607 | for(pOrTerm=pOrWC->a; pOrTerm<pOrWCEnd; pOrTerm++){ WhereCost sTermCost; WHERETRACE(("... Multi-index OR testing for term %d of %d....\n", (pOrTerm - pOrWC->a), (pTerm - pWC->a) )); if( pOrTerm->eOperator==WO_AND ){ WhereClause *pAndWC = &pOrTerm->u.pAndInfo->wc; | | | | 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 | for(pOrTerm=pOrWC->a; pOrTerm<pOrWCEnd; pOrTerm++){ WhereCost sTermCost; WHERETRACE(("... Multi-index OR testing for term %d of %d....\n", (pOrTerm - pOrWC->a), (pTerm - pWC->a) )); if( pOrTerm->eOperator==WO_AND ){ WhereClause *pAndWC = &pOrTerm->u.pAndInfo->wc; bestIndex(pParse, pAndWC, pSrc, notReady, notValid, 0, &sTermCost); }else if( pOrTerm->leftCursor==iCur ){ WhereClause tempWC; tempWC.pParse = pWC->pParse; tempWC.pMaskSet = pWC->pMaskSet; tempWC.op = TK_AND; tempWC.a = pOrTerm; tempWC.nTerm = 1; bestIndex(pParse, &tempWC, pSrc, notReady, notValid, 0, &sTermCost); }else{ continue; } rTotal += sTermCost.rCost; nRow += sTermCost.nRow; used |= sTermCost.used; if( rTotal>=pCost->rCost ) break; |
︙ | ︙ | |||
2056 2057 2058 2059 2060 2061 2062 | ** routine takes care of freeing the sqlite3_index_info structure after ** everybody has finished with it. */ static void bestVirtualIndex( Parse *pParse, /* The parsing context */ WhereClause *pWC, /* The WHERE clause */ struct SrcList_item *pSrc, /* The FROM clause term to search */ | | > | 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 | ** routine takes care of freeing the sqlite3_index_info structure after ** everybody has finished with it. */ static void bestVirtualIndex( Parse *pParse, /* The parsing context */ WhereClause *pWC, /* The WHERE clause */ struct SrcList_item *pSrc, /* The FROM clause term to search */ Bitmask notReady, /* Mask of cursors not available for index */ Bitmask notValid, /* Cursors not valid for any purpose */ ExprList *pOrderBy, /* The order by clause */ WhereCost *pCost, /* Lowest cost query plan */ sqlite3_index_info **ppIdxInfo /* Index information passed to xBestIndex */ ){ Table *pTab = pSrc->pTab; sqlite3_index_info *pIdxInfo; struct sqlite3_index_constraint *pIdxCons; |
︙ | ︙ | |||
2186 2187 2188 2189 2190 2191 2192 | } pCost->plan.nEq = 0; pIdxInfo->nOrderBy = nOrderBy; /* Try to find a more efficient access pattern by using multiple indexes ** to optimize an OR expression within the WHERE clause. */ | | | 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 | } pCost->plan.nEq = 0; pIdxInfo->nOrderBy = nOrderBy; /* Try to find a more efficient access pattern by using multiple indexes ** to optimize an OR expression within the WHERE clause. */ bestOrClauseIndex(pParse, pWC, pSrc, notReady, notValid, pOrderBy, pCost); } #endif /* SQLITE_OMIT_VIRTUALTABLE */ /* ** Argument pIdx is a pointer to an index structure that has an array of ** SQLITE_INDEX_SAMPLES evenly spaced samples of the first indexed column ** stored in Index.aSample. The domain of values stored in said column |
︙ | ︙ | |||
2467 2468 2469 2470 2471 2472 2473 | ** selected plan may still take advantage of the tables built-in rowid ** index. */ static void bestBtreeIndex( Parse *pParse, /* The parsing context */ WhereClause *pWC, /* The WHERE clause */ struct SrcList_item *pSrc, /* The FROM clause term to search */ | | > | 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 | ** selected plan may still take advantage of the tables built-in rowid ** index. */ static void bestBtreeIndex( Parse *pParse, /* The parsing context */ WhereClause *pWC, /* The WHERE clause */ struct SrcList_item *pSrc, /* The FROM clause term to search */ Bitmask notReady, /* Mask of cursors not available for indexing */ Bitmask notValid, /* Cursors not available for any purpose */ ExprList *pOrderBy, /* The ORDER BY clause */ WhereCost *pCost /* Lowest cost query plan */ ){ int iCur = pSrc->iCursor; /* The cursor of the table to be accessed */ Index *pProbe; /* An index we are evaluating */ Index *pIdx; /* Copy of pProbe, or zero for IPK index */ int eqTermMask; /* Current mask of valid equality operators */ |
︙ | ︙ | |||
2729 2730 2731 2732 2733 2734 2735 | /* If there are additional constraints on this table that cannot ** be used with the current index, but which might lower the number ** of output rows, adjust the nRow value accordingly. This only ** matters if the current index is the least costly, so do not bother ** with this step if we already know this index will not be chosen. ** Also, never reduce the output row count below 2 using this step. ** | | < < | < > | > > | | | | | 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 | /* If there are additional constraints on this table that cannot ** be used with the current index, but which might lower the number ** of output rows, adjust the nRow value accordingly. This only ** matters if the current index is the least costly, so do not bother ** with this step if we already know this index will not be chosen. ** Also, never reduce the output row count below 2 using this step. ** ** It is critical that the notValid mask be used here instead of ** the notReady mask. When computing an "optimal" index, the notReady ** mask will only have one bit set - the bit for the current table. ** The notValid mask, on the other hand, always has all bits set for ** tables that are not in outer loops. If notReady is used here instead ** of notValid, then a optimal index that depends on inner joins loops ** might be selected even when there exists an optimal index that has ** no such dependency. */ if( nRow>2 && cost<=pCost->rCost ){ int k; /* Loop counter */ int nSkipEq = nEq; /* Number of == constraints to skip */ int nSkipRange = nBound; /* Number of < constraints to skip */ Bitmask thisTab; /* Bitmap for pSrc */ thisTab = getMask(pWC->pMaskSet, iCur); for(pTerm=pWC->a, k=pWC->nTerm; nRow>2 && k; k--, pTerm++){ if( pTerm->wtFlags & TERM_VIRTUAL ) continue; if( (pTerm->prereqAll & notValid)!=thisTab ) continue; if( pTerm->eOperator & (WO_EQ|WO_IN|WO_ISNULL) ){ if( nSkipEq ){ /* Ignore the first nEq equality matches since the index ** has already accounted for these */ nSkipEq--; }else{ /* Assume each additional equality match reduces the result |
︙ | ︙ | |||
2829 2830 2831 2832 2833 2834 2835 | ); WHERETRACE(("best index is: %s\n", ((pCost->plan.wsFlags & WHERE_NOT_FULLSCAN)==0 ? "none" : pCost->plan.u.pIdx ? pCost->plan.u.pIdx->zName : "ipk") )); | | | > | | | 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 | ); WHERETRACE(("best index is: %s\n", ((pCost->plan.wsFlags & WHERE_NOT_FULLSCAN)==0 ? "none" : pCost->plan.u.pIdx ? pCost->plan.u.pIdx->zName : "ipk") )); bestOrClauseIndex(pParse, pWC, pSrc, notReady, notValid, pOrderBy, pCost); bestAutomaticIndex(pParse, pWC, pSrc, notReady, pCost); pCost->plan.wsFlags |= eqTermMask; } /* ** Find the query plan for accessing table pSrc->pTab. Write the ** best query plan and its cost into the WhereCost object supplied ** as the last parameter. This function may calculate the cost of ** both real and virtual table scans. */ static void bestIndex( Parse *pParse, /* The parsing context */ WhereClause *pWC, /* The WHERE clause */ struct SrcList_item *pSrc, /* The FROM clause term to search */ Bitmask notReady, /* Mask of cursors not available for indexing */ Bitmask notValid, /* Cursors not available for any purpose */ ExprList *pOrderBy, /* The ORDER BY clause */ WhereCost *pCost /* Lowest cost query plan */ ){ #ifndef SQLITE_OMIT_VIRTUALTABLE if( IsVirtual(pSrc->pTab) ){ sqlite3_index_info *p = 0; bestVirtualIndex(pParse, pWC, pSrc, notReady, notValid, pOrderBy, pCost,&p); if( p->needToFreeIdxStr ){ sqlite3_free(p->idxStr); } sqlite3DbFree(pParse->db, p); }else #endif { bestBtreeIndex(pParse, pWC, pSrc, notReady, notValid, pOrderBy, pCost); } } /* ** Disable a term in the WHERE clause. Except, do not disable the term ** if it controls a LEFT OUTER JOIN and it did not originate in the ON ** or USING clause of that join. |
︙ | ︙ | |||
4074 4075 4076 4077 4078 4079 4080 4081 4082 | ** were used as the innermost nested loop. In other words, a table ** is chosen such that the cost of running that table cannot be reduced ** by waiting for other tables to run first. This "optimal" test works ** by first assuming that the FROM clause is on the inner loop and finding ** its query plan, then checking to see if that query plan uses any ** other FROM clause terms that are notReady. If no notReady terms are ** used then the "optimal" query plan works. ** ** The second loop iteration is only performed if no optimal scan | > > > > > > | | | | 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 | ** were used as the innermost nested loop. In other words, a table ** is chosen such that the cost of running that table cannot be reduced ** by waiting for other tables to run first. This "optimal" test works ** by first assuming that the FROM clause is on the inner loop and finding ** its query plan, then checking to see if that query plan uses any ** other FROM clause terms that are notReady. If no notReady terms are ** used then the "optimal" query plan works. ** ** Note that the WhereCost.nRow parameter for an optimal scan might ** not be as small as it would be if the table really were the innermost ** join. The nRow value can be reduced by WHERE clause constraints ** that do not use indices. But this nRow reduction only happens if the ** table really is the innermost join. ** ** The second loop iteration is only performed if no optimal scan ** strategies were found by the first iteration. This second iteration ** is used to search for the lowest cost scan overall. ** ** Previous versions of SQLite performed only the second iteration - ** the next outermost loop was always that with the lowest overall ** cost. However, this meant that SQLite could select the wrong plan ** for scripts such as the following: ** ** CREATE TABLE t1(a, b); ** CREATE TABLE t2(c, d); ** SELECT * FROM t2, t1 WHERE t2.rowid = t1.a; ** ** The best strategy is to iterate through table t1 first. However it ** is not possible to determine this with a simple greedy algorithm. ** Since the cost of a linear scan through table t2 is the same ** as the cost of a linear scan through table t1, a simple greedy ** algorithm may choose to use t2 for the outer loop, which is a much ** costlier approach. */ nUnconstrained = 0; notIndexed = 0; for(isOptimal=(iFrom<nTabList-1); isOptimal>=0 && bestJ<0; isOptimal--){ Bitmask mask; /* Mask of tables not yet ready */ for(j=iFrom, pTabItem=&pTabList->a[j]; j<nTabList; j++, pTabItem++){ int doNotReorder; /* True if this table should not be reordered */ WhereCost sCost; /* Cost information from best[Virtual]Index() */ ExprList *pOrderBy; /* ORDER BY clause for index to optimize */ doNotReorder = (pTabItem->jointype & (JT_LEFT|JT_CROSS))!=0; |
︙ | ︙ | |||
4119 4120 4121 4122 4123 4124 4125 | pOrderBy = ((i==0 && ppOrderBy )?*ppOrderBy:0); if( pTabItem->pIndex==0 ) nUnconstrained++; assert( pTabItem->pTab ); #ifndef SQLITE_OMIT_VIRTUALTABLE if( IsVirtual(pTabItem->pTab) ){ sqlite3_index_info **pp = &pWInfo->a[j].pIdxInfo; | | > | > | 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 | pOrderBy = ((i==0 && ppOrderBy )?*ppOrderBy:0); if( pTabItem->pIndex==0 ) nUnconstrained++; assert( pTabItem->pTab ); #ifndef SQLITE_OMIT_VIRTUALTABLE if( IsVirtual(pTabItem->pTab) ){ sqlite3_index_info **pp = &pWInfo->a[j].pIdxInfo; bestVirtualIndex(pParse, pWC, pTabItem, mask, notReady, pOrderBy, &sCost, pp); }else #endif { bestBtreeIndex(pParse, pWC, pTabItem, mask, notReady, pOrderBy, &sCost); } assert( isOptimal || (sCost.used¬Ready)==0 ); /* If an INDEXED BY clause is present, then the plan must use that ** index if it uses any index at all */ assert( pTabItem->pIndex==0 || (sCost.plan.wsFlags & WHERE_NOT_FULLSCAN)==0 |
︙ | ︙ |
Changes to test/where3.test.
︙ | ︙ | |||
256 257 258 259 260 261 262 263 264 | } {0 1 {TABLE t401} 1 0 {TABLE t400} 2 2 {TABLE t402}} do_test where3-4.2 { execsql { EXPLAIN QUERY PLAN SELECT * FROM t400, t401, t402 WHERE t400.c GLOB 'abc*'; } } {0 0 {TABLE t400} 1 1 {TABLE t401} 2 2 {TABLE t402}} finish_test | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 | } {0 1 {TABLE t401} 1 0 {TABLE t400} 2 2 {TABLE t402}} do_test where3-4.2 { execsql { EXPLAIN QUERY PLAN SELECT * FROM t400, t401, t402 WHERE t400.c GLOB 'abc*'; } } {0 0 {TABLE t400} 1 1 {TABLE t401} 2 2 {TABLE t402}} # Verify that a performance regression encountered by firefox # has been fixed. # do_test where3-5.0 { execsql { CREATE TABLE aaa (id INTEGER PRIMARY KEY, type INTEGER, fk INTEGER DEFAULT NULL, parent INTEGER, position INTEGER, title LONGVARCHAR, keyword_id INTEGER, folder_type TEXT, dateAdded INTEGER, lastModified INTEGER); CREATE INDEX aaa_111 ON aaa (fk, type); CREATE INDEX aaa_222 ON aaa (parent, position); CREATE INDEX aaa_333 ON aaa (fk, lastModified); CREATE TABLE bbb (id INTEGER PRIMARY KEY, type INTEGER, fk INTEGER DEFAULT NULL, parent INTEGER, position INTEGER, title LONGVARCHAR, keyword_id INTEGER, folder_type TEXT, dateAdded INTEGER, lastModified INTEGER); CREATE INDEX bbb_111 ON bbb (fk, type); CREATE INDEX bbb_222 ON bbb (parent, position); CREATE INDEX bbb_333 ON bbb (fk, lastModified); } execsql { EXPLAIN QUERY PLAN SELECT bbb.title AS tag_title FROM aaa JOIN bbb ON bbb.id = aaa.parent WHERE aaa.fk = 'constant' AND LENGTH(bbb.title) > 0 AND bbb.parent = 4 ORDER BY bbb.title COLLATE NOCASE ASC; } } {0 0 {TABLE aaa WITH INDEX aaa_333} 1 1 {TABLE bbb USING PRIMARY KEY}} do_test where3-5.1 { execsql { EXPLAIN QUERY PLAN SELECT bbb.title AS tag_title FROM aaa JOIN aaa AS bbb ON bbb.id = aaa.parent WHERE aaa.fk = 'constant' AND LENGTH(bbb.title) > 0 AND bbb.parent = 4 ORDER BY bbb.title COLLATE NOCASE ASC; } } {0 0 {TABLE aaa WITH INDEX aaa_333} 1 1 {TABLE aaa AS bbb USING PRIMARY KEY}} do_test where3-5.2 { execsql { EXPLAIN QUERY PLAN SELECT bbb.title AS tag_title FROM bbb JOIN aaa ON bbb.id = aaa.parent WHERE aaa.fk = 'constant' AND LENGTH(bbb.title) > 0 AND bbb.parent = 4 ORDER BY bbb.title COLLATE NOCASE ASC; } } {0 1 {TABLE aaa WITH INDEX aaa_333} 1 0 {TABLE bbb USING PRIMARY KEY}} do_test where3-5.3 { execsql { EXPLAIN QUERY PLAN SELECT bbb.title AS tag_title FROM aaa AS bbb JOIN aaa ON bbb.id = aaa.parent WHERE aaa.fk = 'constant' AND LENGTH(bbb.title) > 0 AND bbb.parent = 4 ORDER BY bbb.title COLLATE NOCASE ASC; } } {0 1 {TABLE aaa WITH INDEX aaa_333} 1 0 {TABLE aaa AS bbb USING PRIMARY KEY}} finish_test |