Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Adjustments to the result row estimator for the IN operator so that it gives the same estimates as the equivalent OR operator. Test cases for the same. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | stat2-enhancement |
Files: | files | file ages | folders |
SHA1: |
c82cb9c028b3ba5463ae50c30196dbf1 |
User & Date: | drh 2011-01-21 18:18:13.960 |
Context
2011-01-22
| ||
00:10 | Add the ability to use indices for constraints of the form "x IS NOT NULL" when sqlite_stat2 is available and most entries for column x are NULL. (check-in: 5d5bddd290 user: drh tags: stat2-enhancement) | |
2011-01-21
| ||
18:18 | Adjustments to the result row estimator for the IN operator so that it gives the same estimates as the equivalent OR operator. Test cases for the same. (check-in: c82cb9c028 user: drh tags: stat2-enhancement) | |
16:27 | Make use of histogram data to make better estimates for the number of rows that will be returned from "x IN (v1,v2,v3,...)" constraints. (check-in: fd3977a27a user: drh tags: stat2-enhancement) | |
Changes
Changes to src/where.c.
︙ | ︙ | |||
113 114 115 116 117 118 119 120 121 122 123 124 125 126 | #define TERM_DYNAMIC 0x01 /* Need to call sqlite3ExprDelete(db, pExpr) */ #define TERM_VIRTUAL 0x02 /* Added by the optimizer. Do not code */ #define TERM_CODED 0x04 /* This term is already coded */ #define TERM_COPIED 0x08 /* Has a child */ #define TERM_ORINFO 0x10 /* Need to free the WhereTerm.u.pOrInfo object */ #define TERM_ANDINFO 0x20 /* Need to free the WhereTerm.u.pAndInfo obj */ #define TERM_OR_OK 0x40 /* Used during OR-clause processing */ /* ** An instance of the following structure holds all information about a ** WHERE clause. Mostly this is a container for one or more WhereTerms. */ struct WhereClause { Parse *pParse; /* The parser context */ | > | 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 | #define TERM_DYNAMIC 0x01 /* Need to call sqlite3ExprDelete(db, pExpr) */ #define TERM_VIRTUAL 0x02 /* Added by the optimizer. Do not code */ #define TERM_CODED 0x04 /* This term is already coded */ #define TERM_COPIED 0x08 /* Has a child */ #define TERM_ORINFO 0x10 /* Need to free the WhereTerm.u.pOrInfo object */ #define TERM_ANDINFO 0x20 /* Need to free the WhereTerm.u.pAndInfo obj */ #define TERM_OR_OK 0x40 /* Used during OR-clause processing */ #define TERM_NOHELP 0x80 /* This term does not reduce the search space */ /* ** An instance of the following structure holds all information about a ** WHERE clause. Mostly this is a container for one or more WhereTerms. */ struct WhereClause { Parse *pParse; /* The parser context */ |
︙ | ︙ | |||
1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 | exprAnalyze(pSrc, pWC, idxNew); pTerm = &pWC->a[idxTerm]; pWC->a[idxNew].iParent = idxTerm; pTerm->nChild = 1; }else{ sqlite3ExprListDelete(db, pList); } pTerm->eOperator = 0; /* case 1 trumps case 2 */ } } } #endif /* !SQLITE_OMIT_OR_OPTIMIZATION && !SQLITE_OMIT_SUBQUERY */ | > | 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 | exprAnalyze(pSrc, pWC, idxNew); pTerm = &pWC->a[idxTerm]; pWC->a[idxNew].iParent = idxTerm; pTerm->nChild = 1; }else{ sqlite3ExprListDelete(db, pList); } pTerm->wtFlags |= TERM_NOHELP; pTerm->eOperator = 0; /* case 1 trumps case 2 */ } } } #endif /* !SQLITE_OMIT_OR_OPTIMIZATION && !SQLITE_OMIT_SUBQUERY */ |
︙ | ︙ | |||
2519 2520 2521 2522 2523 2524 2525 | return rc; } #endif /* defined(SQLITE_ENABLE_STAT2) */ #ifdef SQLITE_ENABLE_STAT2 /* ** Estimate the number of rows that will be returned based on | | | > > | | | | > | | > | | | | | | > | > > > > > > | | | | 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 | return rc; } #endif /* defined(SQLITE_ENABLE_STAT2) */ #ifdef SQLITE_ENABLE_STAT2 /* ** Estimate the number of rows that will be returned based on ** an IN constraint where the right-hand side of the IN operator ** is a list of values. Example: ** ** WHERE x IN (1,2,3,4) ** ** Write the estimated row count into *pnRow and return SQLITE_OK. ** If unable to make an estimate, leave *pnRow unchanged and return ** non-zero. ** ** This routine can fail if it is unable to load a collating sequence ** required for string comparison, or if unable to allocate memory ** for a UTF conversion required for comparison. The error is stored ** in the pParse structure. */ int whereInScanEst( Parse *pParse, /* Parsing & code generating context */ Index *p, /* The index whose left-most column is pTerm */ ExprList *pList, /* The value list on the RHS of "x IN (v1,v2,v3,...)" */ double *pnRow /* Write the revised row estimate here */ ){ sqlite3_value *pVal = 0; /* One value from list */ int iLower, iUpper; /* Range of histogram regions containing pRhs */ u8 aff; /* Column affinity */ int rc = SQLITE_OK; /* Subfunction return code */ double nRowEst; /* New estimate of the number of rows */ int nSpan = 0; /* Number of histogram regions spanned */ int nSingle = 0; /* Histogram regions hit by a single value */ int nNotFound = 0; /* Count of values that are not constants */ int i; /* Loop counter */ u8 aSpan[SQLITE_INDEX_SAMPLES+1]; /* Histogram regions that are spanned */ u8 aSingle[SQLITE_INDEX_SAMPLES+1]; /* Histogram regions hit once */ assert( p->aSample!=0 ); aff = p->pTable->aCol[p->aiColumn[0]].affinity; memset(aSpan, 0, sizeof(aSpan)); memset(aSingle, 0, sizeof(aSingle)); for(i=0; i<pList->nExpr; i++){ sqlite3ValueFree(pVal); rc = valueFromExpr(pParse, pList->a[i].pExpr, aff, &pVal); if( rc ) break; if( pVal==0 || sqlite3_value_type(pVal)==SQLITE_NULL ){ nNotFound++; continue; } rc = whereRangeRegion(pParse, p, pVal, 0, &iLower); if( rc ) break; rc = whereRangeRegion(pParse, p, pVal, 1, &iUpper); if( rc ) break; if( iLower>=iUpper ){ aSingle[iLower] = 1; }else{ assert( iLower>=0 && iUpper<=SQLITE_INDEX_SAMPLES ); while( iLower<iUpper ) aSpan[iLower++] = 1; } } if( rc==SQLITE_OK ){ for(i=nSpan=0; i<=SQLITE_INDEX_SAMPLES; i++){ if( aSpan[i] ){ nSpan++; }else if( aSingle[i] ){ nSingle++; } } nRowEst = (nSpan*2+nSingle)*p->aiRowEst[0]/(2*SQLITE_INDEX_SAMPLES) + nNotFound*p->aiRowEst[1]; if( nRowEst > p->aiRowEst[0] ) nRowEst = p->aiRowEst[0]; *pnRow = nRowEst; WHERETRACE(("IN row estimate: nSpan=%d, nSingle=%d, nNotFound=%d, est=%g\n", nSpan, nSingle, nNotFound, nRowEst)); } sqlite3ValueFree(pVal); return rc; } #endif /* defined(SQLITE_ENABLE_STAT2) */ |
︙ | ︙ | |||
2919 2920 2921 2922 2923 2924 2925 | int k; /* Loop counter */ int nSkipEq = nEq; /* Number of == constraints to skip */ int nSkipRange = nBound; /* Number of < constraints to skip */ Bitmask thisTab; /* Bitmap for pSrc */ thisTab = getMask(pWC->pMaskSet, iCur); for(pTerm=pWC->a, k=pWC->nTerm; nRow>2 && k; k--, pTerm++){ | | | | 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 | int k; /* Loop counter */ int nSkipEq = nEq; /* Number of == constraints to skip */ int nSkipRange = nBound; /* Number of < constraints to skip */ Bitmask thisTab; /* Bitmap for pSrc */ thisTab = getMask(pWC->pMaskSet, iCur); for(pTerm=pWC->a, k=pWC->nTerm; nRow>2 && k; k--, pTerm++){ if( pTerm->wtFlags & (TERM_VIRTUAL|TERM_NOHELP) ) continue; if( (pTerm->prereqAll & notValid)!=thisTab ) continue; if( pTerm->eOperator & (WO_EQ|WO_IN|WO_ISNULL) ){ if( nSkipEq ){ /* Ignore the first nEq equality matches since the index ** has already accounted for these */ nSkipEq--; }else{ /* Assume each additional equality match reduces the result ** set size by a factor of 10 */ nRow /= 10; } }else if( pTerm->eOperator & (WO_LT|WO_LE|WO_GT|WO_GE) ){ if( nSkipRange ){ /* Ignore the first nSkipRange range constraints since the index ** has already accounted for these */ nSkipRange--; }else{ /* Assume each additional range constraint reduces the result ** set size by a factor of 3 */ nRow /= 3; } |
︙ | ︙ |
Changes to test/analyze5.test.
︙ | ︙ | |||
114 115 116 117 118 119 120 | } { do_test analyze5-1.$testid { eqp "SELECT * FROM t1 WHERE $where" } [format {0 0 0 {SEARCH TABLE t1 USING INDEX t1z (z>? AND z<?) (~%d rows)}} \ $rows] } foreach {testid where rows} { | | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 | } { do_test analyze5-1.$testid { eqp "SELECT * FROM t1 WHERE $where" } [format {0 0 0 {SEARCH TABLE t1 USING INDEX t1z (z>? AND z<?) (~%d rows)}} \ $rows] } foreach {testid where rows} { 101 {z=-1} 50 102 {z=0} 400 103 {z=1} 300 104 {z=2} 200 105 {z=3} 100 106 {z=4} 50 107 {z=-10.0} 50 108 {z=0.0} 400 109 {z=1.0} 300 110 {z=2.0} 200 111 {z=3.0} 100 112 {z=4.0} 50 113 {z=1.5} 50 114 {z=2.5} 50 } { do_test analyze5-1.$testid { eqp "SELECT * FROM t1 WHERE $where" } [format {0 0 0 {SEARCH TABLE t1 USING INDEX t1z (z=?) (~%d rows)}} $rows] } # for the next sequence of tests a value of rows<=0 means a full-table scan # is used. # #set sqlite_where_trace 1 foreach {testid where rows} { 201 {z IN (-1)} 50 202 {z IN (0)} 400 203 {z IN (1)} 300 204 {z IN (2)} 200 205 {z IN (3)} 100 206 {z IN (4)} 50 207 {z IN (0.5)} 50 208 {z IN (0,1)} 700 209 {z IN (0,1,2)} 900 210 {z IN (0,1,2,3)} 0 211 {z IN (0,1,2,3,4,5)} 0 212 {z IN (1,2)} 500 213 {z IN (2,3)} 300 214 {z=3 OR z=2} 300 215 {z IN (-1,3)} 150 216 {z=-1 OR z=3} 150 } { if {$rows<=0} { set ans {SCAN TABLE t1 (~100 rows)} } else { set ans [format {SEARCH TABLE t1 USING INDEX t1z (z=?) (~%d rows)} $rows] } do_test analyze5-1.$testid { lindex [eqp "SELECT * FROM t1 WHERE $where"] 3 } $ans } # For the t1.y column, most entries are known to be zero. So do a # full table scan for y=0 but use the index for any other constraint on # y. # do_test analyze5-201 { eqp {SELECT * FROM t1 WHERE y=0} |
︙ | ︙ |