Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Make use of histogram data to make better estimates for the number of rows that will be returned from "x IN (v1,v2,v3,...)" constraints. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | stat2-enhancement |
Files: | files | file ages | folders |
SHA1: |
fd3977a27ae68e694df12a4713e55515 |
User & Date: | drh 2011-01-21 16:27:18.621 |
Context
2011-01-21
| ||
18:18 | Adjustments to the result row estimator for the IN operator so that it gives the same estimates as the equivalent OR operator. Test cases for the same. (check-in: c82cb9c028 user: drh tags: stat2-enhancement) | |
16:27 | Make use of histogram data to make better estimates for the number of rows that will be returned from "x IN (v1,v2,v3,...)" constraints. (check-in: fd3977a27a user: drh tags: stat2-enhancement) | |
14:37 | Add the ability to use indices when a range contraint is bounded on the lower end by NULL. (check-in: f73a167b43 user: drh tags: stat2-enhancement) | |
Changes
Changes to src/where.c.
︙ | ︙ | |||
2471 2472 2473 2474 2475 2476 2477 | /* ** Estimate the number of rows that will be returned based on ** an equality constraint x=VALUE and where that VALUE occurs in ** the histogram data. This only works when x is the left-most ** column of an index and sqlite_stat2 histogram data is available ** for that index. ** | | | > | | < | | > | | | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 | /* ** Estimate the number of rows that will be returned based on ** an equality constraint x=VALUE and where that VALUE occurs in ** the histogram data. This only works when x is the left-most ** column of an index and sqlite_stat2 histogram data is available ** for that index. ** ** Write the estimated row count into *pnRow and return SQLITE_OK. ** If unable to make an estimate, leave *pnRow unchanged and return ** non-zero. ** ** This routine can fail if it is unable to load a collating sequence ** required for string comparison, or if unable to allocate memory ** for a UTF conversion required for comparison. The error is stored ** in the pParse structure. */ int whereEqualScanEst( Parse *pParse, /* Parsing & code generating context */ Index *p, /* The index whose left-most column is pTerm */ Expr *pExpr, /* Expression for VALUE in the x=VALUE constraint */ double *pnRow /* Write the revised row estimate here */ ){ sqlite3_value *pRhs = 0; /* VALUE on right-hand side of pTerm */ int iLower, iUpper; /* Range of histogram regions containing pRhs */ u8 aff; /* Column affinity */ int rc; /* Subfunction return code */ double nRowEst; /* New estimate of the number of rows */ assert( p->aSample!=0 ); aff = p->pTable->aCol[p->aiColumn[0]].affinity; rc = valueFromExpr(pParse, pExpr, aff, &pRhs); if( rc ) goto whereEqualScanEst_cancel; if( pRhs==0 ) return SQLITE_NOTFOUND; rc = whereRangeRegion(pParse, p, pRhs, 0, &iLower); if( rc ) goto whereEqualScanEst_cancel; rc = whereRangeRegion(pParse, p, pRhs, 1, &iUpper); if( rc ) goto whereEqualScanEst_cancel; WHERETRACE(("equality scan regions: %d..%d\n", iLower, iUpper)); if( iLower>=iUpper ){ nRowEst = p->aiRowEst[0]/(SQLITE_INDEX_SAMPLES*2); if( nRowEst<*pnRow ) *pnRow = nRowEst; }else{ nRowEst = (iUpper-iLower)*p->aiRowEst[0]/SQLITE_INDEX_SAMPLES; *pnRow = nRowEst; } whereEqualScanEst_cancel: sqlite3ValueFree(pRhs); return rc; } #endif /* defined(SQLITE_ENABLE_STAT2) */ #ifdef SQLITE_ENABLE_STAT2 /* ** Estimate the number of rows that will be returned based on ** an IN constraint "x IN (V1,V2,V3,...)" where the right-hand side ** of the IN operator is a list of values. ** ** Write the estimated row count into *pnRow and return SQLITE_OK. ** If unable to make an estimate, leave *pnRow unchanged and return ** non-zero. ** ** This routine can fail if it is unable to load a collating sequence ** required for string comparison, or if unable to allocate memory ** for a UTF conversion required for comparison. The error is stored ** in the pParse structure. */ int whereInScanEst( Parse *pParse, /* Parsing & code generating context */ Index *p, /* The index whose left-most column is pTerm */ ExprList *pList, /* The value list on the RHS of "x IN (v1,v2,v3,...)" */ double *pnRow /* Write the revised row estimate here */ ){ sqlite3_value *pVal = 0; /* One value from list */ int iLower, iUpper; /* Range of histogram regions containing pRhs */ u8 aff; /* Column affinity */ int rc; /* Subfunction return code */ double nRowEst; /* New estimate of the number of rows */ int nRegion = 0; /* Number of histogram regions spanned */ int nSingle = 0; /* Count of values contained within one region */ int nNotFound = 0; /* Count of values that are not constants */ int i; /* Loop counter */ u8 aHit[SQLITE_INDEX_SAMPLES+1]; /* Histogram regions that are spanned */ assert( p->aSample!=0 ); aff = p->pTable->aCol[p->aiColumn[0]].affinity; memset(aHit, 0, sizeof(aHit)); for(i=0; i<pList->nExpr; i++){ sqlite3ValueFree(pVal); rc = valueFromExpr(pParse, pList->a[i].pExpr, aff, &pVal); if( rc ) break; if( pVal==0 ){ nNotFound++; continue; } rc = whereRangeRegion(pParse, p, pVal, 0, &iLower); if( rc ) break; rc = whereRangeRegion(pParse, p, pVal, 1, &iUpper); if( rc ) break; if( iLower>=iUpper ){ nSingle++; } assert( iLower>=0 && iUpper<=SQLITE_INDEX_SAMPLES ); while( iLower<=iUpper ) aHit[iLower++] = 1; } if( rc==SQLITE_OK ){ for(i=nRegion=0; i<ArraySize(aHit); i++) nRegion += aHit[i]; nRowEst = nRegion*p->aiRowEst[0]/(SQLITE_INDEX_SAMPLES+1) + nNotFound*p->aiRowEst[1]; if( nRowEst > p->aiRowEst[0] ) nRowEst = p->aiRowEst[0]; *pnRow = nRowEst; WHERETRACE(("IN row estimate: nRegion=%d, nSingle=%d, nNotFound=%d\n", nRegion, nSingle, nNotFound)); } sqlite3ValueFree(pVal); return rc; } #endif /* defined(SQLITE_ENABLE_STAT2) */ /* ** Find the query plan for accessing a particular table. Write the ** best query plan and its cost into the WhereCost object supplied as the |
︙ | ︙ | |||
2682 2683 2684 2685 2686 2687 2688 | int nInMul = 1; int estBound = 100; int nBound = 0; /* Number of range constraints seen */ int bSort = 0; int bLookup = 0; WhereTerm *pTerm; /* A single term of the WHERE clause */ #ifdef SQLITE_ENABLE_STAT2 | | | 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 | int nInMul = 1; int estBound = 100; int nBound = 0; /* Number of range constraints seen */ int bSort = 0; int bLookup = 0; WhereTerm *pTerm; /* A single term of the WHERE clause */ #ifdef SQLITE_ENABLE_STAT2 WhereTerm *pFirstTerm = 0; /* First term matching the index */ #endif /* Determine the values of nEq and nInMul */ for(nEq=0; nEq<pProbe->nColumn; nEq++){ int j = pProbe->aiColumn[nEq]; pTerm = findTerm(pWC, iCur, j, notReady, eqTermMask, pIdx); if( pTerm==0 ) break; |
︙ | ︙ | |||
2706 2707 2708 2709 2710 2711 2712 | /* "x IN (value, value, ...)" */ nInMul *= pExpr->x.pList->nExpr + 1; } }else if( pTerm->eOperator & WO_ISNULL ){ wsFlags |= WHERE_COLUMN_NULL; } #ifdef SQLITE_ENABLE_STAT2 | | < < | 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 | /* "x IN (value, value, ...)" */ nInMul *= pExpr->x.pList->nExpr + 1; } }else if( pTerm->eOperator & WO_ISNULL ){ wsFlags |= WHERE_COLUMN_NULL; } #ifdef SQLITE_ENABLE_STAT2 if( nEq==0 && pProbe->aSample ) pFirstTerm = pTerm; #endif used |= pTerm->prereqRight; } /* Determine the value of estBound. */ if( nEq<pProbe->nColumn ){ int j = pProbe->aiColumn[nEq]; |
︙ | ︙ | |||
2792 2793 2794 2795 2796 2797 2798 | #ifdef SQLITE_ENABLE_STAT2 /* If the constraint is of the form x=VALUE and histogram ** data is available for column x, then it might be possible ** to get a better estimate on the number of rows based on ** VALUE and how common that value is according to the histogram. */ | | > > > | > | 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 | #ifdef SQLITE_ENABLE_STAT2 /* If the constraint is of the form x=VALUE and histogram ** data is available for column x, then it might be possible ** to get a better estimate on the number of rows based on ** VALUE and how common that value is according to the histogram. */ if( nRow>(double)1 && nEq==1 && pFirstTerm!=0 ){ if( pFirstTerm->eOperator==WO_EQ ){ whereEqualScanEst(pParse, pProbe, pFirstTerm->pExpr->pRight, &nRow); }else if( pFirstTerm->eOperator==WO_IN && bInEst==0 ){ whereInScanEst(pParse, pProbe, pFirstTerm->pExpr->x.pList, &nRow); } } #endif /* SQLITE_ENABLE_STAT2 */ /* Assume constant cost to access a row and logarithmic cost to ** do a binary search. Hence, the initial cost is the number of output ** rows plus log2(table-size) times the number of binary searches. */ |
︙ | ︙ |