Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Use histogram data to improve the row-count estimates on equality constraints. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | stat2-enhancement |
Files: | files | file ages | folders |
SHA1: |
6bfc5c69eb22938972bbf4e60179952d |
User & Date: | drh 2011-01-20 16:52:09.439 |
Context
2011-01-20
| ||
20:36 | Update ANALYZE test cases to check out the use of histograms for equality constraints. (check-in: c7b59afaf0 user: drh tags: stat2-enhancement) | |
16:52 | Use histogram data to improve the row-count estimates on equality constraints. (check-in: 6bfc5c69eb user: drh tags: stat2-enhancement) | |
02:56 | The first of a planned series of enhancements to the query planner that enable it to make better use of sqlite_stat2 histograms when the table has many repeated values. (check-in: 2cd374cd23 user: drh tags: stat2-enhancement) | |
Changes
Changes to src/where.c.
︙ | ︙ | |||
2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 | *piEst = 11; }else{ *piEst = 33; } return rc; } /* ** Find the query plan for accessing a particular table. Write the ** best query plan and its cost into the WhereCost object supplied as the ** last parameter. ** ** The lowest cost plan wins. The cost is an estimate of the amount of | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 | *piEst = 11; }else{ *piEst = 33; } return rc; } #ifdef SQLITE_ENABLE_STAT2 /* ** Estimate the number of rows that will be returned based on ** an equality constraint x=VALUE and where that VALUE occurs in ** the histogram data. This only works when x is the left-most ** column of an index and sqlite_stat2 histogram data is available ** for that index. ** ** Write the estimated row count into *pnRow. If unable to make ** an estimate, leave *pnRow unchanged. */ void whereEqScanEst( Parse *pParse, /* Parsing & code generating context */ Index *p, /* The index whose left-most column is pTerm */ WhereTerm *pTerm, /* The x=VALUE constraint */ double *pnRow /* Write the revised row estimate here */ ){ sqlite3_value *pRhs = 0; /* VALUE on right-hand side of pTerm */ int iLower, iUpper; /* Range of histogram regions containing pRhs */ u8 aff; /* Column affinity */ int rc; /* Subfunction return code */ double nRowEst; /* New estimate of the number of rows */ assert( p->aSample!=0 ); assert( pTerm->eOperator==WO_EQ ); aff = p->pTable->aCol[p->aiColumn[0]].affinity; rc = valueFromExpr(pParse, pTerm->pExpr->pRight, aff, &pRhs); if( rc ) goto whereEqScanEst_cancel; rc = whereRangeRegion(pParse, p, pRhs, 0, &iLower); if( rc ) goto whereEqScanEst_cancel; rc = whereRangeRegion(pParse, p, pRhs, 1, &iUpper); if( rc ) goto whereEqScanEst_cancel; if( iLower>=iUpper ){ nRowEst = p->aiRowEst[0]/(SQLITE_INDEX_SAMPLES*2); if( nRowEst<*pnRow ) *pnRow = nRowEst; }else{ nRowEst = (iUpper-iLower)*p->aiRowEst[0]/SQLITE_INDEX_SAMPLES; *pnRow = nRowEst; } whereEqScanEst_cancel: sqlite3ValueFree(pRhs); } #endif /* defined(SQLITE_ENABLE_STAT2) */ /* ** Find the query plan for accessing a particular table. Write the ** best query plan and its cost into the WhereCost object supplied as the ** last parameter. ** ** The lowest cost plan wins. The cost is an estimate of the amount of |
︙ | ︙ | |||
2620 2621 2622 2623 2624 2625 2626 | ** SELECT a, b FROM tbl WHERE a = 1; ** SELECT a, b, c FROM tbl WHERE a = 1; */ int nEq; int bInEst = 0; int nInMul = 1; int estBound = 100; | | | > > > > > > > > | 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 | ** SELECT a, b FROM tbl WHERE a = 1; ** SELECT a, b, c FROM tbl WHERE a = 1; */ int nEq; int bInEst = 0; int nInMul = 1; int estBound = 100; int nBound = 0; /* Number of range constraints seen */ int bSort = 0; int bLookup = 0; WhereTerm *pTerm; /* A single term of the WHERE clause */ #ifdef SQLITE_ENABLE_STAT2 WhereTerm *pFirstEqTerm = 0; /* First WO_EQ term */ #endif /* Determine the values of nEq and nInMul */ for(nEq=0; nEq<pProbe->nColumn; nEq++){ int j = pProbe->aiColumn[nEq]; pTerm = findTerm(pWC, iCur, j, notReady, eqTermMask, pIdx); if( pTerm==0 ) break; wsFlags |= (WHERE_COLUMN_EQ|WHERE_ROWID_EQ); if( pTerm->eOperator & WO_IN ){ Expr *pExpr = pTerm->pExpr; wsFlags |= WHERE_COLUMN_IN; if( ExprHasProperty(pExpr, EP_xIsSelect) ){ nInMul *= 25; bInEst = 1; }else if( ALWAYS(pExpr->x.pList) ){ nInMul *= pExpr->x.pList->nExpr + 1; } }else if( pTerm->eOperator & WO_ISNULL ){ wsFlags |= WHERE_COLUMN_NULL; } #ifdef SQLITE_ENABLE_STAT2 else if( nEq==0 && pProbe->aSample ){ pFirstEqTerm = pTerm; } #endif used |= pTerm->prereqRight; } /* Determine the value of estBound. */ if( nEq<pProbe->nColumn ){ int j = pProbe->aiColumn[nEq]; if( findTerm(pWC, iCur, j, notReady, WO_LT|WO_LE|WO_GT|WO_GE, pIdx) ){ |
︙ | ︙ | |||
2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 | */ nRow = (double)(aiRowEst[nEq] * nInMul); if( bInEst && nRow*2>aiRowEst[0] ){ nRow = aiRowEst[0]/2; nInMul = (int)(nRow / aiRowEst[nEq]); } /* Assume constant cost to access a row and logarithmic cost to ** do a binary search. Hence, the initial cost is the number of output ** rows plus log2(table-size) times the number of binary searches. */ cost = nRow + nInMul*estLog(aiRowEst[0]); /* Adjust the number of rows and the cost downward to reflect rows | > > > > > > > > > > > | 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 | */ nRow = (double)(aiRowEst[nEq] * nInMul); if( bInEst && nRow*2>aiRowEst[0] ){ nRow = aiRowEst[0]/2; nInMul = (int)(nRow / aiRowEst[nEq]); } #ifdef SQLITE_ENABLE_STAT2 /* If the constraint is of the form x=VALUE and histogram ** data is available for column x, then it might be possible ** to get a better estimate on the number of rows based on ** VALUE and how common that value is according to the histogram. */ if( nRow>(double)1 && nEq==1 && pFirstEqTerm!=0 ){ whereEqScanEst(pParse, pProbe, pFirstEqTerm, &nRow); } #endif /* SQLITE_ENABLE_STAT2 */ /* Assume constant cost to access a row and logarithmic cost to ** do a binary search. Hence, the initial cost is the number of output ** rows plus log2(table-size) times the number of binary searches. */ cost = nRow + nInMul*estLog(aiRowEst[0]); /* Adjust the number of rows and the cost downward to reflect rows |
︙ | ︙ |
Changes to test/analyze5.test.
︙ | ︙ | |||
210 211 212 213 214 215 216 217 218 219 220 221 222 223 | 16 {y>=0 AND y<=''} 50 17 {y>=0 AND y<='alpha'} 400 18 {y>=0 AND y<'alpha'} 50 19 {y>=0 AND y<='bravo'} 700 20 {y>=0 AND y<'charlie'} 700 21 {y>=0 AND y<='charlie'} 900 22 {y>=0 AND y<'delta'} 900 } { do_test analyze5-3.$testid { eqp "SELECT * FROM t1 WHERE $where" } [format {0 0 0 {SEARCH TABLE t1 USING INDEX t1y (y>? AND y<?) (~%d rows)}} \ $rows] } | > > > > > > > > | 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 | 16 {y>=0 AND y<=''} 50 17 {y>=0 AND y<='alpha'} 400 18 {y>=0 AND y<'alpha'} 50 19 {y>=0 AND y<='bravo'} 700 20 {y>=0 AND y<'charlie'} 700 21 {y>=0 AND y<='charlie'} 900 22 {y>=0 AND y<'delta'} 900 23 {y>'alpha' AND y<x'00'} 600 24 {y>='bravo' AND y<x'00'} 600 25 {y>'bravo' AND y<x'00'} 300 26 {y>='charlie' AND y<x'00'} 300 27 {y>'charlie' AND y<x'00'} 100 28 {y>='delta' AND y<x'00'} 100 29 {y>'delta' AND y<x'00'} 50 30 {y>='echo' AND y<x'00'} 50 } { do_test analyze5-3.$testid { eqp "SELECT * FROM t1 WHERE $where" } [format {0 0 0 {SEARCH TABLE t1 USING INDEX t1y (y>? AND y<?) (~%d rows)}} \ $rows] } |
︙ | ︙ |