Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Continued refactoring of the ORDER BY optimization logic. This check-in is close to working, but it still has issues. A few test cases fail. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | qp-enhancements |
Files: | files | file ages | folders |
SHA1: |
adbdc663f3d22ff03f21040a811d585c |
User & Date: | drh 2012-10-08 18:23:51.612 |
Context
2012-10-08
| ||
19:41 | Bug fixes in the ORDER BY optimizer. (check-in: 301bbee404 user: drh tags: qp-enhancements) | |
18:23 | Continued refactoring of the ORDER BY optimization logic. This check-in is close to working, but it still has issues. A few test cases fail. (check-in: adbdc663f3 user: drh tags: qp-enhancements) | |
2012-10-04
| ||
12:10 | Yet another refactoring of ORDER BY logic in the query planner. This particular check-in works mostly, but still has a few minor issues. (check-in: 8f4487450b user: drh tags: qp-enhancements) | |
Changes
Changes to src/where.c.
︙ | ︙ | |||
2708 2709 2710 2711 2712 2713 2714 | } return rc; } #endif /* defined(SQLITE_ENABLE_STAT3) */ /* ** Check to see if column iCol of the table with cursor iTab will appear | | | > < | | > > | > > > > | 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 | } return rc; } #endif /* defined(SQLITE_ENABLE_STAT3) */ /* ** Check to see if column iCol of the table with cursor iTab will appear ** in sorted order according to the current query plan. ** ** Return values: ** ** 0 iCol is not ordered ** 1 iCol has only a single value ** 2 iCol is in ASC order ** 3 iCol is in DESC order */ static int isOrderedColumn( WhereBestIdx *p, int iTab, int iCol ){ int i, j; WhereLevel *pLevel = &p->aLevel[p->i-1]; Index *pIdx; u8 sortOrder; for(i=p->i-1; i>=0; i--, pLevel--){ if( pLevel->iTabCur!=iTab ) continue; if( (pLevel->plan.wsFlags & WHERE_ALL_UNIQUE)!=0 ){ |
︙ | ︙ | |||
2751 2752 2753 2754 2755 2756 2757 | testcase( (pLevel->plan.wsFlags & WHERE_REVERSE)!=0 ); } if( (pLevel->plan.wsFlags & WHERE_REVERSE)!=0 ){ assert( sortOrder==0 || sortOrder==1 ); testcase( sortOrder==1 ); sortOrder = 1 - sortOrder; } | < < | | < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < | 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 | testcase( (pLevel->plan.wsFlags & WHERE_REVERSE)!=0 ); } if( (pLevel->plan.wsFlags & WHERE_REVERSE)!=0 ){ assert( sortOrder==0 || sortOrder==1 ); testcase( sortOrder==1 ); sortOrder = 1 - sortOrder; } return sortOrder+2; } return 0; } /* ** This routine decides if pIdx can be used to satisfy the ORDER BY ** clause, either in whole or in part. The return value is the ** cumulative number of terms in the ORDER BY clause that are satisfied ** by the index pIdx and other indices in outer loops. ** ** The table being queried has a cursor number of "base". pIdx is the |
︙ | ︙ | |||
2835 2836 2837 2838 2839 2840 2841 | int base, /* Cursor number for the table to be sorted */ int *pbRev /* Set to 1 for reverse-order scan of pIdx */ ){ int i; /* Number of pIdx terms used */ int j; /* Number of ORDER BY terms satisfied */ int sortOrder = 2; /* 0: forward. 1: backward. 2: unknown */ int nTerm; /* Number of ORDER BY terms */ | | | > | | | | | | | > | > > > > | | < < < < < | > > > | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | < < < < < | | < < < < | < < < < > < | > | < < | | < < < | | 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 | int base, /* Cursor number for the table to be sorted */ int *pbRev /* Set to 1 for reverse-order scan of pIdx */ ){ int i; /* Number of pIdx terms used */ int j; /* Number of ORDER BY terms satisfied */ int sortOrder = 2; /* 0: forward. 1: backward. 2: unknown */ int nTerm; /* Number of ORDER BY terms */ struct ExprList_item *pOBItem;/* A term of the ORDER BY clause */ Table *pTab = pIdx->pTable; /* Table that owns index pIdx */ ExprList *pOrderBy; /* The ORDER BY clause */ Parse *pParse = p->pParse; /* Parser context */ sqlite3 *db = pParse->db; /* Database connection */ int nPriorSat; /* ORDER BY terms satisfied by outer loops */ int seenRowid = 0; /* True if an ORDER BY rowid term is seen */ int uniqueNotNull = 1; /* pIdx is UNIQUE with all terms are NOT NULL */ if( p->i==0 ){ nPriorSat = 0; }else{ nPriorSat = p->aLevel[p->i-1].plan.nOBSat; if( OptimizationDisabled(db, SQLITE_OrderByIdxJoin) ) return nPriorSat; } pOrderBy = p->pOrderBy; assert( pOrderBy!=0 ); if( pIdx->bUnordered ) return nPriorSat; nTerm = pOrderBy->nExpr; uniqueNotNull = pIdx->onError!=OE_None; assert( nTerm>0 ); /* Argument pIdx must either point to a 'real' named index structure, ** or an index structure allocated on the stack by bestBtreeIndex() to ** represent the rowid index that is part of every table. */ assert( pIdx->zName || (pIdx->nColumn==1 && pIdx->aiColumn[0]==-1) ); /* Match terms of the ORDER BY clause against columns of ** the index. ** ** Note that indices have pIdx->nColumn regular columns plus ** one additional column containing the rowid. The rowid column ** of the index is also allowed to match against the ORDER BY ** clause. */ j = nPriorSat; for(i=0,pOBItem=&pOrderBy->a[j]; j<nTerm && i<=pIdx->nColumn; i++){ Expr *pOBExpr; /* The expression of the ORDER BY pOBItem */ CollSeq *pColl; /* The collating sequence of pOBExpr */ int termSortOrder; /* Sort order for this term */ int iColumn; /* The i-th column of the index. -1 for rowid */ int iSortOrder; /* 1 for DESC, 0 for ASC on the i-th index term */ int isEq; /* Subject to an == or IS NULL constraint */ int isMatch; /* ORDER BY term matches the index term */ const char *zColl; /* Name of collating sequence for i-th index term */ WhereTerm *pConstraint; /* A constraint in the WHERE clause */ /* If the next term of the ORDER BY clause refers to anything other than ** a column in the "base" table, then this index will not be of any ** further use in handling the ORDER BY. */ pOBExpr = pOBItem->pExpr; if( pOBExpr->op!=TK_COLUMN || pOBExpr->iTable!=base ){ break; } /* Find column number and collating sequence for the next entry ** in the index */ if( pIdx->zName && i<pIdx->nColumn ){ iColumn = pIdx->aiColumn[i]; if( iColumn==pIdx->pTable->iPKey ){ iColumn = -1; } iSortOrder = pIdx->aSortOrder[i]; zColl = pIdx->azColl[i]; assert( zColl!=0 ); }else{ iColumn = -1; iSortOrder = 0; zColl = 0; } /* Check to see if the column number and collating sequence of the ** index match the column number and collating sequence of the ORDER BY ** clause entry. Set isMatch to 1 if they both match. */ if( pOBExpr->iColumn==iColumn ){ if( zColl ){ pColl = sqlite3ExprCollSeq(pParse, pOBExpr); if( !pColl ) pColl = db->pDfltColl; isMatch = sqlite3StrICmp(pColl->zName, zColl)==0; }else{ isMatch = 1; } }else{ isMatch = 0; } /* termSortOrder is 0 or 1 for whether or not the access loop should ** run forward or backwards (respectively) in order to satisfy this ** term of the ORDER BY clause. */ termSortOrder = iSortOrder ^ pOBItem->sortOrder; /* If X is the column in the index and ORDER BY clause, check to see ** if there are any X= or X IS NULL constraints in the WHERE clause. */ pConstraint = findTerm(p->pWC, base, iColumn, p->notReady, WO_EQ|WO_ISNULL|WO_IN, pIdx); if( pConstraint==0 ){ isEq = 0; }else if( pConstraint->eOperator==WO_IN ){ break; }else if( pConstraint->prereqRight==0 ){ isEq = 1; }else if( pConstraint->eOperator==WO_ISNULL ){ uniqueNotNull = 0; isEq = 1; }else{ Expr *pRight = pConstraint->pExpr->pRight; if( pRight->op==TK_COLUMN ){ WHERETRACE((" .. isOrderedColumn(tab=%d,col=%d)", pRight->iTable, pRight->iColumn)); isEq = isOrderedColumn(p, pRight->iTable, pRight->iColumn); WHERETRACE((" -> isEq=%d\n", isEq)); if( isEq>=2 && isEq!=pOBItem->sortOrder+2 ){ break; } }else{ isEq = 0; } } assert( pOBItem->sortOrder==0 || pOBItem->sortOrder==1 ); assert( iSortOrder==0 || iSortOrder==1 ); if( !isMatch ){ if( isEq==0 ){ break; }else{ continue; } }else if( sortOrder==2 ){ sortOrder = termSortOrder; }else if( termSortOrder!=sortOrder ){ break; } j++; pOBItem++; if( iColumn<0 ){ seenRowid = 1; break; }else if( pTab->aCol[iColumn].notNull==0 ){ uniqueNotNull = 0; } } |
︙ | ︙ | |||
3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 | /* Loop over all indices looking for the best one to use */ for(; pProbe; pIdx=pProbe=pProbe->pNext){ const tRowcnt * const aiRowEst = pProbe->aiRowEst; WhereCost pc; /* Cost of using pProbe */ double log10N = (double)1; /* base-10 logarithm of nRow (inexact) */ int bRev = 2; /* 0=forward scan. 1=reverse. 2=undecided */ /* The following variables are populated based on the properties of ** index being evaluated. They are then used to determine the expected ** cost and number of rows returned. ** ** pc.plan.nEq: ** Number of equality terms that can be implemented using the index. | > > > > > | 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 | /* Loop over all indices looking for the best one to use */ for(; pProbe; pIdx=pProbe=pProbe->pNext){ const tRowcnt * const aiRowEst = pProbe->aiRowEst; WhereCost pc; /* Cost of using pProbe */ double log10N = (double)1; /* base-10 logarithm of nRow (inexact) */ int bRev = 2; /* 0=forward scan. 1=reverse. 2=undecided */ WHERETRACE(( " %s(%s):\n", pSrc->pTab->zName, (pIdx ? pIdx->zName : "ipk") )); /* The following variables are populated based on the properties of ** index being evaluated. They are then used to determine the expected ** cost and number of rows returned. ** ** pc.plan.nEq: ** Number of equality terms that can be implemented using the index. |
︙ | ︙ | |||
3221 3222 3223 3224 3225 3226 3227 | /* If there is an ORDER BY clause and the index being considered will ** naturally scan rows in the required order, set the appropriate flags ** in pc.plan.wsFlags. Otherwise, if there is an ORDER BY clause but ** the index will scan rows in a different order, set the bSort ** variable. */ assert( bRev>=0 && bRev<=2 ); | | | < < | | > | 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 | /* If there is an ORDER BY clause and the index being considered will ** naturally scan rows in the required order, set the appropriate flags ** in pc.plan.wsFlags. Otherwise, if there is an ORDER BY clause but ** the index will scan rows in a different order, set the bSort ** variable. */ assert( bRev>=0 && bRev<=2 ); if( bSort && (pSrc->jointype & JT_LEFT)==0 ){ int bRev = 2; WHERETRACE((" --> before isSortingIndex: nPriorSat=%d\n",nPriorSat)); pc.plan.nOBSat = isSortingIndex(p, pProbe, iCur, &bRev); WHERETRACE((" --> after isSortingIndex: bRev=%d nOBSat=%d\n", bRev, pc.plan.nOBSat)); if( nPriorSat<pc.plan.nOBSat || (pc.plan.wsFlags & WHERE_UNIQUE)!=0 ){ pc.plan.wsFlags |= WHERE_ORDERED; } if( nOrderBy==pc.plan.nOBSat ){ bSort = 0; pc.plan.wsFlags |= WHERE_ROWID_RANGE|WHERE_COLUMN_RANGE; } |
︙ | ︙ | |||
3345 3346 3347 3348 3349 3350 3351 | ** the cost function to err on the side of choosing an index over ** choosing a full scan. This 4x full-scan penalty is an arguable ** decision and one which we expect to revisit in the future. But ** it seems to be working well enough at the moment. */ pc.rCost = aiRowEst[0]*4; pc.plan.wsFlags &= ~WHERE_IDX_ONLY; | > | > > | 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 | ** the cost function to err on the side of choosing an index over ** choosing a full scan. This 4x full-scan penalty is an arguable ** decision and one which we expect to revisit in the future. But ** it seems to be working well enough at the moment. */ pc.rCost = aiRowEst[0]*4; pc.plan.wsFlags &= ~WHERE_IDX_ONLY; if( pIdx ){ pc.plan.wsFlags &= ~WHERE_ORDERED; pc.plan.nOBSat = nPriorSat; } }else{ log10N = estLog(aiRowEst[0]); pc.rCost = pc.plan.nRow; if( pIdx ){ if( bLookup ){ /* For an index lookup followed by a table lookup: ** nInMul index searches to find the start of each index range |
︙ | ︙ | |||
3450 3451 3452 3453 3454 3455 3456 | } } if( pc.plan.nRow<2 ) pc.plan.nRow = 2; } WHERETRACE(( | < | | | < | 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 | } } if( pc.plan.nRow<2 ) pc.plan.nRow = 2; } WHERETRACE(( " nEq=%d nInMul=%d rangeDiv=%d bSort=%d bLookup=%d wsFlags=0x%08x\n" " notReady=0x%llx log10N=%.1f nRow=%.1f cost=%.1f\n" " used=0x%llx nOBSat=%d\n", pc.plan.nEq, nInMul, (int)rangeDiv, bSort, bLookup, pc.plan.wsFlags, p->notReady, log10N, pc.plan.nRow, pc.rCost, pc.used, pc.plan.nOBSat )); /* If this index is the best we have seen so far, then record this ** index and its cost in the p->cost structure. |
︙ | ︙ | |||
3494 3495 3496 3497 3498 3499 3500 | assert( p->pOrderBy || (p->cost.plan.wsFlags&WHERE_ORDERED)==0 ); assert( p->cost.plan.u.pIdx==0 || (p->cost.plan.wsFlags&WHERE_ROWID_EQ)==0 ); assert( pSrc->pIndex==0 || p->cost.plan.u.pIdx==0 || p->cost.plan.u.pIdx==pSrc->pIndex ); | | | 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 | assert( p->pOrderBy || (p->cost.plan.wsFlags&WHERE_ORDERED)==0 ); assert( p->cost.plan.u.pIdx==0 || (p->cost.plan.wsFlags&WHERE_ROWID_EQ)==0 ); assert( pSrc->pIndex==0 || p->cost.plan.u.pIdx==0 || p->cost.plan.u.pIdx==pSrc->pIndex ); WHERETRACE((" best index is: %s\n", p->cost.plan.u.pIdx ? p->cost.plan.u.pIdx->zName : "ipk")); bestOrClauseIndex(p); bestAutomaticIndex(p); p->cost.plan.wsFlags |= eqTermMask; } |
︙ | ︙ | |||
5044 5045 5046 5047 5048 5049 5050 | if( (m & sWBI.notValid)==0 ){ if( j==iFrom ) iFrom++; continue; } sWBI.notReady = (isOptimal ? m : sWBI.notValid); if( sWBI.pSrc->pIndex==0 ) nUnconstrained++; | | | 5041 5042 5043 5044 5045 5046 5047 5048 5049 5050 5051 5052 5053 5054 5055 | if( (m & sWBI.notValid)==0 ){ if( j==iFrom ) iFrom++; continue; } sWBI.notReady = (isOptimal ? m : sWBI.notValid); if( sWBI.pSrc->pIndex==0 ) nUnconstrained++; WHERETRACE((" === trying table %d (%s) with isOptimal=%d ===\n", j, sWBI.pSrc->pTab->zName, isOptimal)); assert( sWBI.pSrc->pTab ); #ifndef SQLITE_OMIT_VIRTUALTABLE if( IsVirtual(sWBI.pSrc->pTab) ){ sWBI.ppIdxInfo = &pWInfo->a[j].pIdxInfo; bestVirtualIndex(&sWBI); }else |
︙ | ︙ | |||
5097 5098 5099 5100 5101 5102 5103 | && (bestJ<0 || (notIndexed&m)!=0 /* (2) */ || (bestPlan.plan.wsFlags & WHERE_NOT_FULLSCAN)==0 || (sWBI.cost.plan.wsFlags & WHERE_NOT_FULLSCAN)!=0) && (nUnconstrained==0 || sWBI.pSrc->pIndex==0 /* (3) */ || NEVER((sWBI.cost.plan.wsFlags & WHERE_NOT_FULLSCAN)!=0)) && (bestJ<0 || compareCost(&sWBI.cost, &bestPlan)) /* (4) */ ){ | | | | 5094 5095 5096 5097 5098 5099 5100 5101 5102 5103 5104 5105 5106 5107 5108 5109 | && (bestJ<0 || (notIndexed&m)!=0 /* (2) */ || (bestPlan.plan.wsFlags & WHERE_NOT_FULLSCAN)==0 || (sWBI.cost.plan.wsFlags & WHERE_NOT_FULLSCAN)!=0) && (nUnconstrained==0 || sWBI.pSrc->pIndex==0 /* (3) */ || NEVER((sWBI.cost.plan.wsFlags & WHERE_NOT_FULLSCAN)!=0)) && (bestJ<0 || compareCost(&sWBI.cost, &bestPlan)) /* (4) */ ){ WHERETRACE((" === table %d (%s) is best so far\n" " cost=%.1f, nRow=%.1f, nOBSat=%d, wsFlags=%08x\n", j, sWBI.pSrc->pTab->zName, sWBI.cost.rCost, sWBI.cost.plan.nRow, sWBI.cost.plan.nOBSat, sWBI.cost.plan.wsFlags)); bestPlan = sWBI.cost; bestJ = j; } if( doNotReorder ) break; |
︙ | ︙ |