Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Enhanced analysis of OR terms in a WHERE clause. Another step toward being able to use indices with OR-connected WHERE clause terms. (CVS 6045) |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA1: |
c3f7aa019c613e18457f090685725dd8 |
User & Date: | drh 2008-12-20 02:06:14.000 |
Context
2008-12-20
| ||
02:14 | Specify type "void" in the parameter list of functions that take no parameters. Tickets #3545 and #3546. (CVS 6046) (check-in: c2228bd14a user: drh tags: trunk) | |
02:06 | Enhanced analysis of OR terms in a WHERE clause. Another step toward being able to use indices with OR-connected WHERE clause terms. (CVS 6045) (check-in: c3f7aa019c user: drh tags: trunk) | |
2008-12-19
| ||
18:45 | Add extra crash test cases that stress the savepoint mechanism to savepoint4.test. Currently, these tests are causing database corruption which (obviously) needs to be fixed. (CVS 6044) (check-in: e06a968aa5 user: danielk1977 tags: trunk) | |
Changes
Changes to src/where.c.
︙ | ︙ | |||
12 13 14 15 16 17 18 | ** This module contains C code that generates VDBE code used to process ** the WHERE clause of SQL statements. This module is responsible for ** generating the code that loops through a table looking for applicable ** rows. Indices are selected and used to speed the search when doing ** so is applicable. Because this module is responsible for selecting ** indices, you might also think of this module as the "query optimizer". ** | | | 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 | ** This module contains C code that generates VDBE code used to process ** the WHERE clause of SQL statements. This module is responsible for ** generating the code that loops through a table looking for applicable ** rows. Indices are selected and used to speed the search when doing ** so is applicable. Because this module is responsible for selecting ** indices, you might also think of this module as the "query optimizer". ** ** $Id: where.c,v 1.339 2008/12/20 02:06:14 drh Exp $ */ #include "sqliteInt.h" /* ** Trace output macros */ #if defined(SQLITE_TEST) || defined(SQLITE_DEBUG) |
︙ | ︙ | |||
136 137 138 139 140 141 142 | /* ** A WhereTerm with eOperator==WO_OR has its u.pOrInfo pointer set to ** a dynamically allocated instance of the following structure. */ struct WhereOrInfo { WhereClause wc; /* The OR subexpression broken out */ | | | 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 | /* ** A WhereTerm with eOperator==WO_OR has its u.pOrInfo pointer set to ** a dynamically allocated instance of the following structure. */ struct WhereOrInfo { WhereClause wc; /* The OR subexpression broken out */ Bitmask indexable; /* Bitmask of all indexable tables in the clause */ }; /* ** A WhereTerm with eOperator==WO_AND has its u.pAndInfo pointer set to ** a dynamically allocated instance of the following structure. */ struct WhereAndInfo { |
︙ | ︙ | |||
198 199 200 201 202 203 204 205 206 207 208 209 210 211 | #define WO_GE (WO_EQ<<(TK_GE-TK_EQ)) #define WO_MATCH 0x040 #define WO_ISNULL 0x080 #define WO_OR 0x100 /* Two or more OR-connected terms */ #define WO_AND 0x200 /* Two or more AND-connected terms */ #define WO_ALL 0xfff /* Mask of all possible WO_* values */ /* ** Value for wsFlags returned by bestIndex() and stored in ** WhereLevel.wsFlags. These flags determine which search ** strategies are appropriate. ** ** The least significant 12 bits is reserved as a mask for WO_ values above. | > | 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 | #define WO_GE (WO_EQ<<(TK_GE-TK_EQ)) #define WO_MATCH 0x040 #define WO_ISNULL 0x080 #define WO_OR 0x100 /* Two or more OR-connected terms */ #define WO_AND 0x200 /* Two or more AND-connected terms */ #define WO_ALL 0xfff /* Mask of all possible WO_* values */ #define WO_SINGLE 0x0ff /* Mask of all non-compound WO_* values */ /* ** Value for wsFlags returned by bestIndex() and stored in ** WhereLevel.wsFlags. These flags determine which search ** strategies are appropriate. ** ** The least significant 12 bits is reserved as a mask for WO_ values above. |
︙ | ︙ | |||
250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 | /* ** Deallocate all memory associated with a WhereOrInfo object. */ static void whereOrInfoDelete(sqlite3 *db, WhereOrInfo *p){ if( p ){ whereClauseClear(&p->wc); } } /* ** Deallocate all memory associated with a WhereAndInfo object. */ static void whereAndInfoDelete(sqlite3 *db, WhereAndInfo *p){ if( p ){ whereClauseClear(&p->wc); } } /* ** Deallocate a WhereClause structure. The WhereClause structure ** itself is not freed. This routine is the inverse of whereClauseInit(). */ | > > | 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 | /* ** Deallocate all memory associated with a WhereOrInfo object. */ static void whereOrInfoDelete(sqlite3 *db, WhereOrInfo *p){ if( p ){ whereClauseClear(&p->wc); sqlite3DbFree(db, p); } } /* ** Deallocate all memory associated with a WhereAndInfo object. */ static void whereAndInfoDelete(sqlite3 *db, WhereAndInfo *p){ if( p ){ whereClauseClear(&p->wc); sqlite3DbFree(db, p); } } /* ** Deallocate a WhereClause structure. The WhereClause structure ** itself is not freed. This routine is the inverse of whereClauseInit(). */ |
︙ | ︙ | |||
701 702 703 704 705 706 707 | static void transferJoinMarkings(Expr *pDerived, Expr *pBase){ pDerived->flags |= pBase->flags & EP_FromJoin; pDerived->iRightJoinTable = pBase->iRightJoinTable; } #if !defined(SQLITE_OMIT_OR_OPTIMIZATION) && !defined(SQLITE_OMIT_SUBQUERY) /* | | | > > > > > > | > > > | > > | > > > > > < > > > > > > | | | | > | | > | | | > > > > > > > > > > > > > > > > > | > > > > > > > > > > > > < < | > > > | < < > > > > > > > | < < > | | < | > > > > > > | < > | < > > > > > | > | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > < < < < | | | | | < | > | > > > | < < < | < | | < > > | | < > > > > > | > > | > > > > > > > > > > > > > > > > > > | < < > > > > > > > > > | > | | < > > > > > > > > > > | > > > > > > > | > > | < < | > > > | > | | | > > > > > | | | | | | | | | 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 | static void transferJoinMarkings(Expr *pDerived, Expr *pBase){ pDerived->flags |= pBase->flags & EP_FromJoin; pDerived->iRightJoinTable = pBase->iRightJoinTable; } #if !defined(SQLITE_OMIT_OR_OPTIMIZATION) && !defined(SQLITE_OMIT_SUBQUERY) /* ** Analyze a term that consists of two or more OR-connected ** subterms. So in: ** ** ... WHERE (a=5) AND (b=7 OR c=9 OR d=13) AND (d=13) ** ^^^^^^^^^^^^^^^^^^^^ ** ** This routine analyzes terms such as the middle term in the above example. ** A WhereOrTerm object is computed and attached to the term under ** analysis, regardless of the outcome of the analysis. Hence: ** ** WhereTerm.wtFlags |= TERM_ORINFO ** WhereTerm.u.pOrInfo = a dynamically allocated WhereOrTerm object ** ** The term being analyzed must have two or more of OR-connected subterms. ** A single subterms might be a set of AND-connected sub-subterms. ** Examples of terms under analysis: ** ** (A) t1.x=t2.y OR t1.x=t2.z OR t1.y=15 OR t1.z=t3.a+5 ** (B) x=expr1 OR expr2=x OR x=expr3 ** (C) t1.x=t2.y OR (t1.x=t2.z AND t1.y=15) ** (D) x=expr1 OR (y>11 AND y<22 AND z LIKE '*hello*') ** (E) (p.a=1 AND q.b=2 AND r.c=3) OR (p.x=4 AND q.y=5 AND r.z=6) ** ** CASE 1: ** ** If all subterms are of the form T.C=expr for some single column of C ** a single table T (as shown in example B above) then create a new virtual ** term that is an equivalent IN expression. In other words, if the term ** being analyzed is: ** ** x = expr1 OR expr2 = x OR x = expr3 ** ** then create a new virtual term like this: ** ** x IN (expr1,expr2,expr3) ** ** CASE 2: ** ** If all subterms are indexable by a single table T, then set ** ** WhereTerm.eOperator = WO_OR ** WhereTerm.u.pOrInfo->indexable |= the cursor number for table T ** ** A subterm is "indexable" if it is of the form ** "T.C <op> <expr>" where C is any column of table T and ** <op> is one of "=", "<", "<=", ">", ">=", "IS NULL", or "IN". ** A subterm is also indexable if it is an AND of two or more ** subsubterms at least one of which is indexable. Indexable AND ** subterms have their eOperator set to WO_AND and they have ** u.pAndInfo set to a dynamically allocated WhereAndTerm object. ** ** From another point of view, "indexable" means that the subterm could ** potentially be used with an index if an appropriate index exists. ** This analysis does not consider whether or not the index exists; that ** is something the bestIndex() routine will determine. This analysis ** only looks at whether subterms appropriate for indexing exist. ** ** All examples A through E above all satisfy case 2. But if a term ** also statisfies case 1 (such as B) we know that the optimizer will ** always prefer case 1, so in that case we pretend that case 2 is not ** satisfied. ** ** It might be the case that multiple tables are indexable. For example, ** (E) above is indexable on tables P, Q, and R. ** ** Terms that satisfy case 2 are candidates for lookup by using ** separate indices to find rowids for each subterm and composing ** the union of all rowids using a RowSet object. This is similar ** to "bitmap indices" in other database engines. ** ** OTHERWISE: ** ** If neither case 1 nor case 2 apply, then leave the eOperator set to ** zero. This term is not useful for search. */ static void exprAnalyzeOrTerm( SrcList *pSrc, /* the FROM clause */ WhereClause *pWC, /* the complete WHERE clause */ int idxTerm /* Index of the OR-term to be analyzed */ ){ Parse *pParse = pWC->pParse; /* Parser context */ sqlite3 *db = pParse->db; /* Database connection */ WhereTerm *pTerm = &pWC->a[idxTerm]; /* The term to be analyzed */ Expr *pExpr = pTerm->pExpr; /* The expression of the term */ ExprMaskSet *pMaskSet = pWC->pMaskSet; /* Table use masks */ int i; /* Loop counters */ WhereClause *pOrWc; /* Breakup of pTerm into subterms */ WhereTerm *pOrTerm; /* A Sub-term within the pOrWc */ WhereOrInfo *pOrInfo; /* Additional information associated with pTerm */ Bitmask chngToIN; /* Tables that might satisfy case 1 */ Bitmask indexable; /* Tables that are indexable, satisfying case 2 */ /* ** Break the OR clause into its separate subterms. The subterms are ** stored in a WhereClause structure containing within the WhereOrInfo ** object that is attached to the original OR clause term. */ assert( (pTerm->wtFlags & (TERM_DYNAMIC|TERM_ORINFO|TERM_ANDINFO))==0 ); assert( pExpr->op==TK_OR ); pTerm->u.pOrInfo = pOrInfo = sqlite3DbMallocRaw(db, sizeof(*pOrInfo)); if( pOrInfo==0 ) return; pTerm->wtFlags |= TERM_ORINFO; pOrWc = &pOrInfo->wc; whereClauseInit(pOrWc, pWC->pParse, pMaskSet); whereSplit(pOrWc, pExpr, TK_OR); exprAnalyzeAll(pSrc, pOrWc); if( db->mallocFailed ) return; assert( pOrWc->nTerm>=2 ); /* ** Compute the set of tables that might satisfy cases 1 or 2. */ indexable = chngToIN = ~(Bitmask)0; for(i=pOrWc->nTerm-1, pOrTerm=pOrWc->a; i>=0 && indexable; i--, pOrTerm++){ if( (pOrTerm->eOperator & WO_SINGLE)==0 ){ chngToIN = 0; indexable = 0; /***** FIX ME. Some AND clauses are indexable. */ }else if( pOrTerm->wtFlags & TERM_COPIED ){ /* Skip this term for now. We revisit it when we process the ** corresponding TERM_VIRTUAL term */ }else{ Bitmask b; b = getMask(pMaskSet, pOrTerm->leftCursor); if( pOrTerm->wtFlags & TERM_VIRTUAL ){ WhereTerm *pOther = &pOrWc->a[pOrTerm->iParent]; b |= getMask(pMaskSet, pOther->leftCursor); } indexable &= b; if( pOrTerm->eOperator!=WO_EQ ){ chngToIN = 0; }else{ chngToIN &= b; } } } /* ** Record the set of tables that satisfy case 2. The set might be ** empty, but that is OK. */ pOrInfo->indexable = indexable; pTerm->eOperator = WO_OR; /* ** chngToIN holds a set of tables that *might* satisfy case 1. But ** we have to do some additional checking to see if case 1 really ** is satisfied. */ if( chngToIN ){ int okToChngToIN = 0; /* True if the conversion to IN is valid */ int iColumn = -1; /* Column index on lhs of IN operator */ int iCursor; /* Table cursor common to all terms */ int j = 0; /* Loop counter */ /* Search for a table and column that appears on one side or the ** other of the == operator in every subterm. That table and column ** will be recorded in iCursor and iColumn. There might not be any ** such table and column. Set okToChngToIN if an appropriate table ** and column is found but leave okToChngToIN false if not found. */ for(j=0; j<2 && !okToChngToIN; j++){ pOrTerm = pOrWc->a; for(i=pOrWc->nTerm-1; i>=0; i--, pOrTerm++){ assert( pOrTerm->eOperator==WO_EQ ); pOrTerm->wtFlags &= ~TERM_OR_OK; if( pOrTerm->leftCursor==iColumn ) continue; if( (chngToIN & getMask(pMaskSet, pOrTerm->leftCursor))==0 ) continue; iColumn = pOrTerm->u.leftColumn; iCursor = pOrTerm->leftCursor; break; } if( i<0 ){ assert( j==1 ); assert( (chngToIN&(chngToIN-1))==0 ); assert( chngToIN==getMask(pMaskSet, iColumn) ); break; } okToChngToIN = 1; for(; i>=0 && okToChngToIN; i--, pOrTerm++){ assert( pOrTerm->eOperator==WO_EQ ); if( pOrTerm->leftCursor!=iCursor ){ pOrTerm->wtFlags &= ~TERM_OR_OK; }else if( pOrTerm->u.leftColumn!=iColumn ){ okToChngToIN = 0; }else{ int affLeft, affRight; /* If the right-hand side is also a column, then the affinities ** of both right and left sides must be such that no type ** conversions are required on the right. (Ticket #2249) */ affRight = sqlite3ExprAffinity(pOrTerm->pExpr->pRight); affLeft = sqlite3ExprAffinity(pOrTerm->pExpr->pLeft); if( affRight!=0 && affRight!=affLeft ){ okToChngToIN = 0; }else{ pOrTerm->wtFlags |= TERM_OR_OK; } } } } /* At this point, okToChngToIN is true if original pTerm satisfies ** case 1. In that case, construct a new virtual term that is ** pTerm converted into an IN operator. */ if( okToChngToIN ){ Expr *pDup; /* A transient duplicate expression */ ExprList *pList = 0; /* The RHS of the IN operator */ Expr *pLeft = 0; /* The LHS of the IN operator */ Expr *pNew; /* The complete IN operator */ for(i=pOrWc->nTerm-1, pOrTerm=pOrWc->a; i>=0; i--, pOrTerm++){ if( (pOrTerm->wtFlags & TERM_OR_OK)==0 ) continue; assert( pOrTerm->eOperator==WO_EQ ); assert( pOrTerm->leftCursor==iCursor ); assert( pOrTerm->u.leftColumn==iColumn ); pDup = sqlite3ExprDup(db, pOrTerm->pExpr->pRight); pList = sqlite3ExprListAppend(pWC->pParse, pList, pDup, 0); pLeft = pOrTerm->pExpr->pLeft; } assert( pLeft!=0 ); pDup = sqlite3ExprDup(db, pLeft); pNew = sqlite3Expr(db, TK_IN, pDup, 0, 0); if( pNew ){ int idxNew; transferJoinMarkings(pNew, pExpr); pNew->pList = pList; idxNew = whereClauseInsert(pWC, pNew, TERM_VIRTUAL|TERM_DYNAMIC); testcase( idxNew==0 ); exprAnalyze(pSrc, pWC, idxNew); pTerm = &pWC->a[idxTerm]; pWC->a[idxNew].iParent = idxTerm; pTerm->nChild = 1; }else{ sqlite3ExprListDelete(db, pList); } pTerm->eOperator = 0; /* case 1 trumps case 2 */ } } } #endif /* !SQLITE_OMIT_OR_OPTIMIZATION && !SQLITE_OMIT_SUBQUERY */ /* ** The input to this routine is an WhereTerm structure with only the ** "pExpr" field filled in. The job of this routine is to analyze the ** subexpression and populate all the other fields of the WhereTerm ** structure. ** ** If the expression is of the form "<expr> <op> X" it gets commuted ** to the standard form of "X <op> <expr>". ** ** If the expression is of the form "X <op> Y" where both X and Y are ** columns, then the original expression is unchanged and a new virtual ** term of the form "Y <op> X" is added to the WHERE clause and ** analyzed separately. The original term is marked with TERM_COPIED ** and the new term is marked with TERM_DYNAMIC (because it's pExpr ** needs to be freed with the WhereClause) and TERM_VIRTUAL (because it ** is a commuted copy of a prior term.) The original term has nChild=1 ** and the copy has idxParent set to the index of the original term. */ static void exprAnalyze( SrcList *pSrc, /* the FROM clause */ WhereClause *pWC, /* the WHERE clause */ int idxTerm /* Index of the term to be analyzed */ ){ WhereTerm *pTerm; /* The term to be analyzed */ ExprMaskSet *pMaskSet; /* Set of table index masks */ Expr *pExpr; /* The expression to be analyzed */ Bitmask prereqLeft; /* Prerequesites of the pExpr->pLeft */ Bitmask prereqAll; /* Prerequesites of pExpr */ Bitmask extraRight = 0; int nPattern; int isComplete; int noCase; int op; /* Top-level operator. pExpr->op */ Parse *pParse = pWC->pParse; /* Parsing context */ sqlite3 *db = pParse->db; /* Database connection */ if( db->mallocFailed ){ return; } pTerm = &pWC->a[idxTerm]; pMaskSet = pWC->pMaskSet; pExpr = pTerm->pExpr; |
︙ | ︙ | |||
885 886 887 888 889 890 891 | pNew->prereqAll = prereqAll; pNew->eOperator = operatorMask(pDup->op); } } #ifndef SQLITE_OMIT_BETWEEN_OPTIMIZATION /* If a term is the BETWEEN operator, create two new virtual terms | | > > > > > > > > > > > > | 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 | pNew->prereqAll = prereqAll; pNew->eOperator = operatorMask(pDup->op); } } #ifndef SQLITE_OMIT_BETWEEN_OPTIMIZATION /* If a term is the BETWEEN operator, create two new virtual terms ** that define the range that the BETWEEN implements. For example: ** ** a BETWEEN b AND c ** ** is converted into: ** ** (a BETWEEN b AND c) AND (a>=b) AND (a<=c) ** ** The two new terms are added onto the end of the WhereClause object. ** The new terms are "dynamic" and are children of the original BETWEEN ** term. That means that if the BETWEEN term is coded, the children are ** skipped. Or, if the children are satisfied by an index, the original ** BETWEEN term is skipped. */ else if( pExpr->op==TK_BETWEEN ){ ExprList *pList = pExpr->pList; int i; static const u8 ops[] = {TK_GE, TK_LE}; assert( pList!=0 ); assert( pList->nExpr==2 ); |
︙ | ︙ | |||
909 910 911 912 913 914 915 | pWC->a[idxNew].iParent = idxTerm; } pTerm->nChild = 2; } #endif /* SQLITE_OMIT_BETWEEN_OPTIMIZATION */ #if !defined(SQLITE_OMIT_OR_OPTIMIZATION) && !defined(SQLITE_OMIT_SUBQUERY) | < < < < < | | < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < | < < < < < < < < < | 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 | pWC->a[idxNew].iParent = idxTerm; } pTerm->nChild = 2; } #endif /* SQLITE_OMIT_BETWEEN_OPTIMIZATION */ #if !defined(SQLITE_OMIT_OR_OPTIMIZATION) && !defined(SQLITE_OMIT_SUBQUERY) /* Analyze a term that is composed of two or more subterms connected by ** an OR operator. */ else if( pExpr->op==TK_OR ){ exprAnalyzeOrTerm(pSrc, pWC, idxTerm); } #endif /* SQLITE_OMIT_OR_OPTIMIZATION */ #ifndef SQLITE_OMIT_LIKE_OPTIMIZATION /* Add constraints to reduce the search space on a LIKE or GLOB ** operator. ** |
︙ | ︙ |