Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Improvements and tests for detection of redundant DISTINCT qualifiers. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | experimental |
Files: | files | file ages | folders |
SHA1: |
7337293c87fb563604dd6ad284f2d1e3 |
User & Date: | dan 2011-07-01 14:21:38.743 |
Context
2011-07-01
| ||
18:26 | Improve use of indexes to optimize DISTINCT queries. (check-in: 6c202ea024 user: dan tags: experimental) | |
14:21 | Improvements and tests for detection of redundant DISTINCT qualifiers. (check-in: 7337293c87 user: dan tags: experimental) | |
2011-06-30
| ||
20:17 | Experimental changes to improve optimization of DISTINCT queries. (check-in: f7ba0219ef user: dan tags: experimental) | |
Changes
Changes to src/select.c.
︙ | ︙ | |||
3844 3845 3846 3847 3848 3849 3850 | } rc = multiSelect(pParse, p, pDest); explainSetInteger(pParse->iSelectId, iRestoreSelectId); return rc; } #endif | < < < < < < < < < < < < > > > > > > > > > > > > > > > > > > > > > > > > | 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 | } rc = multiSelect(pParse, p, pDest); explainSetInteger(pParse->iSelectId, iRestoreSelectId); return rc; } #endif /* If there is both a GROUP BY and an ORDER BY clause and they are ** identical, then disable the ORDER BY clause since the GROUP BY ** will cause elements to come out in the correct order. This is ** an optimization - the correct answer should result regardless. ** Use the SQLITE_GroupByOrder flag with SQLITE_TESTCTRL_OPTIMIZER ** to disable this optimization for testing purposes. */ if( sqlite3ExprListCompare(p->pGroupBy, pOrderBy)==0 && (db->flags & SQLITE_GroupByOrder)==0 ){ pOrderBy = 0; } /* If the query is DISTINCT with an ORDER BY but is not an aggregate, and ** if the select-list is the same as the ORDER BY list, then this query ** can be rewritten as a GROUP BY. In other words, this: ** ** SELECT DISTINCT xyz FROM ... ORDER BY xyz ** ** is transformed to: ** ** SELECT xyz FROM ... GROUP BY xyz ** ** The second form is preferred as a single index (or temp-table) may be ** used for both the ORDER BY and DISTINCT processing. As originally ** written the query must use a temp-table for at least one of the ORDER ** BY and DISTINCT, and an index or separate temp-table for the other. */ if( (p->selFlags & (SF_Distinct|SF_Aggregate))==SF_Distinct && sqlite3ExprListCompare(pOrderBy, p->pEList)==0 ){ p->selFlags &= ~SF_Distinct; p->pGroupBy = sqlite3ExprListDup(db, p->pEList, 0); pGroupBy = p->pGroupBy; pOrderBy = 0; } /* If there is an ORDER BY clause, then this sorting ** index might end up being unused if the data can be ** extracted in pre-sorted order. If that is the case, then the ** OP_OpenEphemeral instruction will be changed to an OP_Noop once ** we figure out that the sorting index is not needed. The addrSortIndex ** variable is used to facilitate that change. |
︙ | ︙ | |||
3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 | */ if( addrSortIndex>=0 && pOrderBy==0 ){ sqlite3VdbeChangeToNoop(v, addrSortIndex, 1); p->addrOpenEphm[2] = -1; } if( pWInfo->eDistinct ){ assert( isDistinct ); assert( pWInfo->eDistinct==WHERE_DISTINCT_ORDERED || pWInfo->eDistinct==WHERE_DISTINCT_UNIQUE ); distinct = -1; if( pWInfo->eDistinct==WHERE_DISTINCT_ORDERED ){ int iJump; int iExpr; int iFlag = ++pParse->nMem; int iBase = pParse->nMem+1; int iBase2 = iBase + pEList->nExpr; pParse->nMem += (pEList->nExpr*2); | > > > > < < > > | 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 | */ if( addrSortIndex>=0 && pOrderBy==0 ){ sqlite3VdbeChangeToNoop(v, addrSortIndex, 1); p->addrOpenEphm[2] = -1; } if( pWInfo->eDistinct ){ VdbeOp *pOp; /* No longer required OpenEphemeral instr. */ pOp = sqlite3VdbeGetOp(v, addrDistinctIndex); assert( isDistinct ); assert( pWInfo->eDistinct==WHERE_DISTINCT_ORDERED || pWInfo->eDistinct==WHERE_DISTINCT_UNIQUE ); distinct = -1; if( pWInfo->eDistinct==WHERE_DISTINCT_ORDERED ){ int iJump; int iExpr; int iFlag = ++pParse->nMem; int iBase = pParse->nMem+1; int iBase2 = iBase + pEList->nExpr; pParse->nMem += (pEList->nExpr*2); /* Change the OP_OpenEphemeral coded earlier to an OP_Integer. The ** OP_Integer initializes the "first row" flag. */ pOp->opcode = OP_Integer; pOp->p1 = 1; pOp->p2 = iFlag; sqlite3ExprCodeExprList(pParse, pEList, iBase, 1); iJump = sqlite3VdbeCurrentAddr(v) + 1 + pEList->nExpr + 1 + 1; sqlite3VdbeAddOp2(v, OP_If, iFlag, iJump-1); for(iExpr=0; iExpr<pEList->nExpr; iExpr++){ CollSeq *pColl = sqlite3ExprCollSeq(pParse, pEList->a[iExpr].pExpr); sqlite3VdbeAddOp3(v, OP_Ne, iBase+iExpr, iJump, iBase2+iExpr); sqlite3VdbeChangeP4(v, -1, (const char *)pColl, P4_COLLSEQ); sqlite3VdbeChangeP5(v, SQLITE_NULLEQ); } sqlite3VdbeAddOp2(v, OP_Goto, 0, pWInfo->iContinue); sqlite3VdbeAddOp2(v, OP_Integer, 0, iFlag); assert( sqlite3VdbeCurrentAddr(v)==iJump ); sqlite3VdbeAddOp3(v, OP_Move, iBase, iBase2, pEList->nExpr); }else{ pOp->opcode = OP_Noop; } } /* Use the standard inner loop. */ selectInnerLoop(pParse, p, pEList, 0, 0, pOrderBy, distinct, pDest, pWInfo->iContinue, pWInfo->iBreak); |
︙ | ︙ |
Changes to src/where.c.
︙ | ︙ | |||
4298 4299 4300 4301 4302 4303 4304 | Parse *pParse, SrcList *pTabList, WhereClause *pWC, ExprList *pDistinct ){ Table *pTab; Index *pIdx; | | < < < | < | | | 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 | Parse *pParse, SrcList *pTabList, WhereClause *pWC, ExprList *pDistinct ){ Table *pTab; Index *pIdx; int i; int iBase; /* If there is more than one table or sub-select in the FROM clause of ** this query, then it will not be possible to show that the DISTINCT ** clause is redundant. */ if( pTabList->nSrc!=1 ) return 0; iBase = pTabList->a[0].iCursor; pTab = pTabList->a[0].pTab; /* If any of the expressions is an IPK column, then return true. */ for(i=0; i<pDistinct->nExpr; i++){ Expr *p = pDistinct->a[i].pExpr; assert( p->op!=TK_COLUMN || p->iTable==iBase ); if( p->op==TK_COLUMN && p->iColumn<0 ) return 1; } /* Loop through all indices on the table, checking each to see if it makes ** the DISTINCT qualifier redundant. It does so if: ** ** 1. The index is itself UNIQUE, and ** |
︙ | ︙ | |||
4339 4340 4341 4342 4343 4344 4345 | int iCol = pIdx->aiColumn[i]; const char *zColl = pIdx->azColl[i]; if( 0==findTerm(pWC, iBase, iCol, ~(Bitmask)0, WO_EQ, pIdx) ){ int j; for(j=0; j<pDistinct->nExpr; j++){ Expr *p = pDistinct->a[j].pExpr; | > | < | | 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 | int iCol = pIdx->aiColumn[i]; const char *zColl = pIdx->azColl[i]; if( 0==findTerm(pWC, iBase, iCol, ~(Bitmask)0, WO_EQ, pIdx) ){ int j; for(j=0; j<pDistinct->nExpr; j++){ Expr *p = pDistinct->a[j].pExpr; assert( p->op!=TK_COLUMN || p->iTable==iBase ); if( p->op==TK_COLUMN && p->iColumn==iCol ){ CollSeq *pColl = sqlite3ExprCollSeq(pParse, p); if( pColl && 0==sqlite3StrICmp(zColl, pColl->zName) ) break; } } if( j==pDistinct->nExpr ) break; } } if( i==pIdx->nColumn ){ /* This index implies that the DISTINCT qualifier is redundant. */ |
︙ | ︙ |
Added test/distinct.test.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 | # 2011 July 1 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #*********************************************************************** # This file implements regression tests for SQLite library. The # focus of this script is the DISTINCT modifier. # set testdir [file dirname $argv0] source $testdir/tester.tcl set testprefix distinct proc is_distinct_noop {sql} { set sql1 $sql set sql2 [string map {DISTINCT ""} $sql] set program1 [list] set program2 [list] db eval "EXPLAIN $sql1" { if {$opcode != "Noop"} { lappend program1 $opcode } } db eval "EXPLAIN $sql2" { if {$opcode != "Noop"} { lappend program2 $opcode } } return [expr {$program1==$program2}] } proc do_distinct_noop_test {tn sql} { uplevel [list do_test $tn [list is_distinct_noop $sql] 1] } proc do_distinct_not_noop_test {tn sql} { uplevel [list do_test $tn [list is_distinct_noop $sql] 0] } #------------------------------------------------------------------------- # The following tests - distinct-1.* - check that the planner correctly # detects cases where a UNIQUE index means that a DISTINCT clause is # redundant. Currently the planner only detects such cases when there # is a single table in the FROM clause. # do_execsql_test 1.0 { CREATE TABLE t1(a, b, c, d); CREATE UNIQUE INDEX i1 ON t1(b, c); CREATE UNIQUE INDEX i2 ON t1(d COLLATE nocase); CREATE TABLE t2(x INTEGER PRIMARY KEY, y); CREATE TABLE t3(c1 PRIMARY KEY, c2); CREATE INDEX i3 ON t3(c2); } foreach {tn noop sql} { 1 1 "SELECT DISTINCT b, c FROM t1" 2 1 "SELECT DISTINCT c FROM t1 WHERE b = ?" 3 1 "SELECT DISTINCT rowid FROM t1" 4 1 "SELECT DISTINCT rowid, a FROM t1" 5 1 "SELECT DISTINCT x FROM t2" 6 1 "SELECT DISTINCT * FROM t2" 7 1 "SELECT DISTINCT * FROM (SELECT * FROM t2)" 8 1 "SELECT DISTINCT * FROM t1" 8 0 "SELECT DISTINCT a, b FROM t1" 9 0 "SELECT DISTINCT c FROM t1 WHERE b IN (1,2)" 10 0 "SELECT DISTINCT c FROM t1" 11 0 "SELECT DISTINCT b FROM t1" 12 0 "SELECT DISTINCT a, d FROM t1" 13 0 "SELECT DISTINCT a, b, c COLLATE nocase FROM t1" 14 1 "SELECT DISTINCT a, d COLLATE nocase FROM t1" 15 0 "SELECT DISTINCT a, d COLLATE binary FROM t1" 16 1 "SELECT DISTINCT a, b, c COLLATE binary FROM t1" 16 0 "SELECT DISTINCT t1.rowid FROM t1, t2" 17 0 { /* Technically, it would be possible to detect that DISTINCT ** is a no-op in cases like the following. But SQLite does not ** do so. */ SELECT DISTINCT t1.rowid FROM t1, t2 WHERE t1.rowid=t2.rowid } 18 1 "SELECT DISTINCT c1, c2 FROM t3" 19 1 "SELECT DISTINCT c1 FROM t3" 20 1 "SELECT DISTINCT * FROM t3" 21 0 "SELECT DISTINCT c2 FROM t3" 22 0 "SELECT DISTINCT * FROM (SELECT 1, 2, 3 UNION SELECT 4, 5, 6)" 23 1 "SELECT DISTINCT rowid FROM (SELECT 1, 2, 3 UNION SELECT 4, 5, 6)" 24 0 "SELECT DISTINCT rowid/2 FROM t1" 25 1 "SELECT DISTINCT rowid/2, rowid FROM t1" 26 1 "SELECT DISTINCT rowid/2, b FROM t1 WHERE c = ?" } { if {$noop} { do_distinct_noop_test 1.$tn $sql } else { do_distinct_not_noop_test 1.$tn $sql } } finish_test |