Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Improve use of indexes to optimize DISTINCT queries. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | experimental |
Files: | files | file ages | folders |
SHA1: |
6c202ea0247ff509f696eee3839975a8 |
User & Date: | dan 2011-07-01 18:26:40.769 |
Context
2011-07-01
| ||
18:43 | Merge latest trunk changes with experimental branch. (check-in: e56be74eab user: dan tags: experimental) | |
18:26 | Improve use of indexes to optimize DISTINCT queries. (check-in: 6c202ea024 user: dan tags: experimental) | |
14:21 | Improvements and tests for detection of redundant DISTINCT qualifiers. (check-in: 7337293c87 user: dan tags: experimental) | |
Changes
Changes to src/where.c.
︙ | ︙ | |||
1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 | if( (exprTableUsage(pMaskSet, pList->a[iFirst++].pExpr)&allowed)!=0 ){ return 1; } } return 0; } /* ** This routine decides if pIdx can be used to satisfy the ORDER BY ** clause. If it can, it returns 1. If pIdx cannot satisfy the ** ORDER BY clause, this routine returns 0. ** ** pOrderBy is an ORDER BY clause from a SELECT statement. pTab is the | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 | if( (exprTableUsage(pMaskSet, pList->a[iFirst++].pExpr)&allowed)!=0 ){ return 1; } } return 0; } /* ** This function searches the expression list passed as the second argument ** for an expression of type TK_COLUMN that refers to the same column and ** uses the same collation sequence as the iCol'th column of index pIdx. ** Argument iBase is the cursor number used for the table that pIdx refers ** to. ** ** If such an expression is found, its index in pList->a[] is returned. If ** no expression is found, -1 is returned. */ static int findIndexCol( Parse *pParse, /* Parse context */ ExprList *pList, /* Expression list to search */ int iBase, /* Cursor for table associated with pIdx */ Index *pIdx, /* Index to match column of */ int iCol /* Column of index to match */ ){ int i; const char *zColl = pIdx->azColl[iCol]; for(i=0; i<pList->nExpr; i++){ Expr *p = pList->a[i].pExpr; if( pIdx->aiColumn[iCol]==p->iColumn && iBase==p->iTable ){ CollSeq *pColl = sqlite3ExprCollSeq(pParse, p); if( pColl && 0==sqlite3StrICmp(pColl->zName, zColl) ){ return i; } } } return -1; } /* ** This routine determines if pIdx can be used to assist in processing a ** DISTINCT qualifier. In other words, it tests whether or not using this ** index for the outer loop guarantees that rows with equal values for ** all expressions in the pDistinct list are delivered grouped together. ** ** For example, the query ** ** SELECT DISTINCT a, b, c FROM tbl WHERE a = ? ** ** can benefit from any index on columns "b" and "c". */ static int isDistinctIndex( Parse *pParse, /* Parsing context */ WhereClause *pWC, /* The WHERE clause */ Index *pIdx, /* The index being considered */ int base, /* Cursor number for the table pIdx is on */ ExprList *pDistinct, /* The DISTINCT expressions */ int nEqCol /* Number of index columns with == */ ){ Bitmask mask = 0; /* Mask of unaccounted for pDistinct exprs */ int i; /* Iterator variable */ if( pIdx->zName==0 || pDistinct==0 || pDistinct->nExpr>=BMS ) return 0; /* Loop through all the expressions in the distinct list. If any of them ** are not simple column references, return early. Otherwise, test if the ** WHERE clause contains a "col=X" clause. If it does, the expression ** can be ignored. If it does not, and the column does not belong to the ** same table as index pIdx, return early. Finally, if there is no ** matching "col=X" expression and the column is on the same table as pIdx, ** set the corresponding bit in variable mask. */ for(i=0; i<pDistinct->nExpr; i++){ WhereTerm *pTerm; Expr *p = pDistinct->a[i].pExpr; if( p->op!=TK_COLUMN ) return 0; pTerm = findTerm(pWC, p->iTable, p->iColumn, ~(Bitmask)0, WO_EQ, 0); if( pTerm ){ Expr *pX = pTerm->pExpr; CollSeq *p1 = sqlite3BinaryCompareCollSeq(pParse, pX->pLeft, pX->pRight); CollSeq *p2 = sqlite3ExprCollSeq(pParse, p); if( p1==p2 ) continue; } if( p->iTable!=base ) return 0; mask |= (((Bitmask)1) << i); } for(i=nEqCol; mask && i<pIdx->nColumn; i++){ int iExpr = findIndexCol(pParse, pDistinct, base, pIdx, i); if( iExpr<0 ) break; mask &= ~(((Bitmask)1) << iExpr); } return (mask==0); } /* ** Return true if the DISTINCT expression-list passed as the third argument ** is redundant. A DISTINCT list is redundant if the database contains a ** UNIQUE index that guarantees that the result of the query will be distinct ** anyway. */ static int isDistinctRedundant( Parse *pParse, SrcList *pTabList, WhereClause *pWC, ExprList *pDistinct ){ Table *pTab; Index *pIdx; int i; int iBase; /* If there is more than one table or sub-select in the FROM clause of ** this query, then it will not be possible to show that the DISTINCT ** clause is redundant. */ if( pTabList->nSrc!=1 ) return 0; iBase = pTabList->a[0].iCursor; pTab = pTabList->a[0].pTab; /* If any of the expressions is an IPK column, then return true. */ for(i=0; i<pDistinct->nExpr; i++){ Expr *p = pDistinct->a[i].pExpr; assert( p->op!=TK_COLUMN || p->iTable==iBase ); if( p->op==TK_COLUMN && p->iColumn<0 ) return 1; } /* Loop through all indices on the table, checking each to see if it makes ** the DISTINCT qualifier redundant. It does so if: ** ** 1. The index is itself UNIQUE, and ** ** 2. All of the columns in the index are either part of the pDistinct ** list, or else the WHERE clause contains a term of the form "col=X", ** where X is a constant value. The collation sequences of the ** comparison and select-list expressions must match those of the index. */ for(pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext){ if( pIdx->onError==OE_None ) continue; for(i=0; i<pIdx->nColumn; i++){ int iCol = pIdx->aiColumn[i]; if( 0==findTerm(pWC, iBase, iCol, ~(Bitmask)0, WO_EQ, pIdx) && 0>findIndexCol(pParse, pDistinct, iBase, pIdx, i) ){ break; } } if( i==pIdx->nColumn ){ /* This index implies that the DISTINCT qualifier is redundant. */ return 1; } } return 0; } /* ** This routine decides if pIdx can be used to satisfy the ORDER BY ** clause. If it can, it returns 1. If pIdx cannot satisfy the ** ORDER BY clause, this routine returns 0. ** ** pOrderBy is an ORDER BY clause from a SELECT statement. pTab is the |
︙ | ︙ | |||
2906 2907 2908 2909 2910 2911 2912 | wsFlags |= WHERE_ROWID_RANGE|WHERE_COLUMN_RANGE|WHERE_ORDERBY; wsFlags |= (rev ? WHERE_REVERSE : 0); } /* If there is a DISTINCT qualifier and this index will scan rows in ** order of the DISTINCT expressions, clear bDist and set the appropriate ** flags in wsFlags. */ | < | < | 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 | wsFlags |= WHERE_ROWID_RANGE|WHERE_COLUMN_RANGE|WHERE_ORDERBY; wsFlags |= (rev ? WHERE_REVERSE : 0); } /* If there is a DISTINCT qualifier and this index will scan rows in ** order of the DISTINCT expressions, clear bDist and set the appropriate ** flags in wsFlags. */ if( isDistinctIndex(pParse, pWC, pProbe, iCur, pDistinct, nEq) ){ bDist = 0; wsFlags |= WHERE_ROWID_RANGE|WHERE_COLUMN_RANGE|WHERE_DISTINCT; } /* If currently calculating the cost of using an index (not the IPK ** index), determine if all required column data may be obtained without ** using the main table (i.e. if the index is a covering |
︙ | ︙ | |||
4284 4285 4286 4287 4288 4289 4290 | } } whereClauseClear(pWInfo->pWC); sqlite3DbFree(db, pWInfo); } } | < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < | 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 | } } whereClauseClear(pWInfo->pWC); sqlite3DbFree(db, pWInfo); } } /* ** Generate the beginning of the loop used for WHERE clause processing. ** The return value is a pointer to an opaque structure that contains ** information needed to terminate the loop. Later, the calling routine ** should invoke sqlite3WhereEnd() with the return value of this function ** in order to complete the WHERE clause processing. |
︙ | ︙ | |||
4579 4580 4581 4582 4583 4584 4585 | goto whereBeginError; } /* Check if the DISTINCT qualifier, if there is one, is redundant. ** If it is, then set pDistinct to NULL and WhereInfo.eDistinct to ** WHERE_DISTINCT_UNIQUE to tell the caller to ignore the DISTINCT. */ | | | 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 | goto whereBeginError; } /* Check if the DISTINCT qualifier, if there is one, is redundant. ** If it is, then set pDistinct to NULL and WhereInfo.eDistinct to ** WHERE_DISTINCT_UNIQUE to tell the caller to ignore the DISTINCT. */ if( pDistinct && isDistinctRedundant(pParse, pTabList, pWC, pDistinct) ){ pDistinct = 0; pWInfo->eDistinct = WHERE_DISTINCT_UNIQUE; } /* Chose the best index to use for each table in the FROM clause. ** ** This loop fills in the following fields: |
︙ | ︙ |
Changes to test/distinct.test.
︙ | ︙ | |||
37 38 39 40 41 42 43 44 45 46 47 48 49 50 | proc do_distinct_noop_test {tn sql} { uplevel [list do_test $tn [list is_distinct_noop $sql] 1] } proc do_distinct_not_noop_test {tn sql} { uplevel [list do_test $tn [list is_distinct_noop $sql] 0] } #------------------------------------------------------------------------- # The following tests - distinct-1.* - check that the planner correctly # detects cases where a UNIQUE index means that a DISTINCT clause is # redundant. Currently the planner only detects such cases when there # is a single table in the FROM clause. # | > > > > > > > > > > > > > > > > > | 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 | proc do_distinct_noop_test {tn sql} { uplevel [list do_test $tn [list is_distinct_noop $sql] 1] } proc do_distinct_not_noop_test {tn sql} { uplevel [list do_test $tn [list is_distinct_noop $sql] 0] } proc do_temptables_test {tn sql temptables} { uplevel [list do_test $tn [subst -novar { set ret "" db eval "EXPLAIN [set sql]" { if {$opcode == "OpenEphemeral"} { if {$p5 != "10" && $p5!="00"} { error "p5 = $p5" } if {$p5 == "10"} { lappend ret hash } else { lappend ret btree } } } set ret }] $temptables] } #------------------------------------------------------------------------- # The following tests - distinct-1.* - check that the planner correctly # detects cases where a UNIQUE index means that a DISTINCT clause is # redundant. Currently the planner only detects such cases when there # is a single table in the FROM clause. # |
︙ | ︙ | |||
103 104 105 106 107 108 109 110 111 112 113 114 115 | if {$noop} { do_distinct_noop_test 1.$tn $sql } else { do_distinct_not_noop_test 1.$tn $sql } } finish_test | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 | if {$noop} { do_distinct_noop_test 1.$tn $sql } else { do_distinct_not_noop_test 1.$tn $sql } } #------------------------------------------------------------------------- # The following tests - distinct-2.* - test cases where an index is # used to deliver results in order of the DISTINCT expressions. # drop_all_tables do_execsql_test 2.0 { CREATE TABLE t1(a, b, c); CREATE INDEX i1 ON t1(a, b); CREATE INDEX i2 ON t1(b COLLATE nocase, c COLLATE nocase); INSERT INTO t1 VALUES('a', 'b', 'c'); INSERT INTO t1 VALUES('A', 'B', 'C'); INSERT INTO t1 VALUES('a', 'b', 'c'); INSERT INTO t1 VALUES('A', 'B', 'C'); } foreach {tn sql temptables res} { 1 "a, b FROM t1" {} {A B a b} 2 "b, a FROM t1" {} {B A b a} 3 "a, b, c FROM t1" {hash} {a b c A B C} 4 "a, b, c FROM t1 ORDER BY a, b, c" {btree} {A B C a b c} 5 "b FROM t1 WHERE a = 'a'" {} {b} 6 "b FROM t1" {hash} {b B} 7 "a FROM t1" {} {A a} 8 "b COLLATE nocase FROM t1" {} {b} 9 "b COLLATE nocase FROM t1 ORDER BY b COLLATE nocase" {} {B} } { do_execsql_test 2.$tn.1 "SELECT DISTINCT $sql" $res do_temptables_test 2.$tn.2 "SELECT DISTINCT $sql" $temptables } finish_test |
Changes to test/tester.tcl.
︙ | ︙ | |||
16 17 18 19 20 21 22 | #------------------------------------------------------------------------- # The commands provided by the code in this file to help with creating # test cases are as follows: # # Commands to manipulate the db and the file-system at a high level: # # copy_file FROM TO | | | 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 | #------------------------------------------------------------------------- # The commands provided by the code in this file to help with creating # test cases are as follows: # # Commands to manipulate the db and the file-system at a high level: # # copy_file FROM TO # drop_all_tables ?DB? # forcedelete FILENAME # # Test the capability of the SQLite version built into the interpreter to # determine if a specific test can be run: # # ifcapable EXPR # |
︙ | ︙ |