Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Test cases and tuning of the new optimizer code. (CVS 2567) |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA1: |
4b02703dec71aa78e5f8d8cab5b95096 |
User & Date: | drh 2005-07-28 20:51:19.000 |
Context
2005-07-28
| ||
23:12 | The BETWEEN operator in a WHERE clause is now able to use indices. (CVS 2568) (check-in: cdf8c9584b user: drh tags: trunk) | |
20:51 | Test cases and tuning of the new optimizer code. (CVS 2567) (check-in: 4b02703dec user: drh tags: trunk) | |
16:51 | The new optimizer now passes all regression tests. (CVS 2566) (check-in: a212128433 user: drh tags: trunk) | |
Changes
Changes to src/vdbe.c.
︙ | ︙ | |||
39 40 41 42 43 44 45 | ** ** Various scripts scan this source file in order to generate HTML ** documentation, headers files, or other derived files. The formatting ** of the code in this file is, therefore, important. See other comments ** in this file for details. If in doubt, do not deviate from existing ** commenting and indentation practices when changing or adding code. ** | | | 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 | ** ** Various scripts scan this source file in order to generate HTML ** documentation, headers files, or other derived files. The formatting ** of the code in this file is, therefore, important. See other comments ** in this file for details. If in doubt, do not deviate from existing ** commenting and indentation practices when changing or adding code. ** ** $Id: vdbe.c,v 1.478 2005/07/28 20:51:19 drh Exp $ */ #include "sqliteInt.h" #include "os.h" #include <ctype.h> #include "vdbeInt.h" /* |
︙ | ︙ | |||
450 451 452 453 454 455 456 | Vdbe *p /* The VDBE */ ){ int pc; /* The program counter */ Op *pOp; /* Current operation */ int rc = SQLITE_OK; /* Value to return */ sqlite3 *db = p->db; /* The database */ Mem *pTos; /* Top entry in the operand stack */ | < | 450 451 452 453 454 455 456 457 458 459 460 461 462 463 | Vdbe *p /* The VDBE */ ){ int pc; /* The program counter */ Op *pOp; /* Current operation */ int rc = SQLITE_OK; /* Value to return */ sqlite3 *db = p->db; /* The database */ Mem *pTos; /* Top entry in the operand stack */ #ifdef VDBE_PROFILE unsigned long long start; /* CPU clock count at start of opcode */ int origPc; /* Program counter at start of opcode */ #endif #ifndef SQLITE_OMIT_PROGRESS_CALLBACK int nProgressOps = 0; /* Opcodes executed since progress callback. */ #endif |
︙ | ︙ | |||
2535 2536 2537 2538 2539 2540 2541 | wrFlag = pOp->opcode==OP_OpenWrite; if( p2<=0 ){ assert( pTos>=p->aStack ); Integerify(pTos); p2 = pTos->i; assert( (pTos->flags & MEM_Dyn)==0 ); pTos--; | | < < < < | 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 | wrFlag = pOp->opcode==OP_OpenWrite; if( p2<=0 ){ assert( pTos>=p->aStack ); Integerify(pTos); p2 = pTos->i; assert( (pTos->flags & MEM_Dyn)==0 ); pTos--; assert( p2>=2 ); } assert( i>=0 ); pCur = allocateCursor(p, i); if( pCur==0 ) goto no_mem; pCur->nullRow = 1; if( pX==0 ) break; /* We always provide a key comparison function. If the table being |
︙ | ︙ | |||
4601 4602 4603 4604 4605 4606 4607 | break; } /* An other opcode is illegal... */ default: { | | < < | 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 | break; } /* An other opcode is illegal... */ default: { assert( 0 ); break; } /***************************************************************************** ** The cases of the switch statement above this line should all be indented ** by 6 spaces. But the left-most 6 spaces have been removed to improve the ** readability. From this point on down, the normal indentation rules are |
︙ | ︙ | |||
4640 4641 4642 4643 4644 4645 4646 | ** the evaluator loop. So we can leave it out when NDEBUG is defined. */ #ifndef NDEBUG /* Sanity checking on the top element of the stack */ if( pTos>=p->aStack ){ sqlite3VdbeMemSanity(pTos, db->enc); } | | < < < | 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 | ** the evaluator loop. So we can leave it out when NDEBUG is defined. */ #ifndef NDEBUG /* Sanity checking on the top element of the stack */ if( pTos>=p->aStack ){ sqlite3VdbeMemSanity(pTos, db->enc); } assert( pc>=-1 && pc<p->nOp ); #ifdef SQLITE_DEBUG /* Code for tracing the vdbe stack. */ if( p->trace && pTos>=p->aStack ){ int i; fprintf(p->trace, "Stack:"); for(i=0; i>-5 && &pTos[i]>=p->aStack; i--){ if( pTos[i].flags & MEM_Null ){ |
︙ | ︙ |
Changes to src/where.c.
︙ | ︙ | |||
12 13 14 15 16 17 18 | ** This module contains C code that generates VDBE code used to process ** the WHERE clause of SQL statements. This module is reponsible for ** generating the code that loops through a table looking for applicable ** rows. Indices are selected and used to speed the search when doing ** so is applicable. Because this module is responsible for selecting ** indices, you might also think of this module as the "query optimizer". ** | | | 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 | ** This module contains C code that generates VDBE code used to process ** the WHERE clause of SQL statements. This module is reponsible for ** generating the code that loops through a table looking for applicable ** rows. Indices are selected and used to speed the search when doing ** so is applicable. Because this module is responsible for selecting ** indices, you might also think of this module as the "query optimizer". ** ** $Id: where.c,v 1.155 2005/07/28 20:51:19 drh Exp $ */ #include "sqliteInt.h" /* ** The number of bits in a Bitmask. "BMS" means "BitMask Size". */ #define BMS (sizeof(Bitmask)*8) |
︙ | ︙ | |||
145 146 147 148 149 150 151 | /* ** Bitmasks for the operators that indices are able to exploit. An ** OR-ed combination of these values can be used when searching for ** terms in the where clause. */ #define WO_IN 1 | < < | | 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 | /* ** Bitmasks for the operators that indices are able to exploit. An ** OR-ed combination of these values can be used when searching for ** terms in the where clause. */ #define WO_IN 1 #define WO_EQ 2 #define WO_LT (WO_EQ<<(TK_LT-TK_EQ)) #define WO_LE (WO_EQ<<(TK_LE-TK_EQ)) #define WO_GT (WO_EQ<<(TK_GT-TK_EQ)) #define WO_GE (WO_EQ<<(TK_GE-TK_EQ)) /* ** Value for flags returned by bestIndex() |
︙ | ︙ | |||
466 467 468 469 470 471 472 | if( allowedOp(pExpr->op) && (pTerm->prereqRight & prereqLeft)==0 ){ Expr *pLeft = pExpr->pLeft; Expr *pRight = pExpr->pRight; if( pLeft->op==TK_COLUMN ){ pTerm->leftCursor = pLeft->iTable; pTerm->leftColumn = pLeft->iColumn; pTerm->operator = operatorMask(pExpr->op); | < < < < < < < | 464 465 466 467 468 469 470 471 472 473 474 475 476 477 | if( allowedOp(pExpr->op) && (pTerm->prereqRight & prereqLeft)==0 ){ Expr *pLeft = pExpr->pLeft; Expr *pRight = pExpr->pRight; if( pLeft->op==TK_COLUMN ){ pTerm->leftCursor = pLeft->iTable; pTerm->leftColumn = pLeft->iColumn; pTerm->operator = operatorMask(pExpr->op); } if( pRight && pRight->op==TK_COLUMN ){ WhereTerm *pNew; Expr *pDup; if( pTerm->leftCursor>=0 ){ pDup = sqlite3ExprDup(pExpr); pNew = whereClauseInsert(pTerm->pWC, pDup, TERM_VIRTUAL|TERM_DYNAMIC); |
︙ | ︙ | |||
645 646 647 648 649 650 651 | } return logN; } /* ** Find the best index for accessing a particular table. Return a pointer ** to the index, flags that describe how the index should be used, the | | | 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 | } return logN; } /* ** Find the best index for accessing a particular table. Return a pointer ** to the index, flags that describe how the index should be used, the ** number of equality constraints, and the "cost" for this index. ** ** The lowest cost index wins. The cost is an estimate of the amount of ** CPU and disk I/O need to process the request using the selected index. ** Factors that influence cost include: ** ** * The estimated number of rows that will be retrieved. (The ** fewer the better.) |
︙ | ︙ | |||
688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 | TRACE(("bestIndex: tbl=%s notReady=%x\n", pSrc->pTab->zName, notReady)); /* Check for a rowid=EXPR or rowid IN (...) constraints */ pTerm = findTerm(pWC, iCur, -1, notReady, WO_EQ|WO_IN, 0); if( pTerm ){ *ppIndex = 0; bestFlags = WHERE_ROWID_EQ; if( pTerm->operator & WO_EQ ){ /* Rowid== is always the best pick. Look no further. Because only ** a single row is generated, output is always in sorted order */ *pFlags = WHERE_ROWID_EQ; *pnEq = 1; if( pOrderBy ) *pFlags |= WHERE_ORDERBY; TRACE(("... best is rowid\n")); return 0.0; | > | | | 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 | TRACE(("bestIndex: tbl=%s notReady=%x\n", pSrc->pTab->zName, notReady)); /* Check for a rowid=EXPR or rowid IN (...) constraints */ pTerm = findTerm(pWC, iCur, -1, notReady, WO_EQ|WO_IN, 0); if( pTerm ){ Expr *pExpr; *ppIndex = 0; bestFlags = WHERE_ROWID_EQ; if( pTerm->operator & WO_EQ ){ /* Rowid== is always the best pick. Look no further. Because only ** a single row is generated, output is always in sorted order */ *pFlags = WHERE_ROWID_EQ; *pnEq = 1; if( pOrderBy ) *pFlags |= WHERE_ORDERBY; TRACE(("... best is rowid\n")); return 0.0; }else if( (pExpr = pTerm->pExpr)->pList!=0 ){ /* Rowid IN (LIST): cost is NlogN where N is the number of list ** elements. */ lowestCost = pExpr->pList->nExpr; lowestCost *= estLog(lowestCost); }else{ /* Rowid IN (SELECT): cost is NlogN where N is the number of rows ** in the result of the inner select. We have no way to estimate ** that value so make a wild guess. */ lowestCost = 200.0; } |
︙ | ︙ | |||
773 774 775 776 777 778 779 780 | flags = 0; for(i=0; i<pProbe->nColumn; i++){ int j = pProbe->aiColumn[i]; pTerm = findTerm(pWC, iCur, j, notReady, WO_EQ|WO_IN, pProbe); if( pTerm==0 ) break; flags |= WHERE_COLUMN_EQ; if( pTerm->operator & WO_IN ){ flags |= WHERE_COLUMN_IN; | > | | | | | 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 | flags = 0; for(i=0; i<pProbe->nColumn; i++){ int j = pProbe->aiColumn[i]; pTerm = findTerm(pWC, iCur, j, notReady, WO_EQ|WO_IN, pProbe); if( pTerm==0 ) break; flags |= WHERE_COLUMN_EQ; if( pTerm->operator & WO_IN ){ Expr *pExpr = pTerm->pExpr; flags |= WHERE_COLUMN_IN; if( pExpr->pSelect!=0 ){ inMultiplier *= 100.0; }else if( pExpr->pList!=0 ){ inMultiplier *= pExpr->pList->nExpr + 1.0; } } } cost = pProbe->aiRowEst[i] * inMultiplier * estLog(inMultiplier); nEq = i; TRACE(("...... nEq=%d inMult=%.9g cost=%.9g\n", nEq, inMultiplier, cost)); /* Look for range constraints */ if( nEq<pProbe->nColumn ){ int j = pProbe->aiColumn[nEq]; pTerm = findTerm(pWC, iCur, j, notReady, WO_LT|WO_LE|WO_GT|WO_GE, pProbe); if( pTerm ){ flags |= WHERE_COLUMN_RANGE; if( findTerm(pWC, iCur, j, notReady, WO_LT|WO_LE, pProbe) ){ flags |= WHERE_TOP_LIMIT; cost *= 0.333; } if( findTerm(pWC, iCur, j, notReady, WO_GT|WO_GE, pProbe) ){ flags |= WHERE_BTM_LIMIT; cost *= 0.333; |
︙ | ︙ | |||
847 848 849 850 851 852 853 | } /* If this index has achieved the lowest cost so far, then use it. */ if( cost < lowestCost ){ bestIdx = pProbe; lowestCost = cost; | | < < | 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 | } /* If this index has achieved the lowest cost so far, then use it. */ if( cost < lowestCost ){ bestIdx = pProbe; lowestCost = cost; assert( flags!=0 ); bestFlags = flags; bestNEq = nEq; } } /* Report the best result */ |
︙ | ︙ | |||
959 960 961 962 963 964 965 966 967 968 969 970 971 972 | pLevel->aInLoop = aIn = sqliteRealloc(pLevel->aInLoop, sizeof(pLevel->aInLoop[0])*3*pLevel->nIn); if( aIn ){ aIn += pLevel->nIn*3 - 3; aIn[0] = OP_Next; aIn[1] = iTab; aIn[2] = sqlite3VdbeAddOp(v, OP_Column, iTab, 0); } #endif } disableTerm(pLevel, pTerm); } /* | > > | 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 | pLevel->aInLoop = aIn = sqliteRealloc(pLevel->aInLoop, sizeof(pLevel->aInLoop[0])*3*pLevel->nIn); if( aIn ){ aIn += pLevel->nIn*3 - 3; aIn[0] = OP_Next; aIn[1] = iTab; aIn[2] = sqlite3VdbeAddOp(v, OP_Column, iTab, 0); }else{ pLevel->nIn = 0; } #endif } disableTerm(pLevel, pTerm); } /* |
︙ | ︙ | |||
1366 1367 1368 1369 1370 1371 1372 | /* Case 2: We have an inequality comparison against the ROWID field. */ int testOp = OP_Noop; int start; WhereTerm *pStart, *pEnd; assert( omitTable==0 ); | < | < < < < < | < < < < | 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 | /* Case 2: We have an inequality comparison against the ROWID field. */ int testOp = OP_Noop; int start; WhereTerm *pStart, *pEnd; assert( omitTable==0 ); pStart = findTerm(&wc, iCur, -1, notReady, WO_GT|WO_GE, 0); pEnd = findTerm(&wc, iCur, -1, notReady, WO_LT|WO_LE, 0); if( bRev ){ pTerm = pStart; pStart = pEnd; pEnd = pTerm; } if( pStart ){ Expr *pX; |
︙ | ︙ | |||
1599 1600 1601 1602 1603 1604 1605 | } pLevel->p1 = iIdxCur; pLevel->p2 = start; }else{ /* Case 5: There is no usable index. We must do a complete ** scan of the entire table. */ | < < | < < < < | < | | 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 | } pLevel->p1 = iIdxCur; pLevel->p2 = start; }else{ /* Case 5: There is no usable index. We must do a complete ** scan of the entire table. */ assert( omitTable==0 ); assert( bRev==0 ); pLevel->op = OP_Next; pLevel->p1 = iCur; pLevel->p2 = 1 + sqlite3VdbeAddOp(v, OP_Rewind, iCur, brk); } notReady &= ~getMask(&maskSet, iCur); /* Insert code to test every subexpression that can be completely ** computed using the current set of tables. */ for(pTerm=wc.a, j=wc.nTerm; j>0; j--, pTerm++){ |
︙ | ︙ |
Added test/where2.test.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 | # 2005 July 28 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #*********************************************************************** # This file implements regression tests for SQLite library. The # focus of this file is testing the use of indices in WHERE clauses # based on recent changes to the optimizer. # # $Id: where2.test,v 1.1 2005/07/28 20:51:19 drh Exp $ set testdir [file dirname $argv0] source $testdir/tester.tcl # Build some test data # do_test where2-1.0 { execsql { BEGIN; CREATE TABLE t1(w int, x int, y int, z int); } for {set i 1} {$i<=100} {incr i} { set w $i set x [expr {int(log($i)/log(2))}] set y [expr {$i*$i + 2*$i + 1}] set z [expr {$x+$y}] execsql {INSERT INTO t1 VALUES($::w,$::x,$::y,$::z)} } execsql { CREATE UNIQUE INDEX i1w ON t1(w); CREATE INDEX i1xy ON t1(x,y); CREATE INDEX i1zyx ON t1(z,y,x); COMMIT; } } {} # Do an SQL statement. Append the search count to the end of the result. # proc count sql { set ::sqlite_search_count 0 return [concat [execsql $sql] $::sqlite_search_count] } # This procedure executes the SQL. Then it checks to see if the OP_Sort # opcode was executed. If an OP_Sort did occur, then "sort" is appended # to the result. If no OP_Sort happened, then "nosort" is appended. # # This procedure is used to check to make sure sorting is or is not # occurring as expected. # proc cksort {sql} { set ::sqlite_sort_count 0 set data [execsql $sql] if {$::sqlite_sort_count} {set x sort} {set x nosort} lappend data $x return $data } # This procedure executes the SQL. Then it appends to the result the # "sort" or "nosort" keyword (as in the cksort procedure above) then # it appends the ::sqlite_query_plan variable. # proc queryplan {sql} { set ::sqlite_sort_count 0 set data [execsql $sql] if {$::sqlite_sort_count} {set x sort} {set x nosort} lappend data $x return [concat $data $::sqlite_query_plan] } # Prefer a UNIQUE index over another index. # do_test where2-1.1 { queryplan { SELECT * FROM t1 WHERE w=85 AND x=6 AND y=7396 } } {85 6 7396 7402 nosort t1 i1w} # Always prefer a rowid== constraint over any other index. # do_test where2-1.3 { queryplan { SELECT * FROM t1 WHERE w=85 AND x=6 AND y=7396 AND rowid=85 } } {85 6 7396 7402 nosort t1 *} # When constrained by a UNIQUE index, the ORDER BY clause is always ignored. # do_test where2-2.1 { queryplan { SELECT * FROM t1 WHERE w=85 ORDER BY random(5); } } {85 6 7396 7402 nosort t1 i1w} do_test where2-2.2 { queryplan { SELECT * FROM t1 WHERE x=6 AND y=7396 ORDER BY random(5); } } {85 6 7396 7402 sort t1 i1xy} do_test where2-2.3 { queryplan { SELECT * FROM t1 WHERE rowid=85 AND x=6 AND y=7396 ORDER BY random(5); } } {85 6 7396 7402 nosort t1 *} # Efficient handling of forward and reverse table scans. # do_test where2-3.1 { queryplan { SELECT * FROM t1 ORDER BY rowid LIMIT 2 } } {1 0 4 4 2 1 9 10 nosort t1 *} do_test where2-3.2 { queryplan { SELECT * FROM t1 ORDER BY rowid DESC LIMIT 2 } } {100 6 10201 10207 99 6 10000 10006 nosort t1 *} # The IN operator can be used by indices at multiple layers # do_test where2-4.1 { queryplan { SELECT * FROM t1 WHERE z IN (10207,10006) AND y IN (10000,10201) AND x>0 AND x<10 ORDER BY w } } {99 6 10000 10006 100 6 10201 10207 sort t1 i1zyx} do_test where2-4.2 { queryplan { SELECT * FROM t1 WHERE z IN (10207,10006) AND y=10000 AND x>0 AND x<10 ORDER BY w } } {99 6 10000 10006 sort t1 i1zyx} do_test where2-4.3 { queryplan { SELECT * FROM t1 WHERE z=10006 AND y IN (10000,10201) AND x>0 AND x<10 ORDER BY w } } {99 6 10000 10006 sort t1 i1zyx} do_test where2-4.4 { queryplan { SELECT * FROM t1 WHERE z IN (SELECT 10207 UNION SELECT 10006) AND y IN (10000,10201) AND x>0 AND x<10 ORDER BY w } } {99 6 10000 10006 100 6 10201 10207 sort t1 i1zyx} do_test where2-4.5 { queryplan { SELECT * FROM t1 WHERE z IN (SELECT 10207 UNION SELECT 10006) AND y IN (SELECT 10000 UNION SELECT 10201) AND x>0 AND x<10 ORDER BY w } } {99 6 10000 10006 100 6 10201 10207 sort t1 i1zyx} do_test where2-4.6 { queryplan { SELECT * FROM t1 WHERE x IN (1,2,3,4,5,6,7,8) AND y IN (10000,10001,10002,10003,10004,10005) ORDER BY 2 } } {99 6 10000 10006 sort t1 i1xy} # Duplicate entires on the RHS of an IN operator do not cause duplicate # output rows. # do_test where2-4.6 { queryplan { SELECT * FROM t1 WHERE z IN (10207,10006,10006,10207) ORDER BY w } } {99 6 10000 10006 100 6 10201 10207 sort t1 i1zyx} do_test where2-4.7 { queryplan { SELECT * FROM t1 WHERE z IN ( SELECT 10207 UNION ALL SELECT 10006 UNION ALL SELECT 10006 UNION ALL SELECT 10207) ORDER BY w } } {99 6 10000 10006 100 6 10201 10207 sort t1 i1zyx} # The use of an IN operator disables the index as a sorter. # do_test where2-5.1 { queryplan { SELECT * FROM t1 WHERE w=99 ORDER BY w } } {99 6 10000 10006 nosort t1 i1w} do_test where2-5.2 { queryplan { SELECT * FROM t1 WHERE w IN (99) ORDER BY w } } {99 6 10000 10006 sort t1 i1w} integrity_check {where2-99.0} finish_test |