SQLite: Check-in [4b02703dec]

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview

Comment:	Test cases and tuning of the new optimizer code. (CVS 2567)
Downloads:	Tarball \| ZIP archive
Timelines:	family \| ancestors \| descendants \| both \| trunk
Files:	files \| file ages \| folders
SHA1:	4b02703dec71aa78e5f8d8cab5b950966a4c6abc
User & Date:	drh 2005-07-28 20:51:19.000

Context

2005-07-28
23:12		The BETWEEN operator in a WHERE clause is now able to use indices. (CVS 2568) (check-in: cdf8c9584b user: drh tags: trunk)
20:51		Test cases and tuning of the new optimizer code. (CVS 2567) (check-in: 4b02703dec user: drh tags: trunk)
16:51		The new optimizer now passes all regression tests. (CVS 2566) (check-in: a212128433 user: drh tags: trunk)

Changes

Changes to src/vdbe.c.

Changes to src/where.c.

Added test/where2.test.

︙			︙
39 40 41 42 43 44 45 46 47 48 49 50 51 52 53	Various scripts scan this source file in order to generate HTML documentation, headers files, or other derived files. The formatting of the code in this file is, therefore, important. See other comments in this file for details. If in doubt, do not deviate from existing commenting and indentation practices when changing or adding code. $Id: vdbe.c,v 1.477 2005/07/2~~3 03~~:1~~8:40~~ drh Exp $ / #include "sqliteInt.h" #include "os.h" #include <ctype.h> #include "vdbeInt.h" /	\|	39 40 41 42 43 44 45 46 47 48 49 50 51 52 53	Various scripts scan this source file in order to generate HTML documentation, headers files, or other derived files. The formatting of the code in this file is, therefore, important. See other comments in this file for details. If in doubt, do not deviate from existing commenting and indentation practices when changing or adding code. $Id: vdbe.c,v 1.478 2005/07/28 20:51:19 drh Exp $ / #include "sqliteInt.h" #include "os.h" #include <ctype.h> #include "vdbeInt.h" /
︙			︙
450 451 452 453 454 455 456 ~~457~~ 458 459 460 461 462 463 464	Vdbe p / The VDBE / ){ int pc; / The program counter / Op pOp; /* Current operation / int rc = SQLITE_OK; / Value to return / sqlite3 db = p->db; /* The database / Mem pTos; /* Top entry in the operand stack / ~~char zBuf[100]; / Space to sprintf() an integer /~~ #ifdef VDBE_PROFILE unsigned long long start; / CPU clock count at start of opcode / int origPc; / Program counter at start of opcode / #endif #ifndef SQLITE_OMIT_PROGRESS_CALLBACK int nProgressOps = 0; / Opcodes executed since progress callback. */ #endif	<	450 451 452 453 454 455 456 457 458 459 460 461 462 463	Vdbe p / The VDBE / ){ int pc; / The program counter / Op pOp; /* Current operation / int rc = SQLITE_OK; / Value to return / sqlite3 db = p->db; /* The database / Mem pTos; /* Top entry in the operand stack / #ifdef VDBE_PROFILE unsigned long long start; / CPU clock count at start of opcode / int origPc; / Program counter at start of opcode / #endif #ifndef SQLITE_OMIT_PROGRESS_CALLBACK int nProgressOps = 0; / Opcodes executed since progress callback. */ #endif
︙			︙
2535 2536 2537 2538 2539 2540 2541 ~~2542 2543 2544 2545 2546~~ 2547 2548 2549 2550 2551 2552 2553	wrFlag = pOp->opcode==OP_OpenWrite; if( p2<=0 ){ assert( pTos>=p->aStack ); Integerify(pTos); p2 = pTos->i; assert( (pTos->flags & MEM_Dyn)==0 ); pTos--; ~~if( p2~~<2 ){~~ ~~sqlite3SetString(&p->zErrMsg, "root page number less than 2", (char)0);~~ ~~rc = SQLITE_INTERNAL;~~ ~~break;~~ }~~ } assert( i>=0 ); pCur = allocateCursor(p, i); if( pCur==0 ) goto no_mem; pCur->nullRow = 1; if( pX==0 ) break; / We always provide a key comparison function. If the table being	\| < < < <	2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548	wrFlag = pOp->opcode==OP_OpenWrite; if( p2<=0 ){ assert( pTos>=p->aStack ); Integerify(pTos); p2 = pTos->i; assert( (pTos->flags & MEM_Dyn)==0 ); pTos--; assert( p2>=2 ); } assert( i>=0 ); pCur = allocateCursor(p, i); if( pCur==0 ) goto no_mem; pCur->nullRow = 1; if( pX==0 ) break; /* We always provide a key comparison function. If the table being
︙			︙
4601 4602 4603 4604 4605 4606 4607 ~~4608 4609 4610~~ 4611 4612 4613 4614 4615 4616 4617	break; } /* An other opcode is illegal... / default: { ~~s~~qli~~t~~e3_snprintf(sizeof(zBuf),zBuf,"%d",pOp->opcode~~); ~~sqlite3SetString(&p->zErrMsg, "unknown opcode ", zBuf, (char)0);~~ ~~rc = SQLITE_INTERNAL;~~~~ break; } /*************************************************************************** The cases of the switch statement above this line should all be indented by 6 spaces. But the left-most 6 spaces have been removed to improve the readability. From this point on down, the normal indentation rules are	\| < <	4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610	break; } /* An other opcode is illegal... / default: { assert( 0 ); break; } /************************************************************************** The cases of the switch statement above this line should all be indented by 6 spaces. But the left-most 6 spaces have been removed to improve the readability. From this point on down, the normal indentation rules are
︙			︙
4640 4641 4642 4643 4644 4645 4646 ~~4647 4648 4649 4650~~ 4651 4652 4653 4654 4655 4656 4657	** the evaluator loop. So we can leave it out when NDEBUG is defined. / #ifndef NDEBUG / Sanity checking on the top element of the stack / if( pTos>=p->aStack ){ sqlite3VdbeMemSanity(pTos, db->enc); } ~~if( ~~pc<-1 \|\|~~ pc>=p->nOp ){ ~~sqlite3SetString(&p->zErrMsg, "jump destination out of range", (char)0);~~ ~~rc = SQLITE_INTERNAL;~~ }~~ #ifdef SQLITE_DEBUG /* Code for tracing the vdbe stack. */ if( p->trace && pTos>=p->aStack ){ int i; fprintf(p->trace, "Stack:"); for(i=0; i>-5 && &pTos[i]>=p->aStack; i--){ if( pTos[i].flags & MEM_Null ){	\| < < <	4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647	** the evaluator loop. So we can leave it out when NDEBUG is defined. / #ifndef NDEBUG / Sanity checking on the top element of the stack / if( pTos>=p->aStack ){ sqlite3VdbeMemSanity(pTos, db->enc); } assert( pc>=-1 && pc<p->nOp ); #ifdef SQLITE_DEBUG / Code for tracing the vdbe stack. */ if( p->trace && pTos>=p->aStack ){ int i; fprintf(p->trace, "Stack:"); for(i=0; i>-5 && &pTos[i]>=p->aStack; i--){ if( pTos[i].flags & MEM_Null ){
︙			︙

︙			︙
12 13 14 15 16 17 18 19 20 21 22 23 24 25 26	This module contains C code that generates VDBE code used to process the WHERE clause of SQL statements. This module is reponsible for generating the code that loops through a table looking for applicable rows. Indices are selected and used to speed the search when doing so is applicable. Because this module is responsible for selecting indices, you might also think of this module as the "query optimizer". $Id: where.c,v 1.154 2005/07/28 16:51:51 drh Exp $ / #include "sqliteInt.h" / ** The number of bits in a Bitmask. "BMS" means "BitMask Size". / #define BMS (sizeof(Bitmask)8)	\|	12 13 14 15 16 17 18 19 20 21 22 23 24 25 26	This module contains C code that generates VDBE code used to process the WHERE clause of SQL statements. This module is reponsible for generating the code that loops through a table looking for applicable rows. Indices are selected and used to speed the search when doing so is applicable. Because this module is responsible for selecting indices, you might also think of this module as the "query optimizer". $Id: where.c,v 1.155 2005/07/28 20:51:19 drh Exp $ / #include "sqliteInt.h" / ** The number of bits in a Bitmask. "BMS" means "BitMask Size". / #define BMS (sizeof(Bitmask)8)
︙			︙
145 146 147 148 149 150 151 ~~152 153 154~~ 155 156 157 158 159 160 161	/* Bitmasks for the operators that indices are able to exploit. An OR-ed combination of these values can be used when searching for ** terms in the where clause. / #define WO_IN 1 ~~~~#define WO_LIST 2~~ ~~#define WO_SELECT 4~~ #define WO_EQ 8~~ #define WO_LT (WO_EQ<<(TK_LT-TK_EQ)) #define WO_LE (WO_EQ<<(TK_LE-TK_EQ)) #define WO_GT (WO_EQ<<(TK_GT-TK_EQ)) #define WO_GE (WO_EQ<<(TK_GE-TK_EQ)) / ** Value for flags returned by bestIndex()	< < \|	145 146 147 148 149 150 151 152 153 154 155 156 157 158 159	/* Bitmasks for the operators that indices are able to exploit. An OR-ed combination of these values can be used when searching for ** terms in the where clause. / #define WO_IN 1 #define WO_EQ 2 #define WO_LT (WO_EQ<<(TK_LT-TK_EQ)) #define WO_LE (WO_EQ<<(TK_LE-TK_EQ)) #define WO_GT (WO_EQ<<(TK_GT-TK_EQ)) #define WO_GE (WO_EQ<<(TK_GE-TK_EQ)) / ** Value for flags returned by bestIndex()
︙			︙
466 467 468 469 470 471 472 ~~473 474 475 476 477 478 479~~ 480 481 482 483 484 485 486	if( allowedOp(pExpr->op) && (pTerm->prereqRight & prereqLeft)==0 ){ Expr pLeft = pExpr->pLeft; Expr pRight = pExpr->pRight; if( pLeft->op==TK_COLUMN ){ pTerm->leftCursor = pLeft->iTable; pTerm->leftColumn = pLeft->iColumn; pTerm->operator = operatorMask(pExpr->op); ~~if( pTerm->operator==WO_IN ){~~ ~~if( pExpr->pSelect ){~~ ~~pTerm->operator \|= WO_SELECT;~~ ~~}else if( pExpr->pList ){~~ ~~pTerm->operator \|= WO_LIST;~~ } } } if( pRight && pRight->op==TK_COLUMN ){ WhereTerm pNew; Expr pDup; if( pTerm->leftCursor>=0 ){ pDup = sqlite3ExprDup(pExpr); pNew = whereClauseInsert(pTerm->pWC, pDup, TERM_VIRTUAL\|TERM_DYNAMIC);	< < < < < < <	464 465 466 467 468 469 470 471 472 473 474 475 476 477	if( allowedOp(pExpr->op) && (pTerm->prereqRight & prereqLeft)==0 ){ Expr pLeft = pExpr->pLeft; Expr pRight = pExpr->pRight; if( pLeft->op==TK_COLUMN ){ pTerm->leftCursor = pLeft->iTable; pTerm->leftColumn = pLeft->iColumn; pTerm->operator = operatorMask(pExpr->op); } if( pRight && pRight->op==TK_COLUMN ){ WhereTerm pNew; Expr pDup; if( pTerm->leftCursor>=0 ){ pDup = sqlite3ExprDup(pExpr); pNew = whereClauseInsert(pTerm->pWC, pDup, TERM_VIRTUAL\|TERM_DYNAMIC);
︙			︙
645 646 647 648 649 650 651 ~~652~~ 653 654 655 656 657 658 659	} return logN; } /* Find the best index for accessing a particular table. Return a pointer to the index, flags that describe how the index should be used, the number of equality constraints and the "cost" for this index. The lowest cost index wins. The cost is an estimate of the amount of CPU and disk I/O need to process the request using the selected index. Factors that influence cost include: ** * The estimated number of rows that will be retrieved. (The ** fewer the better.)	\|	636 637 638 639 640 641 642 643 644 645 646 647 648 649 650	} return logN; } /* Find the best index for accessing a particular table. Return a pointer to the index, flags that describe how the index should be used, the number of equality constraints, and the "cost" for this index. The lowest cost index wins. The cost is an estimate of the amount of CPU and disk I/O need to process the request using the selected index. Factors that influence cost include: ** * The estimated number of rows that will be retrieved. (The ** fewer the better.)
︙			︙
688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 ~~705~~ 706 707 ~~708~~ 709 710 711 712 713 714 715	TRACE(("bestIndex: tbl=%s notReady=%x\n", pSrc->pTab->zName, notReady)); /* Check for a rowid=EXPR or rowid IN (...) constraints / pTerm = findTerm(pWC, iCur, -1, notReady, WO_EQ\|WO_IN, 0); if( pTerm ){ ppIndex = 0; bestFlags = WHERE_ROWID_EQ; if( pTerm->operator & WO_EQ ){ /* Rowid== is always the best pick. Look no further. Because only ** a single row is generated, output is always in sorted order / pFlags = WHERE_ROWID_EQ; pnEq = 1; if( pOrderBy ) pFlags \|= WHERE_ORDERBY; TRACE(("... best is rowid\n")); return 0.0; ~~}else if( pTerm->operat~~or & WO_LIST~~ ){~~ /* Rowid IN (LIST): cost is NlogN where N is the number of list ** elements. / ~~lowestCost = ~~pTerm->~~pExpr->pList->nExpr;~~ lowestCost = estLog(lowestCost); }else{ /* Rowid IN (SELECT): cost is NlogN where N is the number of rows in the result of the inner select. We have no way to estimate that value so make a wild guess. */ lowestCost = 200.0; }	> \| \|	679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707	TRACE(("bestIndex: tbl=%s notReady=%x\n", pSrc->pTab->zName, notReady)); /* Check for a rowid=EXPR or rowid IN (...) constraints / pTerm = findTerm(pWC, iCur, -1, notReady, WO_EQ\|WO_IN, 0); if( pTerm ){ Expr pExpr; ppIndex = 0; bestFlags = WHERE_ROWID_EQ; if( pTerm->operator & WO_EQ ){ / Rowid== is always the best pick. Look no further. Because only ** a single row is generated, output is always in sorted order / pFlags = WHERE_ROWID_EQ; pnEq = 1; if( pOrderBy ) pFlags \|= WHERE_ORDERBY; TRACE(("... best is rowid\n")); return 0.0; }else if( (pExpr = pTerm->pExpr)->pList!=0 ){ /* Rowid IN (LIST): cost is NlogN where N is the number of list ** elements. / lowestCost = pExpr->pList->nExpr; lowestCost = estLog(lowestCost); }else{ /* Rowid IN (SELECT): cost is NlogN where N is the number of rows in the result of the inner select. We have no way to estimate that value so make a wild guess. */ lowestCost = 200.0; }
︙			︙
773 774 775 776 777 778 779 780 ~~781~~ 782 ~~783 784~~ 785 786 787 788 789 790 791 792 793 794 795 796 797 ~~798~~ 799 800 801 802 803 804 805	flags = 0; for(i=0; i<pProbe->nColumn; i++){ int j = pProbe->aiColumn[i]; pTerm = findTerm(pWC, iCur, j, notReady, WO_EQ\|WO_IN, pProbe); if( pTerm==0 ) break; flags \|= WHERE_COLUMN_EQ; if( pTerm->operator & WO_IN ){ flags \|= WHERE_COLUMN_IN; ~~if( p~~Term~~->operat~~or & WO_SELECT~~ ){~~ inMultiplier = 100.0; ~~}else if( p~~Term~~->op~~era~~t~~or & WO_LIST~~ ){ inMultiplier = ~~pTerm->~~pExpr->pList->nExpr + 1.0;~~ } } } cost = pProbe->aiRowEst[i] * inMultiplier * estLog(inMultiplier); nEq = i; TRACE(("...... nEq=%d inMult=%.9g cost=%.9g\n", nEq, inMultiplier, cost)); /* Look for range constraints / if( nEq<pProbe->nColumn ){ int j = pProbe->aiColumn[nEq]; pTerm = findTerm(pWC, iCur, j, notReady, WO_LT\|WO_LE\|WO_GT\|WO_GE, pProbe); if( pTerm ){ ~~flags = WHERE_COLUMN_RANGE;~~ if( findTerm(pWC, iCur, j, notReady, WO_LT\|WO_LE, pProbe) ){ flags \|= WHERE_TOP_LIMIT; cost = 0.333; } if( findTerm(pWC, iCur, j, notReady, WO_GT\|WO_GE, pProbe) ){ flags \|= WHERE_BTM_LIMIT; cost *= 0.333;	> \| \| \| \|	765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798	flags = 0; for(i=0; i<pProbe->nColumn; i++){ int j = pProbe->aiColumn[i]; pTerm = findTerm(pWC, iCur, j, notReady, WO_EQ\|WO_IN, pProbe); if( pTerm==0 ) break; flags \|= WHERE_COLUMN_EQ; if( pTerm->operator & WO_IN ){ Expr pExpr = pTerm->pExpr; flags \|= WHERE_COLUMN_IN; if( pExpr->pSelect!=0 ){ inMultiplier = 100.0; }else if( pExpr->pList!=0 ){ inMultiplier = pExpr->pList->nExpr + 1.0; } } } cost = pProbe->aiRowEst[i] inMultiplier * estLog(inMultiplier); nEq = i; TRACE(("...... nEq=%d inMult=%.9g cost=%.9g\n", nEq, inMultiplier, cost)); /* Look for range constraints / if( nEq<pProbe->nColumn ){ int j = pProbe->aiColumn[nEq]; pTerm = findTerm(pWC, iCur, j, notReady, WO_LT\|WO_LE\|WO_GT\|WO_GE, pProbe); if( pTerm ){ flags \|= WHERE_COLUMN_RANGE; if( findTerm(pWC, iCur, j, notReady, WO_LT\|WO_LE, pProbe) ){ flags \|= WHERE_TOP_LIMIT; cost = 0.333; } if( findTerm(pWC, iCur, j, notReady, WO_GT\|WO_GE, pProbe) ){ flags \|= WHERE_BTM_LIMIT; cost *= 0.333;
︙			︙
847 848 849 850 851 852 853 ~~854 855 856~~ 857 858 859 860 861 862 863	} /* If this index has achieved the lowest cost so far, then use it. / if( cost < lowestCost ){ bestIdx = pProbe; lowestCost = cost; ~~if( flags==0 ){ ~~flags = WHERE_COLUMN_RANGE;~~ }~~ bestFlags = flags; bestNEq = nEq; } } / Report the best result */	\| < <	840 841 842 843 844 845 846 847 848 849 850 851 852 853 854	} /* If this index has achieved the lowest cost so far, then use it. / if( cost < lowestCost ){ bestIdx = pProbe; lowestCost = cost; assert( flags!=0 ); bestFlags = flags; bestNEq = nEq; } } / Report the best result */
︙			︙
959 960 961 962 963 964 965 966 967 968 969 970 971 972	pLevel->aInLoop = aIn = sqliteRealloc(pLevel->aInLoop, sizeof(pLevel->aInLoop[0])3pLevel->nIn); if( aIn ){ aIn += pLevel->nIn3 - 3; aIn[0] = OP_Next; aIn[1] = iTab; aIn[2] = sqlite3VdbeAddOp(v, OP_Column, iTab, 0); } #endif } disableTerm(pLevel, pTerm); } /	> >	950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965	pLevel->aInLoop = aIn = sqliteRealloc(pLevel->aInLoop, sizeof(pLevel->aInLoop[0])3pLevel->nIn); if( aIn ){ aIn += pLevel->nIn3 - 3; aIn[0] = OP_Next; aIn[1] = iTab; aIn[2] = sqlite3VdbeAddOp(v, OP_Column, iTab, 0); }else{ pLevel->nIn = 0; } #endif } disableTerm(pLevel, pTerm); } /
︙			︙
1366 1367 1368 1369 1370 1371 1372 ~~1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384~~ 1385 1386 1387 1388 1389 1390 1391	/* Case 2: We have an inequality comparison against the ROWID field. / int testOp = OP_Noop; int start; WhereTerm pStart, pEnd; assert( omitTable==0 ); ~~if( pLevel->flags & WHERE_BTM_LIMIT ){~~ pStart = findTerm(&wc, iCur, -1, notReady, WO_GT\|WO_GE, 0); ~~assert( pStart!=0 );~~ ~~}else{~~ ~~pStart = 0;~~ } ~~if( pLevel->flags & WHERE_TOP_LIMIT ){~~ pEnd = findTerm(&wc, iCur, -1, notReady, WO_LT\|WO_LE, 0); ~~assert( pEnd!=0 );~~ ~~}else{~~ ~~pEnd = 0;~~ } if( bRev ){ pTerm = pStart; pStart = pEnd; pEnd = pTerm; } if( pStart ){ Expr pX;	< \| < < < < < \| < < < <	1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374	/* Case 2: We have an inequality comparison against the ROWID field. / int testOp = OP_Noop; int start; WhereTerm pStart, pEnd; assert( omitTable==0 ); pStart = findTerm(&wc, iCur, -1, notReady, WO_GT\|WO_GE, 0); pEnd = findTerm(&wc, iCur, -1, notReady, WO_LT\|WO_LE, 0); if( bRev ){ pTerm = pStart; pStart = pEnd; pEnd = pTerm; } if( pStart ){ Expr pX;
︙			︙
1599 1600 1601 1602 1603 1604 1605 ~~1606 1607~~ 1608 ~~1609 1610 1611 1612 1613 1614 1615~~ 1616 ~~1617~~ 1618 1619 1620 1621 1622 1623 1624	} pLevel->p1 = iIdxCur; pLevel->p2 = start; }else{ /* Case 5: There is no usable index. We must do a complete ** scan of the entire table. / ~~int opRewind;~~ assert( omitTable==0 ); ~~if( bRev ){ ~~opRewind = OP_Last;~~ ~~pLevel->op = OP_Prev;~~ ~~}else{~~ ~~opRewind = OP_Rewind;~~ pLevel->op = OP_Next; }~~ pLevel->p1 = iCur; ~~pLevel->p2 = 1 + sqlite3VdbeAddOp(v, opRewind, iCur, brk);~~ } notReady &= ~getMask(&maskSet, iCur); / Insert code to test every subexpression that can be completely ** computed using the current set of tables. */ for(pTerm=wc.a, j=wc.nTerm; j>0; j--, pTerm++){	< < \| < < < < \| < \|	1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600	} pLevel->p1 = iIdxCur; pLevel->p2 = start; }else{ /* Case 5: There is no usable index. We must do a complete ** scan of the entire table. / assert( omitTable==0 ); assert( bRev==0 ); pLevel->op = OP_Next; pLevel->p1 = iCur; pLevel->p2 = 1 + sqlite3VdbeAddOp(v, OP_Rewind, iCur, brk); } notReady &= ~getMask(&maskSet, iCur); / Insert code to test every subexpression that can be completely ** computed using the current set of tables. */ for(pTerm=wc.a, j=wc.nTerm; j>0; j--, pTerm++){
︙			︙