Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | More work on the new optimizer. Fewer tests fail now. (CVS 2565) |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA1: |
ee3a08e353f563c36e904479393fcb56 |
User & Date: | drh 2005-07-27 20:41:44.000 |
Context
2005-07-28
| ||
16:51 | The new optimizer now passes all regression tests. (CVS 2566) (check-in: a212128433 user: drh tags: trunk) | |
2005-07-27
| ||
20:41 | More work on the new optimizer. Fewer tests fail now. (CVS 2565) (check-in: ee3a08e353 user: drh tags: trunk) | |
2005-07-23
| ||
22:59 | A new optimizer that breaks a lot of tests. But none of them critically, I think. Nevertheless, there is a lot of work ahead to stabilize the code. (CVS 2564) (check-in: 86ce56ccea user: drh tags: trunk) | |
Changes
Changes to src/build.c.
︙ | ︙ | |||
18 19 20 21 22 23 24 | ** CREATE INDEX ** DROP INDEX ** creating ID lists ** BEGIN TRANSACTION ** COMMIT ** ROLLBACK ** | | | 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 | ** CREATE INDEX ** DROP INDEX ** creating ID lists ** BEGIN TRANSACTION ** COMMIT ** ROLLBACK ** ** $Id: build.c,v 1.337 2005/07/27 20:41:44 drh Exp $ */ #include "sqliteInt.h" #include <ctype.h> /* ** This routine is called when a new SQL statement is beginning to ** be parsed. Initialize the pParse structure as needed. |
︙ | ︙ | |||
2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 | sqliteFree(zName); return; } /* ** Fill the Index.aiRowEst[] array with default information - information ** to be used when we have no ANALYZE command to run. */ void sqlite3DefaultRowEst(Index *pIdx){ int i; | > > > > > > > > > > > > > > > > > | > > > > > > > > > > > > > > | > > > | < < | | 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 | sqliteFree(zName); return; } /* ** Fill the Index.aiRowEst[] array with default information - information ** to be used when we have no ANALYZE command to run. ** ** aiRowEst[0] is suppose to contain the number of elements in the index. ** Since we do not know, guess 1 million. aiRowEst[1] is an estimate of the ** number of rows in the table that match any particular value of the ** first column of the index. aiRowEst[2] is an estimate of the number ** of rows that match any particular combiniation of the first 2 columns ** of the index. And so forth. It must always be the case that * ** aiRowEst[N]<=aiRowEst[N-1] ** aiRowEst[N]>=1 ** ** Apart from that, we have little to go on besides intuition as to ** how aiRowEst[] should be initialized. The numbers generated here ** are based on typical values found in actual indices. */ void sqlite3DefaultRowEst(Index *pIdx){ int *a = pIdx->aiRowEst; int i; assert( a!=0 ); a[0] = 1000000; switch( pIdx->nColumn ){ case 1: { a[1] = 20; break; } case 2: { a[1] = 350; a[2] = 20; break; } default: { a[1] = 1250; a[2] = 350; a[3] = 20; for(i=pIdx->nColumn; i>=4; i--){ a[i] = 10; } } } if( pIdx->onError!=OE_None ){ a[pIdx->nColumn] = 1; } } /* ** This routine will drop an existing named index. This routine ** implements the DROP INDEX statement. */ |
︙ | ︙ |
Changes to src/where.c.
︙ | ︙ | |||
12 13 14 15 16 17 18 | ** This module contains C code that generates VDBE code used to process ** the WHERE clause of SQL statements. This module is reponsible for ** generating the code that loops through a table looking for applicable ** rows. Indices are selected and used to speed the search when doing ** so is applicable. Because this module is responsible for selecting ** indices, you might also think of this module as the "query optimizer". ** | | | 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 | ** This module contains C code that generates VDBE code used to process ** the WHERE clause of SQL statements. This module is reponsible for ** generating the code that loops through a table looking for applicable ** rows. Indices are selected and used to speed the search when doing ** so is applicable. Because this module is responsible for selecting ** indices, you might also think of this module as the "query optimizer". ** ** $Id: where.c,v 1.153 2005/07/27 20:41:44 drh Exp $ */ #include "sqliteInt.h" /* ** The number of bits in a Bitmask. "BMS" means "BitMask Size". */ #define BMS (sizeof(Bitmask)*8) |
︙ | ︙ | |||
539 540 541 542 543 544 545 546 547 548 549 550 551 552 | int nTerm; /* Number of ORDER BY terms */ struct ExprList_item *pTerm; /* A term of the ORDER BY clause */ sqlite3 *db = pParse->db; assert( pOrderBy!=0 ); nTerm = pOrderBy->nExpr; assert( nTerm>0 ); /* Match terms of the ORDER BY clause against columns of ** the index. */ for(i=j=0, pTerm=pOrderBy->a; j<nTerm && i<pIdx->nColumn; i++){ Expr *pExpr; /* The expression of the ORDER BY pTerm */ CollSeq *pColl; /* The collating sequence of pExpr */ | > > > > > > > > | 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 | int nTerm; /* Number of ORDER BY terms */ struct ExprList_item *pTerm; /* A term of the ORDER BY clause */ sqlite3 *db = pParse->db; assert( pOrderBy!=0 ); nTerm = pOrderBy->nExpr; assert( nTerm>0 ); /* A UNIQUE index that is fully specified is always a sorting ** index. */ if( pIdx->onError!=OE_None && nEqCol==pIdx->nColumn ){ *pbRev = 0; return 1; } /* Match terms of the ORDER BY clause against columns of ** the index. */ for(i=j=0, pTerm=pOrderBy->a; j<nTerm && i<pIdx->nColumn; i++){ Expr *pExpr; /* The expression of the ORDER BY pTerm */ CollSeq *pColl; /* The collating sequence of pExpr */ |
︙ | ︙ | |||
614 615 616 617 618 619 620 621 622 623 624 625 626 627 | p = pOrderBy->a[0].pExpr; if( p->op==TK_COLUMN && p->iTable==base && p->iColumn==-1 ){ *pbRev = pOrderBy->a[0].sortOrder; return 1; } return 0; } /* ** Find the best index for accessing a particular table. Return a pointer ** to the index, flags that describe how the index should be used, the ** number of equality constraints and the "cost" for this index. ** ** The lowest cost index wins. The cost is an estimate of the amount of | > > > > > > > > > > > > > > > > > > > | 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 | p = pOrderBy->a[0].pExpr; if( p->op==TK_COLUMN && p->iTable==base && p->iColumn==-1 ){ *pbRev = pOrderBy->a[0].sortOrder; return 1; } return 0; } /* ** Prepare a crude estimate of the logorithm of the input value. ** The results need not be exact. This is only used for estimating ** the total cost of performing operatings with O(logN) or O(NlogN) ** complexity. Because N is just a guess, it is no great tragedy if ** logN is a little off. ** ** We can assume N>=1.0; */ static double estLog(double N){ double logN = 1.0; double x = 10.0; while( N>x ){ logN = logN+1.0; x *= 10; } return logN; } /* ** Find the best index for accessing a particular table. Return a pointer ** to the index, flags that describe how the index should be used, the ** number of equality constraints and the "cost" for this index. ** ** The lowest cost index wins. The cost is an estimate of the amount of |
︙ | ︙ | |||
664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 | /* Check for a rowid=EXPR or rowid IN (...) constraints */ pTerm = findTerm(pWC, iCur, -1, notReady, WO_EQ|WO_IN, 0); if( pTerm ){ *ppIndex = 0; bestFlags = WHERE_ROWID_EQ; if( pTerm->operator & WO_EQ ){ *pFlags = WHERE_ROWID_EQ; *pnEq = 1; if( pOrderBy ) *pFlags |= WHERE_ORDERBY; TRACE(("... best is rowid\n")); return 0.0; }else if( pTerm->operator & WO_LIST ){ lowestCost = pTerm->pExpr->pList->nExpr; }else{ | > > > > > > > > | | > | | > > > > < | | | > > > > | | < | | | > > | > | | 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 | /* Check for a rowid=EXPR or rowid IN (...) constraints */ pTerm = findTerm(pWC, iCur, -1, notReady, WO_EQ|WO_IN, 0); if( pTerm ){ *ppIndex = 0; bestFlags = WHERE_ROWID_EQ; if( pTerm->operator & WO_EQ ){ /* Rowid== is always the best pick. Look no further. Because only ** a single row is generated, output is always in sorted order */ *pFlags = WHERE_ROWID_EQ; *pnEq = 1; if( pOrderBy ) *pFlags |= WHERE_ORDERBY; TRACE(("... best is rowid\n")); return 0.0; }else if( pTerm->operator & WO_LIST ){ /* Rowid IN (LIST): cost is NlogN where N is the number of list ** elements. */ lowestCost = pTerm->pExpr->pList->nExpr; lowestCost *= estLog(lowestCost); }else{ /* Rowid IN (SELECT): cost is NlogN where N is the number of rows ** in the result of the inner select. We have no way to estimate ** that value so make a wild guess. */ lowestCost = 200.0; } TRACE(("... rowid IN cost: %g\n", lowestCost)); } /* Estimate the cost of a table scan. If we do not know how many ** entries are in the table, use 1 million as a guess. */ pProbe = pSrc->pTab->pIndex; cost = pProbe ? pProbe->aiRowEst[0] : 1000000.0; TRACE(("... table scan base cost: %g\n", cost)); flags = WHERE_ROWID_RANGE; /* Check for constraints on a range of rowids in a table scan. */ pTerm = findTerm(pWC, iCur, -1, notReady, WO_LT|WO_LE|WO_GT|WO_GE, 0); if( pTerm ){ if( findTerm(pWC, iCur, -1, notReady, WO_LT|WO_LE, 0) ){ flags |= WHERE_TOP_LIMIT; cost *= 0.333; /* Guess that rowid<EXPR eliminates two-thirds or rows */ } if( findTerm(pWC, iCur, -1, notReady, WO_GT|WO_GE, 0) ){ flags |= WHERE_BTM_LIMIT; cost *= 0.333; /* Guess that rowid>EXPR eliminates two-thirds of rows */ } TRACE(("... rowid range reduces cost to %g\n", cost)); }else{ flags = 0; } /* If the table scan does not satisfy the ORDER BY clause, increase ** the cost by NlogN to cover the expense of sorting. */ if( pOrderBy ){ if( sortableByRowid(iCur, pOrderBy, &rev) ){ flags |= WHERE_ORDERBY|WHERE_ROWID_RANGE; if( rev ){ flags |= WHERE_REVERSE; } }else{ cost += cost*estLog(cost); TRACE(("... sorting increases cost to %g\n", cost)); } } if( cost<lowestCost ){ lowestCost = cost; bestFlags = flags; } /* Look at each index. */ for(; pProbe; pProbe=pProbe->pNext){ int i; /* Loop counter */ double inMultiplier = 1.0; TRACE(("... index %s:\n", pProbe->zName)); /* Count the number of columns in the index that are satisfied ** by x=EXPR constraints or x IN (...) constraints. */ flags = 0; |
︙ | ︙ | |||
736 737 738 739 740 741 742 | if( pTerm->operator & WO_SELECT ){ inMultiplier *= 100.0; }else if( pTerm->operator & WO_LIST ){ inMultiplier *= pTerm->pExpr->pList->nExpr + 1.0; } } } | | | | < | > | | | | | < | | | > > | > | 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 | if( pTerm->operator & WO_SELECT ){ inMultiplier *= 100.0; }else if( pTerm->operator & WO_LIST ){ inMultiplier *= pTerm->pExpr->pList->nExpr + 1.0; } } } cost = pProbe->aiRowEst[i] * inMultiplier * estLog(inMultiplier); nEq = i; TRACE(("...... nEq=%d inMult=%g cost=%g\n", nEq, inMultiplier, cost)); /* Look for range constraints */ if( nEq<pProbe->nColumn ){ int j = pProbe->aiColumn[nEq]; pTerm = findTerm(pWC, iCur, j, notReady, WO_LT|WO_LE|WO_GT|WO_GE, pProbe); if( pTerm ){ flags = WHERE_COLUMN_RANGE; if( findTerm(pWC, iCur, j, notReady, WO_LT|WO_LE, pProbe) ){ flags |= WHERE_TOP_LIMIT; cost *= 0.333; } if( findTerm(pWC, iCur, j, notReady, WO_GT|WO_GE, pProbe) ){ flags |= WHERE_BTM_LIMIT; cost *= 0.333; } TRACE(("...... range reduces cost to %g\n", cost)); } } /* Add the additional cost of sorting if that is a factor. */ if( pOrderBy ){ if( (flags & WHERE_COLUMN_IN)==0 && isSortingIndex(pParse, pProbe, pSrc->pTab, iCur, pOrderBy, nEq, &rev) ){ if( flags==0 ){ flags = WHERE_COLUMN_RANGE; } flags |= WHERE_ORDERBY; if( rev ){ flags |= WHERE_REVERSE; } }else{ cost += cost*estLog(cost); TRACE(("...... orderby reduces cost to %g\n", cost)); } } /* Check to see if we can get away with using just the index without ** ever reading the table. If that is the case, then halve the ** cost of this index. */ if( flags && pSrc->colUsed < (((Bitmask)1)<<(BMS-1)) ){ |
︙ | ︙ |