Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Improvements to cost estimation for evaluating the IN operator. Ticket #3757. (CVS 6403) |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA1: |
0c438e813c411e8f9e92d6c7405fccb7 |
User & Date: | drh 2009-03-29 00:13:03.000 |
Context
2009-03-29
| ||
00:15 | Turn off the debugging macros in where.c - left on by mistake in the previous check-in. (CVS 6404) (check-in: b601a57582 user: drh tags: trunk) | |
00:13 | Improvements to cost estimation for evaluating the IN operator. Ticket #3757. (CVS 6403) (check-in: 0c438e813c user: drh tags: trunk) | |
2009-03-28
| ||
23:47 | Previous commit ((6401)) did not quite fix the problem. This should work better. (CVS 6402) (check-in: 2e7d3cc9f0 user: drh tags: trunk) | |
Changes
Changes to src/where.c.
︙ | ︙ | |||
12 13 14 15 16 17 18 | ** This module contains C code that generates VDBE code used to process ** the WHERE clause of SQL statements. This module is responsible for ** generating the code that loops through a table looking for applicable ** rows. Indices are selected and used to speed the search when doing ** so is applicable. Because this module is responsible for selecting ** indices, you might also think of this module as the "query optimizer". ** | | | | 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 | ** This module contains C code that generates VDBE code used to process ** the WHERE clause of SQL statements. This module is responsible for ** generating the code that loops through a table looking for applicable ** rows. Indices are selected and used to speed the search when doing ** so is applicable. Because this module is responsible for selecting ** indices, you might also think of this module as the "query optimizer". ** ** $Id: where.c,v 1.378 2009/03/29 00:13:03 drh Exp $ */ #include "sqliteInt.h" /* ** Trace output macros */ #if defined(SQLITE_TEST) || defined(SQLITE_DEBUG) int sqlite3WhereTrace = 0; #endif #if 1 # define WHERETRACE(X) if(sqlite3WhereTrace) sqlite3DebugPrintf X #else # define WHERETRACE(X) #endif /* Forward reference */ |
︙ | ︙ | |||
1922 1923 1924 1925 1926 1927 1928 | /* Look at each index. */ if( pSrc->pIndex ){ pProbe = pSrc->pIndex; } for(; pProbe; pProbe=(pSrc->pIndex ? 0 : pProbe->pNext)){ | | > | > > > > > > > > > > > | > > | > | > | > | 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 | /* Look at each index. */ if( pSrc->pIndex ){ pProbe = pSrc->pIndex; } for(; pProbe; pProbe=(pSrc->pIndex ? 0 : pProbe->pNext)){ double inMultiplier = 1; /* Number of equality look-ups needed */ int inMultIsEst = 0; /* True if inMultiplier is an estimate */ WHERETRACE(("... index %s:\n", pProbe->zName)); /* Count the number of columns in the index that are satisfied ** by x=EXPR constraints or x IN (...) constraints. For a term ** of the form x=EXPR we only have to do a single binary search. ** But for x IN (...) we have to do a number of binary searched ** equal to the number of entries on the RHS of the IN operator. ** The inMultipler variable with try to estimate the number of ** binary searches needed. */ wsFlags = 0; for(i=0; i<pProbe->nColumn; i++){ int j = pProbe->aiColumn[i]; pTerm = findTerm(pWC, iCur, j, notReady, eqTermMask, pProbe); if( pTerm==0 ) break; wsFlags |= WHERE_COLUMN_EQ; if( pTerm->eOperator & WO_IN ){ Expr *pExpr = pTerm->pExpr; wsFlags |= WHERE_COLUMN_IN; if( ExprHasProperty(pExpr, EP_xIsSelect) ){ inMultiplier *= 25; inMultIsEst = 1; }else if( pExpr->x.pList ){ inMultiplier *= pExpr->x.pList->nExpr + 1; } } } nRow = pProbe->aiRowEst[i] * inMultiplier; /* If inMultiplier is an estimate and that estimate results in an ** nRow it that is more than half number of rows in the table, ** then reduce inMultipler */ if( inMultIsEst && nRow*2 > pProbe->aiRowEst[0] ){ nRow = pProbe->aiRowEst[0]/2; inMultiplier = nRow/pProbe->aiRowEst[i]; } cost = nRow + inMultiplier*estLog(pProbe->aiRowEst[0]); nEq = i; if( pProbe->onError!=OE_None && (wsFlags & WHERE_COLUMN_IN)==0 && nEq==pProbe->nColumn ){ wsFlags |= WHERE_UNIQUE; } WHERETRACE(("...... nEq=%d inMult=%.9g nRow=%.9g cost=%.9g\n", nEq, inMultiplier, nRow, cost)); /* Look for range constraints. Assume that each range constraint ** makes the search space 1/3rd smaller. */ if( nEq<pProbe->nColumn ){ int j = pProbe->aiColumn[nEq]; pTerm = findTerm(pWC, iCur, j, notReady, WO_LT|WO_LE|WO_GT|WO_GE, pProbe); if( pTerm ){ wsFlags |= WHERE_COLUMN_RANGE; if( findTerm(pWC, iCur, j, notReady, WO_LT|WO_LE, pProbe) ){ wsFlags |= WHERE_TOP_LIMIT; cost /= 3; nRow /= 3; } if( findTerm(pWC, iCur, j, notReady, WO_GT|WO_GE, pProbe) ){ wsFlags |= WHERE_BTM_LIMIT; cost /= 3; nRow /= 3; } WHERETRACE(("...... range reduces nRow to %.9g and cost to %.9g\n", nRow, cost)); } } /* Add the additional cost of sorting if that is a factor. */ if( pOrderBy ){ if( (wsFlags & WHERE_COLUMN_IN)==0 && |
︙ | ︙ |
Added test/tkt3757.test.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 | # 2009 March 28 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #*********************************************************************** # # Ticket #3757: The cost functions on the query optimizer for the # IN operator can be improved. # # $Id: tkt3757.test,v 1.1 2009/03/29 00:13:04 drh Exp $ set testdir [file dirname $argv0] source $testdir/tester.tcl # Evaluate SQL. Return the result set followed by the # and the number of full-scan steps. # proc count_steps {sql} { set r [db eval $sql] lappend r scan [db status step] sort [db status sort] } # Construct tables # do_test tkt3757-1.1 { db eval { CREATE TABLE t1(x INTEGER, y INTEGER, z TEXT); CREATE INDEX t1i1 ON t1(y,z); INSERT INTO t1 VALUES(1,2,'three'); CREATE TABLE t2(a INTEGER, b TEXT); INSERT INTO t2 VALUES(2, 'two'); ANALYZE; SELECT * FROM sqlite_stat1; } } {t1 t1i1 {1 1 1}} # Modify statistics in order to make the optimizer then that: # # (1) Table T1 has about 250K entries # (2) There are only about 5 distinct values of T1. # # Then run a query with "t1.y IN (SELECT ..)" in the WHERE clause. # Make sure the index is used. # do_test tkt3757-1.2 { db eval { DELETE FROM sqlite_stat1; INSERT INTO sqlite_stat1 VALUES('t1','t1i1','250000 50000 30'); } count_steps { SELECT * FROM t1 WHERE y IN (SELECT a FROM t2) } } {1 2 three scan 0 sort 0} finish_test |