/ Check-in [0c438e81]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Improvements to cost estimation for evaluating the IN operator. Ticket #3757. (CVS 6403)
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1:0c438e813c411e8f9e92d6c7405fccb7a36e110a
User & Date: drh 2009-03-29 00:13:03
Context
2009-03-29
00:15
Turn off the debugging macros in where.c - left on by mistake in the previous check-in. (CVS 6404) check-in: b601a575 user: drh tags: trunk
00:13
Improvements to cost estimation for evaluating the IN operator. Ticket #3757. (CVS 6403) check-in: 0c438e81 user: drh tags: trunk
2009-03-28
23:47
Previous commit ((6401)) did not quite fix the problem. This should work better. (CVS 6402) check-in: 2e7d3cc9 user: drh tags: trunk
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to src/where.c.

    12     12   ** This module contains C code that generates VDBE code used to process
    13     13   ** the WHERE clause of SQL statements.  This module is responsible for
    14     14   ** generating the code that loops through a table looking for applicable
    15     15   ** rows.  Indices are selected and used to speed the search when doing
    16     16   ** so is applicable.  Because this module is responsible for selecting
    17     17   ** indices, you might also think of this module as the "query optimizer".
    18     18   **
    19         -** $Id: where.c,v 1.377 2009/03/25 16:51:43 drh Exp $
           19  +** $Id: where.c,v 1.378 2009/03/29 00:13:03 drh Exp $
    20     20   */
    21     21   #include "sqliteInt.h"
    22     22   
    23     23   /*
    24     24   ** Trace output macros
    25     25   */
    26     26   #if defined(SQLITE_TEST) || defined(SQLITE_DEBUG)
    27     27   int sqlite3WhereTrace = 0;
    28     28   #endif
    29         -#if 0
           29  +#if 1
    30     30   # define WHERETRACE(X)  if(sqlite3WhereTrace) sqlite3DebugPrintf X
    31     31   #else
    32     32   # define WHERETRACE(X)
    33     33   #endif
    34     34   
    35     35   /* Forward reference
    36     36   */
................................................................................
  1922   1922   
  1923   1923     /* Look at each index.
  1924   1924     */
  1925   1925     if( pSrc->pIndex ){
  1926   1926       pProbe = pSrc->pIndex;
  1927   1927     }
  1928   1928     for(; pProbe; pProbe=(pSrc->pIndex ? 0 : pProbe->pNext)){
  1929         -    double inMultiplier = 1;
         1929  +    double inMultiplier = 1;  /* Number of equality look-ups needed */
         1930  +    int inMultIsEst = 0;      /* True if inMultiplier is an estimate */
  1930   1931   
  1931   1932       WHERETRACE(("... index %s:\n", pProbe->zName));
  1932   1933   
  1933   1934       /* Count the number of columns in the index that are satisfied
  1934         -    ** by x=EXPR constraints or x IN (...) constraints.
         1935  +    ** by x=EXPR constraints or x IN (...) constraints.  For a term
         1936  +    ** of the form x=EXPR we only have to do a single binary search.
         1937  +    ** But for x IN (...) we have to do a number of binary searched
         1938  +    ** equal to the number of entries on the RHS of the IN operator.
         1939  +    ** The inMultipler variable with try to estimate the number of
         1940  +    ** binary searches needed.
  1935   1941       */
  1936   1942       wsFlags = 0;
  1937   1943       for(i=0; i<pProbe->nColumn; i++){
  1938   1944         int j = pProbe->aiColumn[i];
  1939   1945         pTerm = findTerm(pWC, iCur, j, notReady, eqTermMask, pProbe);
  1940   1946         if( pTerm==0 ) break;
  1941   1947         wsFlags |= WHERE_COLUMN_EQ;
  1942   1948         if( pTerm->eOperator & WO_IN ){
  1943   1949           Expr *pExpr = pTerm->pExpr;
  1944   1950           wsFlags |= WHERE_COLUMN_IN;
  1945   1951           if( ExprHasProperty(pExpr, EP_xIsSelect) ){
  1946   1952             inMultiplier *= 25;
         1953  +          inMultIsEst = 1;
  1947   1954           }else if( pExpr->x.pList ){
  1948   1955             inMultiplier *= pExpr->x.pList->nExpr + 1;
  1949   1956           }
  1950   1957         }
  1951   1958       }
  1952   1959       nRow = pProbe->aiRowEst[i] * inMultiplier;
  1953         -    cost = nRow * estLog(inMultiplier);
         1960  +    /* If inMultiplier is an estimate and that estimate results in an
         1961  +    ** nRow it that is more than half number of rows in the table,
         1962  +    ** then reduce inMultipler */
         1963  +    if( inMultIsEst && nRow*2 > pProbe->aiRowEst[0] ){
         1964  +      nRow = pProbe->aiRowEst[0]/2;
         1965  +      inMultiplier = nRow/pProbe->aiRowEst[i];
         1966  +    }
         1967  +    cost = nRow + inMultiplier*estLog(pProbe->aiRowEst[0]);
  1954   1968       nEq = i;
  1955   1969       if( pProbe->onError!=OE_None && (wsFlags & WHERE_COLUMN_IN)==0
  1956   1970            && nEq==pProbe->nColumn ){
  1957   1971         wsFlags |= WHERE_UNIQUE;
  1958   1972       }
  1959         -    WHERETRACE(("...... nEq=%d inMult=%.9g cost=%.9g\n",nEq,inMultiplier,cost));
         1973  +    WHERETRACE(("...... nEq=%d inMult=%.9g nRow=%.9g cost=%.9g\n",
         1974  +                nEq, inMultiplier, nRow, cost));
  1960   1975   
  1961         -    /* Look for range constraints
         1976  +    /* Look for range constraints.  Assume that each range constraint
         1977  +    ** makes the search space 1/3rd smaller.
  1962   1978       */
  1963   1979       if( nEq<pProbe->nColumn ){
  1964   1980         int j = pProbe->aiColumn[nEq];
  1965   1981         pTerm = findTerm(pWC, iCur, j, notReady, WO_LT|WO_LE|WO_GT|WO_GE, pProbe);
  1966   1982         if( pTerm ){
  1967   1983           wsFlags |= WHERE_COLUMN_RANGE;
  1968   1984           if( findTerm(pWC, iCur, j, notReady, WO_LT|WO_LE, pProbe) ){
................................................................................
  1971   1987             nRow /= 3;
  1972   1988           }
  1973   1989           if( findTerm(pWC, iCur, j, notReady, WO_GT|WO_GE, pProbe) ){
  1974   1990             wsFlags |= WHERE_BTM_LIMIT;
  1975   1991             cost /= 3;
  1976   1992             nRow /= 3;
  1977   1993           }
  1978         -        WHERETRACE(("...... range reduces cost to %.9g\n", cost));
         1994  +        WHERETRACE(("...... range reduces nRow to %.9g and cost to %.9g\n",
         1995  +                    nRow, cost));
  1979   1996         }
  1980   1997       }
  1981   1998   
  1982   1999       /* Add the additional cost of sorting if that is a factor.
  1983   2000       */
  1984   2001       if( pOrderBy ){
  1985   2002         if( (wsFlags & WHERE_COLUMN_IN)==0 &&

Added test/tkt3757.test.

            1  +# 2009 March 28
            2  +#
            3  +# The author disclaims copyright to this source code.  In place of
            4  +# a legal notice, here is a blessing:
            5  +#
            6  +#    May you do good and not evil.
            7  +#    May you find forgiveness for yourself and forgive others.
            8  +#    May you share freely, never taking more than you give.
            9  +#
           10  +#***********************************************************************
           11  +#
           12  +# Ticket #3757:  The cost functions on the query optimizer for the
           13  +# IN operator can be improved.
           14  +#
           15  +# $Id: tkt3757.test,v 1.1 2009/03/29 00:13:04 drh Exp $
           16  +
           17  +set testdir [file dirname $argv0]
           18  +source $testdir/tester.tcl
           19  +
           20  +# Evaluate SQL.  Return the result set followed by the
           21  +# and the number of full-scan steps.
           22  +#
           23  +proc count_steps {sql} {
           24  +  set r [db eval $sql]
           25  +  lappend r scan [db status step] sort [db status sort]
           26  +}
           27  +
           28  +# Construct tables
           29  +#
           30  +do_test tkt3757-1.1 {
           31  +  db eval {
           32  +     CREATE TABLE t1(x INTEGER, y INTEGER, z TEXT);
           33  +     CREATE INDEX t1i1 ON t1(y,z);
           34  +     INSERT INTO t1 VALUES(1,2,'three');
           35  +     CREATE TABLE t2(a INTEGER, b TEXT);
           36  +     INSERT INTO t2 VALUES(2, 'two');
           37  +     ANALYZE;
           38  +     SELECT * FROM sqlite_stat1;
           39  +  }
           40  +} {t1 t1i1 {1 1 1}}
           41  +
           42  +# Modify statistics in order to make the optimizer then that:
           43  +#
           44  +#   (1)  Table T1 has about 250K entries
           45  +#   (2)  There are only about 5 distinct values of T1.
           46  +#
           47  +# Then run a query with "t1.y IN (SELECT ..)" in the WHERE clause.
           48  +# Make sure the index is used.
           49  +#
           50  +do_test tkt3757-1.2 {
           51  +  db eval {
           52  +    DELETE FROM sqlite_stat1;
           53  +    INSERT INTO sqlite_stat1 VALUES('t1','t1i1','250000 50000 30');
           54  +  }
           55  +  count_steps {
           56  +    SELECT * FROM t1 WHERE y IN (SELECT a FROM t2)
           57  +  }
           58  +} {1 2 three scan 0 sort 0}
           59  +
           60  +finish_test