/ Check-in [2cbbabdf]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:When the query planner has the opportunity to use an IN operater constraint on a term of an index other than the left-most term, use the estimated number of elements on the right-hand side of the IN operator to determine if makes sense to use the IN operator with index lookups, or to just do a scan over the range of the table identified by the index terms to the left. Only do this if sqlite_stat1 measurements are available as otherwise the performance estimates will not be accurate enough to discern the best plan. Bias the decision slightly in favor of using index lookups on each element of the IN operator.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA3-256:2cbbabdf5ef624d809fbb40d2d312a29e0b5f02756fc0dbf6985fc8b0c8d1ade
User & Date: drh 2018-06-08 23:23:53
Original Comment: When the query planner has the opportunity to use an IN operater constraint on a term of an index other than the left-most term, use the estimated number of elements on the right-hand side of the IN operator to determine if makes sense to use the IN operator with index looks, or to just do a scan over the range of the table identified by the index terms to the left. Only do this if sqlite_stat1 measurements are available as otherwise the performance estimates will not be accurate enough to discern the best plan. Bias the decision slightly in favor of using index lookups on each element of the IN operator.
Context
2018-06-09
00:09
Avoid invoking the whereLoopAddOr() routine in the query planner if there are no OR operators in the WHERE clause, thus speeding up query planning slightly. check-in: 292724ff user: drh tags: trunk
2018-06-08
23:23
When the query planner has the opportunity to use an IN operater constraint on a term of an index other than the left-most term, use the estimated number of elements on the right-hand side of the IN operator to determine if makes sense to use the IN operator with index lookups, or to just do a scan over the range of the table identified by the index terms to the left. Only do this if sqlite_stat1 measurements are available as otherwise the performance estimates will not be accurate enough to discern the best plan. Bias the decision slightly in favor of using index lookups on each element of the IN operator. check-in: 2cbbabdf user: drh tags: trunk
21:21
Only choose to scan an IN operator rather than use an index if we have real STAT1 data to suggest it is advantageous. Closed-Leaf check-in: 30e87466 user: drh tags: in-scan-vs-index
19:13
Fix an assert() that can be false for a corrupt database and a strange query that uses a recursive SQL function to delete content from a corrupt database file while it is being queried. check-in: 99057383 user: drh tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to src/where.c.

2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
....
2467
2468
2469
2470
2471
2472
2473


































2474
2475
2476
2477
2478
2479
2480
        || (pNew->wsFlags & WHERE_COLUMN_NULL)!=0 
        || (pNew->wsFlags & WHERE_COLUMN_IN)!=0 
        || (pNew->wsFlags & WHERE_SKIPSCAN)!=0 
    );

    if( eOp & WO_IN ){
      Expr *pExpr = pTerm->pExpr;
      pNew->wsFlags |= WHERE_COLUMN_IN;
      if( ExprHasProperty(pExpr, EP_xIsSelect) ){
        /* "x IN (SELECT ...)":  TUNING: the SELECT returns 25 rows */
        int i;
        nIn = 46;  assert( 46==sqlite3LogEst(25) );

        /* The expression may actually be of the form (x, y) IN (SELECT...).
        ** In this case there is a separate term for each of (x) and (y).
................................................................................
        }
      }else if( ALWAYS(pExpr->x.pList && pExpr->x.pList->nExpr) ){
        /* "x IN (value, value, ...)" */
        nIn = sqlite3LogEst(pExpr->x.pList->nExpr);
        assert( nIn>0 );  /* RHS always has 2 or more terms...  The parser
                          ** changes "x IN (?)" into "x=?". */
      }


































    }else if( eOp & (WO_EQ|WO_IS) ){
      int iCol = pProbe->aiColumn[saved_nEq];
      pNew->wsFlags |= WHERE_COLUMN_EQ;
      assert( saved_nEq==pNew->u.btree.nEq );
      if( iCol==XN_ROWID 
       || (iCol>=0 && nInMul==0 && saved_nEq==pProbe->nKeyCol-1)
      ){







|







 







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
....
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
        || (pNew->wsFlags & WHERE_COLUMN_NULL)!=0 
        || (pNew->wsFlags & WHERE_COLUMN_IN)!=0 
        || (pNew->wsFlags & WHERE_SKIPSCAN)!=0 
    );

    if( eOp & WO_IN ){
      Expr *pExpr = pTerm->pExpr;
      LogEst M, logK;
      if( ExprHasProperty(pExpr, EP_xIsSelect) ){
        /* "x IN (SELECT ...)":  TUNING: the SELECT returns 25 rows */
        int i;
        nIn = 46;  assert( 46==sqlite3LogEst(25) );

        /* The expression may actually be of the form (x, y) IN (SELECT...).
        ** In this case there is a separate term for each of (x) and (y).
................................................................................
        }
      }else if( ALWAYS(pExpr->x.pList && pExpr->x.pList->nExpr) ){
        /* "x IN (value, value, ...)" */
        nIn = sqlite3LogEst(pExpr->x.pList->nExpr);
        assert( nIn>0 );  /* RHS always has 2 or more terms...  The parser
                          ** changes "x IN (?)" into "x=?". */
      }
      if( pProbe->hasStat1 ){
        /* Let:
        **   N = the total number of rows in the table
        **   K = the number of entries on the RHS of the IN operator
        **   M = the number of rows in the table that match terms to the 
        **       to the left in the same index.  If the IN operator is on
        **       the left-most index column, M==N.
        **
        ** Given the definitions above, it is better to omit the IN operator
        ** from the index lookup and instead do a scan of the M elements,
        ** testing each scanned row against the IN operator separately, if:
        **
        **        M*log(K) < K*log(N)
        **
        ** Our estimates for M, K, and N might be inaccurate, so we build in
        ** a safety margin of 2 (LogEst: 10) that favors using the IN operator
        ** with the index, as using an index has better worst-case behavior.
        ** If we do not have real sqlite_stat1 data, always prefer to use
        ** the index.
        */
        M = pProbe->aiRowLogEst[saved_nEq];
        logK = estLog(nIn);
        if( M + logK + 10 < nIn + rLogSize ){
          WHERETRACE(0x40,
            ("Scan preferred over IN operator on column %d of \"%s\" (%d<%d)\n",
             saved_nEq, pProbe->zName, M+logK+10, nIn+rLogSize));
          continue;
        }else{
          WHERETRACE(0x40,
            ("IN operator preferred on column %d of \"%s\" (%d>=%d)\n",
             saved_nEq, pProbe->zName, M+logK+10, nIn+rLogSize));
        }
      }
      pNew->wsFlags |= WHERE_COLUMN_IN;
    }else if( eOp & (WO_EQ|WO_IS) ){
      int iCol = pProbe->aiColumn[saved_nEq];
      pNew->wsFlags |= WHERE_COLUMN_EQ;
      assert( saved_nEq==pNew->u.btree.nEq );
      if( iCol==XN_ROWID 
       || (iCol>=0 && nInMul==0 && saved_nEq==pProbe->nKeyCol-1)
      ){

Changes to test/in6.test.

24
25
26
27
28
29
30



31
32
33
34
35
36
37
do_test in6-1.1 {
  db eval {
    CREATE TABLE t1(a,b,c,d);
    WITH RECURSIVE c(x) AS (VALUES(1) UNION ALL SELECT x+1 FROM c WHERE x<100)
      INSERT INTO t1(a,b,c,d)
        SELECT 100, 200+x/2, 300+x/5, x FROM c;
    CREATE INDEX t1abc ON t1(a,b,c);



  }
  set ::sqlite_search_count 0
  db eval {
    SELECT d FROM t1
     WHERE a=99
       AND b IN (200,205,201,204)
       AND c IN (304,302,309,308);







>
>
>







24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
do_test in6-1.1 {
  db eval {
    CREATE TABLE t1(a,b,c,d);
    WITH RECURSIVE c(x) AS (VALUES(1) UNION ALL SELECT x+1 FROM c WHERE x<100)
      INSERT INTO t1(a,b,c,d)
        SELECT 100, 200+x/2, 300+x/5, x FROM c;
    CREATE INDEX t1abc ON t1(a,b,c);
    ANALYZE;
    UPDATE sqlite_stat1 SET stat='1000000 500000 500 50';
    ANALYZE sqlite_master;
  }
  set ::sqlite_search_count 0
  db eval {
    SELECT d FROM t1
     WHERE a=99
       AND b IN (200,205,201,204)
       AND c IN (304,302,309,308);

Changes to test/rowvalue4.test.

220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
  CREATE TABLE d2(a, b, c);
  CREATE INDEX d2ab ON d2(a, b);
  CREATE INDEX d2c ON d2(c);

  WITH i(i) AS (
    VALUES(1) UNION ALL SELECT i+1 FROM i WHERE i<1000
  )
  INSERT INTO d2 SELECT i/3, i%3, i/3 FROM i;
  ANALYZE;
}

do_eqp_test 5.1 {
  SELECT * FROM d2 WHERE 
    (a, b) IN (SELECT x, y FROM d1) AND
    (c) IN (SELECT y FROM d1)







|







220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
  CREATE TABLE d2(a, b, c);
  CREATE INDEX d2ab ON d2(a, b);
  CREATE INDEX d2c ON d2(c);

  WITH i(i) AS (
    VALUES(1) UNION ALL SELECT i+1 FROM i WHERE i<1000
  )
  INSERT INTO d2 SELECT i/100, i%100, i/100 FROM i;
  ANALYZE;
}

do_eqp_test 5.1 {
  SELECT * FROM d2 WHERE 
    (a, b) IN (SELECT x, y FROM d1) AND
    (c) IN (SELECT y FROM d1)