Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Only choose to scan an IN operator rather than use an index if we have real STAT1 data to suggest it is advantageous. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | in-scan-vs-index |
Files: | files | file ages | folders |
SHA3-256: |
30e874661dcc1a2ecb40df2ef7458215 |
User & Date: | drh 2018-06-08 21:21:01.417 |
Context
2018-06-08
| ||
23:23 | When the query planner has the opportunity to use an IN operater constraint on a term of an index other than the left-most term, use the estimated number of elements on the right-hand side of the IN operator to determine if makes sense to use the IN operator with index lookups, or to just do a scan over the range of the table identified by the index terms to the left. Only do this if sqlite_stat1 measurements are available as otherwise the performance estimates will not be accurate enough to discern the best plan. Bias the decision slightly in favor of using index lookups on each element of the IN operator. (check-in: 2cbbabdf5e user: drh tags: trunk) | |
21:21 | Only choose to scan an IN operator rather than use an index if we have real STAT1 data to suggest it is advantageous. (Closed-Leaf check-in: 30e874661d user: drh tags: in-scan-vs-index) | |
19:54 | Merge the btreeNext() assertion bug fix from trunk. (check-in: 11bd66e090 user: drh tags: in-scan-vs-index) | |
Changes
Changes to src/where.c.
︙ | ︙ | |||
2467 2468 2469 2470 2471 2472 2473 | } }else if( ALWAYS(pExpr->x.pList && pExpr->x.pList->nExpr) ){ /* "x IN (value, value, ...)" */ nIn = sqlite3LogEst(pExpr->x.pList->nExpr); assert( nIn>0 ); /* RHS always has 2 or more terms... The parser ** changes "x IN (?)" into "x=?". */ } | > | | | | | | | | | | | | | | | | > > | | | | | | | | | | | | > | 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 | } }else if( ALWAYS(pExpr->x.pList && pExpr->x.pList->nExpr) ){ /* "x IN (value, value, ...)" */ nIn = sqlite3LogEst(pExpr->x.pList->nExpr); assert( nIn>0 ); /* RHS always has 2 or more terms... The parser ** changes "x IN (?)" into "x=?". */ } if( pProbe->hasStat1 ){ /* Let: ** N = the total number of rows in the table ** K = the number of entries on the RHS of the IN operator ** M = the number of rows in the table that match terms to the ** to the left in the same index. If the IN operator is on ** the left-most index column, M==N. ** ** Given the definitions above, it is better to omit the IN operator ** from the index lookup and instead do a scan of the M elements, ** testing each scanned row against the IN operator separately, if: ** ** M*log(K) < K*log(N) ** ** Our estimates for M, K, and N might be inaccurate, so we build in ** a safety margin of 2 (LogEst: 10) that favors using the IN operator ** with the index, as using an index has better worst-case behavior. ** If we do not have real sqlite_stat1 data, always prefer to use ** the index. */ M = pProbe->aiRowLogEst[saved_nEq]; logK = estLog(nIn); if( M + logK + 10 < nIn + rLogSize ){ WHERETRACE(0x40, ("Scan preferred over IN operator on column %d of \"%s\" (%d<%d)\n", saved_nEq, pProbe->zName, M+logK+10, nIn+rLogSize)); continue; }else{ WHERETRACE(0x40, ("IN operator preferred on column %d of \"%s\" (%d>=%d)\n", saved_nEq, pProbe->zName, M+logK+10, nIn+rLogSize)); } } pNew->wsFlags |= WHERE_COLUMN_IN; }else if( eOp & (WO_EQ|WO_IS) ){ int iCol = pProbe->aiColumn[saved_nEq]; pNew->wsFlags |= WHERE_COLUMN_EQ; assert( saved_nEq==pNew->u.btree.nEq ); if( iCol==XN_ROWID |
︙ | ︙ |
Changes to test/in6.test.
︙ | ︙ | |||
24 25 26 27 28 29 30 31 32 33 34 35 36 37 | do_test in6-1.1 { db eval { CREATE TABLE t1(a,b,c,d); WITH RECURSIVE c(x) AS (VALUES(1) UNION ALL SELECT x+1 FROM c WHERE x<100) INSERT INTO t1(a,b,c,d) SELECT 100, 200+x/2, 300+x/5, x FROM c; CREATE INDEX t1abc ON t1(a,b,c); } set ::sqlite_search_count 0 db eval { SELECT d FROM t1 WHERE a=99 AND b IN (200,205,201,204) AND c IN (304,302,309,308); | > > > | 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 | do_test in6-1.1 { db eval { CREATE TABLE t1(a,b,c,d); WITH RECURSIVE c(x) AS (VALUES(1) UNION ALL SELECT x+1 FROM c WHERE x<100) INSERT INTO t1(a,b,c,d) SELECT 100, 200+x/2, 300+x/5, x FROM c; CREATE INDEX t1abc ON t1(a,b,c); ANALYZE; UPDATE sqlite_stat1 SET stat='1000000 500000 500 50'; ANALYZE sqlite_master; } set ::sqlite_search_count 0 db eval { SELECT d FROM t1 WHERE a=99 AND b IN (200,205,201,204) AND c IN (304,302,309,308); |
︙ | ︙ |
Changes to test/rowvalue4.test.
︙ | ︙ | |||
220 221 222 223 224 225 226 | CREATE TABLE d2(a, b, c); CREATE INDEX d2ab ON d2(a, b); CREATE INDEX d2c ON d2(c); WITH i(i) AS ( VALUES(1) UNION ALL SELECT i+1 FROM i WHERE i<1000 ) | | | 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 | CREATE TABLE d2(a, b, c); CREATE INDEX d2ab ON d2(a, b); CREATE INDEX d2c ON d2(c); WITH i(i) AS ( VALUES(1) UNION ALL SELECT i+1 FROM i WHERE i<1000 ) INSERT INTO d2 SELECT i/100, i%100, i/100 FROM i; ANALYZE; } do_eqp_test 5.1 { SELECT * FROM d2 WHERE (a, b) IN (SELECT x, y FROM d1) AND (c) IN (SELECT y FROM d1) |
︙ | ︙ |