/ Check-in [f489b5bb]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:When doing a DISTINCT query using an index, try to use the index to skip ahead to the next distinct element, rather than doing a full scan of the index. (This is the "skip-ahead-distinct" optimization.)
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA3-256:f489b5bb6b35665befdd411c2c55df5258e83cba265d8c4427ba22529cf882a4
User & Date: drh 2017-04-14 17:30:43
References
2017-11-21
15:19 New ticket [ef931875] Incorrect result due to the skip-ahead-distinct optimization. artifact: 00f210e1 user: drh
Context
2017-04-14
19:44
Fix some left-over K&R-C constructs in lemon.c. No changes to the core. check-in: a5379905 user: drh tags: trunk
17:30
When doing a DISTINCT query using an index, try to use the index to skip ahead to the next distinct element, rather than doing a full scan of the index. (This is the "skip-ahead-distinct" optimization.) check-in: f489b5bb user: drh tags: trunk
14:50
Make USE_FULLWARN=1 the default for MSVC and fix harmless compiler warnings. check-in: 6bf67376 user: mistachkin tags: trunk
00:45
Fix a couple of unreachable branches. check-in: 1aa0ea8d user: drh tags: skip-ahead-distinct
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to src/where.c.

  3433   3433     }
  3434   3434   
  3435   3435     whereLoopClear(db, pNew);
  3436   3436     return rc;
  3437   3437   }
  3438   3438   
  3439   3439   /*
  3440         -** Examine a WherePath (with the addition of the extra WhereLoop of the 5th
         3440  +** Examine a WherePath (with the addition of the extra WhereLoop of the 6th
  3441   3441   ** parameters) to see if it outputs rows in the requested ORDER BY
  3442   3442   ** (or GROUP BY) without requiring a separate sort operation.  Return N:
  3443   3443   ** 
  3444   3444   **   N>0:   N terms of the ORDER BY clause are satisfied
  3445   3445   **   N==0:  No terms of the ORDER BY clause are satisfied
  3446   3446   **   N<0:   Unknown yet how many terms of ORDER BY might be satisfied.   
  3447   3447   **
................................................................................
  3528   3528         if( wctrlFlags & WHERE_ORDERBY_LIMIT ) continue;
  3529   3529       }else{
  3530   3530         pLoop = pLast;
  3531   3531       }
  3532   3532       if( pLoop->wsFlags & WHERE_VIRTUALTABLE ){
  3533   3533         if( pLoop->u.vtab.isOrdered ) obSat = obDone;
  3534   3534         break;
         3535  +    }else{
         3536  +      pLoop->u.btree.nIdxCol = 0;
  3535   3537       }
  3536   3538       iCur = pWInfo->pTabList->a[pLoop->iTab].iCursor;
  3537   3539   
  3538   3540       /* Mark off any ORDER BY term X that is a column in the table of
  3539   3541       ** the current loop for which there is term in the WHERE
  3540   3542       ** clause of the form X IS NULL or X=? that reference only outer
  3541   3543       ** loops.
................................................................................
  3673   3675               }
  3674   3676             }
  3675   3677             if( iColumn>=0 ){
  3676   3678               pColl = sqlite3ExprCollSeq(pWInfo->pParse, pOrderBy->a[i].pExpr);
  3677   3679               if( !pColl ) pColl = db->pDfltColl;
  3678   3680               if( sqlite3StrICmp(pColl->zName, pIndex->azColl[j])!=0 ) continue;
  3679   3681             }
         3682  +          pLoop->u.btree.nIdxCol = j+1;
  3680   3683             isMatch = 1;
  3681   3684             break;
  3682   3685           }
  3683   3686           if( isMatch && (wctrlFlags & WHERE_GROUPBY)==0 ){
  3684   3687             /* Make sure the sort order is compatible in an ORDER BY clause.
  3685   3688             ** Sort order is irrelevant for a GROUP BY clause. */
  3686   3689             if( revSet ){
................................................................................
  4754   4757         assert( iIndexCur>=0 );
  4755   4758         if( op ){
  4756   4759           sqlite3VdbeAddOp3(v, op, iIndexCur, pIx->tnum, iDb);
  4757   4760           sqlite3VdbeSetP4KeyInfo(pParse, pIx);
  4758   4761           if( (pLoop->wsFlags & WHERE_CONSTRAINT)!=0
  4759   4762            && (pLoop->wsFlags & (WHERE_COLUMN_RANGE|WHERE_SKIPSCAN))==0
  4760   4763            && (pWInfo->wctrlFlags&WHERE_ORDERBY_MIN)==0
         4764  +         && pWInfo->eDistinct!=WHERE_DISTINCT_ORDERED
  4761   4765           ){
  4762   4766             sqlite3VdbeChangeP5(v, OPFLAG_SEEKEQ); /* Hint to COMDB2 */
  4763   4767           }
  4764   4768           VdbeComment((v, "%s", pIx->zName));
  4765   4769   #ifdef SQLITE_ENABLE_COLUMN_USED_MASK
  4766   4770           {
  4767   4771             u64 colUsed = 0;
................................................................................
  4842   4846     */
  4843   4847     VdbeModuleComment((v, "End WHERE-core"));
  4844   4848     sqlite3ExprCacheClear(pParse);
  4845   4849     for(i=pWInfo->nLevel-1; i>=0; i--){
  4846   4850       int addr;
  4847   4851       pLevel = &pWInfo->a[i];
  4848   4852       pLoop = pLevel->pWLoop;
  4849         -    sqlite3VdbeResolveLabel(v, pLevel->addrCont);
  4850   4853       if( pLevel->op!=OP_Noop ){
         4854  +#ifndef SQLITE_DISABLE_SKIPAHEAD_DISTINCT
         4855  +      int addrSeek = 0;
         4856  +      Index *pIdx;
         4857  +      int n;
         4858  +      if( pWInfo->eDistinct==WHERE_DISTINCT_ORDERED
         4859  +       && (pLoop->wsFlags & WHERE_INDEXED)!=0
         4860  +       && (pIdx = pLoop->u.btree.pIndex)->hasStat1
         4861  +       && pIdx->aiRowLogEst[(n = pLoop->u.btree.nIdxCol)-1]>=36
         4862  +      ){
         4863  +        int r1 = pParse->nMem+1;
         4864  +        int j, op;
         4865  +        for(j=0; j<n; j++){
         4866  +          sqlite3VdbeAddOp3(v, OP_Column, pLevel->iIdxCur, j, r1+j);
         4867  +        }
         4868  +        pParse->nMem += n+1;
         4869  +        op = pLevel->op==OP_Prev ? OP_SeekLT : OP_SeekGT;
         4870  +        addrSeek = sqlite3VdbeAddOp4Int(v, op, pLevel->iIdxCur, 0, r1, n);
         4871  +        VdbeCoverageIf(v, op==OP_SeekLT);
         4872  +        VdbeCoverageIf(v, op==OP_SeekGT);
         4873  +        sqlite3VdbeAddOp2(v, OP_Goto, 1, pLevel->p2);
         4874  +      }
         4875  +#endif /* SQLITE_DISABLE_SKIPAHEAD_DISTINCT */
         4876  +      /* The common case: Advance to the next row */
         4877  +      sqlite3VdbeResolveLabel(v, pLevel->addrCont);
  4851   4878         sqlite3VdbeAddOp3(v, pLevel->op, pLevel->p1, pLevel->p2, pLevel->p3);
  4852   4879         sqlite3VdbeChangeP5(v, pLevel->p5);
  4853   4880         VdbeCoverage(v);
  4854   4881         VdbeCoverageIf(v, pLevel->op==OP_Next);
  4855   4882         VdbeCoverageIf(v, pLevel->op==OP_Prev);
  4856   4883         VdbeCoverageIf(v, pLevel->op==OP_VNext);
         4884  +#ifndef SQLITE_DISABLE_SKIPAHEAD_DISTINCT
         4885  +      if( addrSeek ) sqlite3VdbeJumpHere(v, addrSeek);
         4886  +#endif
         4887  +    }else{
         4888  +      sqlite3VdbeResolveLabel(v, pLevel->addrCont);
  4857   4889       }
  4858   4890       if( pLoop->wsFlags & WHERE_IN_ABLE && pLevel->u.in.nIn>0 ){
  4859   4891         struct InLoop *pIn;
  4860   4892         int j;
  4861   4893         sqlite3VdbeResolveLabel(v, pLevel->addrNxt);
  4862   4894         for(j=pLevel->u.in.nIn, pIn=&pLevel->u.in.aInLoop[j-1]; j>0; j--, pIn--){
  4863   4895           sqlite3VdbeJumpHere(v, pIn->addrInTop+1);

Changes to src/whereInt.h.

   120    120     LogEst rRun;          /* Cost of running each loop */
   121    121     LogEst nOut;          /* Estimated number of output rows */
   122    122     union {
   123    123       struct {               /* Information for internal btree tables */
   124    124         u16 nEq;               /* Number of equality constraints */
   125    125         u16 nBtm;              /* Size of BTM vector */
   126    126         u16 nTop;              /* Size of TOP vector */
          127  +      u16 nIdxCol;           /* Index column used for ORDER BY */
   127    128         Index *pIndex;         /* Index used, or NULL */
   128    129       } btree;
   129    130       struct {               /* Information for virtual tables */
   130    131         int idxNum;            /* Index number */
   131    132         u8 needFree;           /* True if sqlite3_free(idxStr) is needed */
   132    133         i8 isOrdered;          /* True if satisfies ORDER BY */
   133    134         u16 omitMask;          /* Terms that may be omitted */

Added test/distinct2.test.

            1  +# 2016-04-15
            2  +#
            3  +# The author disclaims copyright to this source code.  In place of
            4  +# a legal notice, here is a blessing:
            5  +#
            6  +#    May you do good and not evil.
            7  +#    May you find forgiveness for yourself and forgive others.
            8  +#    May you share freely, never taking more than you give.
            9  +#
           10  +#***********************************************************************
           11  +# This file implements regression tests for SQLite library.  The
           12  +# focus of this script is DISTINCT queries using the skip-ahead 
           13  +# optimization.
           14  +#
           15  +
           16  +set testdir [file dirname $argv0]
           17  +source $testdir/tester.tcl
           18  +
           19  +set testprefix distinct2
           20  +
           21  +do_execsql_test 100 {
           22  +  CREATE TABLE t1(x INTEGER PRIMARY KEY);
           23  +  INSERT INTO t1 VALUES(0),(1),(2);
           24  +  CREATE TABLE t2 AS
           25  +     SELECT DISTINCT a.x AS aa, b.x AS bb
           26  +      FROM t1 a, t1 b;
           27  +  SELECT *, '|' FROM t2 ORDER BY aa, bb;
           28  +} {0 0 | 0 1 | 0 2 | 1 0 | 1 1 | 1 2 | 2 0 | 2 1 | 2 2 |}
           29  +do_execsql_test 110 {
           30  +  DROP TABLE t2;
           31  +  CREATE TABLE t2 AS
           32  +     SELECT DISTINCT a.x AS aa, b.x AS bb
           33  +       FROM t1 a, t1 b
           34  +      WHERE a.x IN t1 AND b.x IN t1;
           35  +  SELECT *, '|' FROM t2 ORDER BY aa, bb;
           36  +} {0 0 | 0 1 | 0 2 | 1 0 | 1 1 | 1 2 | 2 0 | 2 1 | 2 2 |}
           37  +do_execsql_test 120 {
           38  +  CREATE TABLE t102 (i0 TEXT UNIQUE NOT NULL);
           39  +  INSERT INTO t102 VALUES ('0'),('1'),('2');
           40  +  DROP TABLE t2;
           41  +  CREATE TABLE t2 AS
           42  +    SELECT DISTINCT * 
           43  +    FROM t102 AS t0 
           44  +    JOIN t102 AS t4 ON (t2.i0 IN t102)
           45  +    NATURAL JOIN t102 AS t3
           46  +    JOIN t102 AS t1 ON (t0.i0 IN t102)
           47  +    JOIN t102 AS t2 ON (t2.i0=+t0.i0 OR (t0.i0<>500 AND t2.i0=t1.i0));
           48  +  SELECT *, '|' FROM t2 ORDER BY 1, 2, 3, 4, 5;
           49  +} {0 0 0 0 | 0 0 1 0 | 0 0 1 1 | 0 0 2 0 | 0 0 2 2 | 0 1 0 0 | 0 1 1 0 | 0 1 1 1 | 0 1 2 0 | 0 1 2 2 | 0 2 0 0 | 0 2 1 0 | 0 2 1 1 | 0 2 2 0 | 0 2 2 2 | 1 0 0 0 | 1 0 0 1 | 1 0 1 1 | 1 0 2 1 | 1 0 2 2 | 1 1 0 0 | 1 1 0 1 | 1 1 1 1 | 1 1 2 1 | 1 1 2 2 | 1 2 0 0 | 1 2 0 1 | 1 2 1 1 | 1 2 2 1 | 1 2 2 2 | 2 0 0 0 | 2 0 0 2 | 2 0 1 1 | 2 0 1 2 | 2 0 2 2 | 2 1 0 0 | 2 1 0 2 | 2 1 1 1 | 2 1 1 2 | 2 1 2 2 | 2 2 0 0 | 2 2 0 2 | 2 2 1 1 | 2 2 1 2 | 2 2 2 2 |}
           50  +
           51  +do_execsql_test 400 {
           52  +  CREATE TABLE t4(a,b,c,d,e,f,g,h,i,j);
           53  +  INSERT INTO t4 VALUES(0,1,2,3,4,5,6,7,8,9);
           54  +  INSERT INTO t4 SELECT * FROM t4;
           55  +  INSERT INTO t4 SELECT * FROM t4;
           56  +  CREATE INDEX t4x ON t4(c,d,e);
           57  +  SELECT DISTINCT a,b,c FROM t4 WHERE a=0 AND b=1;
           58  +} {0 1 2}
           59  +do_execsql_test 410 {
           60  +  SELECT DISTINCT a,b,c,d FROM t4 WHERE a=0 AND b=1;
           61  +} {0 1 2 3}
           62  +do_execsql_test 411 {
           63  +  SELECT DISTINCT d,a,b,c FROM t4 WHERE a=0 AND b=1;
           64  +} {3 0 1 2}
           65  +do_execsql_test 420 {
           66  +  SELECT DISTINCT a,b,c,d,e FROM t4 WHERE a=0 AND b=1;
           67  +} {0 1 2 3 4}
           68  +do_execsql_test 430 {
           69  +  SELECT DISTINCT a,b,c,d,e,f FROM t4 WHERE a=0 AND b=1;
           70  +} {0 1 2 3 4 5}
           71  +
           72  +do_execsql_test 500 {
           73  +  CREATE TABLE t5(a INT, b INT);
           74  +  CREATE UNIQUE INDEX t5x ON t5(a+b);
           75  +  INSERT INTO t5(a,b) VALUES(0,0),(1,0),(1,1),(0,3);
           76  +  CREATE TEMP TABLE out AS SELECT DISTINCT a+b FROM t5;
           77  +  SELECT * FROM out ORDER BY 1;
           78  +} {0 1 2 3}
           79  +
           80  +do_execsql_test 600 {
           81  +  CREATE TABLE t6a(x INTEGER PRIMARY KEY);
           82  +  INSERT INTO t6a VALUES(1);
           83  +  CREATE TABLE t6b(y INTEGER PRIMARY KEY);
           84  +  INSERT INTO t6b VALUES(2),(3);
           85  +  SELECT DISTINCT x, x FROM t6a, t6b;
           86  +} {1 1}
           87  +
           88  +do_execsql_test 700 {
           89  +  CREATE TABLE t7(a, b, c);
           90  +  WITH s(i) AS (
           91  +    SELECT 1 UNION ALL SELECT i+1 FROM s WHERE (i+1)<200
           92  +  )
           93  +  INSERT INTO t7 SELECT i/100, i/50, i FROM s;
           94  +}
           95  +do_execsql_test 710 {
           96  +  SELECT DISTINCT a, b FROM t7;
           97  +} {
           98  +  0 0    0 1
           99  +  1 2    1 3
          100  +}
          101  +do_execsql_test 720 {
          102  +  SELECT DISTINCT a, b+1 FROM t7;
          103  +} {
          104  +  0 1    0 2
          105  +  1 3    1 4
          106  +}
          107  +do_execsql_test 730 {
          108  +  CREATE INDEX i7 ON t7(a, b+1);
          109  +  ANALYZE;
          110  +  SELECT DISTINCT a, b+1 FROM t7;
          111  +} {
          112  +  0 1    0 2
          113  +  1 3    1 4
          114  +}
          115  +
          116  +do_execsql_test 800 {
          117  +  CREATE TABLE t8(a, b, c);
          118  +  WITH s(i) AS (
          119  +    SELECT 1 UNION ALL SELECT i+1 FROM s WHERE (i+1)<100
          120  +  )
          121  +  INSERT INTO t8 SELECT i/40, i/20, i/40 FROM s;
          122  +}
          123  +
          124  +do_execsql_test 820 {
          125  +  SELECT DISTINCT a, b, c FROM t8;
          126  +} {
          127  +  0 0 0    0 1 0
          128  +  1 2 1    1 3 1
          129  +  2 4 2
          130  +}
          131  +
          132  +do_execsql_test 820 {
          133  +  SELECT DISTINCT a, b, c FROM t8 WHERE b=3;
          134  +} {1 3 1}
          135  +
          136  +do_execsql_test 830 {
          137  +  CREATE INDEX i8 ON t8(a, c);
          138  +  ANALYZE;
          139  +  SELECT DISTINCT a, b, c FROM t8 WHERE b=3;
          140  +} {1 3 1}
          141  +
          142  +do_execsql_test 900 {
          143  +  CREATE TABLE t9(v);
          144  +  INSERT INTO t9 VALUES 
          145  +    ('abcd'), ('Abcd'), ('aBcd'), ('ABcd'), ('abCd'), ('AbCd'), ('aBCd'), 
          146  +    ('ABCd'), ('abcD'), ('AbcD'), ('aBcD'), ('ABcD'), ('abCD'), ('AbCD'), 
          147  +    ('aBCD'), ('ABCD'),
          148  +    ('wxyz'), ('Wxyz'), ('wXyz'), ('WXyz'), ('wxYz'), ('WxYz'), ('wXYz'), 
          149  +    ('WXYz'), ('wxyZ'), ('WxyZ'), ('wXyZ'), ('WXyZ'), ('wxYZ'), ('WxYZ'), 
          150  +    ('wXYZ'), ('WXYZ');
          151  +}
          152  +
          153  +do_execsql_test 910 {
          154  +  SELECT DISTINCT v COLLATE NOCASE, v FROM t9 ORDER BY +v;
          155  +} {
          156  +  ABCD ABCD ABCd ABCd ABcD ABcD ABcd ABcd AbCD
          157  +  AbCD AbCd AbCd AbcD AbcD Abcd Abcd
          158  +  WXYZ WXYZ WXYz WXYz WXyZ WXyZ WXyz WXyz WxYZ
          159  +  WxYZ WxYz WxYz WxyZ WxyZ Wxyz Wxyz
          160  +  aBCD aBCD aBCd aBCd aBcD aBcD aBcd aBcd abCD
          161  +  abCD abCd abCd abcD abcD abcd abcd
          162  +  wXYZ wXYZ wXYz wXYz wXyZ wXyZ wXyz wXyz wxYZ
          163  +  wxYZ wxYz wxYz wxyZ wxyZ wxyz wxyz
          164  +}
          165  +
          166  +do_execsql_test 920 {
          167  +  CREATE INDEX i9 ON t9(v COLLATE NOCASE, v);
          168  +  ANALYZE;
          169  +
          170  +  SELECT DISTINCT v COLLATE NOCASE, v FROM t9 ORDER BY +v;
          171  +} {
          172  +  ABCD ABCD ABCd ABCd ABcD ABcD ABcd ABcd AbCD
          173  +  AbCD AbCd AbCd AbcD AbcD Abcd Abcd
          174  +  WXYZ WXYZ WXYz WXYz WXyZ WXyZ WXyz WXyz WxYZ
          175  +  WxYZ WxYz WxYz WxyZ WxyZ Wxyz Wxyz
          176  +  aBCD aBCD aBCd aBCd aBcD aBcD aBcd aBcd abCD
          177  +  abCD abCd abCd abcD abcD abcd abcd
          178  +  wXYZ wXYZ wXYz wXYz wXyZ wXyZ wXyz wXyz wxYZ
          179  +  wxYZ wxYz wxYz wxyZ wxyZ wxyz wxyz
          180  +}
          181  +
          182  +
          183  +finish_test