SQLite

Check-in [ce1e2b8877]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Fix the skip-ahead-distinct optimization so that it works with indexes that have repeated columns with different collating sequences.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | skip-ahead-distinct
Files: files | file ages | folders
SHA3-256: ce1e2b88777e00a82c04abe5ba35eec81b5f324e462f099cd00b21054f369688
User & Date: drh 2017-04-13 21:29:02.792
Context
2017-04-14
00:45
Fix a couple of unreachable branches. (check-in: 1aa0ea8db7 user: drh tags: skip-ahead-distinct)
2017-04-13
21:29
Fix the skip-ahead-distinct optimization so that it works with indexes that have repeated columns with different collating sequences. (check-in: ce1e2b8877 user: drh tags: skip-ahead-distinct)
19:48
Simplification of the skip-ahead-distinct logic. There is still an issue with handling COLLATE. (check-in: 57c5173b63 user: drh tags: skip-ahead-distinct)
Changes
Unified Diff Ignore Whitespace Patch
Changes to src/where.c.
3528
3529
3530
3531
3532
3533
3534


3535
3536
3537
3538
3539
3540
3541
      if( wctrlFlags & WHERE_ORDERBY_LIMIT ) continue;
    }else{
      pLoop = pLast;
    }
    if( pLoop->wsFlags & WHERE_VIRTUALTABLE ){
      if( pLoop->u.vtab.isOrdered ) obSat = obDone;
      break;


    }
    iCur = pWInfo->pTabList->a[pLoop->iTab].iCursor;

    /* Mark off any ORDER BY term X that is a column in the table of
    ** the current loop for which there is term in the WHERE
    ** clause of the form X IS NULL or X=? that reference only outer
    ** loops.







>
>







3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
      if( wctrlFlags & WHERE_ORDERBY_LIMIT ) continue;
    }else{
      pLoop = pLast;
    }
    if( pLoop->wsFlags & WHERE_VIRTUALTABLE ){
      if( pLoop->u.vtab.isOrdered ) obSat = obDone;
      break;
    }else{
      pLoop->u.btree.nIdxCol = 0;
    }
    iCur = pWInfo->pTabList->a[pLoop->iTab].iCursor;

    /* Mark off any ORDER BY term X that is a column in the table of
    ** the current loop for which there is term in the WHERE
    ** clause of the form X IS NULL or X=? that reference only outer
    ** loops.
3673
3674
3675
3676
3677
3678
3679

3680
3681
3682
3683
3684
3685
3686
            }
          }
          if( iColumn>=0 ){
            pColl = sqlite3ExprCollSeq(pWInfo->pParse, pOrderBy->a[i].pExpr);
            if( !pColl ) pColl = db->pDfltColl;
            if( sqlite3StrICmp(pColl->zName, pIndex->azColl[j])!=0 ) continue;
          }

          isMatch = 1;
          break;
        }
        if( isMatch && (wctrlFlags & WHERE_GROUPBY)==0 ){
          /* Make sure the sort order is compatible in an ORDER BY clause.
          ** Sort order is irrelevant for a GROUP BY clause. */
          if( revSet ){







>







3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
            }
          }
          if( iColumn>=0 ){
            pColl = sqlite3ExprCollSeq(pWInfo->pParse, pOrderBy->a[i].pExpr);
            if( !pColl ) pColl = db->pDfltColl;
            if( sqlite3StrICmp(pColl->zName, pIndex->azColl[j])!=0 ) continue;
          }
          if( pLoop->u.btree.nIdxCol<=j ) pLoop->u.btree.nIdxCol = j+1;
          isMatch = 1;
          break;
        }
        if( isMatch && (wctrlFlags & WHERE_GROUPBY)==0 ){
          /* Make sure the sort order is compatible in an ORDER BY clause.
          ** Sort order is irrelevant for a GROUP BY clause. */
          if( revSet ){
4846
4847
4848
4849
4850
4851
4852
4853
4854
4855
4856

4857
4858
4859
4860

4861
4862
4863
4864
4865
4866
4867
4868
4869
4870
4871
4872
4873
4874
4875
4876
4877
4878
4879
4880
4881
4882
4883
4884
4885
4886
4887
4888
4889
4890
4891
4892
4893
4894
4895
4896
4897
4898
4899
4900
4901
4902
4903
4904
4905
4906
  for(i=pWInfo->nLevel-1; i>=0; i--){
    int addr;
    pLevel = &pWInfo->a[i];
    pLoop = pLevel->pWLoop;
    if( pLevel->op!=OP_Noop ){
#ifndef SQLITE_DISABLE_SKIPAHEAD_DISTINCT
      int addrSeek = 0;
      int n = -1;
      int j, k, op;
      int r1 = pParse->nMem+1;
      Index *pIdx;

      if( pWInfo->eDistinct==WHERE_DISTINCT_ORDERED
       && (pLoop->wsFlags & WHERE_INDEXED)!=0
       && OptimizationEnabled(db, SQLITE_SkipAhead)
       && (pIdx = pLoop->u.btree.pIndex)->hasStat1

      ){
        /* This is the Skip-ahead optimization.  When doing a DISTINCT query
        ** that has WHERE_DISTINCT_ORDERED, use OP_SkipGT/OP_SkipLT to skip
        ** over all duplicate entries, rather than visiting all duplicates
        ** using OP_Next/OP_Prev. */
        ExprList *pX = pWInfo->pResultSet;
        for(j=0; j<pX->nExpr; j++){
          Expr *pE = sqlite3ExprSkipCollate(pX->a[j].pExpr);
          if( pE->op==TK_COLUMN ){
            if( pE->iTable==pLevel->iIdxCur ){
              k = pE->iColumn+1;
            }else{
              if( pE->iTable!=pLevel->iTabCur ) continue;
              k = 1+sqlite3ColumnOfIndex(pIdx, pE->iColumn);
            }
            if( k>n ) n = k;
          }
#ifdef SQLITE_DEBUG
          /* Any expressions in the result-set that match columns of the
          ** index should have already been transformed to TK_COLUMN 
          ** operations by whereIndexExprTrans().  */
          else if( pIdx->aColExpr ){
            for(k=0; k<pIdx->nKeyCol; k++){
              Expr *pI = pIdx->aColExpr->a[k].pExpr;
              assert( pI==0 || sqlite3ExprCompare(pE, pI, pE->iTable)!=0 );
            }
          }
#endif
        }
      }
      /* TUNING: Only try to skip ahead using OP_Seek if we expect to
      ** skip over 11 or more rows.  Otherwise, OP_Next is just as fast.
      */
      assert( 36==sqlite3LogEst(12) );
      if( n>0 && pIdx->aiRowLogEst[n]>=36 ){
        for(j=0; j<n; j++){
          sqlite3VdbeAddOp3(v, OP_Column, pLevel->iIdxCur, j, r1+j);
        }
        pParse->nMem += n;
        op = pLevel->op==OP_Prev ? OP_SeekLT : OP_SeekGT;
        addrSeek = sqlite3VdbeAddOp4Int(v, op, pLevel->iIdxCur, 0, r1, n);
        VdbeCoverageIf(v, op==OP_SeekLT);
        VdbeCoverageIf(v, op==OP_SeekGT);
        sqlite3VdbeAddOp2(v, OP_Goto, 1, pLevel->p2);
      }
#endif /* SQLITE_DISABLE_SKIPAHEAD_DISTINCT */







<
<
<

>




>

<
<
<
<
<
<
<
<
<
|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
|
<
<
<
<
<
<
<



|







4849
4850
4851
4852
4853
4854
4855



4856
4857
4858
4859
4860
4861
4862
4863









4864
















4865







4866
4867
4868
4869
4870
4871
4872
4873
4874
4875
4876
  for(i=pWInfo->nLevel-1; i>=0; i--){
    int addr;
    pLevel = &pWInfo->a[i];
    pLoop = pLevel->pWLoop;
    if( pLevel->op!=OP_Noop ){
#ifndef SQLITE_DISABLE_SKIPAHEAD_DISTINCT
      int addrSeek = 0;



      Index *pIdx;
      int n;
      if( pWInfo->eDistinct==WHERE_DISTINCT_ORDERED
       && (pLoop->wsFlags & WHERE_INDEXED)!=0
       && OptimizationEnabled(db, SQLITE_SkipAhead)
       && (pIdx = pLoop->u.btree.pIndex)->hasStat1
       && pIdx->aiRowLogEst[(n = pLoop->u.btree.nIdxCol)-1]>=36
      ){









        int r1 = pParse->nMem+1;
















        int j, op;







        for(j=0; j<n; j++){
          sqlite3VdbeAddOp3(v, OP_Column, pLevel->iIdxCur, j, r1+j);
        }
        pParse->nMem += n+1;
        op = pLevel->op==OP_Prev ? OP_SeekLT : OP_SeekGT;
        addrSeek = sqlite3VdbeAddOp4Int(v, op, pLevel->iIdxCur, 0, r1, n);
        VdbeCoverageIf(v, op==OP_SeekLT);
        VdbeCoverageIf(v, op==OP_SeekGT);
        sqlite3VdbeAddOp2(v, OP_Goto, 1, pLevel->p2);
      }
#endif /* SQLITE_DISABLE_SKIPAHEAD_DISTINCT */
Changes to src/whereInt.h.
120
121
122
123
124
125
126

127
128
129
130
131
132
133
  LogEst rRun;          /* Cost of running each loop */
  LogEst nOut;          /* Estimated number of output rows */
  union {
    struct {               /* Information for internal btree tables */
      u16 nEq;               /* Number of equality constraints */
      u16 nBtm;              /* Size of BTM vector */
      u16 nTop;              /* Size of TOP vector */

      Index *pIndex;         /* Index used, or NULL */
    } btree;
    struct {               /* Information for virtual tables */
      int idxNum;            /* Index number */
      u8 needFree;           /* True if sqlite3_free(idxStr) is needed */
      i8 isOrdered;          /* True if satisfies ORDER BY */
      u16 omitMask;          /* Terms that may be omitted */







>







120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
  LogEst rRun;          /* Cost of running each loop */
  LogEst nOut;          /* Estimated number of output rows */
  union {
    struct {               /* Information for internal btree tables */
      u16 nEq;               /* Number of equality constraints */
      u16 nBtm;              /* Size of BTM vector */
      u16 nTop;              /* Size of TOP vector */
      u16 nIdxCol;           /* Index column used for ORDER BY */
      Index *pIndex;         /* Index used, or NULL */
    } btree;
    struct {               /* Information for virtual tables */
      int idxNum;            /* Index number */
      u8 needFree;           /* True if sqlite3_free(idxStr) is needed */
      i8 isOrdered;          /* True if satisfies ORDER BY */
      u16 omitMask;          /* Terms that may be omitted */
Changes to test/distinct2.test.
135
136
137
138
139
140
141









142
143











144




















do_execsql_test 830 {
  CREATE INDEX i8 ON t8(a, c);
  ANALYZE;
  SELECT DISTINCT a, b, c FROM t8 WHERE b=3;
} {1 3 1}











finish_test






































>
>
>
>
>
>
>
>
>
|
|
>
>
>
>
>
>
>
>
>
>
>
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183

do_execsql_test 830 {
  CREATE INDEX i8 ON t8(a, c);
  ANALYZE;
  SELECT DISTINCT a, b, c FROM t8 WHERE b=3;
} {1 3 1}

do_execsql_test 900 {
  CREATE TABLE t9(v);
  INSERT INTO t9 VALUES 
    ('abcd'), ('Abcd'), ('aBcd'), ('ABcd'), ('abCd'), ('AbCd'), ('aBCd'), 
    ('ABCd'), ('abcD'), ('AbcD'), ('aBcD'), ('ABcD'), ('abCD'), ('AbCD'), 
    ('aBCD'), ('ABCD'),
    ('wxyz'), ('Wxyz'), ('wXyz'), ('WXyz'), ('wxYz'), ('WxYz'), ('wXYz'), 
    ('WXYz'), ('wxyZ'), ('WxyZ'), ('wXyZ'), ('WXyZ'), ('wxYZ'), ('WxYZ'), 
    ('wXYZ'), ('WXYZ');
}

do_execsql_test 910 {
  SELECT DISTINCT v COLLATE NOCASE, v FROM t9 ORDER BY +v;
} {
  ABCD ABCD ABCd ABCd ABcD ABcD ABcd ABcd AbCD
  AbCD AbCd AbCd AbcD AbcD Abcd Abcd
  WXYZ WXYZ WXYz WXYz WXyZ WXyZ WXyz WXyz WxYZ
  WxYZ WxYz WxYz WxyZ WxyZ Wxyz Wxyz
  aBCD aBCD aBCd aBCd aBcD aBcD aBcd aBcd abCD
  abCD abCd abCd abcD abcD abcd abcd
  wXYZ wXYZ wXYz wXYz wXyZ wXyZ wXyz wXyz wxYZ
  wxYZ wxYz wxYz wxyZ wxyZ wxyz wxyz
}

do_execsql_test 920 {
  CREATE INDEX i9 ON t9(v COLLATE NOCASE, v);
  ANALYZE;

  SELECT DISTINCT v COLLATE NOCASE, v FROM t9 ORDER BY +v;
} {
  ABCD ABCD ABCd ABCd ABcD ABcD ABcd ABcd AbCD
  AbCD AbCd AbCd AbcD AbcD Abcd Abcd
  WXYZ WXYZ WXYz WXYz WXyZ WXyZ WXyz WXyz WxYZ
  WxYZ WxYz WxYz WxyZ WxyZ Wxyz Wxyz
  aBCD aBCD aBCd aBCd aBcD aBcD aBcd aBcd abCD
  abCD abCd abCd abcD abcD abcd abcd
  wXYZ wXYZ wXYz wXYz wXyZ wXyZ wXyz wXyz wxYZ
  wxYZ wxYz wxYz wxyZ wxyZ wxyz wxyz
}


finish_test