SQLite

Check-in [d23c8bf81e]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Optimizer now converts OR-connected WHERE-clause terms into an IN operator so that they can be used with indices. There are known problems with the ORDER BY optimization in this and in several prior check-ins. This check-in is not recommended for production use. (CVS 2569)
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: d23c8bf81e508722e92ff1b9c8bc98dc026a31f2
User & Date: drh 2005-07-29 15:10:18.000
Context
2005-07-29
15:36
Fix authentication so that it works with AS aliases. Ticket #1338. (CVS 2570) (check-in: cc7ae73ed0 user: drh tags: trunk)
15:10
Optimizer now converts OR-connected WHERE-clause terms into an IN operator so that they can be used with indices. There are known problems with the ORDER BY optimization in this and in several prior check-ins. This check-in is not recommended for production use. (CVS 2569) (check-in: d23c8bf81e user: drh tags: trunk)
2005-07-28
23:12
The BETWEEN operator in a WHERE clause is now able to use indices. (CVS 2568) (check-in: cdf8c9584b user: drh tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Changes to src/expr.c.
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** This file contains routines used for analyzing expressions and
** for generating VDBE code that evaluates expressions in SQLite.
**
** $Id: expr.c,v 1.213 2005/07/21 18:23:20 drh Exp $
*/
#include "sqliteInt.h"
#include <ctype.h>

/*
** Return the 'affinity' of the expression pExpr if any.
**







|







8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** This file contains routines used for analyzing expressions and
** for generating VDBE code that evaluates expressions in SQLite.
**
** $Id: expr.c,v 1.214 2005/07/29 15:10:18 drh Exp $
*/
#include "sqliteInt.h"
#include <ctype.h>

/*
** Return the 'affinity' of the expression pExpr if any.
**
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
          Expr *pE2 = pItem->pExpr;

          /* If the expression is not constant then we will need to
          ** disable the test that was generated above that makes sure
          ** this code only executes once.  Because for a non-constant
          ** expression we need to rerun this code each time.
          */
          if( testAddr>=0 && !sqlite3ExprIsConstant(pE2) ){
            VdbeOp *aOp = sqlite3VdbeGetOp(v, testAddr-1);
            int i;
            for(i=0; i<4; i++){
              aOp[i].opcode = OP_Noop;
            }
            testAddr = 0;
          }







|







1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
          Expr *pE2 = pItem->pExpr;

          /* If the expression is not constant then we will need to
          ** disable the test that was generated above that makes sure
          ** this code only executes once.  Because for a non-constant
          ** expression we need to rerun this code each time.
          */
          if( testAddr>0 && !sqlite3ExprIsConstant(pE2) ){
            VdbeOp *aOp = sqlite3VdbeGetOp(v, testAddr-1);
            int i;
            for(i=0; i<4; i++){
              aOp[i].opcode = OP_Noop;
            }
            testAddr = 0;
          }
Changes to src/where.c.
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
** This module contains C code that generates VDBE code used to process
** the WHERE clause of SQL statements.  This module is reponsible for
** generating the code that loops through a table looking for applicable
** rows.  Indices are selected and used to speed the search when doing
** so is applicable.  Because this module is responsible for selecting
** indices, you might also think of this module as the "query optimizer".
**
** $Id: where.c,v 1.156 2005/07/28 23:12:08 drh Exp $
*/
#include "sqliteInt.h"

/*
** The number of bits in a Bitmask.  "BMS" means "BitMask Size".
*/
#define BMS  (sizeof(Bitmask)*8)







|







12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
** This module contains C code that generates VDBE code used to process
** the WHERE clause of SQL statements.  This module is reponsible for
** generating the code that loops through a table looking for applicable
** rows.  Indices are selected and used to speed the search when doing
** so is applicable.  Because this module is responsible for selecting
** indices, you might also think of this module as the "query optimizer".
**
** $Id: where.c,v 1.157 2005/07/29 15:10:18 drh Exp $
*/
#include "sqliteInt.h"

/*
** The number of bits in a Bitmask.  "BMS" means "BitMask Size".
*/
#define BMS  (sizeof(Bitmask)*8)
78
79
80
81
82
83
84
85
86
87
88

89
90
91
92
93
94
95
96
97
98
99
100


101
102
103
104
105
106
107
** would be mapped into integers 0 through 7.
*/
typedef struct WhereTerm WhereTerm;
struct WhereTerm {
  Expr *pExpr;            /* Pointer to the subexpression */
  u16 idx;                /* Index of this term in pWC->a[] */
  i16 iPartner;           /* Disable pWC->a[iPartner] when this term disabled */
  u16 flags;              /* Bit flags.  See below */
  i16 leftCursor;         /* Cursor number of X in "X <op> <expr>" */
  i16 leftColumn;         /* Column number of X in "X <op> <expr>" */
  u16 operator;           /* A WO_xx value describing <op> */

  u8 nPartner;            /* Number of partners that must disable us */
  WhereClause *pWC;       /* The clause this term is part of */
  Bitmask prereqRight;    /* Bitmask of tables used by pRight */
  Bitmask prereqAll;      /* Bitmask of tables referenced by p */
};

/*
** Allowed values of WhereTerm.flags
*/
#define TERM_DYNAMIC    0x0001   /* Need to call sqlite3ExprDelete(pExpr) */
#define TERM_VIRTUAL    0x0002   /* Added by the optimizer.  Do not code */
#define TERM_CODED      0x0004   /* This term is already coded */



/*
** An instance of the following structure holds all information about a
** WHERE clause.  Mostly this is a container for one or more WhereTerms.
*/
struct WhereClause {
  Parse *pParse;           /* The parser context */







<



>









|
|
|
>
>







78
79
80
81
82
83
84

85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
** would be mapped into integers 0 through 7.
*/
typedef struct WhereTerm WhereTerm;
struct WhereTerm {
  Expr *pExpr;            /* Pointer to the subexpression */
  u16 idx;                /* Index of this term in pWC->a[] */
  i16 iPartner;           /* Disable pWC->a[iPartner] when this term disabled */

  i16 leftCursor;         /* Cursor number of X in "X <op> <expr>" */
  i16 leftColumn;         /* Column number of X in "X <op> <expr>" */
  u16 operator;           /* A WO_xx value describing <op> */
  u8 flags;               /* Bit flags.  See below */
  u8 nPartner;            /* Number of partners that must disable us */
  WhereClause *pWC;       /* The clause this term is part of */
  Bitmask prereqRight;    /* Bitmask of tables used by pRight */
  Bitmask prereqAll;      /* Bitmask of tables referenced by p */
};

/*
** Allowed values of WhereTerm.flags
*/
#define TERM_DYNAMIC    0x01   /* Need to call sqlite3ExprDelete(pExpr) */
#define TERM_VIRTUAL    0x02   /* Added by the optimizer.  Do not code */
#define TERM_CODED      0x04   /* This term is already coded */
#define TERM_PARTNERED  0x08   /* Has a virtual partner */
#define TERM_OR_OK      0x10   /* Used during OR-clause processing */

/*
** An instance of the following structure holds all information about a
** WHERE clause.  Mostly this is a container for one or more WhereTerms.
*/
struct WhereClause {
  Parse *pParse;           /* The parser context */
221
222
223
224
225
226
227
228

229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
  pTerm->pWC = pWC;
  pTerm->iPartner = -1;
  return pTerm;
}

/*
** This routine identifies subexpressions in the WHERE clause where
** each subexpression is separate by the AND operator.  aSlot is 

** filled with pointers to the subexpressions.  For example:
**
**    WHERE  a=='hello' AND coalesce(b,11)<10 AND (c+12!=d OR c==22)
**           \________/     \_______________/     \________________/
**            slot[0]            slot[1]               slot[2]
**
** The original WHERE clause in pExpr is unaltered.  All this routine
** does is make slot[] entries point to substructure within pExpr.
**
** In the previous sentence and in the diagram, "slot[]" refers to
** the WhereClause.a[] array.  This array grows as needed to contain
** all terms of the WHERE clause.
*/
static void whereSplit(WhereClause *pWC, Expr *pExpr){
  if( pExpr==0 ) return;
  if( pExpr->op!=TK_AND ){
    whereClauseInsert(pWC, pExpr, 0);
  }else{
    whereSplit(pWC, pExpr->pLeft);
    whereSplit(pWC, pExpr->pRight);
  }
}

/*
** Initialize an expression mask set
*/
#define initMaskSet(P)  memset(P, 0, sizeof(*P))







|
>
|












|

|


|
|







223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
  pTerm->pWC = pWC;
  pTerm->iPartner = -1;
  return pTerm;
}

/*
** This routine identifies subexpressions in the WHERE clause where
** each subexpression is separate by the AND operator or some other
** operator specified in the op parameter.  The WhereClause structure
** is filled with pointers to subexpressions.  For example:
**
**    WHERE  a=='hello' AND coalesce(b,11)<10 AND (c+12!=d OR c==22)
**           \________/     \_______________/     \________________/
**            slot[0]            slot[1]               slot[2]
**
** The original WHERE clause in pExpr is unaltered.  All this routine
** does is make slot[] entries point to substructure within pExpr.
**
** In the previous sentence and in the diagram, "slot[]" refers to
** the WhereClause.a[] array.  This array grows as needed to contain
** all terms of the WHERE clause.
*/
static void whereSplit(WhereClause *pWC, Expr *pExpr, int op){
  if( pExpr==0 ) return;
  if( pExpr->op!=op ){
    whereClauseInsert(pWC, pExpr, 0);
  }else{
    whereSplit(pWC, pExpr->pLeft, op);
    whereSplit(pWC, pExpr->pRight, op);
  }
}

/*
** Initialize an expression mask set
*/
#define initMaskSet(P)  memset(P, 0, sizeof(*P))
428
429
430
431
432
433
434




















435
436
437
438
439
440
441
        if( pColl!=pIdx->keyInfo.aColl[k] ) continue;
      }
      return pTerm;
    }
  }
  return 0;
}





















/*
** The input to this routine is an WhereTerm structure with only the
** "pExpr" field filled in.  The job of this routine is to analyze the
** subexpression and populate all the other fields of the WhereTerm
** structure.
**







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
        if( pColl!=pIdx->keyInfo.aColl[k] ) continue;
      }
      return pTerm;
    }
  }
  return 0;
}

/* Forward reference */
static void exprAnalyze(SrcList*, ExprMaskSet*, WhereTerm*);

/*
** Call exprAnalyze on all terms in a WHERE clause.  
**
**
*/
static void exprAnalyzeAll(
  SrcList *pTabList,       /* the FROM clause */
  ExprMaskSet *pMaskSet,   /* table masks */
  WhereClause *pWC         /* the WHERE clause to be analyzed */
){
  WhereTerm *pTerm;
  int i;
  for(i=pWC->nTerm-1, pTerm=pWC->a; i>=0; i--, pTerm++){
    exprAnalyze(pTabList, pMaskSet, pTerm);
  }
}

/*
** The input to this routine is an WhereTerm structure with only the
** "pExpr" field filled in.  The job of this routine is to analyze the
** subexpression and populate all the other fields of the WhereTerm
** structure.
**
475
476
477
478
479
480
481

482
483
484
485
486
487
488
      Expr *pDup;
      if( pTerm->leftCursor>=0 ){
        pDup = sqlite3ExprDup(pExpr);
        pNew = whereClauseInsert(pTerm->pWC, pDup, TERM_VIRTUAL|TERM_DYNAMIC);
        if( pNew==0 ) return;
        pNew->iPartner = pTerm->idx;
        pTerm->nPartner = 1;

      }else{
        pDup = pExpr;
        pNew = pTerm;
      }
      exprCommute(pDup);
      pLeft = pDup->pLeft;
      pNew->leftCursor = pLeft->iTable;







>







498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
      Expr *pDup;
      if( pTerm->leftCursor>=0 ){
        pDup = sqlite3ExprDup(pExpr);
        pNew = whereClauseInsert(pTerm->pWC, pDup, TERM_VIRTUAL|TERM_DYNAMIC);
        if( pNew==0 ) return;
        pNew->iPartner = pTerm->idx;
        pTerm->nPartner = 1;
        pTerm->flags |= TERM_PARTNERED;
      }else{
        pDup = pExpr;
        pNew = pTerm;
      }
      exprCommute(pDup);
      pLeft = pDup->pLeft;
      pNew->leftCursor = pLeft->iTable;
511
512
513
514
515
516
517









518















































519
520
521
522
523
524
525
                                   TERM_VIRTUAL|TERM_DYNAMIC);
      exprAnalyze(pSrc, pMaskSet, pNewTerm);
      pNewTerm->iPartner = pTerm->idx;
    }
    pTerm->nPartner = 2;
  }










  















































}


/*
** This routine decides if pIdx can be used to satisfy the ORDER BY
** clause.  If it can, it returns 1.  If pIdx cannot satisfy the
** ORDER BY clause, this routine returns 0.







>
>
>
>
>
>
>
>
>
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
                                   TERM_VIRTUAL|TERM_DYNAMIC);
      exprAnalyze(pSrc, pMaskSet, pNewTerm);
      pNewTerm->iPartner = pTerm->idx;
    }
    pTerm->nPartner = 2;
  }

  /* Attempt to convert OR-connected terms into an IN operator so that
  ** they can make use of indices.
  */
  else if( pExpr->op==TK_OR ){
    int ok;
    int i, j;
    int iColumn, iCursor;
    WhereClause sOr;
    WhereTerm *pOrTerm;

    assert( (pTerm->flags & TERM_DYNAMIC)==0 );
    whereClauseInit(&sOr, pTerm->pWC->pParse);
    whereSplit(&sOr, pExpr, TK_OR);
    exprAnalyzeAll(pSrc, pMaskSet, &sOr);
    assert( sOr.nTerm>0 );
    j = 0;
    do{
      iColumn = sOr.a[j].leftColumn;
      iCursor = sOr.a[j].leftCursor;
      ok = iCursor>=0;
      for(i=sOr.nTerm-1, pOrTerm=sOr.a; i>=0 && ok; i--, pOrTerm++){
        if( pOrTerm->operator!=WO_EQ ){
          goto or_not_possible;
        }
        if( pOrTerm->leftCursor==iCursor && pOrTerm->leftColumn==iColumn ){
          pOrTerm->flags |= TERM_OR_OK;
        }else if( (pOrTerm->flags & TERM_PARTNERED)!=0 ||
                    ((pOrTerm->flags & TERM_VIRTUAL)!=0 &&
                     (sOr.a[pOrTerm->iPartner].flags & TERM_OR_OK)!=0) ){
          pOrTerm->flags &= ~TERM_OR_OK;
        }else{
          ok = 0;
        }
      }
    }while( !ok && (sOr.a[j++].flags & TERM_PARTNERED)!=0 && j<sOr.nTerm );
    if( ok ){
      ExprList *pList = 0;
      Expr *pNew, *pDup;
      for(i=sOr.nTerm-1, pOrTerm=sOr.a; i>=0 && ok; i--, pOrTerm++){
        if( (pOrTerm->flags & TERM_OR_OK)==0 ) continue;
        pDup = sqlite3ExprDup(pOrTerm->pExpr->pRight);
        pList = sqlite3ExprListAppend(pList, pDup, 0);
      }
      pDup = sqlite3Expr(TK_COLUMN, 0, 0, 0);
      if( pDup ){
        pDup->iTable = iCursor;
        pDup->iColumn = iColumn;
      }
      pNew = sqlite3Expr(TK_IN, pDup, 0, 0);
      if( pNew ) pNew->pList = pList;
      pTerm->pExpr = pNew;
      pTerm->flags |= TERM_DYNAMIC;
      exprAnalyze(pSrc, pMaskSet, pTerm);
    }
or_not_possible:
    whereClauseClear(&sOr);
  }
}


/*
** This routine decides if pIdx can be used to satisfy the ORDER BY
** clause.  If it can, it returns 1.  If pIdx cannot satisfy the
** ORDER BY clause, this routine returns 0.
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
  }

  /* Split the WHERE clause into separate subexpressions where each
  ** subexpression is separated by an AND operator.
  */
  initMaskSet(&maskSet);
  whereClauseInit(&wc, pParse);
  whereSplit(&wc, pWhere);
    
  /* Allocate and initialize the WhereInfo structure that will become the
  ** return value.
  */
  pWInfo = sqliteMalloc( sizeof(WhereInfo) + pTabList->nSrc*sizeof(WhereLevel));
  if( sqlite3_malloc_failed ){
    goto whereBeginNoMem;







|







1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
  }

  /* Split the WHERE clause into separate subexpressions where each
  ** subexpression is separated by an AND operator.
  */
  initMaskSet(&maskSet);
  whereClauseInit(&wc, pParse);
  whereSplit(&wc, pWhere, TK_AND);
    
  /* Allocate and initialize the WhereInfo structure that will become the
  ** return value.
  */
  pWInfo = sqliteMalloc( sizeof(WhereInfo) + pTabList->nSrc*sizeof(WhereLevel));
  if( sqlite3_malloc_failed ){
    goto whereBeginNoMem;
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
  ** add new virtual terms onto the end of the WHERE clause.  We do not
  ** want to analyze these virtual terms, so start analyzing at the end
  ** and work forward so that they added virtual terms are never processed.
  */
  for(i=0; i<pTabList->nSrc; i++){
    createMask(&maskSet, pTabList->a[i].iCursor);
  }
  for(i=wc.nTerm-1; i>=0; i--){
    exprAnalyze(pTabList, &maskSet, &wc.a[i]);
  }

  /* Chose the best index to use for each table in the FROM clause.
  **
  ** This loop fills in the following fields:
  **
  **   pWInfo->a[].pIdx      The index to use for this level of the loop.
  **   pWInfo->a[].flags     WHERE_xxx flags associated with pIdx







<
|
<







1301
1302
1303
1304
1305
1306
1307

1308

1309
1310
1311
1312
1313
1314
1315
  ** add new virtual terms onto the end of the WHERE clause.  We do not
  ** want to analyze these virtual terms, so start analyzing at the end
  ** and work forward so that they added virtual terms are never processed.
  */
  for(i=0; i<pTabList->nSrc; i++){
    createMask(&maskSet, pTabList->a[i].iCursor);
  }

  exprAnalyzeAll(pTabList, &maskSet, &wc);


  /* Chose the best index to use for each table in the FROM clause.
  **
  ** This loop fills in the following fields:
  **
  **   pWInfo->a[].pIdx      The index to use for this level of the loop.
  **   pWInfo->a[].flags     WHERE_xxx flags associated with pIdx
Changes to test/where2.test.
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
#    May you share freely, never taking more than you give.
#
#***********************************************************************
# This file implements regression tests for SQLite library.  The
# focus of this file is testing the use of indices in WHERE clauses
# based on recent changes to the optimizer.
#
# $Id: where2.test,v 1.2 2005/07/28 23:12:08 drh Exp $

set testdir [file dirname $argv0]
source $testdir/tester.tcl

# Build some test data
#
do_test where2-1.0 {







|







8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
#    May you share freely, never taking more than you give.
#
#***********************************************************************
# This file implements regression tests for SQLite library.  The
# focus of this file is testing the use of indices in WHERE clauses
# based on recent changes to the optimizer.
#
# $Id: where2.test,v 1.3 2005/07/29 15:10:19 drh Exp $

set testdir [file dirname $argv0]
source $testdir/tester.tcl

# Build some test data
#
do_test where2-1.0 {
197
198
199
200
201
202
203






































204
205

206
} {99 6 10000 10006 nosort t1 i1w}
do_test where2-5.2 {
  queryplan {
    SELECT * FROM t1 WHERE w IN (99) ORDER BY w
  }
} {99 6 10000 10006 sort t1 i1w}







































integrity_check {where2-99.0}


finish_test







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|

>

197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
} {99 6 10000 10006 nosort t1 i1w}
do_test where2-5.2 {
  queryplan {
    SELECT * FROM t1 WHERE w IN (99) ORDER BY w
  }
} {99 6 10000 10006 sort t1 i1w}

# Verify that OR clauses get translated into IN operators.
#
do_test where2-6.1 {
  queryplan {
    SELECT * FROM t1 WHERE w=99 OR w=100 ORDER BY +w
  }
} {99 6 10000 10006 100 6 10201 10207 sort t1 i1w}
do_test where2-6.2 {
  queryplan {
    SELECT * FROM t1 WHERE w=99 OR w=100 OR 6=w ORDER BY +w
  }
} {6 2 49 51 99 6 10000 10006 100 6 10201 10207 sort t1 i1w}
do_test where2-6.3 {
  queryplan {
    SELECT * FROM t1 WHERE w=99 OR w=100 OR 6=+w ORDER BY +w
  }
} {6 2 49 51 99 6 10000 10006 100 6 10201 10207 sort t1 {}}
do_test where2-6.4 {
  queryplan {
    SELECT * FROM t1 WHERE w=99 OR +w=100 OR 6=w ORDER BY +w
  }
} {6 2 49 51 99 6 10000 10006 100 6 10201 10207 sort t1 {}}
if 0 {  ##### FIX ME #####
do_test where2-6.5 {
  queryplan {
    SELECT b.* FROM t1 a, t1 b
     WHERE a.w=1 AND (a.y=b.z OR b.z=10)
     ORDER BY +b.w
  }
} {1 0 4 4 2 1 9 10 sort a i1w b i1zyx}
do_test where2-6.6 {
  queryplan {
    SELECT b.* FROM t1 a, t1 b
     WHERE a.w=1 AND (b.z=10 OR a.y=b.z OR b.z=10)
     ORDER BY +b.w
  }
} {1 0 4 4 2 1 9 10 sort a i1w b i1zyx}
}



finish_test