SQLite

Check-in [a212128433]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:The new optimizer now passes all regression tests. (CVS 2566)
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: a21212843359fb9fdbd60799ae50ad3566f4399a
User & Date: drh 2005-07-28 16:51:51.000
Context
2005-07-28
20:51
Test cases and tuning of the new optimizer code. (CVS 2567) (check-in: 4b02703dec user: drh tags: trunk)
16:51
The new optimizer now passes all regression tests. (CVS 2566) (check-in: a212128433 user: drh tags: trunk)
2005-07-27
20:41
More work on the new optimizer. Fewer tests fail now. (CVS 2565) (check-in: ee3a08e353 user: drh tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Changes to src/build.c.
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
**     CREATE INDEX
**     DROP INDEX
**     creating ID lists
**     BEGIN TRANSACTION
**     COMMIT
**     ROLLBACK
**
** $Id: build.c,v 1.337 2005/07/27 20:41:44 drh Exp $
*/
#include "sqliteInt.h"
#include <ctype.h>

/*
** This routine is called when a new SQL statement is beginning to
** be parsed.  Initialize the pParse structure as needed.







|







18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
**     CREATE INDEX
**     DROP INDEX
**     creating ID lists
**     BEGIN TRANSACTION
**     COMMIT
**     ROLLBACK
**
** $Id: build.c,v 1.338 2005/07/28 16:51:51 drh Exp $
*/
#include "sqliteInt.h"
#include <ctype.h>

/*
** This routine is called when a new SQL statement is beginning to
** be parsed.  Initialize the pParse structure as needed.
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
** are based on typical values found in actual indices.
*/
void sqlite3DefaultRowEst(Index *pIdx){
  int *a = pIdx->aiRowEst;
  int i;
  assert( a!=0 );
  a[0] = 1000000;
  switch( pIdx->nColumn ){
    case 1: {
      a[1] = 20;
      break;
    }
    case 2: {
      a[1] = 350;
      a[2] = 20;
      break;
    }
    default: {
      a[1] = 1250;
      a[2] = 350;
      a[3] = 20;
      for(i=pIdx->nColumn; i>=4; i--){
        a[i] = 10;
      }
    }
  }
  if( pIdx->onError!=OE_None ){
    a[pIdx->nColumn] = 1;
  }
}

/*







<
<
<
<
<
<
<
<
<
<
<
<
<
<
|
|
<
<







2406
2407
2408
2409
2410
2411
2412














2413
2414


2415
2416
2417
2418
2419
2420
2421
** are based on typical values found in actual indices.
*/
void sqlite3DefaultRowEst(Index *pIdx){
  int *a = pIdx->aiRowEst;
  int i;
  assert( a!=0 );
  a[0] = 1000000;














  for(i=pIdx->nColumn; i>=1; i--){
    a[i] = 10;


  }
  if( pIdx->onError!=OE_None ){
    a[pIdx->nColumn] = 1;
  }
}

/*
Changes to src/where.c.
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
** This module contains C code that generates VDBE code used to process
** the WHERE clause of SQL statements.  This module is reponsible for
** generating the code that loops through a table looking for applicable
** rows.  Indices are selected and used to speed the search when doing
** so is applicable.  Because this module is responsible for selecting
** indices, you might also think of this module as the "query optimizer".
**
** $Id: where.c,v 1.153 2005/07/27 20:41:44 drh Exp $
*/
#include "sqliteInt.h"

/*
** The number of bits in a Bitmask.  "BMS" means "BitMask Size".
*/
#define BMS  (sizeof(Bitmask)*8)







|







12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
** This module contains C code that generates VDBE code used to process
** the WHERE clause of SQL statements.  This module is reponsible for
** generating the code that loops through a table looking for applicable
** rows.  Indices are selected and used to speed the search when doing
** so is applicable.  Because this module is responsible for selecting
** indices, you might also think of this module as the "query optimizer".
**
** $Id: where.c,v 1.154 2005/07/28 16:51:51 drh Exp $
*/
#include "sqliteInt.h"

/*
** The number of bits in a Bitmask.  "BMS" means "BitMask Size".
*/
#define BMS  (sizeof(Bitmask)*8)
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
      lowestCost *= estLog(lowestCost);
    }else{
      /* Rowid IN (SELECT): cost is NlogN where N is the number of rows
      ** in the result of the inner select.  We have no way to estimate
      ** that value so make a wild guess. */
      lowestCost = 200.0;
    }
    TRACE(("... rowid IN cost: %g\n", lowestCost));
  }

  /* Estimate the cost of a table scan.  If we do not know how many
  ** entries are in the table, use 1 million as a guess.
  */
  pProbe = pSrc->pTab->pIndex;
  cost = pProbe ? pProbe->aiRowEst[0] : 1000000.0;
  TRACE(("... table scan base cost: %g\n", cost));
  flags = WHERE_ROWID_RANGE;

  /* Check for constraints on a range of rowids in a table scan.
  */
  pTerm = findTerm(pWC, iCur, -1, notReady, WO_LT|WO_LE|WO_GT|WO_GE, 0);
  if( pTerm ){
    if( findTerm(pWC, iCur, -1, notReady, WO_LT|WO_LE, 0) ){
      flags |= WHERE_TOP_LIMIT;
      cost *= 0.333;  /* Guess that rowid<EXPR eliminates two-thirds or rows */
    }
    if( findTerm(pWC, iCur, -1, notReady, WO_GT|WO_GE, 0) ){
      flags |= WHERE_BTM_LIMIT;
      cost *= 0.333;  /* Guess that rowid>EXPR eliminates two-thirds of rows */
    }
    TRACE(("... rowid range reduces cost to %g\n", cost));
  }else{
    flags = 0;
  }

  /* If the table scan does not satisfy the ORDER BY clause, increase
  ** the cost by NlogN to cover the expense of sorting. */
  if( pOrderBy ){
    if( sortableByRowid(iCur, pOrderBy, &rev) ){
      flags |= WHERE_ORDERBY|WHERE_ROWID_RANGE;
      if( rev ){
        flags |= WHERE_REVERSE;
      }
    }else{
      cost += cost*estLog(cost);
      TRACE(("... sorting increases cost to %g\n", cost));
    }
  }
  if( cost<lowestCost ){
    lowestCost = cost;
    bestFlags = flags;
  }








|







|














|














|







709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
      lowestCost *= estLog(lowestCost);
    }else{
      /* Rowid IN (SELECT): cost is NlogN where N is the number of rows
      ** in the result of the inner select.  We have no way to estimate
      ** that value so make a wild guess. */
      lowestCost = 200.0;
    }
    TRACE(("... rowid IN cost: %.9g\n", lowestCost));
  }

  /* Estimate the cost of a table scan.  If we do not know how many
  ** entries are in the table, use 1 million as a guess.
  */
  pProbe = pSrc->pTab->pIndex;
  cost = pProbe ? pProbe->aiRowEst[0] : 1000000.0;
  TRACE(("... table scan base cost: %.9g\n", cost));
  flags = WHERE_ROWID_RANGE;

  /* Check for constraints on a range of rowids in a table scan.
  */
  pTerm = findTerm(pWC, iCur, -1, notReady, WO_LT|WO_LE|WO_GT|WO_GE, 0);
  if( pTerm ){
    if( findTerm(pWC, iCur, -1, notReady, WO_LT|WO_LE, 0) ){
      flags |= WHERE_TOP_LIMIT;
      cost *= 0.333;  /* Guess that rowid<EXPR eliminates two-thirds or rows */
    }
    if( findTerm(pWC, iCur, -1, notReady, WO_GT|WO_GE, 0) ){
      flags |= WHERE_BTM_LIMIT;
      cost *= 0.333;  /* Guess that rowid>EXPR eliminates two-thirds of rows */
    }
    TRACE(("... rowid range reduces cost to %.9g\n", cost));
  }else{
    flags = 0;
  }

  /* If the table scan does not satisfy the ORDER BY clause, increase
  ** the cost by NlogN to cover the expense of sorting. */
  if( pOrderBy ){
    if( sortableByRowid(iCur, pOrderBy, &rev) ){
      flags |= WHERE_ORDERBY|WHERE_ROWID_RANGE;
      if( rev ){
        flags |= WHERE_REVERSE;
      }
    }else{
      cost += cost*estLog(cost);
      TRACE(("... sorting increases cost to %.9g\n", cost));
    }
  }
  if( cost<lowestCost ){
    lowestCost = cost;
    bestFlags = flags;
  }

783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
        }else if( pTerm->operator & WO_LIST ){
          inMultiplier *= pTerm->pExpr->pList->nExpr + 1.0;
        }
      }
    }
    cost = pProbe->aiRowEst[i] * inMultiplier * estLog(inMultiplier);
    nEq = i;
    TRACE(("...... nEq=%d inMult=%g cost=%g\n", nEq, inMultiplier, cost));

    /* Look for range constraints
    */
    if( nEq<pProbe->nColumn ){
      int j = pProbe->aiColumn[nEq];
      pTerm = findTerm(pWC, iCur, j, notReady, WO_LT|WO_LE|WO_GT|WO_GE, pProbe);
      if( pTerm ){
        flags = WHERE_COLUMN_RANGE;
        if( findTerm(pWC, iCur, j, notReady, WO_LT|WO_LE, pProbe) ){
          flags |= WHERE_TOP_LIMIT;
          cost *= 0.333;
        }
        if( findTerm(pWC, iCur, j, notReady, WO_GT|WO_GE, pProbe) ){
          flags |= WHERE_BTM_LIMIT;
          cost *= 0.333;
        }
        TRACE(("...... range reduces cost to %g\n", cost));
      }
    }

    /* Add the additional cost of sorting if that is a factor.
    */
    if( pOrderBy ){
      if( (flags & WHERE_COLUMN_IN)==0 &&
        isSortingIndex(pParse, pProbe, pSrc->pTab, iCur, pOrderBy, nEq, &rev) ){
        if( flags==0 ){
          flags = WHERE_COLUMN_RANGE;
        }
        flags |= WHERE_ORDERBY;
        if( rev ){
          flags |= WHERE_REVERSE;
        }
      }else{
        cost += cost*estLog(cost);
        TRACE(("...... orderby reduces cost to %g\n", cost));
      }
    }

    /* Check to see if we can get away with using just the index without
    ** ever reading the table.  If that is the case, then halve the
    ** cost of this index.
    */
    if( flags && pSrc->colUsed < (((Bitmask)1)<<(BMS-1)) ){
      Bitmask m = pSrc->colUsed;
      int j;
      for(j=0; j<pProbe->nColumn; j++){
        int x = pProbe->aiColumn[j];
        if( x<BMS-1 ){
          m &= ~(((Bitmask)1)<<x);
        }
      }
      if( m==0 ){
        flags |= WHERE_IDX_ONLY;
        cost *= 0.5;
        TRACE(("...... idx-only reduces cost to %g\n", cost));
      }
    }

    /* If this index has achieved the lowest cost so far, then use it.
    */
    if( cost < lowestCost ){
      bestIdx = pProbe;
      lowestCost = cost;
      if( flags==0 ){
        flags = WHERE_COLUMN_RANGE;
      }
      bestFlags = flags;
      bestNEq = nEq;
    }
  }

  /* Report the best result
  */
  *ppIndex = bestIdx;
  TRACE(("best index is %s, cost=%g, flags=%x, nEq=%d\n",
        bestIdx ? bestIdx->zName : "(none)", lowestCost, bestFlags, bestNEq));
  *pFlags = bestFlags;
  *pnEq = bestNEq;
  return lowestCost;
}









|
















|

















|



















|



















|







783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
        }else if( pTerm->operator & WO_LIST ){
          inMultiplier *= pTerm->pExpr->pList->nExpr + 1.0;
        }
      }
    }
    cost = pProbe->aiRowEst[i] * inMultiplier * estLog(inMultiplier);
    nEq = i;
    TRACE(("...... nEq=%d inMult=%.9g cost=%.9g\n", nEq, inMultiplier, cost));

    /* Look for range constraints
    */
    if( nEq<pProbe->nColumn ){
      int j = pProbe->aiColumn[nEq];
      pTerm = findTerm(pWC, iCur, j, notReady, WO_LT|WO_LE|WO_GT|WO_GE, pProbe);
      if( pTerm ){
        flags = WHERE_COLUMN_RANGE;
        if( findTerm(pWC, iCur, j, notReady, WO_LT|WO_LE, pProbe) ){
          flags |= WHERE_TOP_LIMIT;
          cost *= 0.333;
        }
        if( findTerm(pWC, iCur, j, notReady, WO_GT|WO_GE, pProbe) ){
          flags |= WHERE_BTM_LIMIT;
          cost *= 0.333;
        }
        TRACE(("...... range reduces cost to %.9g\n", cost));
      }
    }

    /* Add the additional cost of sorting if that is a factor.
    */
    if( pOrderBy ){
      if( (flags & WHERE_COLUMN_IN)==0 &&
        isSortingIndex(pParse, pProbe, pSrc->pTab, iCur, pOrderBy, nEq, &rev) ){
        if( flags==0 ){
          flags = WHERE_COLUMN_RANGE;
        }
        flags |= WHERE_ORDERBY;
        if( rev ){
          flags |= WHERE_REVERSE;
        }
      }else{
        cost += cost*estLog(cost);
        TRACE(("...... orderby increases cost to %.9g\n", cost));
      }
    }

    /* Check to see if we can get away with using just the index without
    ** ever reading the table.  If that is the case, then halve the
    ** cost of this index.
    */
    if( flags && pSrc->colUsed < (((Bitmask)1)<<(BMS-1)) ){
      Bitmask m = pSrc->colUsed;
      int j;
      for(j=0; j<pProbe->nColumn; j++){
        int x = pProbe->aiColumn[j];
        if( x<BMS-1 ){
          m &= ~(((Bitmask)1)<<x);
        }
      }
      if( m==0 ){
        flags |= WHERE_IDX_ONLY;
        cost *= 0.5;
        TRACE(("...... idx-only reduces cost to %.9g\n", cost));
      }
    }

    /* If this index has achieved the lowest cost so far, then use it.
    */
    if( cost < lowestCost ){
      bestIdx = pProbe;
      lowestCost = cost;
      if( flags==0 ){
        flags = WHERE_COLUMN_RANGE;
      }
      bestFlags = flags;
      bestNEq = nEq;
    }
  }

  /* Report the best result
  */
  *ppIndex = bestIdx;
  TRACE(("best index is %s, cost=%.9g, flags=%x, nEq=%d\n",
        bestIdx ? bestIdx->zName : "(none)", lowestCost, bestFlags, bestNEq));
  *pFlags = bestFlags;
  *pnEq = bestNEq;
  return lowestCost;
}


Changes to test/where.test.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
# 2001 September 15
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#***********************************************************************
# This file implements regression tests for SQLite library.  The
# focus of this file is testing the use of indices in WHERE clases.
#
# $Id: where.test,v 1.31 2005/07/21 03:48:20 drh Exp $

set testdir [file dirname $argv0]
source $testdir/tester.tcl

# Build some test data
#
do_test where-1.0 {













|







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
# 2001 September 15
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#***********************************************************************
# This file implements regression tests for SQLite library.  The
# focus of this file is testing the use of indices in WHERE clases.
#
# $Id: where.test,v 1.32 2005/07/28 16:51:51 drh Exp $

set testdir [file dirname $argv0]
source $testdir/tester.tcl

# Build some test data
#
do_test where-1.0 {
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410





411
412
413
414
415
416
417
418
      SELECT * FROM t1 WHERE y IN (6400,8100) ORDER BY 1;
    }
  } {79 6 6400 89 6 8100 199}
  do_test where-5.12 {
    count {
      SELECT * FROM t1 WHERE x=6 AND y IN (6400,8100) ORDER BY 1;
    }
  } {79 6 6400 89 6 8100 74}
  do_test where-5.13 {
    count {
      SELECT * FROM t1 WHERE x IN (1,7) AND y NOT IN (6400,8100) ORDER BY 1;
    }
  } {2 1 9 3 1 16 7}
  do_test where-5.14 {
    count {
      SELECT * FROM t1 WHERE x IN (1,7) AND y IN (9,10) ORDER BY 1;
    }





  } {2 1 9 7}
}

# This procedure executes the SQL.  Then it checks to see if the OP_Sort
# opcode was executed.  If an OP_Sort did occur, then "sort" is appended
# to the result.  If no OP_Sort happened, then "nosort" is appended.
#
# This procedure is used to check to make sure sorting is or is not







|









>
>
>
>
>
|







394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
      SELECT * FROM t1 WHERE y IN (6400,8100) ORDER BY 1;
    }
  } {79 6 6400 89 6 8100 199}
  do_test where-5.12 {
    count {
      SELECT * FROM t1 WHERE x=6 AND y IN (6400,8100) ORDER BY 1;
    }
  } {79 6 6400 89 6 8100 7}
  do_test where-5.13 {
    count {
      SELECT * FROM t1 WHERE x IN (1,7) AND y NOT IN (6400,8100) ORDER BY 1;
    }
  } {2 1 9 3 1 16 7}
  do_test where-5.14 {
    count {
      SELECT * FROM t1 WHERE x IN (1,7) AND y IN (9,10) ORDER BY 1;
    }
  } {2 1 9 9}
  do_test where-5.15 {
    count {
      SELECT * FROM t1 WHERE x IN (1,7) AND y IN (9,16) ORDER BY 1;
    }
  } {2 1 9 3 1 16 11}
}

# This procedure executes the SQL.  Then it checks to see if the OP_Sort
# opcode was executed.  If an OP_Sort did occur, then "sort" is appended
# to the result.  If no OP_Sort happened, then "nosort" is appended.
#
# This procedure is used to check to make sure sorting is or is not
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
    SELECT * FROM t3 WHERE a>0 ORDER BY a LIMIT 3
  }
} {1 100 4 2 99 9 3 98 16 nosort}
do_test where-6.7 {
  cksort {
    SELECT * FROM t3 WHERE b>0 ORDER BY a LIMIT 3
  }
} {1 100 4 2 99 9 3 98 16 sort}
ifcapable subquery {
  do_test where-6.8 {
    cksort {
      SELECT * FROM t3 WHERE a IN (3,5,7,1,9,4,2) ORDER BY a LIMIT 3
    }
  } {1 100 4 2 99 9 3 98 16 sort}
}







|







468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
    SELECT * FROM t3 WHERE a>0 ORDER BY a LIMIT 3
  }
} {1 100 4 2 99 9 3 98 16 nosort}
do_test where-6.7 {
  cksort {
    SELECT * FROM t3 WHERE b>0 ORDER BY a LIMIT 3
  }
} {1 100 4 2 99 9 3 98 16 nosort}
ifcapable subquery {
  do_test where-6.8 {
    cksort {
      SELECT * FROM t3 WHERE a IN (3,5,7,1,9,4,2) ORDER BY a LIMIT 3
    }
  } {1 100 4 2 99 9 3 98 16 sort}
}