/ Check-in [e7a714b5]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:If the outer loop of a join must be a full table scan, make sure that an incomplete ANALYZE does not trick the planner into use a table that might be indexable in an inner loop. Ticket [13f033c865f878]
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1:e7a714b52c45af096af74049826d32c647abfe3f
User & Date: drh 2010-08-04 21:17:16
Context
2010-08-05
02:52
Fix the query planner so that when it has a choice of full-scan tables to move to the outer loop, it chooses the one that is likely to give the fewest output rows. Ticket [13f033c865f878]. check-in: 309bbedf user: drh tags: trunk
2010-08-04
21:17
If the outer loop of a join must be a full table scan, make sure that an incomplete ANALYZE does not trick the planner into use a table that might be indexable in an inner loop. Ticket [13f033c865f878] check-in: e7a714b5 user: drh tags: trunk
11:34
When opening a write-transaction on a database file that has been appended to or truncated by a pre-3.7.0 client, update the database-size field in the database header. Fix for [51ae9cad31]. check-in: 65b8636a user: dan tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to src/where.c.

4060
4061
4062
4063
4064
4065
4066

4067
4068
4069
4070
4071
4072
4073
....
4101
4102
4103
4104
4105
4106
4107

4108
4109
4110
4111
4112
4113
4114
....
4117
4118
4119
4120
4121
4122
4123

4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136















4137


4138
4139


4140
4141
4142
4143
4144
4145
4146
  for(i=iFrom=0, pLevel=pWInfo->a; i<nTabList; i++, pLevel++){
    WhereCost bestPlan;         /* Most efficient plan seen so far */
    Index *pIdx;                /* Index for FROM table at pTabItem */
    int j;                      /* For looping over FROM tables */
    int bestJ = -1;             /* The value of j */
    Bitmask m;                  /* Bitmask value for j or bestJ */
    int isOptimal;              /* Iterator for optimal/non-optimal search */


    memset(&bestPlan, 0, sizeof(bestPlan));
    bestPlan.rCost = SQLITE_BIG_DBL;

    /* Loop through the remaining entries in the FROM clause to find the
    ** next nested loop. The loop tests all FROM clause entries
    ** either once or twice. 
................................................................................
    ** The best strategy is to iterate through table t1 first. However it
    ** is not possible to determine this with a simple greedy algorithm.
    ** However, since the cost of a linear scan through table t2 is the same 
    ** as the cost of a linear scan through table t1, a simple greedy 
    ** algorithm may choose to use t2 for the outer loop, which is a much
    ** costlier approach.
    */

    for(isOptimal=(iFrom<nTabList-1); isOptimal>=0; isOptimal--){
      Bitmask mask;  /* Mask of tables not yet ready */
      for(j=iFrom, pTabItem=&pTabList->a[j]; j<nTabList; j++, pTabItem++){
        int doNotReorder;    /* True if this table should not be reordered */
        WhereCost sCost;     /* Cost information from best[Virtual]Index() */
        ExprList *pOrderBy;  /* ORDER BY clause for index to optimize */
  
................................................................................
        m = getMask(pMaskSet, pTabItem->iCursor);
        if( (m & notReady)==0 ){
          if( j==iFrom ) iFrom++;
          continue;
        }
        mask = (isOptimal ? m : notReady);
        pOrderBy = ((i==0 && ppOrderBy )?*ppOrderBy:0);

  
        assert( pTabItem->pTab );
#ifndef SQLITE_OMIT_VIRTUALTABLE
        if( IsVirtual(pTabItem->pTab) ){
          sqlite3_index_info **pp = &pWInfo->a[j].pIdxInfo;
          bestVirtualIndex(pParse, pWC, pTabItem, mask, pOrderBy, &sCost, pp);
        }else 
#endif
        {
          bestBtreeIndex(pParse, pWC, pTabItem, mask, pOrderBy, &sCost);
        }
        assert( isOptimal || (sCost.used&notReady)==0 );
















        if( (sCost.used&notReady)==0


         && (bestJ<0 || sCost.rCost<bestPlan.rCost
             || (sCost.rCost<=bestPlan.rCost && sCost.nRow<bestPlan.nRow))


        ){
          WHERETRACE(("... best so far with cost=%g and nRow=%g\n",
                      sCost.rCost, sCost.nRow));
          bestPlan = sCost;
          bestJ = j;
        }
        if( doNotReorder ) break;







>







 







>







 







>













>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
>
>
|
|
>
>







4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
....
4102
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115
4116
....
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168
  for(i=iFrom=0, pLevel=pWInfo->a; i<nTabList; i++, pLevel++){
    WhereCost bestPlan;         /* Most efficient plan seen so far */
    Index *pIdx;                /* Index for FROM table at pTabItem */
    int j;                      /* For looping over FROM tables */
    int bestJ = -1;             /* The value of j */
    Bitmask m;                  /* Bitmask value for j or bestJ */
    int isOptimal;              /* Iterator for optimal/non-optimal search */
    int nUnconstrained;         /* Number tables without INDEXED BY */

    memset(&bestPlan, 0, sizeof(bestPlan));
    bestPlan.rCost = SQLITE_BIG_DBL;

    /* Loop through the remaining entries in the FROM clause to find the
    ** next nested loop. The loop tests all FROM clause entries
    ** either once or twice. 
................................................................................
    ** The best strategy is to iterate through table t1 first. However it
    ** is not possible to determine this with a simple greedy algorithm.
    ** However, since the cost of a linear scan through table t2 is the same 
    ** as the cost of a linear scan through table t1, a simple greedy 
    ** algorithm may choose to use t2 for the outer loop, which is a much
    ** costlier approach.
    */
    nUnconstrained = 0;
    for(isOptimal=(iFrom<nTabList-1); isOptimal>=0; isOptimal--){
      Bitmask mask;  /* Mask of tables not yet ready */
      for(j=iFrom, pTabItem=&pTabList->a[j]; j<nTabList; j++, pTabItem++){
        int doNotReorder;    /* True if this table should not be reordered */
        WhereCost sCost;     /* Cost information from best[Virtual]Index() */
        ExprList *pOrderBy;  /* ORDER BY clause for index to optimize */
  
................................................................................
        m = getMask(pMaskSet, pTabItem->iCursor);
        if( (m & notReady)==0 ){
          if( j==iFrom ) iFrom++;
          continue;
        }
        mask = (isOptimal ? m : notReady);
        pOrderBy = ((i==0 && ppOrderBy )?*ppOrderBy:0);
        if( pTabItem->pIndex==0 ) nUnconstrained++;
  
        assert( pTabItem->pTab );
#ifndef SQLITE_OMIT_VIRTUALTABLE
        if( IsVirtual(pTabItem->pTab) ){
          sqlite3_index_info **pp = &pWInfo->a[j].pIdxInfo;
          bestVirtualIndex(pParse, pWC, pTabItem, mask, pOrderBy, &sCost, pp);
        }else 
#endif
        {
          bestBtreeIndex(pParse, pWC, pTabItem, mask, pOrderBy, &sCost);
        }
        assert( isOptimal || (sCost.used&notReady)==0 );

        /* Conditions under which this table becomes the best so far:
        **
        **   (1) The table must not depend on other tables that have not
        **       yet run.
        **
        **   (2) A full-table-scan plan cannot supercede another plan unless
        **       the full-table-scan is an optimal plan.
        **
        **   (3) The plan cost must be lower than prior plans or else the
        **       cost must be the same and the number of rows must be lower.
        **
        **   (4) All tables have an INDEXED BY clause or this table lacks an
        **       INDEXED BY clause or this table uses the specific
        **       index specified by its INDEXED BY clause.
        */
        if( (sCost.used&notReady)==0                       /* (1) */
            && (bestJ<0 || isOptimal                       /* (2) */
                || (sCost.plan.wsFlags & WHERE_NOT_FULLSCAN)!=0)
            && (bestJ<0 || sCost.rCost<bestPlan.rCost      /* (3) */
                || (sCost.rCost<=bestPlan.rCost && sCost.nRow<bestPlan.nRow))
            && (nUnconstrained==0 || pTabItem->pIndex==0   /* (4) */
                || pTabItem->pIndex==sCost.plan.u.pIdx)
        ){
          WHERETRACE(("... best so far with cost=%g and nRow=%g\n",
                      sCost.rCost, sCost.nRow));
          bestPlan = sCost;
          bestJ = j;
        }
        if( doNotReorder ) break;

Changes to test/indexedby.test.

119
120
121
122
123
124
125










126
127
128
129
130
131
132
#
do_test indexedby-4.1 {
  EQP { SELECT * FROM t1, t2 WHERE a = c }
} {0 0 {TABLE t1} 1 1 {TABLE t2 WITH INDEX i3}}
do_test indexedby-4.2 {
  EQP { SELECT * FROM t1 INDEXED BY i1, t2 WHERE a = c }
} {0 1 {TABLE t2} 1 0 {TABLE t1 WITH INDEX i1}}











# Test embedding an INDEXED BY in a CREATE VIEW statement. This block
# also tests that nothing bad happens if an index refered to by
# a CREATE VIEW statement is dropped and recreated.
#
do_test indexedby-5.1 {
  execsql {







>
>
>
>
>
>
>
>
>
>







119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
#
do_test indexedby-4.1 {
  EQP { SELECT * FROM t1, t2 WHERE a = c }
} {0 0 {TABLE t1} 1 1 {TABLE t2 WITH INDEX i3}}
do_test indexedby-4.2 {
  EQP { SELECT * FROM t1 INDEXED BY i1, t2 WHERE a = c }
} {0 1 {TABLE t2} 1 0 {TABLE t1 WITH INDEX i1}}
do_test indexedby-4.3 {
  catchsql {
    SELECT * FROM t1 INDEXED BY i1, t2 INDEXED BY i3 WHERE a=c
  }
} {1 {cannot use index: i1}}
do_test indexedby-4.4 {
  catchsql {
    SELECT * FROM t2 INDEXED BY i3, t1 INDEXED BY i1 WHERE a=c
  }
} {1 {cannot use index: i3}}

# Test embedding an INDEXED BY in a CREATE VIEW statement. This block
# also tests that nothing bad happens if an index refered to by
# a CREATE VIEW statement is dropped and recreated.
#
do_test indexedby-5.1 {
  execsql {

Changes to test/where3.test.

208
209
210
211
212
213
214




215


















216
do_test where3-2.7 {
  queryplan {
    SELECT * FROM tA, tB, tC LEFT JOIN tD ON dpk=cx
     WHERE cpk=bx AND apk=cx
  }
} {tB {} tC * tA * tD *}
























finish_test







>
>
>
>
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>

208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
do_test where3-2.7 {
  queryplan {
    SELECT * FROM tA, tB, tC LEFT JOIN tD ON dpk=cx
     WHERE cpk=bx AND apk=cx
  }
} {tB {} tC * tA * tD *}

# Ticket [13f033c865f878953]
# If the outer loop must be a full table scan, do not let ANALYZE trick
# the planner into use a table for the outer loop that might be indexable
# if held until an inner loop.
# 
do_test where3-3.0 {
  execsql {
    CREATE TABLE t301(a INTEGER PRIMARY KEY,b,c);
    CREATE INDEX t301c ON t301(c);
    INSERT INTO t301 VALUES(1,2,3);
    CREATE TABLE t302(x, y);
    ANALYZE;
    explain query plan
    SELECT * FROM t302, t301 WHERE t302.x=5 AND t301.a=t302.y;
  }
} {0 0 {TABLE t302} 1 1 {TABLE t301 USING PRIMARY KEY}}
do_test where3-3.1 {
  execsql {
    explain query plan
    SELECT * FROM t301, t302 WHERE t302.x=5 AND t301.a=t302.y;
  }
} {0 1 {TABLE t302} 1 0 {TABLE t301 USING PRIMARY KEY}}

finish_test