/ Check-in [309bbedf]
Login
Overview
Comment:Fix the query planner so that when it has a choice of full-scan tables to move to the outer loop, it chooses the one that is likely to give the fewest output rows.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1:309bbedf9648c750d7b8aedbc15d4fd68f846824
User & Date: drh 2010-08-05 02:52:32
Original Comment: Fix the query planner so that when it has a choice of full-scan tables to move to the outer loop, it chooses the one that is likely to give the fewest output rows.
Context
2010-08-05
03:21
Do not read the database file size on a SAVEPOINT rollback any more since after checkin [65b8636ac6e5] the in-header-size field is always valid. check-in: fbe70e11 user: drh tags: trunk
02:52
Fix the query planner so that when it has a choice of full-scan tables to move to the outer loop, it chooses the one that is likely to give the fewest output rows. Ticket [13f033c865f878]. check-in: 309bbedf user: drh tags: trunk
2010-08-04
21:17
If the outer loop of a join must be a full table scan, make sure that an incomplete ANALYZE does not trick the planner into use a table that might be indexable in an inner loop. Ticket [13f033c865f878] check-in: e7a714b5 user: drh tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to src/where.c.

4061
4062
4063
4064
4065
4066
4067

4068
4069
4070
4071
4072
4073
4074
....
4103
4104
4105
4106
4107
4108
4109

4110
4111
4112
4113
4114
4115
4116
4117
4118
....
4132
4133
4134
4135
4136
4137
4138










4139
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152
4153








4154
4155
4156

4157


4158
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168
    WhereCost bestPlan;         /* Most efficient plan seen so far */
    Index *pIdx;                /* Index for FROM table at pTabItem */
    int j;                      /* For looping over FROM tables */
    int bestJ = -1;             /* The value of j */
    Bitmask m;                  /* Bitmask value for j or bestJ */
    int isOptimal;              /* Iterator for optimal/non-optimal search */
    int nUnconstrained;         /* Number tables without INDEXED BY */


    memset(&bestPlan, 0, sizeof(bestPlan));
    bestPlan.rCost = SQLITE_BIG_DBL;

    /* Loop through the remaining entries in the FROM clause to find the
    ** next nested loop. The loop tests all FROM clause entries
    ** either once or twice. 
................................................................................
    ** is not possible to determine this with a simple greedy algorithm.
    ** However, since the cost of a linear scan through table t2 is the same 
    ** as the cost of a linear scan through table t1, a simple greedy 
    ** algorithm may choose to use t2 for the outer loop, which is a much
    ** costlier approach.
    */
    nUnconstrained = 0;

    for(isOptimal=(iFrom<nTabList-1); isOptimal>=0; isOptimal--){
      Bitmask mask;  /* Mask of tables not yet ready */
      for(j=iFrom, pTabItem=&pTabList->a[j]; j<nTabList; j++, pTabItem++){
        int doNotReorder;    /* True if this table should not be reordered */
        WhereCost sCost;     /* Cost information from best[Virtual]Index() */
        ExprList *pOrderBy;  /* ORDER BY clause for index to optimize */
  
        doNotReorder =  (pTabItem->jointype & (JT_LEFT|JT_CROSS))!=0;
        if( j!=iFrom && doNotReorder ) break;
................................................................................
          bestVirtualIndex(pParse, pWC, pTabItem, mask, pOrderBy, &sCost, pp);
        }else 
#endif
        {
          bestBtreeIndex(pParse, pWC, pTabItem, mask, pOrderBy, &sCost);
        }
        assert( isOptimal || (sCost.used&notReady)==0 );











        /* Conditions under which this table becomes the best so far:
        **
        **   (1) The table must not depend on other tables that have not
        **       yet run.
        **
        **   (2) A full-table-scan plan cannot supercede another plan unless
        **       the full-table-scan is an optimal plan.
        **
        **   (3) The plan cost must be lower than prior plans or else the
        **       cost must be the same and the number of rows must be lower.
        **
        **   (4) All tables have an INDEXED BY clause or this table lacks an
        **       INDEXED BY clause or this table uses the specific
        **       index specified by its INDEXED BY clause.








        */
        if( (sCost.used&notReady)==0                       /* (1) */
            && (bestJ<0 || isOptimal                       /* (2) */

                || (sCost.plan.wsFlags & WHERE_NOT_FULLSCAN)!=0)


            && (bestJ<0 || sCost.rCost<bestPlan.rCost      /* (3) */
                || (sCost.rCost<=bestPlan.rCost && sCost.nRow<bestPlan.nRow))
            && (nUnconstrained==0 || pTabItem->pIndex==0   /* (4) */
                || pTabItem->pIndex==sCost.plan.u.pIdx)
        ){
          WHERETRACE(("... best so far with cost=%g and nRow=%g\n",
                      sCost.rCost, sCost.nRow));
          bestPlan = sCost;
          bestJ = j;
        }
        if( doNotReorder ) break;







>







 







>

|







 







>
>
>
>
>
>
>
>
>
>







|

<
<
<
|

|
>
>
>
>
>
>
>
>


<
>

>
>
|

<
<







4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
....
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
....
4134
4135
4136
4137
4138
4139
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159



4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172

4173
4174
4175
4176
4177
4178


4179
4180
4181
4182
4183
4184
4185
    WhereCost bestPlan;         /* Most efficient plan seen so far */
    Index *pIdx;                /* Index for FROM table at pTabItem */
    int j;                      /* For looping over FROM tables */
    int bestJ = -1;             /* The value of j */
    Bitmask m;                  /* Bitmask value for j or bestJ */
    int isOptimal;              /* Iterator for optimal/non-optimal search */
    int nUnconstrained;         /* Number tables without INDEXED BY */
    Bitmask notIndexed;         /* Mask of tables that cannot use an index */

    memset(&bestPlan, 0, sizeof(bestPlan));
    bestPlan.rCost = SQLITE_BIG_DBL;

    /* Loop through the remaining entries in the FROM clause to find the
    ** next nested loop. The loop tests all FROM clause entries
    ** either once or twice. 
................................................................................
    ** is not possible to determine this with a simple greedy algorithm.
    ** However, since the cost of a linear scan through table t2 is the same 
    ** as the cost of a linear scan through table t1, a simple greedy 
    ** algorithm may choose to use t2 for the outer loop, which is a much
    ** costlier approach.
    */
    nUnconstrained = 0;
    notIndexed = 0;
    for(isOptimal=(iFrom<nTabList-1); isOptimal>=0; isOptimal--){
      Bitmask mask;             /* Mask of tables not yet ready */
      for(j=iFrom, pTabItem=&pTabList->a[j]; j<nTabList; j++, pTabItem++){
        int doNotReorder;    /* True if this table should not be reordered */
        WhereCost sCost;     /* Cost information from best[Virtual]Index() */
        ExprList *pOrderBy;  /* ORDER BY clause for index to optimize */
  
        doNotReorder =  (pTabItem->jointype & (JT_LEFT|JT_CROSS))!=0;
        if( j!=iFrom && doNotReorder ) break;
................................................................................
          bestVirtualIndex(pParse, pWC, pTabItem, mask, pOrderBy, &sCost, pp);
        }else 
#endif
        {
          bestBtreeIndex(pParse, pWC, pTabItem, mask, pOrderBy, &sCost);
        }
        assert( isOptimal || (sCost.used&notReady)==0 );

        /* If an INDEXED BY clause is present, then the plan must use that
        ** index if it uses any index at all */
        assert( pTabItem->pIndex==0 
                  || (sCost.plan.wsFlags & WHERE_NOT_FULLSCAN)==0
                  || sCost.plan.u.pIdx==pTabItem->pIndex );

        if( isOptimal && (sCost.plan.wsFlags & WHERE_NOT_FULLSCAN)==0 ){
          notIndexed |= m;
        }

        /* Conditions under which this table becomes the best so far:
        **
        **   (1) The table must not depend on other tables that have not
        **       yet run.
        **
        **   (2) A full-table-scan plan cannot supercede another plan unless
        **       it is an "optimal" plan as defined above.
        **



        **   (3) All tables have an INDEXED BY clause or this table lacks an
        **       INDEXED BY clause or this table uses the specific
        **       index specified by its INDEXED BY clause.  This rule ensures
        **       that a best-so-far is always selected even if an impossible
        **       combination of INDEXED BY clauses are given.  The error
        **       will be detected and relayed back to the application later.
        **       The NEVER() comes about because rule (2) above prevents
        **       An indexable full-table-scan from reaching rule (3).
        **
        **   (4) The plan cost must be lower than prior plans or else the
        **       cost must be the same and the number of rows must be lower.
        */
        if( (sCost.used&notReady)==0                       /* (1) */

            && (bestJ<0 || (notIndexed&m)!=0               /* (2) */
                || (sCost.plan.wsFlags & WHERE_NOT_FULLSCAN)!=0)
            && (nUnconstrained==0 || pTabItem->pIndex==0   /* (3) */
                || NEVER((sCost.plan.wsFlags & WHERE_NOT_FULLSCAN)!=0))
            && (bestJ<0 || sCost.rCost<bestPlan.rCost      /* (4) */
                || (sCost.rCost<=bestPlan.rCost && sCost.nRow<bestPlan.nRow))


        ){
          WHERETRACE(("... best so far with cost=%g and nRow=%g\n",
                      sCost.rCost, sCost.nRow));
          bestPlan = sCost;
          bestJ = j;
        }
        if( doNotReorder ) break;

Changes to test/where3.test.

230
231
232
233
234
235
236
237


























238
} {0 0 {TABLE t302} 1 1 {TABLE t301 USING PRIMARY KEY}}
do_test where3-3.1 {
  execsql {
    explain query plan
    SELECT * FROM t301, t302 WHERE t302.x=5 AND t301.a=t302.y;
  }
} {0 1 {TABLE t302} 1 0 {TABLE t301 USING PRIMARY KEY}}



























finish_test








>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>

230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
} {0 0 {TABLE t302} 1 1 {TABLE t301 USING PRIMARY KEY}}
do_test where3-3.1 {
  execsql {
    explain query plan
    SELECT * FROM t301, t302 WHERE t302.x=5 AND t301.a=t302.y;
  }
} {0 1 {TABLE t302} 1 0 {TABLE t301 USING PRIMARY KEY}}

# Verify that when there are multiple tables in a join which must be
# full table scans that the query planner attempts put the table with
# the fewest number of output rows as the outer loop.
#
do_test where3-4.0 {
  execsql {
    CREATE TABLE t400(a INTEGER PRIMARY KEY, b, c);
    CREATE TABLE t401(p INTEGER PRIMARY KEY, q, r);
    CREATE TABLE t402(x INTEGER PRIMARY KEY, y, z);
    EXPLAIN QUERY PLAN
    SELECT * FROM t400, t401, t402 WHERE t402.z GLOB 'abc*';
  }
} {0 2 {TABLE t402} 1 0 {TABLE t400} 2 1 {TABLE t401}}
do_test where3-4.1 {
  execsql {
    EXPLAIN QUERY PLAN
    SELECT * FROM t400, t401, t402 WHERE t401.r GLOB 'abc*';
  }
} {0 1 {TABLE t401} 1 0 {TABLE t400} 2 2 {TABLE t402}}
do_test where3-4.2 {
  execsql {
    EXPLAIN QUERY PLAN
    SELECT * FROM t400, t401, t402 WHERE t400.c GLOB 'abc*';
  }
} {0 0 {TABLE t400} 1 1 {TABLE t401} 2 2 {TABLE t402}}

finish_test