/ Check-in [1272fb89]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Improvements to query planning, especially in regards to estimating the cost and benefit of automatic indexes.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 1272fb8991b3888c72dcebf947415883d7727241
User & Date: drh 2014-06-17 15:53:33
Context
2014-06-17
16:11
Add the likely() function for symmetry with unlikely(). The likely(X) function means the same thing as likelihood(X,0.9375). check-in: 38965484 user: drh tags: trunk
15:53
Improvements to query planning, especially in regards to estimating the cost and benefit of automatic indexes. check-in: 1272fb89 user: drh tags: trunk
13:23
Add the autoindex2.test testing module. check-in: ffe3fea4 user: drh tags: autoindex-improvements
2014-06-16
22:45
Fix CSV import issue, reported via the mailing list, in the shell when the file to be imported ends with an empty line. check-in: fc918f7d user: mistachkin tags: trunk
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to ext/rtree/rtree6.test.

    88     88   do_eqp_test rtree6.2.3 {
    89     89     SELECT * FROM t1,t2 WHERE k=ii
    90     90   } {
    91     91     0 0 0 {SCAN TABLE t1 VIRTUAL TABLE INDEX 2:} 
    92     92     0 1 1 {SEARCH TABLE t2 USING INTEGER PRIMARY KEY (rowid=?)}
    93     93   }
    94     94   
    95         -do_eqp_test rtree6.2.4 {
           95  +do_eqp_test rtree6.2.4.1 {
           96  +  SELECT * FROM t1,t2 WHERE v=+ii and x1<10 and x2>10
           97  +} {
           98  +  0 0 0 {SCAN TABLE t1 VIRTUAL TABLE INDEX 2:C0E1} 
           99  +  0 1 1 {SEARCH TABLE t2 USING AUTOMATIC COVERING INDEX (v=?)}
          100  +}
          101  +do_eqp_test rtree6.2.4.2 {
    96    102     SELECT * FROM t1,t2 WHERE v=10 and x1<10 and x2>10
    97    103   } {
    98    104     0 0 0 {SCAN TABLE t1 VIRTUAL TABLE INDEX 2:C0E1} 
    99    105     0 1 1 {SEARCH TABLE t2 USING AUTOMATIC COVERING INDEX (v=?)}
   100    106   }
   101    107   
   102    108   do_eqp_test rtree6.2.5 {

Changes to src/where.c.

  5274   5274     int iLoop;                /* Loop counter over the terms of the join */
  5275   5275     int ii, jj;               /* Loop counters */
  5276   5276     int mxI = 0;              /* Index of next entry to replace */
  5277   5277     int nOrderBy;             /* Number of ORDER BY clause terms */
  5278   5278     LogEst rCost;             /* Cost of a path */
  5279   5279     LogEst nOut;              /* Number of outputs */
  5280   5280     LogEst mxCost = 0;        /* Maximum cost of a set of paths */
  5281         -  LogEst mxOut = 0;         /* Maximum nOut value on the set of paths */
  5282   5281     int nTo, nFrom;           /* Number of valid entries in aTo[] and aFrom[] */
  5283   5282     WherePath *aFrom;         /* All nFrom paths at the previous level */
  5284   5283     WherePath *aTo;           /* The nTo best paths at the current level */
  5285   5284     WherePath *pFrom;         /* An element of aFrom[] that we are working on */
  5286   5285     WherePath *pTo;           /* An element of aTo[] that we are working on */
  5287   5286     WhereLoop *pWLoop;        /* One of the WhereLoop objects */
  5288   5287     WhereLoop **pX;           /* Used to divy up the pSpace memory */
................................................................................
  5384   5383           }else{
  5385   5384             revMask = pFrom->revLoop;
  5386   5385           }
  5387   5386           /* Check to see if pWLoop should be added to the mxChoice best so far */
  5388   5387           for(jj=0, pTo=aTo; jj<nTo; jj++, pTo++){
  5389   5388             if( pTo->maskLoop==maskNew
  5390   5389              && ((pTo->isOrdered^isOrdered)&80)==0
  5391         -           && ((pTo->rCost<=rCost && pTo->nRow<=nOut) ||
  5392         -                (pTo->rCost>=rCost && pTo->nRow>=nOut))
  5393   5390             ){
  5394   5391               testcase( jj==nTo-1 );
  5395   5392               break;
  5396   5393             }
  5397   5394           }
  5398   5395           if( jj>=nTo ){
  5399   5396             if( nTo>=mxChoice && rCost>=mxCost ){
................................................................................
  5419   5416             if( sqlite3WhereTrace&0x4 ){
  5420   5417               sqlite3DebugPrintf("New    %s cost=%-3d,%3d order=%c\n",
  5421   5418                   wherePathName(pFrom, iLoop, pWLoop), rCost, nOut,
  5422   5419                   isOrdered>=0 ? isOrdered+'0' : '?');
  5423   5420             }
  5424   5421   #endif
  5425   5422           }else{
  5426         -          if( pTo->rCost<=rCost && pTo->nRow<=nOut ){
         5423  +          if( pTo->rCost<=rCost ){
  5427   5424   #ifdef WHERETRACE_ENABLED /* 0x4 */
  5428   5425               if( sqlite3WhereTrace&0x4 ){
  5429   5426                 sqlite3DebugPrintf(
  5430   5427                     "Skip   %s cost=%-3d,%3d order=%c",
  5431   5428                     wherePathName(pFrom, iLoop, pWLoop), rCost, nOut,
  5432   5429                     isOrdered>=0 ? isOrdered+'0' : '?');
  5433   5430                 sqlite3DebugPrintf("   vs %s cost=%-3d,%d order=%c\n",
................................................................................
  5459   5456           pTo->rCost = rCost;
  5460   5457           pTo->isOrdered = isOrdered;
  5461   5458           memcpy(pTo->aLoop, pFrom->aLoop, sizeof(WhereLoop*)*iLoop);
  5462   5459           pTo->aLoop[iLoop] = pWLoop;
  5463   5460           if( nTo>=mxChoice ){
  5464   5461             mxI = 0;
  5465   5462             mxCost = aTo[0].rCost;
  5466         -          mxOut = aTo[0].nRow;
  5467   5463             for(jj=1, pTo=&aTo[1]; jj<mxChoice; jj++, pTo++){
  5468         -            if( pTo->rCost>mxCost || (pTo->rCost==mxCost && pTo->nRow>mxOut) ){
         5464  +            if( pTo->rCost>mxCost ){
  5469   5465                 mxCost = pTo->rCost;
  5470         -              mxOut = pTo->nRow;
  5471   5466                 mxI = jj;
  5472   5467               }
  5473   5468             }
  5474   5469           }
  5475   5470         }
  5476   5471       }
  5477   5472   

Added test/autoindex2.test.

            1  +# 2014-06-17
            2  +#
            3  +# The author disclaims copyright to this source code.  In place of
            4  +# a legal notice, here is a blessing:
            5  +#
            6  +#    May you do good and not evil.
            7  +#    May you find forgiveness for yourself and forgive others.
            8  +#    May you share freely, never taking more than you give.
            9  +#
           10  +#*************************************************************************
           11  +#
           12  +# This file implements regression tests for SQLite library.  The
           13  +# focus of this script is testing automatic index creation logic.
           14  +#
           15  +# This file contains a single real-world test case that was giving
           16  +# suboptimal performance because of over-use of automatic indexes.
           17  +#
           18  +
           19  +set testdir [file dirname $argv0]
           20  +source $testdir/tester.tcl
           21  +
           22  +
           23  +do_execsql_test autoindex2-100 {
           24  +  CREATE TABLE t1(
           25  +    t1_id largeint,
           26  +    did char(9),
           27  +    ptime largeint,
           28  +    exbyte char(4),
           29  +    pe_id int,
           30  +    field_id int,
           31  +    mass float,
           32  +    param10 float,
           33  +    param11 float,
           34  +    exmass float,
           35  +    deviation float,
           36  +    trange float,
           37  +    vstatus int,
           38  +    commit_status int,
           39  +    formula char(329),
           40  +    tier int DEFAULT 2,
           41  +    ssid int DEFAULT 0,
           42  +    last_operation largeint DEFAULT 0,
           43  +    admin_uuid int DEFAULT 0,
           44  +    previous_value float,
           45  +    job_id largeint,
           46  +    last_t1 largeint DEFAULT 0,
           47  +    data_t1 int,
           48  +    previous_date largeint DEFAULT 0,
           49  +    flg8 int DEFAULT 1,
           50  +    failed_fields char(100)
           51  +  );
           52  +  CREATE INDEX t1x0 on t1 (t1_id);
           53  +  CREATE INDEX t1x1 on t1 (ptime, vstatus);
           54  +  CREATE INDEX t1x2 on t1 (did, ssid, ptime, vstatus, exbyte, t1_id);
           55  +  CREATE INDEX t1x3 on t1 (job_id);
           56  +  
           57  +  CREATE TABLE t2(
           58  +    did char(9),
           59  +    client_did char(30),
           60  +    description char(49),
           61  +    uid int,
           62  +    tzid int,
           63  +    privilege int,
           64  +    param2 int,
           65  +    type char(30),
           66  +    subtype char(32),
           67  +    dparam1 char(7) DEFAULT '',
           68  +    param5 char(3) DEFAULT '',
           69  +    notional float DEFAULT 0.000000,
           70  +    create_time largeint,
           71  +    sample_time largeint DEFAULT 0,
           72  +    param6 largeint,
           73  +    frequency int,
           74  +    expiration largeint,
           75  +    uw_status int,
           76  +    next_sample largeint,
           77  +    last_sample largeint,
           78  +    reserve1 char(29) DEFAULT '',
           79  +    reserve2 char(29) DEFAULT '',
           80  +    reserve3 char(29) DEFAULT '',
           81  +    bxcdr char(19) DEFAULT 'XY',
           82  +    ssid int DEFAULT 1,
           83  +    last_t1_id largeint,
           84  +    reserve4 char(29) DEFAULT '',
           85  +    reserve5 char(29) DEFAULT '',
           86  +    param12 int DEFAULT 0,
           87  +    long_did char(100) DEFAULT '',
           88  +    gr_code int DEFAULT 0,
           89  +    drx char(100) DEFAULT '',
           90  +    parent_id char(9) DEFAULT '',
           91  +    param13 int DEFAULT 0,
           92  +    position float DEFAULT 1.000000,
           93  +    client_did3 char(100) DEFAULT '',
           94  +    client_did4 char(100) DEFAULT '',
           95  +    dlib_id char(9) DEFAULT ''
           96  +  );
           97  +  CREATE INDEX t2x0 on t2 (did);
           98  +  CREATE INDEX t2x1 on t2 (client_did);
           99  +  CREATE INDEX t2x2 on t2 (long_did);
          100  +  CREATE INDEX t2x3 on t2 (uid);
          101  +  CREATE INDEX t2x4 on t2 (param2);
          102  +  CREATE INDEX t2x5 on t2 (type);
          103  +  CREATE INDEX t2x6 on t2 (subtype);
          104  +  CREATE INDEX t2x7 on t2 (last_sample);
          105  +  CREATE INDEX t2x8 on t2 (param6);
          106  +  CREATE INDEX t2x9 on t2 (frequency);
          107  +  CREATE INDEX t2x10 on t2 (privilege);
          108  +  CREATE INDEX t2x11 on t2 (sample_time);
          109  +  CREATE INDEX t2x12 on t2 (notional);
          110  +  CREATE INDEX t2x13 on t2 (tzid);
          111  +  CREATE INDEX t2x14 on t2 (gr_code);
          112  +  CREATE INDEX t2x15 on t2 (parent_id);
          113  +  
          114  +  CREATE TABLE t3(
          115  +    uid int,
          116  +    param3 int,
          117  +    uuid int,
          118  +    acc_id int,
          119  +    cust_num int,
          120  +    numerix_id int,
          121  +    pfy char(29),
          122  +    param4 char(29),
          123  +    param15 int DEFAULT 0,
          124  +    flg7 int DEFAULT 0,
          125  +    param21 int DEFAULT 0,
          126  +    bxcdr char(2) DEFAULT 'PC',
          127  +    c31 int DEFAULT 0,
          128  +    c33 int DEFAULT 0,
          129  +    c35 int DEFAULT 0,
          130  +    c37 int,
          131  +    mgr_uuid int,
          132  +    back_up_uuid int,
          133  +    priv_mars int DEFAULT 0,
          134  +    is_qc int DEFAULT 0,
          135  +    c41 int DEFAULT 0,
          136  +    deleted int DEFAULT 0,
          137  +    c47 int DEFAULT 1
          138  +  );
          139  +  CREATE INDEX t3x0 on t3 (uid);
          140  +  CREATE INDEX t3x1 on t3 (param3);
          141  +  CREATE INDEX t3x2 on t3 (uuid);
          142  +  CREATE INDEX t3x3 on t3 (acc_id);
          143  +  CREATE INDEX t3x4 on t3 (param4);
          144  +  CREATE INDEX t3x5 on t3 (pfy);
          145  +  CREATE INDEX t3x6 on t3 (is_qc);
          146  +  SELECT count(*) FROM sqlite_master;
          147  +} {30}
          148  +do_execsql_test autoindex2-110 {
          149  +  ANALYZE sqlite_master;
          150  +  INSERT INTO sqlite_stat1 VALUES('t1','t1x3','10747267 260');
          151  +  INSERT INTO sqlite_stat1 VALUES('t1','t1x2','10747267 121 113 2 2 2 1');
          152  +  INSERT INTO sqlite_stat1 VALUES('t1','t1x1','10747267 50 40');
          153  +  INSERT INTO sqlite_stat1 VALUES('t1','t1x0','10747267 1');
          154  +  INSERT INTO sqlite_stat1 VALUES('t2','t2x15','39667 253');
          155  +  INSERT INTO sqlite_stat1 VALUES('t2','t2x14','39667 19834');
          156  +  INSERT INTO sqlite_stat1 VALUES('t2','t2x13','39667 13223');
          157  +  INSERT INTO sqlite_stat1 VALUES('t2','t2x12','39667 7');
          158  +  INSERT INTO sqlite_stat1 VALUES('t2','t2x11','39667 17');
          159  +  INSERT INTO sqlite_stat1 VALUES('t2','t2x10','39667 19834');
          160  +  INSERT INTO sqlite_stat1 VALUES('t2','t2x9','39667 7934');
          161  +  INSERT INTO sqlite_stat1 VALUES('t2','t2x8','39667 11');
          162  +  INSERT INTO sqlite_stat1 VALUES('t2','t2x7','39667 5');
          163  +  INSERT INTO sqlite_stat1 VALUES('t2','t2x6','39667 242');
          164  +  INSERT INTO sqlite_stat1 VALUES('t2','t2x5','39667 1984');
          165  +  INSERT INTO sqlite_stat1 VALUES('t2','t2x4','39667 4408');
          166  +  INSERT INTO sqlite_stat1 VALUES('t2','t2x3','39667 81');
          167  +  INSERT INTO sqlite_stat1 VALUES('t2','t2x2','39667 551');
          168  +  INSERT INTO sqlite_stat1 VALUES('t2','t2x1','39667 2');
          169  +  INSERT INTO sqlite_stat1 VALUES('t2','t2x0','39667 1');
          170  +  INSERT INTO sqlite_stat1 VALUES('t3','t3x6','569 285');
          171  +  INSERT INTO sqlite_stat1 VALUES('t3','t3x5','569 2');
          172  +  INSERT INTO sqlite_stat1 VALUES('t3','t3x4','569 2');
          173  +  INSERT INTO sqlite_stat1 VALUES('t3','t3x3','569 5');
          174  +  INSERT INTO sqlite_stat1 VALUES('t3','t3x2','569 3');
          175  +  INSERT INTO sqlite_stat1 VALUES('t3','t3x1','569 6');
          176  +  INSERT INTO sqlite_stat1 VALUES('t3','t3x0','569 1');
          177  +  ANALYZE sqlite_master;
          178  +} {}
          179  +do_execsql_test autoindex2-120 {
          180  +  EXPLAIN QUERY PLAN
          181  +  SELECT
          182  +     t1_id,
          183  +     t1.did,
          184  +     param2,
          185  +     param3,
          186  +     t1.ptime,
          187  +     t1.trange,
          188  +     t1.exmass,
          189  +     t1.mass,
          190  +     t1.vstatus,
          191  +     type,
          192  +     subtype,
          193  +     t1.deviation,
          194  +     t1.formula,
          195  +     dparam1,
          196  +     reserve1,
          197  +     reserve2,
          198  +     param4,
          199  +     t1.last_operation,
          200  +     t1.admin_uuid,
          201  +     t1.previous_value,
          202  +     t1.job_id,
          203  +     client_did, 
          204  +     t1.last_t1,
          205  +     t1.data_t1,
          206  +     t1.previous_date,
          207  +     param5,
          208  +     param6,
          209  +     mgr_uuid
          210  +  FROM
          211  +     t1,
          212  +     t2,
          213  +     t3
          214  +  WHERE
          215  +     t1.ptime > 1393520400
          216  +     AND param3<>9001
          217  +     AND t3.flg7 = 1
          218  +     AND t1.did = t2.did
          219  +     AND t2.uid = t3.uid
          220  +  ORDER BY t1.ptime desc LIMIT 500;
          221  +} {0 0 0 {SEARCH TABLE t1 USING INDEX t1x1 (ptime>?)} 0 1 1 {SEARCH TABLE t2 USING INDEX t2x0 (did=?)} 0 2 2 {SEARCH TABLE t3 USING INDEX t3x0 (uid=?)}}
          222  +#
          223  +# ^^^--- Before being fixed, the above was using an automatic covering
          224  +# on t3 and reordering the tables so that t3 was in the outer loop and
          225  +# implementing the ORDER BY clause using a B-Tree.
          226  +
          227  +do_execsql_test autoindex2-120 {
          228  +  EXPLAIN QUERY PLAN
          229  +  SELECT
          230  +     t1_id,
          231  +     t1.did,
          232  +     param2,
          233  +     param3,
          234  +     t1.ptime,
          235  +     t1.trange,
          236  +     t1.exmass,
          237  +     t1.mass,
          238  +     t1.vstatus,
          239  +     type,
          240  +     subtype,
          241  +     t1.deviation,
          242  +     t1.formula,
          243  +     dparam1,
          244  +     reserve1,
          245  +     reserve2,
          246  +     param4,
          247  +     t1.last_operation,
          248  +     t1.admin_uuid,
          249  +     t1.previous_value,
          250  +     t1.job_id,
          251  +     client_did, 
          252  +     t1.last_t1,
          253  +     t1.data_t1,
          254  +     t1.previous_date,
          255  +     param5,
          256  +     param6,
          257  +     mgr_uuid
          258  +  FROM
          259  +     t3,
          260  +     t2,
          261  +     t1
          262  +  WHERE
          263  +     t1.ptime > 1393520400
          264  +     AND param3<>9001
          265  +     AND t3.flg7 = 1
          266  +     AND t1.did = t2.did
          267  +     AND t2.uid = t3.uid
          268  +  ORDER BY t1.ptime desc LIMIT 500;
          269  +} {0 0 2 {SEARCH TABLE t1 USING INDEX t1x1 (ptime>?)} 0 1 1 {SEARCH TABLE t2 USING INDEX t2x0 (did=?)} 0 2 0 {SEARCH TABLE t3 USING INDEX t3x0 (uid=?)}}
          270  +
          271  +finish_test

Changes to test/tpch01.test.

   164    164                  o_year
   165    165          order by
   166    166                  o_year;}]
   167    167     set ::eqpres
   168    168   } {/0 0 0 {SEARCH TABLE part USING INDEX bootleg_pti .P_TYPE=..} 0 1 2 {SEARCH TABLE lineitem USING INDEX lpki2 .L_PARTKEY=..}.*/}
   169    169   do_test tpch01-1.1b {
   170    170     set ::eqpres
   171         -} {/.* customer .* nation AS n1 .* nation AS n2 .*/}
          171  +} {/.* customer .* nation AS n1 .*/}
          172  +do_test tpch01-1.1c {
          173  +  set ::eqpres
          174  +} {/.* supplier .* nation AS n2 .*/}
   172    175   
   173    176   do_eqp_test tpch01-1.2 {
   174    177   select
   175    178       c_custkey,    c_name,    sum(l_extendedprice * (1 - l_discount)) as revenue,
   176    179       c_acctbal,    n_name,    c_address,    c_phone,    c_comment
   177    180   from
   178    181       customer,    orders,    lineitem,    nation
................................................................................
   181    184       and o_orderdate >=  '1994-08-01'    and o_orderdate < date('1994-08-01', '+3 month')
   182    185       and l_returnflag = 'R'    and c_nationkey = n_nationkey
   183    186   group by
   184    187       c_custkey,    c_name,    c_acctbal,    c_phone,    n_name, c_address,    c_comment
   185    188   order by
   186    189       revenue desc;
   187    190   } {0 0 1 {SEARCH TABLE orders USING INDEX odi (O_ORDERDATE>? AND O_ORDERDATE<?)} 0 1 0 {SEARCH TABLE customer USING INDEX cpki (C_CUSTKEY=?)} 0 2 3 {SEARCH TABLE nation USING INDEX npki (N_NATIONKEY=?)} 0 3 2 {SEARCH TABLE lineitem USING INDEX lpki (L_ORDERKEY=?)} 0 0 0 {USE TEMP B-TREE FOR GROUP BY} 0 0 0 {USE TEMP B-TREE FOR ORDER BY}}
          191  +
          192  +finish_test