Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Changes In Branch analyze-worst-case Excluding Merge-Ins
This is equivalent to a diff from cb9302cc to c4488730
2016-03-04
| ||
21:18 | Fix an assert() in sqlite3VarintLen(), even though it is impossible to hit in SQLite due to the way sqlite3VarintLen() is used. (check-in: 251424c5 user: drh tags: trunk) | |
19:55 | Simplify the computation of Index.aAvgEq. (Leaf check-in: c4488730 user: drh tags: analyze-worst-case) | |
18:45 | Merge changes from trunk. (check-in: 5294c977 user: drh tags: analyze-worst-case) | |
16:42 | Merge recent enhancements from trunk. Default page size is 4096. Writes to statement journals are avoided. (check-in: 456df336 user: drh tags: sessions) | |
14:57 | Merge recent enhancements from trunk, and especially the changes that reduce the heap-memory footprint of schemas, and defer opening and writing to statement journals. (check-in: 2f0c195c user: drh tags: apple-osx) | |
14:43 | Defer opening and writing statement journals until the size reaches a threshold (currently 64KiB). (check-in: cb9302cc user: drh tags: trunk) | |
14:23 | Update test cases to taken deferred statement-journal opening into account. (Closed-Leaf check-in: 5b2fe521 user: drh tags: memjournal-exp) | |
04:01 | Change the default cache_size to -2000 (which means 2000*1024 bytes independent of page_size). (check-in: 2682e8e4 user: drh tags: trunk) | |
Changes to src/analyze.c.
︙ | ︙ | |||
263 264 265 266 267 268 269 270 271 272 273 274 275 276 | ** information. */ typedef struct Stat4Accum Stat4Accum; typedef struct Stat4Sample Stat4Sample; struct Stat4Sample { tRowcnt *anEq; /* sqlite_stat4.nEq */ tRowcnt *anDLt; /* sqlite_stat4.nDLt */ #ifdef SQLITE_ENABLE_STAT3_OR_STAT4 tRowcnt *anLt; /* sqlite_stat4.nLt */ union { i64 iRowid; /* Rowid in main table of the key */ u8 *aRowid; /* Key for WITHOUT ROWID tables */ } u; u32 nRowid; /* Sizeof aRowid[] */ | > | 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 | ** information. */ typedef struct Stat4Accum Stat4Accum; typedef struct Stat4Sample Stat4Sample; struct Stat4Sample { tRowcnt *anEq; /* sqlite_stat4.nEq */ tRowcnt *anDLt; /* sqlite_stat4.nDLt */ tRowcnt *amxEq; /* Maximum length run of equal values */ #ifdef SQLITE_ENABLE_STAT3_OR_STAT4 tRowcnt *anLt; /* sqlite_stat4.nLt */ union { i64 iRowid; /* Rowid in main table of the key */ u8 *aRowid; /* Key for WITHOUT ROWID tables */ } u; u32 nRowid; /* Sizeof aRowid[] */ |
︙ | ︙ | |||
414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 | nKeyCol = sqlite3_value_int(argv[1]); assert( nKeyCol<=nCol ); assert( nKeyCol>0 ); /* Allocate the space required for the Stat4Accum object */ n = sizeof(*p) + sizeof(tRowcnt)*nColUp /* Stat4Accum.anEq */ + sizeof(tRowcnt)*nColUp /* Stat4Accum.anDLt */ #ifdef SQLITE_ENABLE_STAT3_OR_STAT4 + sizeof(tRowcnt)*nColUp /* Stat4Accum.anLt */ + sizeof(Stat4Sample)*(nCol+mxSample) /* Stat4Accum.aBest[], a[] */ + sizeof(tRowcnt)*3*nColUp*(nCol+mxSample) #endif ; db = sqlite3_context_db_handle(context); p = sqlite3DbMallocZero(db, n); if( p==0 ){ sqlite3_result_error_nomem(context); return; } p->db = db; p->nRow = 0; p->nCol = nCol; p->nKeyCol = nKeyCol; | > | | > | | | 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 | nKeyCol = sqlite3_value_int(argv[1]); assert( nKeyCol<=nCol ); assert( nKeyCol>0 ); /* Allocate the space required for the Stat4Accum object */ n = sizeof(*p) + sizeof(tRowcnt)*nColUp /* Stat4Accum.anEq */ + sizeof(tRowcnt)*nColUp /* Stat4Accum.amxEq */ + sizeof(tRowcnt)*nColUp /* Stat4Accum.anDLt */ #ifdef SQLITE_ENABLE_STAT3_OR_STAT4 + sizeof(tRowcnt)*nColUp /* Stat4Accum.anLt */ + sizeof(Stat4Sample)*(nCol+mxSample) /* Stat4Accum.aBest[], a[] */ + sizeof(tRowcnt)*3*nColUp*(nCol+mxSample) #endif ; db = sqlite3_context_db_handle(context); p = sqlite3DbMallocZero(db, n); if( p==0 ){ sqlite3_result_error_nomem(context); return; } p->db = db; p->nRow = 0; p->nCol = nCol; p->nKeyCol = nKeyCol; p->current.anEq = (tRowcnt*)&p[1]; p->current.amxEq = &p->current.anEq[nColUp]; p->current.anDLt = &p->current.amxEq[nColUp]; #ifdef SQLITE_ENABLE_STAT3_OR_STAT4 { u8 *pSpace; /* Allocated space not yet assigned */ int i; /* Loop counter */ p->iGet = -1; p->mxSample = mxSample; p->nPSample = (tRowcnt)(sqlite3_value_int64(argv[2])/(mxSample/3+1) + 1); p->current.anLt = &p->current.anDLt[nColUp]; p->iPrn = 0x689e962d*(u32)nCol ^ 0xd0944565*(u32)sqlite3_value_int(argv[2]); /* Set up the Stat4Accum.a[] and aBest[] arrays */ p->a = (struct Stat4Sample*)&p->current.anLt[nColUp]; p->aBest = &p->a[mxSample]; pSpace = (u8*)(&p->a[mxSample+nCol]); for(i=0; i<(mxSample+nCol); i++){ |
︙ | ︙ | |||
717 718 719 720 721 722 723 | UNUSED_PARAMETER( argc ); UNUSED_PARAMETER( context ); assert( p->nCol>0 ); assert( iChng<p->nCol ); if( p->nRow==0 ){ /* This is the first call to this function. Do initialization. */ | | > > > > | 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 | UNUSED_PARAMETER( argc ); UNUSED_PARAMETER( context ); assert( p->nCol>0 ); assert( iChng<p->nCol ); if( p->nRow==0 ){ /* This is the first call to this function. Do initialization. */ for(i=0; i<p->nCol; i++){ p->current.anEq[i] = 1; p->current.amxEq[i] = 1; } }else{ /* Second and subsequent calls get processed here */ samplePushPrevious(p, iChng); /* Update anDLt[], anLt[] and anEq[] to reflect the values that apply ** to the current row of the index. */ for(i=0; i<iChng; i++){ if( p->current.amxEq[i]==p->current.anEq[i] ) p->current.amxEq[i]++; p->current.anEq[i]++; } for(i=iChng; i<p->nCol; i++){ p->current.anDLt[i]++; #ifdef SQLITE_ENABLE_STAT3_OR_STAT4 p->current.anLt[i] += p->current.anEq[i]; #endif |
︙ | ︙ | |||
817 818 819 820 821 822 823 | || eCall==STAT_GET_NDLT ); if( eCall==STAT_GET_STAT1 ) #else assert( argc==1 ); #endif { | | | | | | | > | < | | > | | > | | > > > > > > > > > > > > > > > > > > > > | | > > | > > | < | > | 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 | || eCall==STAT_GET_NDLT ); if( eCall==STAT_GET_STAT1 ) #else assert( argc==1 ); #endif { /* Return a value for the "stat" column of the sqlite_stat1 ** table for this index. ** ** The value is a string composed of a list of integers describing ** the index. The first integer is the (estimated) total number ** entries in the index. There is one additional integer for each ** column in the index. The first added integer is an estimate of ** the number of rows that match a single key in the first column of ** the index. The second added integer is an estimate on of the number ** of rows that match a single key consisting of the first two columns ** of the index. And so forth. ** ** For example, for an index on columns (a,b), if the sqlite_stat1.stat ** values is "100 10 2", that means there are about 100 rows in the ** index, and that a query against a=$key1 will match about 10 rows ** and a query against "a=$key1 AND b=$key2" will match about 2 rows. ** ** Let V be the average number of rows that match a key, and let M ** be the most number of rows that match the key for any possible value ** of that key. The estimate is computed as: ** ** E = (2*V + M)/3 ** ** Consider two indexes. Index X has with 100 values of exactly 0 and ** 100 singleton values between 1 and 100. Index Y has 200 values ** evenly distributed between 1 and 20. If only the average (V) is ** used in the estimate, X would have "200 2" and Y would have "200 10" ** and so the planner would think X is the more selective index. And ** X often would be more selective. But when searching for 0, index X ** would perform badly. To avoid this problem, the M is added into the ** estimate so that the stat for X is "200 34" and Y is still "200 10". ** In this way, Y is the preferred index (all else being equal) and ** the pathological case is avoided. ** ** For deciding whether or not to do a skip-scan, we want to know the ** average number of rows (V) with the same key, not the mixed estimate ** E shown above. Usually E will be close enough. However, if E is ** large but V is small, that could trick the query planner into thinking ** that a skip-scan might work well on this index. To avoid that, the ** "noskipscan" flag is added in cases where the divergence between E ** and V might mislead the query planner. */ char *z; int i; int noSkipScan = 0; char *zRet = sqlite3MallocZero( (p->nKeyCol+2)*25 ); if( zRet==0 ){ sqlite3_result_error_nomem(context); return; } sqlite3_snprintf(24, zRet, "%llu", (u64)p->nRow); z = zRet + sqlite3Strlen30(zRet); for(i=0; i<p->nKeyCol; i++){ u64 nDistinct = p->current.anDLt[i]; u64 iMx = p->current.amxEq[i]; /* M: Most rows per key */ u64 iAvg = (p->nRow+nDistinct)/(nDistinct+1); /* V: Average per key */ u64 iVal = (iMx+iAvg*2)/3; /* E: The estimate */ sqlite3_snprintf(24, z, " %llu", iVal); z += sqlite3Strlen30(z); assert( p->current.anEq[i] ); if( iVal>=WHERE_SKIPSCAN_ONSET && iAvg<(WHERE_SKIPSCAN_ONSET*2/3) ){ noSkipScan = 1; } } if( noSkipScan ) sqlite3_snprintf(14, z, " noskipscan"); sqlite3_result_text(context, zRet, -1, sqlite3_free); } #ifdef SQLITE_ENABLE_STAT3_OR_STAT4 else if( eCall==STAT_GET_ROWID ){ if( p->iGet<0 ){ samplePushPrevious(p, 0); p->iGet = 0; |
︙ | ︙ | |||
1567 1568 1569 1570 1571 1572 1573 | #ifdef SQLITE_ENABLE_STAT3_OR_STAT4 /* ** Populate the pIdx->aAvgEq[] array based on the samples currently ** stored in pIdx->aSample[]. */ static void initAvgEq(Index *pIdx){ if( pIdx ){ | > > | | | < | | < > | | | | < < < | < < < < < | < < < > > | < > > | | 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 | #ifdef SQLITE_ENABLE_STAT3_OR_STAT4 /* ** Populate the pIdx->aAvgEq[] array based on the samples currently ** stored in pIdx->aSample[]. */ static void initAvgEq(Index *pIdx){ if( pIdx ){ int nSample = pIdx->nSample; /* Number of samples */ int nCol = pIdx->nSampleCol; /* Number of columns sampled */ IndexSample *aSample = pIdx->aSample; /* The samples */ IndexSample *pFinal = &aSample[nSample-1]; /* The last sample */ int iCol; /* Loop counter of columns */ if( nCol>1 ){ /* If this is stat4 data, then calculate aAvgEq[] values for all ** sample columns except the last. The last is always set to 1, as ** once the trailing PK fields are considered all index keys are ** unique. */ nCol--; pIdx->aAvgEq[nCol] = 1; } for(iCol=0; iCol<nCol; iCol++){ int i; /* Used to iterate through samples */ u32 nDSample = 0; /* Number of distinct samples less than pFinal*/ tRowcnt sumEq = 0; /* Sum of the nEq values */ tRowcnt avgEq; /* Average repeats for unsampled entries */ tRowcnt nRow; /* Number of rows in index */ tRowcnt prevLt = 0; /* Number of values less than previous sample */ tRowcnt thisLt; /* Number of values less than current sample */ if( !pIdx->aiRowEst || iCol>=pIdx->nKeyCol || pIdx->aiRowEst[iCol+1]==0 ){ nRow = pFinal->anLt[iCol] + pFinal->anEq[iCol]; }else{ nRow = pIdx->aiRowEst[0]; } pIdx->nRowEst0 = nRow; for(i=0; (thisLt = aSample[i].anLt[iCol])<pFinal->anLt[iCol]; i++){ if( i==0 || thisLt!=prevLt ){ sumEq += aSample[i].anEq[iCol]; nDSample++; } prevLt = thisLt; } if( pFinal->anDLt[iCol] > nDSample ){ avgEq = (pFinal->anLt[iCol] - sumEq)/(pFinal->anDLt[iCol] - nDSample); }else{ avgEq = 1; } if( avgEq==0 ) avgEq = 1; pIdx->aAvgEq[iCol] = avgEq; } } } |
︙ | ︙ |
Changes to src/sqliteInt.h.
︙ | ︙ | |||
2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 | #define JT_CROSS 0x0002 /* Explicit use of the CROSS keyword */ #define JT_NATURAL 0x0004 /* True for a "natural" join */ #define JT_LEFT 0x0008 /* Left outer join */ #define JT_RIGHT 0x0010 /* Right outer join */ #define JT_OUTER 0x0020 /* The "OUTER" keyword is present */ #define JT_ERROR 0x0040 /* unknown or unsupported join type */ /* ** Flags appropriate for the wctrlFlags parameter of sqlite3WhereBegin() ** and the WhereInfo.wctrlFlags member. */ #define WHERE_ORDERBY_NORMAL 0x0000 /* No-op */ #define WHERE_ORDERBY_MIN 0x0001 /* ORDER BY processing for min() func */ | > > > > > > > > > | 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 | #define JT_CROSS 0x0002 /* Explicit use of the CROSS keyword */ #define JT_NATURAL 0x0004 /* True for a "natural" join */ #define JT_LEFT 0x0008 /* Left outer join */ #define JT_RIGHT 0x0010 /* Right outer join */ #define JT_OUTER 0x0020 /* The "OUTER" keyword is present */ #define JT_ERROR 0x0040 /* unknown or unsupported join type */ /* ** TUNING: The skip-scan optimization is only profitable if the average ** number of repeats of an entry in the index is greater than or equal to ** WHERE_SKIPSCAN_ONSET. If the average numbe of repeats is less ** than WHERE_SKIPSCAN_ONSET, then it is faster to do a full table ** scan. */ #define WHERE_SKIPSCAN_ONSET 18 #define WHERE_SKIPSCAN_ONSET_LOG 42 /* ** Flags appropriate for the wctrlFlags parameter of sqlite3WhereBegin() ** and the WhereInfo.wctrlFlags member. */ #define WHERE_ORDERBY_NORMAL 0x0000 /* No-op */ #define WHERE_ORDERBY_MIN 0x0001 /* ORDER BY processing for min() func */ |
︙ | ︙ |
Changes to src/where.c.
︙ | ︙ | |||
2408 2409 2410 2411 2412 2413 2414 | ** ** The magic number 18 is selected on the basis that scanning 17 rows ** is almost always quicker than an index seek (even though if the index ** contains fewer than 2^17 rows we assume otherwise in other parts of ** the code). And, even if it is not, it should not be too much slower. ** On the other hand, the extra seeks could end up being significantly ** more expensive. */ | | | | 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 | ** ** The magic number 18 is selected on the basis that scanning 17 rows ** is almost always quicker than an index seek (even though if the index ** contains fewer than 2^17 rows we assume otherwise in other parts of ** the code). And, even if it is not, it should not be too much slower. ** On the other hand, the extra seeks could end up being significantly ** more expensive. */ assert( WHERE_SKIPSCAN_ONSET_LOG==sqlite3LogEst(WHERE_SKIPSCAN_ONSET) ); if( saved_nEq==saved_nSkip && saved_nEq+1<pProbe->nKeyCol && pProbe->noSkipScan==0 && pProbe->aiRowLogEst[saved_nEq+1]>=WHERE_SKIPSCAN_ONSET_LOG && (rc = whereLoopResize(db, pNew, pNew->nLTerm+1))==SQLITE_OK ){ LogEst nIter; pNew->u.btree.nEq++; pNew->nSkip++; pNew->aLTerm[pNew->nLTerm++] = 0; pNew->wsFlags |= WHERE_SKIPSCAN; |
︙ | ︙ |
Changes to test/skipscan2.test.
︙ | ︙ | |||
44 45 46 47 48 49 50 | INSERT INTO people VALUES('Nathan','student',163); INSERT INTO people VALUES('Olivia','student',161); INSERT INTO people VALUES('Patrick','teacher',180); INSERT INTO people VALUES('Quiana','student',182); INSERT INTO people VALUES('Robert','student',159); INSERT INTO people VALUES('Sally','student',166); INSERT INTO people VALUES('Tom','student',171); | < < < < < < | | | | | | | | | | | | | | | < < < < < < < | | < > | < < < < < < < < < < < < < < < < < < < | | | | | | | | | | | | 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 | INSERT INTO people VALUES('Nathan','student',163); INSERT INTO people VALUES('Olivia','student',161); INSERT INTO people VALUES('Patrick','teacher',180); INSERT INTO people VALUES('Quiana','student',182); INSERT INTO people VALUES('Robert','student',159); INSERT INTO people VALUES('Sally','student',166); INSERT INTO people VALUES('Tom','student',171); } # Without ANALYZE, a skip-scan is not used # do_execsql_test skipscan2-1.3 { SELECT name FROM people WHERE height<=161 ORDER BY +name; } {Alice Bob Cindy Emily Megan Olivia Robert} do_execsql_test skipscan2-1.3eqp { EXPLAIN QUERY PLAN SELECT name FROM people WHERE height<=161 ORDER BY +name; } {~/*INDEX people_idx1 */} # Now do an ANALYZE. A skip-scan can be used after ANALYZE. # do_execsql_test skipscan2-1.4 { ANALYZE; -- We do not have enough people above to actually force the use -- of a skip-scan. So make a manual adjustment to the stat1 table -- to make it seem like there are many more. UPDATE sqlite_stat1 SET stat='10000 5000 20' WHERE idx='people_idx1'; UPDATE sqlite_stat1 SET stat='10000 1' WHERE idx='sqlite_autoindex_people_1'; ANALYZE sqlite_master; } db cache flush do_execsql_test skipscan2-1.5 { SELECT name FROM people WHERE height<=161 ORDER BY +name; } {Alice Bob Cindy Emily Megan Olivia Robert} do_execsql_test skipscan2-1.5eqp { EXPLAIN QUERY PLAN SELECT name FROM people WHERE height<=161 ORDER BY +name; } {/*INDEX people_idx1 */} # Same answer with other formulations of the same query # do_execsql_test skipscan2-1.6 { SELECT name FROM people WHERE role IN (SELECT DISTINCT role FROM people) AND height<=161 ORDER BY +name; } {Alice Bob Cindy Emily Megan Olivia Robert} do_execsql_test skipscan2-1.7 { SELECT name FROM people WHERE role='teacher' AND height<=161 UNION ALL SELECT name FROM people WHERE role='student' AND height<=161 ORDER BY 1; } {Alice Bob Cindy Emily Megan Olivia Robert} # Add more people so that the estimated number of rows per key is # equal to the skip-scan threshold of 18. Then run an (unfudged) # ANALYZE. skip-scan should work. # do_execsql_test skipscan2-1.8 { INSERT INTO people VALUES('Ursula','student',170); ANALYZE; SELECT stat FROM sqlite_stat1 WHERE idx='people_idx1'; } {{21 13 2}} db cache flush do_execsql_test skipscan2-1.9 { SELECT name FROM people WHERE height<=161 ORDER BY +name; } {Alice Bob Cindy Emily Megan Olivia Robert} do_execsql_test skipscan2-1.9eqp { EXPLAIN QUERY PLAN SELECT name FROM people WHERE height<=161 ORDER BY +name; } {~/*INDEX people_idx1 */} # Repeat using a WITHOUT ROWID table. # do_execsql_test skipscan2-2.1 { CREATE TABLE peoplew( name TEXT PRIMARY KEY, role TEXT NOT NULL, height INT NOT NULL, -- in cm CHECK( role IN ('student','teacher') ) ) WITHOUT ROWID; CREATE INDEX peoplew_idx1 ON peoplew(role, height); INSERT INTO peoplew(name,role,height) SELECT name, role, height FROM people; ALTER TABLE people RENAME TO old_people; SELECT name FROM peoplew WHERE height<=161 ORDER BY +name; } {Alice Bob Cindy Emily Megan Olivia Robert} do_execsql_test skipscan2-2.2 { SELECT name FROM peoplew WHERE role IN (SELECT DISTINCT role FROM peoplew) AND height<=161 ORDER BY +name; } {Alice Bob Cindy Emily Megan Olivia Robert} do_execsql_test skipscan2-2.2 { SELECT name FROM peoplew WHERE role='teacher' AND height<=161 UNION ALL SELECT name FROM peoplew WHERE role='student' AND height<=161 ORDER BY 1; } {Alice Bob Cindy Emily Megan Olivia Robert} # Now do an ANALYZE. A skip-scan can be used after ANALYZE. # do_execsql_test skipscan2-2.4 { ANALYZE; } db cache flush do_execsql_test skipscan2-2.5 { SELECT name FROM peoplew WHERE height<=161 ORDER BY +name; } {Alice Bob Cindy Emily Megan Olivia Robert} do_execsql_test skipscan2-2.5eqp { EXPLAIN QUERY PLAN SELECT name FROM peoplew WHERE height<=161 ORDER BY +name; } {/*INDEX peoplew_idx1*/} # A skip-scan on a PK index of a WITHOUT ROWID table. # do_execsql_test skipscan2-3.1 { CREATE TABLE t3(a, b, c, PRIMARY KEY(a, b)) WITHOUT ROWID; } do_test skipscan2-3.2 { |
︙ | ︙ |