Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Use key-prefixes instead of full keys on interior nodes. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA1: |
449433ea21655ca6bc6a42d32d7ddf0b |
User & Date: | dan 2013-10-14 14:03:06.140 |
Context
2013-10-18
| ||
08:28 | Trim overflow pages when the corresponding record is deleted from the database. check-in: 547b950db0 user: dan tags: trunk | |
2013-10-14
| ||
14:03 | Use key-prefixes instead of full keys on interior nodes. check-in: 449433ea21 user: dan tags: trunk | |
2013-10-12
| ||
20:03 | Speed up the assert() used to check that no page references are being leaked. check-in: d276ad3f9e user: dan tags: trunk | |
Changes
Changes to src/btInt.h.
︙ | ︙ | |||
65 66 67 68 69 70 71 72 73 74 75 76 77 78 | */ u32 sqlite4BtPagerRootpgno(BtPager*); /* ** Read, write and trim existing database pages. */ int sqlite4BtPageGet(BtPager*, u32 pgno, BtPage **ppPage); int sqlite4BtPageWrite(BtPage*); int sqlite4BtPageTrim(BtPage*); int sqlite4BtPageRelease(BtPage*); void sqlite4BtPageReference(BtPage*); /* ** Allocate a new database page and return a writable reference to it. | > | 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 | */ u32 sqlite4BtPagerRootpgno(BtPager*); /* ** Read, write and trim existing database pages. */ int sqlite4BtPageGet(BtPager*, u32 pgno, BtPage **ppPage); int sqlite4BtPageTrimPgno(BtPager*, u32 pgno); int sqlite4BtPageWrite(BtPage*); int sqlite4BtPageTrim(BtPage*); int sqlite4BtPageRelease(BtPage*); void sqlite4BtPageReference(BtPage*); /* ** Allocate a new database page and return a writable reference to it. |
︙ | ︙ |
Changes to src/bt_main.c.
︙ | ︙ | |||
24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 | ** b-tree pages. */ #define BT_PGFLAGS_INTERNAL 0x01 /* True for non-leaf nodes */ #ifndef MIN # define MIN(a,b) (((a)<(b))?(a):(b)) #endif #define BT_EXPENSIVE_ASSERT 0 struct bt_db { sqlite4_env *pEnv; /* SQLite environment */ BtPager *pPager; /* Underlying page-based database */ bt_cursor *pAllCsr; /* List of all open cursors */ }; | > > > > | 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 | ** b-tree pages. */ #define BT_PGFLAGS_INTERNAL 0x01 /* True for non-leaf nodes */ #ifndef MIN # define MIN(a,b) (((a)<(b))?(a):(b)) #endif #ifndef MAX # define MAX(a,b) (((a)>(b))?(a):(b)) #endif #define BT_EXPENSIVE_ASSERT 0 /* #define BT_STDERR_DEBUG 1 */ struct bt_db { sqlite4_env *pEnv; /* SQLite environment */ BtPager *pPager; /* Underlying page-based database */ bt_cursor *pAllCsr; /* List of all open cursors */ }; |
︙ | ︙ | |||
207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 | rc = btErrorBkpt(SQLITE4_NOMEM); }else{ btCsrSetup(db, pCsr); pCsr->pNextCsr = db->pAllCsr; db->pAllCsr = pCsr; } return rc; } static void btCsrReset(bt_cursor *pCsr, int bFreeBuffer){ int i; for(i=0; i<pCsr->nPg; i++){ sqlite4BtPageRelease(pCsr->apPage[i]); } if( bFreeBuffer ) sqlite4_free(pCsr->pDb->pEnv, pCsr->ovfl.pBuf); pCsr->nPg = 0; } int sqlite4BtCsrClose(bt_cursor *pCsr){ if( pCsr ){ bt_cursor **pp; btCsrReset(pCsr, 1); | > > > | | > > > | 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 | rc = btErrorBkpt(SQLITE4_NOMEM); }else{ btCsrSetup(db, pCsr); pCsr->pNextCsr = db->pAllCsr; db->pAllCsr = pCsr; } btCheckPageRefs(db); return rc; } static void btCsrReset(bt_cursor *pCsr, int bFreeBuffer){ int i; for(i=0; i<pCsr->nPg; i++){ sqlite4BtPageRelease(pCsr->apPage[i]); } if( bFreeBuffer ) sqlite4_free(pCsr->pDb->pEnv, pCsr->ovfl.pBuf); pCsr->nPg = 0; } int sqlite4BtCsrClose(bt_cursor *pCsr){ if( pCsr ){ bt_db *pDb = pCsr->pDb; bt_cursor **pp; btCheckPageRefs(pDb); btCsrReset(pCsr, 1); for(pp=&pDb->pAllCsr; *pp!=pCsr; pp=&(*pp)->pNextCsr); *pp = pCsr->pNextCsr; sqlite4_free(pDb->pEnv, pCsr); btCheckPageRefs(pDb); } return SQLITE4_OK; } void *sqlite4BtCsrExtra(bt_cursor *pCsr){ return (void*)&pCsr[1]; } /* ** Set pCsr->apPage[pCsr->nPg] to a reference to database page pgno. */ static int btCsrDescend(bt_cursor *pCsr, u32 pgno){ int rc; if( pCsr->nPg>=BT_MAX_DEPTH ){ rc = btErrorBkpt(SQLITE4_CORRUPT); }else{ rc = sqlite4BtPageGet(pCsr->pDb->pPager, pgno, &pCsr->apPage[pCsr->nPg]); if( rc==SQLITE4_OK ){ assert( pCsr->apPage[pCsr->nPg] ); pCsr->nPg++; } } return rc; } /* ** Move the cursor from the current page to the parent. Return ** SQLITE4_NOTFOUND if the cursor already points to the root page, ** or SQLITE4_OK otherwise. */ static int btCsrAscend(bt_cursor *pCsr, int nLvl){ int i; for(i=0; i<nLvl && ( pCsr->nPg>0 ); i++){ pCsr->nPg--; sqlite4BtPageRelease(pCsr->apPage[pCsr->nPg]); pCsr->apPage[pCsr->nPg] = 0; } return (pCsr->nPg==0 ? SQLITE4_NOTFOUND : SQLITE4_OK); } /************************************************************************** ** The functions in this section are used to extract data from buffers ** containing formatted b-tree pages. They do not entirely encapsulate all |
︙ | ︙ | |||
377 378 379 380 381 382 383 | return 0; } #endif /* ** This function compares the key passed via parameters pK and nK to the | | < | | < < | | < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < > | | > < | 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 | return 0; } #endif /* ** This function compares the key passed via parameters pK and nK to the ** key that cursor pCsr currently points to. ** ** If the cursor key is C, and the user key K, then this function sets: ** ** *piRes = (C - K). ** ** In other words, *piRes is +ve, zero or -ve if C is respectively larger, ** equal to or smaller than K. */ static int btCellKeyCompare( bt_cursor *pCsr, /* Cursor handle */ int bLeaf, /* True if cursor currently points to leaf */ const void *pK, int nK, /* Key to compare against cursor key */ int *piRes /* OUT: Result of comparison */ ){ const void *pCsrKey; int nCsrKey; int nCmp; int nAscend = 0; int rc = SQLITE4_OK; int res; if( bLeaf ){ rc = sqlite4BtCsrKey(pCsr, &pCsrKey, &nCsrKey); }else{ const int pgsz = sqlite4BtPagerPagesize(pCsr->pDb->pPager); u8 *aData = sqlite4BtPageData(pCsr->apPage[pCsr->nPg-1]); u8 *pCell = btCellFind(aData, pgsz, pCsr->aiCell[pCsr->nPg-1]); pCsrKey = pCell + sqlite4BtVarintGet32(pCell, &nCsrKey); if( nCsrKey==0 ){ int iCell = pCsr->aiCell[pCsr->nPg-1]+1; while( 1 ){ u8 *aData = sqlite4BtPageData(pCsr->apPage[pCsr->nPg-1]); u32 pgno = btChildPgno(aData, pgsz, iCell); nAscend++; rc = btCsrDescend(pCsr, pgno); if( rc!=SQLITE4_OK ) break; aData = sqlite4BtPageData(pCsr->apPage[pCsr->nPg-1]); pCsr->aiCell[pCsr->nPg-1] = 0; if( (btFlags(aData) & BT_PGFLAGS_INTERNAL)==0 ) break; iCell = 0; } rc = sqlite4BtCsrKey(pCsr, &pCsrKey, &nCsrKey); } } if( rc==SQLITE4_OK ){ nCmp = MIN(nCsrKey, nK); res = memcmp(pCsrKey, pK, nCmp); |
︙ | ︙ | |||
532 533 534 535 536 537 538 | }else{ /* Cell iTst is LARGER than pK/nK */ iHi = iTst; } } if( rc!=SQLITE4_OK ) break; assert( iHi==iLo ); | < < < < | > | 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 | }else{ /* Cell iTst is LARGER than pK/nK */ iHi = iTst; } } if( rc!=SQLITE4_OK ) break; assert( iHi==iLo ); iHi += (bLeaf==0 && res==0); pCsr->aiCell[pCsr->nPg-1] = iHi; if( bLeaf==0 ){ pgno = btChildPgno(aData, pgsz, iHi); }else{ pgno = 0; if( res!=0 ){ assert( BT_SEEK_LEFAST<0 && BT_SEEK_LE<0 ); |
︙ | ︙ | |||
573 574 575 576 577 578 579 | int sqlite4BtCsrSeek( bt_cursor *pCsr, const void *pK, /* Key to seek for */ int nK, /* Size of key pK in bytes */ int eSeek /* Seek mode (a BT_SEEK_XXX constant) */ ){ | > > | > > | 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 | int sqlite4BtCsrSeek( bt_cursor *pCsr, const void *pK, /* Key to seek for */ int nK, /* Size of key pK in bytes */ int eSeek /* Seek mode (a BT_SEEK_XXX constant) */ ){ int rc; btCheckPageRefs(pCsr->pDb); rc = btCsrSeek(pCsr, pK, nK, eSeek, 0); btCheckPageRefs(pCsr->pDb); return rc; } /* ** This function seeks the cursor as required for either sqlite4BtCsrFirst() ** (if parameter bLast is false) or sqlite4BtCsrLast() (if bLast is true). */ static int btCsrEnd(bt_cursor *pCsr, int bLast){ |
︙ | ︙ | |||
1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 | } } assert( rc!=SQLITE4_OK || iCall==p->nCell ); return rc; } int btBalance( bt_cursor *pCsr, /* Cursor pointed to page to rebalance */ int bLeaf, /* True if rebalancing leaf pages */ int nKV, /* Number of entries in apKV[] array */ KeyValue *apKV /* Extra entries to add while rebalancing */ ){ | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 | } } assert( rc!=SQLITE4_OK || iCall==p->nCell ); return rc; } /* ** Extract a key-prefix from page pPg, which resides in a database with ** page size pgsz. If parameter bLast is true, the key-prefix is extracted ** from the right-most cell on the page. If bLast is false, the key-prefix ** is extracted from the left-most cell. ** ** A pointer to the key-prefix is returned. Before returning, *pnByte is ** set to the size of the prefix in bytes. */ static u8 *btKeyPrefix(const int pgsz, BtPage *pPg, int bLast, int *pnByte){ u8 *p; int n; u8 *aData; aData = sqlite4BtPageData(pPg); p = btCellFind(aData, pgsz, bLast ? btCellCount(aData, pgsz)-1 : 0); p += sqlite4BtVarintGet32(p, &n); if( n==0 ) p += sqlite4BtVarintGet32(p, &n); *pnByte = n; return p; } /* ** Parameters pLeft and pRight point to a pair of adjacent leaf pages in ** a database with page size pgsz. The keys in pRight are larger than those ** in pLeft. This function populates pKV->pK and pKV->nK with a separator ** key that is: ** ** * larger than all keys on pLeft, and ** * smaller than or equal to all keys on pRight. */ static void btPrefixKey( const int pgsz, BtPage *pLeft, BtPage *pRight, KeyValue *pKV ){ int nMax; int nMaxPrefix = pgsz/4; u8 *aLeft; int nLeft; u8 *aRight; int nRight; u8 *aData; int i; aLeft = btKeyPrefix(pgsz, pLeft, 1, &nLeft); aRight = btKeyPrefix(pgsz, pRight, 0, &nRight); nMax = MIN(nLeft, nMaxPrefix); for(i=0; i<nMax && aLeft[i]==aRight[i]; i++); if( i<nMaxPrefix ){ pKV->pK = aRight; pKV->nK = i + 1; assert( pKV->nK<=nRight ); } } int btBalance( bt_cursor *pCsr, /* Cursor pointed to page to rebalance */ int bLeaf, /* True if rebalancing leaf pages */ int nKV, /* Number of entries in apKV[] array */ KeyValue *apKV /* Extra entries to add while rebalancing */ ){ |
︙ | ︙ | |||
1853 1854 1855 1856 1857 1858 1859 | /* The leaves are written. Now gather the keys and page numbers to ** push up into the parent page. This is only required when rebalancing ** b-tree leaves. When internal nodes are balanced, the btBalanceOutput ** loop accumulates the cells destined for the parent page. */ for(iPg=0; iPg<(ctx.nOut-1); iPg++){ ctx.aPCell[iPg].pgno = sqlite4BtPagePgno(ctx.apPg[iPg]); | < < < < < < | | < | 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 | /* The leaves are written. Now gather the keys and page numbers to ** push up into the parent page. This is only required when rebalancing ** b-tree leaves. When internal nodes are balanced, the btBalanceOutput ** loop accumulates the cells destined for the parent page. */ for(iPg=0; iPg<(ctx.nOut-1); iPg++){ ctx.aPCell[iPg].pgno = sqlite4BtPagePgno(ctx.apPg[iPg]); if( bLeaf ){ assert( ctx.aPCell[iPg].nK==0 ); btPrefixKey(pgsz, ctx.apPg[iPg], ctx.apPg[iPg+1], &ctx.aPCell[iPg]); } } rc = btSetChildPgno( pDb, pPar, iSib+ctx.nIn-1, sqlite4BtPagePgno(ctx.apPg[ctx.nOut-1]) ); if( rc==SQLITE4_OK ){ |
︙ | ︙ | |||
2178 2179 2180 2181 2182 2183 2184 | btCheckPageRefs(db); return rc; } int sqlite4BtDelete(bt_cursor *pCsr){ int rc; | < < | 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 | btCheckPageRefs(db); return rc; } int sqlite4BtDelete(bt_cursor *pCsr){ int rc; rc = btDeleteFromPage(pCsr, 1); if( rc==SQLITE4_OK ){ rc = btBalanceIfUnderfull(pCsr); } return rc; } int sqlite4BtSetCookie(bt_db *db, unsigned int iVal){ return sqlite4BtPagerSetCookie(db->pPager, iVal); } int sqlite4BtGetCookie(bt_db *db, unsigned int *piVal){ return sqlite4BtPagerGetCookie(db->pPager, piVal); } |
Changes to src/bt_pager.c.
︙ | ︙ | |||
497 498 499 500 501 502 503 504 505 506 | if( rc!=SQLITE4_OK ){ btFreePage(p, pRet); pRet = 0; } } } if( rc==SQLITE4_OK ){ p->nTotalRef++; pRet->nRef++; | > < > | 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 | if( rc!=SQLITE4_OK ){ btFreePage(p, pRet); pRet = 0; } } } assert( (pRet!=0)==(rc==SQLITE4_OK) ); if( rc==SQLITE4_OK ){ p->nTotalRef++; pRet->nRef++; } *ppPg = pRet; return rc; } int sqlite4BtPageWrite(BtPage *pPg){ if( (pPg->flags & BT_PAGE_DIRTY)==0 ){ pPg->flags |= BT_PAGE_DIRTY; pPg->pNextDirty = pPg->pPager->pDirty; |
︙ | ︙ | |||
523 524 525 526 527 528 529 530 531 532 533 534 535 536 | ** no longer in use. */ int sqlite4BtPageTrim(BtPage *pPg){ /* assert( !"todo" ); */ sqlite4BtPageRelease(pPg); return SQLITE4_OK; } int sqlite4BtPageRelease(BtPage *pPg){ if( pPg ){ assert( pPg->nRef>=1 ); pPg->nRef--; pPg->pPager->nTotalRef--; } | > > > > > > > | 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 | ** no longer in use. */ int sqlite4BtPageTrim(BtPage *pPg){ /* assert( !"todo" ); */ sqlite4BtPageRelease(pPg); return SQLITE4_OK; } /* ** Page number pgno is no longer in use. */ int sqlite4BtPageTrimPgno(BtPager *pPager, u32 pgno){ return SQLITE4_OK; } int sqlite4BtPageRelease(BtPage *pPg){ if( pPg ){ assert( pPg->nRef>=1 ); pPg->nRef--; pPg->pPager->nTotalRef--; } |
︙ | ︙ |