Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Enable the b-tree cursor object's overflow page-number cache, which is normally enabled only for incr-blob cursors, for all cursors. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | overflow-pgno-cache |
Files: | files | file ages | folders |
SHA1: |
da59198505990a4fe832be7932117c7e |
User & Date: | dan 2014-03-11 20:33:04.219 |
Original Comment: | Enable the b-tree cursor objects overflow page-number cache, which is normally enabled only for incr-blob cursors, for all cursors. |
References
2017-01-27
| ||
00:31 | Simplify the accessPayload() routine so that it always populates the overflow page cache. In the one case where populating the page cache can lead to problems, simply invalidate the cache as soon as accessPayload() returns. This simplification reduces code size and helps accessPayload() to run a little faster. This backs out the eOp==2 mode of accessPayload() added by check-in [da59198505]. (check-in: 68e7a8c676 user: drh tags: trunk) | |
Context
2014-03-11
| ||
23:40 | Combine the various boolean fields of the BtCursor object into a single bit-vector. This allows setting or clearing more than one boolean at a time and makes the overflow-pgno-cache branch faster than trunk on speedtest1. (check-in: 968fec44d7 user: drh tags: overflow-pgno-cache) | |
20:33 | Enable the b-tree cursor object's overflow page-number cache, which is normally enabled only for incr-blob cursors, for all cursors. (check-in: da59198505 user: dan tags: overflow-pgno-cache) | |
15:27 | Version 3.8.4.1 (check-in: 018d317b12 user: drh tags: trunk, release, version-3.8.4.1) | |
Changes
Changes to src/btree.c.
︙ | ︙ | |||
442 443 444 445 446 447 448 | */ #ifdef SQLITE_DEBUG static int cursorHoldsMutex(BtCursor *p){ return sqlite3_mutex_held(p->pBt->mutex); } #endif | < < | > | < < < < > | 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 | */ #ifdef SQLITE_DEBUG static int cursorHoldsMutex(BtCursor *p){ return sqlite3_mutex_held(p->pBt->mutex); } #endif /* ** Invalidate the overflow cache of the cursor passed as the first argument. ** on the shared btree structure pBt. */ #define invalidateOverflowCache(pCur) (pCur->bOvflValid = 0) /* ** Invalidate the overflow page-list cache for all cursors opened ** on the shared btree structure pBt. */ static void invalidateAllOverflowCache(BtShared *pBt){ BtCursor *p; assert( sqlite3_mutex_held(pBt->mutex) ); for(p=pBt->pCursor; p; p=p->pNext){ invalidateOverflowCache(p); } } #ifndef SQLITE_OMIT_INCRBLOB /* ** This function is called before modifying the contents of a table ** to invalidate any incrblob cursors that are open on the ** row or one of the rows being modified. ** ** If argument isClearTable is true, then the entire contents of the ** table is about to be deleted. In this case invalidate all incrblob |
︙ | ︙ | |||
494 495 496 497 498 499 500 | if( p->isIncrblobHandle && (isClearTable || p->info.nKey==iRow) ){ p->eState = CURSOR_INVALID; } } } #else | | < < | 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 | if( p->isIncrblobHandle && (isClearTable || p->info.nKey==iRow) ){ p->eState = CURSOR_INVALID; } } } #else /* Stub function when INCRBLOB is omitted */ #define invalidateIncrblobCursors(x,y,z) #endif /* SQLITE_OMIT_INCRBLOB */ /* ** Set bit pgno of the BtShared.pHasContent bitvec. This is called ** when a page that previously contained data becomes a free-list leaf ** page. |
︙ | ︙ | |||
3690 3691 3692 3693 3694 3695 3696 | if( pCur->pNext ){ pCur->pNext->pPrev = pCur->pPrev; } for(i=0; i<=pCur->iPage; i++){ releasePage(pCur->apPage[i]); } unlockBtreeIfUnused(pBt); | | | 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 | if( pCur->pNext ){ pCur->pNext->pPrev = pCur->pPrev; } for(i=0; i<=pCur->iPage; i++){ releasePage(pCur->apPage[i]); } unlockBtreeIfUnused(pBt); sqlite3DbFree(pBtree->db, pCur->aOverflow); /* sqlite3_free(pCur); */ sqlite3BtreeLeave(pBtree); } return SQLITE_OK; } /* |
︙ | ︙ | |||
3911 3912 3913 3914 3915 3916 3917 | memcpy(pBuf, pPayload, nByte); } return SQLITE_OK; } /* ** This function is used to read or overwrite payload information | | < | | > > > < | > | | | | 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 | memcpy(pBuf, pPayload, nByte); } return SQLITE_OK; } /* ** This function is used to read or overwrite payload information ** for the entry that the pCur cursor is pointing to. The eOp ** argument is interpreted as follows: ** ** 0: The operation is a read. Populate the overflow cache. ** 1: The operation is a write. Populate the overflow cache. ** 2: The operation is a read. Do not populate the overflow cache. ** ** A total of "amt" bytes are read or written beginning at "offset". ** Data is read to or from the buffer pBuf. ** ** The content being read or written might appear on the main page ** or be scattered out on multiple overflow pages. ** ** If the current cursor entry uses one or more overflow pages and the ** eOp argument is not 2, this function may allocate space for and lazily ** popluates the overflow page-list cache array (BtCursor.aOverflow). ** Subsequent calls use this cache to make seeking to the supplied offset ** more efficient. ** ** Once an overflow page-list cache has been allocated, it may be ** invalidated if some other cursor writes to the same table, or if ** the cursor is moved to a different row. Additionally, in auto-vacuum ** mode, the following events may invalidate an overflow page-list cache. ** ** * An incremental vacuum, |
︙ | ︙ | |||
3973 3974 3975 3976 3977 3978 3979 | /* Check if data must be read/written to/from the btree page itself. */ if( offset<pCur->info.nLocal ){ int a = amt; if( a+offset>pCur->info.nLocal ){ a = pCur->info.nLocal - offset; } | | < | > | | < > | | > > > > > > > > | < < | < < | < | 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 | /* Check if data must be read/written to/from the btree page itself. */ if( offset<pCur->info.nLocal ){ int a = amt; if( a+offset>pCur->info.nLocal ){ a = pCur->info.nLocal - offset; } rc = copyPayload(&aPayload[offset], pBuf, a, (eOp & 0x01), pPage->pDbPage); offset = 0; pBuf += a; amt -= a; }else{ offset -= pCur->info.nLocal; } if( rc==SQLITE_OK && amt>0 ){ const u32 ovflSize = pBt->usableSize - 4; /* Bytes content per ovfl page */ Pgno nextPage; nextPage = get4byte(&aPayload[pCur->info.nLocal]); /* If the isIncrblobHandle flag is set and the BtCursor.aOverflow[] ** has not been allocated, allocate it now. The array is sized at ** one entry for each overflow page in the overflow chain. The ** page number of the first overflow page is stored in aOverflow[0], ** etc. A value of 0 in the aOverflow[] array means "not yet known" ** (the cache is lazily populated). */ if( eOp!=2 && !pCur->bOvflValid ){ int nOvfl = (pCur->info.nPayload-pCur->info.nLocal+ovflSize-1)/ovflSize; if( nOvfl>pCur->nOvflAlloc ){ Pgno *aNew = (Pgno*)sqlite3DbRealloc( pCur->pBtree->db, pCur->aOverflow, nOvfl*2*sizeof(Pgno) ); if( aNew==0 ){ rc = SQLITE_NOMEM; }else{ pCur->nOvflAlloc = nOvfl*2; pCur->aOverflow = aNew; } } if( rc==SQLITE_OK ){ memset(pCur->aOverflow, 0, nOvfl*sizeof(Pgno)); pCur->bOvflValid = 1; } } /* If the overflow page-list cache has been allocated and the ** entry for the first required overflow page is valid, skip ** directly to it. */ if( pCur->bOvflValid && pCur->aOverflow[offset/ovflSize] ){ iIdx = (offset/ovflSize); nextPage = pCur->aOverflow[iIdx]; offset = (offset%ovflSize); } for( ; rc==SQLITE_OK && amt>0 && nextPage; iIdx++){ /* If required, populate the overflow page-list cache. */ if( pCur->bOvflValid ){ assert(!pCur->aOverflow[iIdx] || pCur->aOverflow[iIdx]==nextPage); pCur->aOverflow[iIdx] = nextPage; } if( offset>=ovflSize ){ /* The only reason to read this page is to obtain the page ** number for the next page in the overflow chain. The page ** data is not required. So first try to lookup the overflow ** page-list cache, if any, then fall back to the getOverflowPage() ** function. */ if( pCur->bOvflValid && pCur->aOverflow[iIdx+1] ){ nextPage = pCur->aOverflow[iIdx+1]; } else rc = getOverflowPage(pBt, nextPage, 0, &nextPage); offset -= ovflSize; }else{ /* Need to read this page properly. It contains some of the ** range of data that is being read (eOp==0) or written (eOp!=0). */ #ifdef SQLITE_DIRECT_OVERFLOW_READ |
︙ | ︙ | |||
4065 4066 4067 4068 4069 4070 4071 | ** 4) there is no open write-transaction, and ** 5) the database is not a WAL database, ** ** then data can be read directly from the database file into the ** output buffer, bypassing the page-cache altogether. This speeds ** up loading large records that span many overflow pages. */ | | | | | 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 | ** 4) there is no open write-transaction, and ** 5) the database is not a WAL database, ** ** then data can be read directly from the database file into the ** output buffer, bypassing the page-cache altogether. This speeds ** up loading large records that span many overflow pages. */ if( (eOp&0x01)==0 /* (1) */ && offset==0 /* (2) */ && pBt->inTransaction==TRANS_READ /* (4) */ && (fd = sqlite3PagerFile(pBt->pPager))->pMethods /* (3) */ && pBt->pPage1->aData[19]==0x01 /* (5) */ ){ u8 aSave[4]; u8 *aWrite = &pBuf[-4]; memcpy(aSave, aWrite, 4); rc = sqlite3OsRead(fd, aWrite, a+4, (i64)pBt->pageSize*(nextPage-1)); nextPage = get4byte(aWrite); memcpy(aWrite, aSave, 4); }else #endif { DbPage *pDbPage; rc = sqlite3PagerAcquire(pBt->pPager, nextPage, &pDbPage, ((eOp&0x01)==0 ? PAGER_GET_READONLY : 0) ); if( rc==SQLITE_OK ){ aPayload = sqlite3PagerGetData(pDbPage); nextPage = get4byte(aPayload); rc = copyPayload(&aPayload[offset+4], pBuf, a, (eOp&0x01), pDbPage); sqlite3PagerUnref(pDbPage); offset = 0; } } amt -= a; pBuf += a; } |
︙ | ︙ | |||
4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 | MemPage *pRoot; int rc = SQLITE_OK; assert( cursorHoldsMutex(pCur) ); assert( CURSOR_INVALID < CURSOR_REQUIRESEEK ); assert( CURSOR_VALID < CURSOR_REQUIRESEEK ); assert( CURSOR_FAULT > CURSOR_REQUIRESEEK ); if( pCur->eState>=CURSOR_REQUIRESEEK ){ if( pCur->eState==CURSOR_FAULT ){ assert( pCur->skipNext!=SQLITE_OK ); return pCur->skipNext; } sqlite3BtreeClearCursor(pCur); } | > | 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 | MemPage *pRoot; int rc = SQLITE_OK; assert( cursorHoldsMutex(pCur) ); assert( CURSOR_INVALID < CURSOR_REQUIRESEEK ); assert( CURSOR_VALID < CURSOR_REQUIRESEEK ); assert( CURSOR_FAULT > CURSOR_REQUIRESEEK ); invalidateOverflowCache(pCur); if( pCur->eState>=CURSOR_REQUIRESEEK ){ if( pCur->eState==CURSOR_FAULT ){ assert( pCur->skipNext!=SQLITE_OK ); return pCur->skipNext; } sqlite3BtreeClearCursor(pCur); } |
︙ | ︙ | |||
4682 4683 4684 4685 4686 4687 4688 | nCell = (int)pCur->info.nKey; pCellKey = sqlite3Malloc( nCell ); if( pCellKey==0 ){ rc = SQLITE_NOMEM; goto moveto_finish; } pCur->aiIdx[pCur->iPage] = (u16)idx; | | | 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 | nCell = (int)pCur->info.nKey; pCellKey = sqlite3Malloc( nCell ); if( pCellKey==0 ){ rc = SQLITE_NOMEM; goto moveto_finish; } pCur->aiIdx[pCur->iPage] = (u16)idx; rc = accessPayload(pCur, 0, nCell, (unsigned char*)pCellKey, 2); if( rc ){ sqlite3_free(pCellKey); goto moveto_finish; } c = xRecordCompare(nCell, pCellKey, pIdxKey, 0); sqlite3_free(pCellKey); } |
︙ | ︙ | |||
4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 | int idx; MemPage *pPage; assert( cursorHoldsMutex(pCur) ); assert( pRes!=0 ); assert( *pRes==0 || *pRes==1 ); assert( pCur->skipNext==0 || pCur->eState!=CURSOR_VALID ); if( pCur->eState!=CURSOR_VALID ){ rc = restoreCursorPosition(pCur); if( rc!=SQLITE_OK ){ *pRes = 0; return rc; } if( CURSOR_INVALID==pCur->eState ){ | > | 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 | int idx; MemPage *pPage; assert( cursorHoldsMutex(pCur) ); assert( pRes!=0 ); assert( *pRes==0 || *pRes==1 ); assert( pCur->skipNext==0 || pCur->eState!=CURSOR_VALID ); invalidateOverflowCache(pCur); if( pCur->eState!=CURSOR_VALID ){ rc = restoreCursorPosition(pCur); if( rc!=SQLITE_OK ){ *pRes = 0; return rc; } if( CURSOR_INVALID==pCur->eState ){ |
︙ | ︙ | |||
4866 4867 4868 4869 4870 4871 4872 4873 4874 4875 4876 4877 4878 4879 | int rc; MemPage *pPage; assert( cursorHoldsMutex(pCur) ); assert( pRes!=0 ); assert( *pRes==0 || *pRes==1 ); assert( pCur->skipNext==0 || pCur->eState!=CURSOR_VALID ); pCur->atLast = 0; if( pCur->eState!=CURSOR_VALID ){ if( ALWAYS(pCur->eState>=CURSOR_REQUIRESEEK) ){ rc = btreeRestoreCursorPosition(pCur); if( rc!=SQLITE_OK ){ *pRes = 0; return rc; | > | 4867 4868 4869 4870 4871 4872 4873 4874 4875 4876 4877 4878 4879 4880 4881 | int rc; MemPage *pPage; assert( cursorHoldsMutex(pCur) ); assert( pRes!=0 ); assert( *pRes==0 || *pRes==1 ); assert( pCur->skipNext==0 || pCur->eState!=CURSOR_VALID ); invalidateOverflowCache(pCur); pCur->atLast = 0; if( pCur->eState!=CURSOR_VALID ){ if( ALWAYS(pCur->eState>=CURSOR_REQUIRESEEK) ){ rc = btreeRestoreCursorPosition(pCur); if( rc!=SQLITE_OK ){ *pRes = 0; return rc; |
︙ | ︙ | |||
8420 8421 8422 8423 8424 8425 8426 | assert( !hasReadConflicts(pCsr->pBtree, pCsr->pgnoRoot) ); assert( pCsr->apPage[pCsr->iPage]->intKey ); return accessPayload(pCsr, offset, amt, (unsigned char *)z, 1); } /* | < < | < < < < < | < < < | 8422 8423 8424 8425 8426 8427 8428 8429 8430 8431 8432 8433 8434 8435 8436 8437 8438 | assert( !hasReadConflicts(pCsr->pBtree, pCsr->pgnoRoot) ); assert( pCsr->apPage[pCsr->iPage]->intKey ); return accessPayload(pCsr, offset, amt, (unsigned char *)z, 1); } /* ** Mark this cursor as an incremental blob cursor. */ void sqlite3BtreeIncrblobCursor(BtCursor *pCur){ pCur->isIncrblobHandle = 1; } #endif /* ** Set both the "read version" (single byte at byte offset 18) and ** "write version" (single byte at byte offset 19) fields in the database |
︙ | ︙ |
Changes to src/btree.h.
︙ | ︙ | |||
183 184 185 186 187 188 189 | int sqlite3BtreeDataSize(BtCursor*, u32 *pSize); int sqlite3BtreeData(BtCursor*, u32 offset, u32 amt, void*); char *sqlite3BtreeIntegrityCheck(Btree*, int *aRoot, int nRoot, int, int*); struct Pager *sqlite3BtreePager(Btree*); int sqlite3BtreePutData(BtCursor*, u32 offset, u32 amt, void*); | | | 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 | int sqlite3BtreeDataSize(BtCursor*, u32 *pSize); int sqlite3BtreeData(BtCursor*, u32 offset, u32 amt, void*); char *sqlite3BtreeIntegrityCheck(Btree*, int *aRoot, int nRoot, int, int*); struct Pager *sqlite3BtreePager(Btree*); int sqlite3BtreePutData(BtCursor*, u32 offset, u32 amt, void*); void sqlite3BtreeIncrblobCursor(BtCursor *); void sqlite3BtreeClearCursor(BtCursor *); int sqlite3BtreeSetVersion(Btree *pBt, int iVersion); void sqlite3BtreeCursorHints(BtCursor *, unsigned int mask); #ifndef NDEBUG int sqlite3BtreeCursorIsValid(BtCursor*); #endif |
︙ | ︙ |
Changes to src/btreeInt.h.
︙ | ︙ | |||
493 494 495 496 497 498 499 500 501 502 503 504 505 506 | */ struct BtCursor { Btree *pBtree; /* The Btree to which this cursor belongs */ BtShared *pBt; /* The BtShared this cursor points to */ BtCursor *pNext, *pPrev; /* Forms a linked list of all cursors */ struct KeyInfo *pKeyInfo; /* Argument passed to comparison function */ #ifndef SQLITE_OMIT_INCRBLOB Pgno *aOverflow; /* Cache of overflow page locations */ #endif Pgno pgnoRoot; /* The root page of this tree */ CellInfo info; /* A parse of the cell we are pointing at */ i64 nKey; /* Size of pKey, or last integer key */ void *pKey; /* Saved key that was cursor's last known position */ int skipNext; /* Prev() is noop if negative. Next() is noop if positive */ | > > | 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 | */ struct BtCursor { Btree *pBtree; /* The Btree to which this cursor belongs */ BtShared *pBt; /* The BtShared this cursor points to */ BtCursor *pNext, *pPrev; /* Forms a linked list of all cursors */ struct KeyInfo *pKeyInfo; /* Argument passed to comparison function */ #ifndef SQLITE_OMIT_INCRBLOB int nOvflAlloc; /* Allocated size of aOverflow[] array */ u8 bOvflValid; /* True if size and contents of aOverflow[] valid */ Pgno *aOverflow; /* Cache of overflow page locations */ #endif Pgno pgnoRoot; /* The root page of this tree */ CellInfo info; /* A parse of the cell we are pointing at */ i64 nKey; /* Size of pKey, or last integer key */ void *pKey; /* Saved key that was cursor's last known position */ int skipNext; /* Prev() is noop if negative. Next() is noop if positive */ |
︙ | ︙ |
Changes to src/vdbeblob.c.
︙ | ︙ | |||
73 74 75 76 77 78 79 | rc = SQLITE_ERROR; sqlite3_finalize(p->pStmt); p->pStmt = 0; }else{ p->iOffset = pC->aType[p->iCol + pC->nField]; p->nByte = sqlite3VdbeSerialTypeLen(type); p->pCsr = pC->pCursor; | | < < | 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 | rc = SQLITE_ERROR; sqlite3_finalize(p->pStmt); p->pStmt = 0; }else{ p->iOffset = pC->aType[p->iCol + pC->nField]; p->nByte = sqlite3VdbeSerialTypeLen(type); p->pCsr = pC->pCursor; sqlite3BtreeIncrblobCursor(p->pCsr); } } if( rc==SQLITE_ROW ){ rc = SQLITE_OK; }else if( p->pStmt ){ rc = sqlite3_finalize(p->pStmt); |
︙ | ︙ |