Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Simplify the accessPayload() routine so that it always populates the overflow page cache. In the one case where populating the page cache can lead to problems, simply invalidate the cache as soon as accessPayload() returns. This simplification reduces code size and helps accessPayload() to run a little faster. This backs out the eOp==2 mode of accessPayload() added by check-in [da59198505]. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA1: |
68e7a8c6765649195ef1ad9407d87d44 |
User & Date: | drh 2017-01-27 00:31:59.289 |
Context
2017-01-27
| ||
01:13 | Performance optimization in accessPayload(). (check-in: ebb1fd98d4 user: drh tags: trunk) | |
00:31 | Simplify the accessPayload() routine so that it always populates the overflow page cache. In the one case where populating the page cache can lead to problems, simply invalidate the cache as soon as accessPayload() returns. This simplification reduces code size and helps accessPayload() to run a little faster. This backs out the eOp==2 mode of accessPayload() added by check-in [da59198505]. (check-in: 68e7a8c676 user: drh tags: trunk) | |
2017-01-26
| ||
21:30 | Remove an unreachable branch in the error handling logic for sqlite3BtreePayloadChecked(). (check-in: 293bf3ed7e user: drh tags: trunk) | |
Changes
Changes to src/btree.c.
︙ | ︙ | |||
4420 4421 4422 4423 4424 4425 4426 | /* ** This function is used to read or overwrite payload information ** for the entry that the pCur cursor is pointing to. The eOp ** argument is interpreted as follows: ** ** 0: The operation is a read. Populate the overflow cache. ** 1: The operation is a write. Populate the overflow cache. | < | | | | | 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 | /* ** This function is used to read or overwrite payload information ** for the entry that the pCur cursor is pointing to. The eOp ** argument is interpreted as follows: ** ** 0: The operation is a read. Populate the overflow cache. ** 1: The operation is a write. Populate the overflow cache. ** ** A total of "amt" bytes are read or written beginning at "offset". ** Data is read to or from the buffer pBuf. ** ** The content being read or written might appear on the main page ** or be scattered out on multiple overflow pages. ** ** If the current cursor entry uses one or more overflow pages ** this function may allocate space for and lazily populate ** the overflow page-list cache array (BtCursor.aOverflow). ** Subsequent calls use this cache to make seeking to the supplied offset ** more efficient. ** ** Once an overflow page-list cache has been allocated, it must be ** invalidated if some other cursor writes to the same table, or if ** the cursor is moved to a different row. Additionally, in auto-vacuum ** mode, the following events may invalidate an overflow page-list cache. ** ** * An incremental vacuum, ** * A commit in auto_vacuum="full" mode, ** * Creating a table (may require moving an overflow page). |
︙ | ︙ | |||
4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 | MemPage *pPage = pCur->apPage[pCur->iPage]; /* Btree page of current entry */ BtShared *pBt = pCur->pBt; /* Btree this cursor belongs to */ #ifdef SQLITE_DIRECT_OVERFLOW_READ unsigned char * const pBufStart = pBuf; /* Start of original out buffer */ #endif assert( pPage ); assert( pCur->eState==CURSOR_VALID ); assert( pCur->aiIdx[pCur->iPage]<pPage->nCell ); assert( cursorHoldsMutex(pCur) ); | > < | 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 | MemPage *pPage = pCur->apPage[pCur->iPage]; /* Btree page of current entry */ BtShared *pBt = pCur->pBt; /* Btree this cursor belongs to */ #ifdef SQLITE_DIRECT_OVERFLOW_READ unsigned char * const pBufStart = pBuf; /* Start of original out buffer */ #endif assert( pPage ); assert( eOp==0 || eOp==1 ); assert( pCur->eState==CURSOR_VALID ); assert( pCur->aiIdx[pCur->iPage]<pPage->nCell ); assert( cursorHoldsMutex(pCur) ); getCellInfo(pCur); aPayload = pCur->info.pPayload; assert( offset+amt <= pCur->info.nPayload ); assert( aPayload > pPage->aData ); if( (uptr)(aPayload - pPage->aData) > (pBt->usableSize - pCur->info.nLocal) ){ |
︙ | ︙ | |||
4485 4486 4487 4488 4489 4490 4491 | /* Check if data must be read/written to/from the btree page itself. */ if( offset<pCur->info.nLocal ){ int a = amt; if( a+offset>pCur->info.nLocal ){ a = pCur->info.nLocal - offset; } | | < | < | < < | | | | < < < < < | 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 | /* Check if data must be read/written to/from the btree page itself. */ if( offset<pCur->info.nLocal ){ int a = amt; if( a+offset>pCur->info.nLocal ){ a = pCur->info.nLocal - offset; } rc = copyPayload(&aPayload[offset], pBuf, a, eOp, pPage->pDbPage); offset = 0; pBuf += a; amt -= a; }else{ offset -= pCur->info.nLocal; } if( rc==SQLITE_OK && amt>0 ){ const u32 ovflSize = pBt->usableSize - 4; /* Bytes content per ovfl page */ Pgno nextPage; nextPage = get4byte(&aPayload[pCur->info.nLocal]); /* If the BtCursor.aOverflow[] has not been allocated, allocate it now. ** ** The aOverflow[] array is sized at one entry for each overflow page ** in the overflow chain. The page number of the first overflow page is ** stored in aOverflow[0], etc. A value of 0 in the aOverflow[] array ** means "not yet known" (the cache is lazily populated). */ if( (pCur->curFlags & BTCF_ValidOvfl)==0 ){ int nOvfl = (pCur->info.nPayload-pCur->info.nLocal+ovflSize-1)/ovflSize; if( nOvfl>pCur->nOvflAlloc ){ Pgno *aNew = (Pgno*)sqlite3Realloc( pCur->aOverflow, nOvfl*2*sizeof(Pgno) ); if( aNew==0 ){ return SQLITE_NOMEM_BKPT; }else{ pCur->nOvflAlloc = nOvfl*2; pCur->aOverflow = aNew; } } memset(pCur->aOverflow, 0, nOvfl*sizeof(Pgno)); pCur->curFlags |= BTCF_ValidOvfl; } /* If the overflow page-list cache has been allocated and the ** entry for the first required overflow page is valid, skip ** directly to it. */ if( pCur->aOverflow[offset/ovflSize] ){ iIdx = (offset/ovflSize); nextPage = pCur->aOverflow[iIdx]; offset = (offset%ovflSize); } assert( rc==SQLITE_OK && amt>0 ); while( nextPage ){ /* If required, populate the overflow page-list cache. */ assert( pCur->aOverflow[iIdx]==0 || pCur->aOverflow[iIdx]==nextPage || CORRUPT_DB ); pCur->aOverflow[iIdx] = nextPage; if( offset>=ovflSize ){ /* The only reason to read this page is to obtain the page ** number for the next page in the overflow chain. The page ** data is not required. So first try to lookup the overflow ** page-list cache, if any, then fall back to the getOverflowPage() ** function. */ assert( pCur->curFlags & BTCF_ValidOvfl ); assert( pCur->pBtree->db==pBt->db ); if( pCur->aOverflow[iIdx+1] ){ nextPage = pCur->aOverflow[iIdx+1]; }else{ rc = getOverflowPage(pBt, nextPage, 0, &nextPage); } |
︙ | ︙ | |||
4592 4593 4594 4595 4596 4597 4598 | ** 5) the page is not in the WAL file ** 6) at least 4 bytes have already been read into the output buffer ** ** then data can be read directly from the database file into the ** output buffer, bypassing the page-cache altogether. This speeds ** up loading large records that span many overflow pages. */ | | | | | 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 | ** 5) the page is not in the WAL file ** 6) at least 4 bytes have already been read into the output buffer ** ** then data can be read directly from the database file into the ** output buffer, bypassing the page-cache altogether. This speeds ** up loading large records that span many overflow pages. */ if( eOp==0 /* (1) */ && offset==0 /* (2) */ && pBt->inTransaction==TRANS_READ /* (3) */ && (fd = sqlite3PagerFile(pBt->pPager))->pMethods /* (4) */ && 0==sqlite3PagerUseWal(pBt->pPager, nextPage) /* (5) */ && &pBuf[-4]>=pBufStart /* (6) */ ){ u8 aSave[4]; u8 *aWrite = &pBuf[-4]; assert( aWrite>=pBufStart ); /* due to (6) */ memcpy(aSave, aWrite, 4); rc = sqlite3OsRead(fd, aWrite, a+4, (i64)pBt->pageSize*(nextPage-1)); nextPage = get4byte(aWrite); memcpy(aWrite, aSave, 4); }else #endif { DbPage *pDbPage; rc = sqlite3PagerGet(pBt->pPager, nextPage, &pDbPage, (eOp==0 ? PAGER_GET_READONLY : 0) ); if( rc==SQLITE_OK ){ aPayload = sqlite3PagerGetData(pDbPage); nextPage = get4byte(aPayload); rc = copyPayload(&aPayload[offset+4], pBuf, a, eOp, pDbPage); sqlite3PagerUnref(pDbPage); offset = 0; } } amt -= a; pBuf += a; } |
︙ | ︙ | |||
5249 5250 5251 5252 5253 5254 5255 | } pCellKey = sqlite3Malloc( nCell+18 ); if( pCellKey==0 ){ rc = SQLITE_NOMEM_BKPT; goto moveto_finish; } pCur->aiIdx[pCur->iPage] = (u16)idx; | | > | 5239 5240 5241 5242 5243 5244 5245 5246 5247 5248 5249 5250 5251 5252 5253 5254 | } pCellKey = sqlite3Malloc( nCell+18 ); if( pCellKey==0 ){ rc = SQLITE_NOMEM_BKPT; goto moveto_finish; } pCur->aiIdx[pCur->iPage] = (u16)idx; rc = accessPayload(pCur, 0, nCell, (unsigned char*)pCellKey, 0); pCur->curFlags &= ~BTCF_ValidOvfl; if( rc ){ sqlite3_free(pCellKey); goto moveto_finish; } c = xRecordCompare(nCell, pCellKey, pIdxKey); sqlite3_free(pCellKey); } |
︙ | ︙ |