Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Allow read-only cursors to use mmap pages even if there is an open write transaction. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | experimental-mmap |
Files: | files | file ages | folders |
SHA1: |
b387e2f9d24dccac1fd040e309f6fc7e |
User & Date: | dan 2013-03-15 18:29:18.146 |
Context
2013-03-15
| ||
19:13 | Fix a dropped error code in pager.c. (check-in: 022fdc986b user: dan tags: experimental-mmap) | |
18:29 | Allow read-only cursors to use mmap pages even if there is an open write transaction. (check-in: b387e2f9d2 user: dan tags: experimental-mmap) | |
2013-03-14
| ||
18:34 | Use mmap() to read from the database file in rollback mode. This branch is unix only for now. (check-in: 6f21d9cbf5 user: dan tags: experimental-mmap) | |
Changes
Changes to src/btree.c.
︙ | |||
1565 1566 1567 1568 1569 1570 1571 | 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 | - + + + + + - + | ** means we have started to be concerned about content and the disk ** read should occur at that point. */ static int btreeGetPage( BtShared *pBt, /* The btree */ Pgno pgno, /* Number of the page to fetch */ MemPage **ppPage, /* Return the page in this parameter */ |
︙ | |||
1614 1615 1616 1617 1618 1619 1620 | 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 | - - - + + + + - + | ** convenience wrapper around separate calls to btreeGetPage() and ** btreeInitPage(). ** ** If an error occurs, then the value *ppPage is set to is undefined. It ** may remain unchanged, or it may be set to an invalid value. */ static int getAndInitPage( |
︙ | |||
2346 2347 2348 2349 2350 2351 2352 | 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 | - + | int nPageFile = 0; /* Number of pages in the database file */ int nPageHeader; /* Number of pages in the database according to hdr */ assert( sqlite3_mutex_held(pBt->mutex) ); assert( pBt->pPage1==0 ); rc = sqlite3PagerSharedLock(pBt->pPager); if( rc!=SQLITE_OK ) return rc; |
︙ | |||
2561 2562 2563 2564 2565 2566 2567 | 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 | - - + + - + - - + + + - - - + + - - - + + - + - + | p->pBt->nPage = 0; rc = newDatabase(p->pBt); sqlite3BtreeLeave(p); return rc; } /* |
︙ | |||
2936 2937 2938 2939 2940 2941 2942 | 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 | - + | } /* Fix the database pointer on page iPtrPage that pointed at iDbPage so ** that it points at iFreePage. Also fix the pointer map entry for ** iPtrPage. */ if( eType!=PTRMAP_ROOTPAGE ){ |
︙ | |||
3020 3021 3022 3023 3024 3025 3026 | 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 | - + | } } else { Pgno iFreePg; /* Index of free page to move pLastPg to */ MemPage *pLastPg; u8 eMode = BTALLOC_ANY; /* Mode parameter for allocateBtreePage() */ Pgno iNear = 0; /* nearby parameter for allocateBtreePage() */ |
︙ | |||
3112 3113 3114 3115 3116 3117 3118 | 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 | + + - - + + + | Pgno nOrig = btreePagecount(pBt); Pgno nFree = get4byte(&pBt->pPage1->aData[36]); Pgno nFin = finalDbSize(pBt, nOrig, nFree); if( nOrig<nFin ){ rc = SQLITE_CORRUPT_BKPT; }else if( nFree>0 ){ rc = saveAllCursors(pBt, 0, 0); if( rc==SQLITE_OK ){ |
︙ | |||
3434 3435 3436 3437 3438 3439 3440 | 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 | - + | if( rc2!=SQLITE_OK ){ rc = rc2; } /* The rollback may have destroyed the pPage1->aData value. So ** call btreeGetPage() on page 1 again to make ** sure pPage1->aData is set correctly. */ |
︙ | |||
3868 3869 3870 3871 3872 3873 3874 | 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 | - + | } } } #endif assert( next==0 || rc==SQLITE_DONE ); if( rc==SQLITE_OK ){ |
︙ | |||
4089 4090 4091 4092 4093 4094 4095 | 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 | - + + + | nextPage = get4byte(aWrite); memcpy(aWrite, aSave, 4); }else #endif { DbPage *pDbPage; |
︙ | |||
4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 | 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 | + - + | int i = pCur->iPage; MemPage *pNewPage; BtShared *pBt = pCur->pBt; assert( cursorHoldsMutex(pCur) ); assert( pCur->eState==CURSOR_VALID ); assert( pCur->iPage<BTCURSOR_MAX_DEPTH ); assert( pCur->iPage>=0 ); if( pCur->iPage>=(BTCURSOR_MAX_DEPTH-1) ){ return SQLITE_CORRUPT_BKPT; } |
︙ | |||
4388 4389 4390 4391 4392 4393 4394 | 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 | - + | releasePage(pCur->apPage[i]); } pCur->iPage = 0; }else if( pCur->pgnoRoot==0 ){ pCur->eState = CURSOR_INVALID; return SQLITE_OK; }else{ |
︙ | |||
5002 5003 5004 5005 5006 5007 5008 | 5012 5013 5014 5015 5016 5017 5018 5019 5020 5021 5022 5023 5024 5025 5026 | - + | }else{ iTrunk = get4byte(&pPage1->aData[32]); } testcase( iTrunk==mxPage ); if( iTrunk>mxPage ){ rc = SQLITE_CORRUPT_BKPT; }else{ |
︙ | |||
5066 5067 5068 5069 5070 5071 5072 | 5076 5077 5078 5079 5080 5081 5082 5083 5084 5085 5086 5087 5088 5089 5090 | - + | MemPage *pNewTrunk; Pgno iNewTrunk = get4byte(&pTrunk->aData[8]); if( iNewTrunk>mxPage ){ rc = SQLITE_CORRUPT_BKPT; goto end_allocate_page; } testcase( iNewTrunk==mxPage ); |
︙ | |||
5146 5147 5148 5149 5150 5151 5152 | 5156 5157 5158 5159 5160 5161 5162 5163 5164 5165 5166 5167 5168 5169 5170 | - + | rc = sqlite3PagerWrite(pTrunk->pDbPage); if( rc ) goto end_allocate_page; if( closest<k-1 ){ memcpy(&aData[8+closest*4], &aData[4+k*4], 4); } put4byte(&aData[4], k-1); noContent = !btreeGetHasContent(pBt, *pPgno); |
︙ | |||
5194 5195 5196 5197 5198 5199 5200 | 5204 5205 5206 5207 5208 5209 5210 5211 5212 5213 5214 5215 5216 5217 5218 5219 5220 5221 5222 5223 5224 5225 5226 5227 5228 5229 5230 5231 5232 | - + - + | /* If *pPgno refers to a pointer-map page, allocate two new pages ** at the end of the file instead of one. The first allocated page ** becomes a new pointer-map page, the second is used by the caller. */ MemPage *pPg = 0; TRACE(("ALLOCATE: %d from end of file (pointer-map page)\n", pBt->nPage)); assert( pBt->nPage!=PENDING_BYTE_PAGE(pBt) ); |
︙ | |||
5276 5277 5278 5279 5280 5281 5282 | 5286 5287 5288 5289 5290 5291 5292 5293 5294 5295 5296 5297 5298 5299 5300 | - + | nFree = get4byte(&pPage1->aData[36]); put4byte(&pPage1->aData[36], nFree+1); if( pBt->btsFlags & BTS_SECURE_DELETE ){ /* If the secure_delete option is enabled, then ** always fully overwrite deleted information with zeros. */ |
︙ | |||
5303 5304 5305 5306 5307 5308 5309 | 5313 5314 5315 5316 5317 5318 5319 5320 5321 5322 5323 5324 5325 5326 5327 | - + | ** first trunk page in the current free-list. This block tests if it ** is possible to add the page as a new free-list leaf. */ if( nFree!=0 ){ u32 nLeaf; /* Initial number of leaf cells on trunk page */ iTrunk = get4byte(&pPage1->aData[32]); |
︙ | |||
5349 5350 5351 5352 5353 5354 5355 | 5359 5360 5361 5362 5363 5364 5365 5366 5367 5368 5369 5370 5371 5372 5373 | - + | /* If control flows to this point, then it was not possible to add the ** the page being freed as a leaf page of the first trunk in the free-list. ** Possibly because the free-list is empty, or possibly because the ** first trunk in the free-list is full. Either way, the page being freed ** will become the new first trunk page in the free-list. */ |
︙ | |||
6150 6151 6152 6153 6154 6155 6156 | 6160 6161 6162 6163 6164 6165 6166 6167 6168 6169 6170 6171 6172 6173 6174 | - + | if( (i+nxDiv-pParent->nOverflow)==pParent->nCell ){ pRight = &pParent->aData[pParent->hdrOffset+8]; }else{ pRight = findCell(pParent, i+nxDiv-pParent->nOverflow); } pgno = get4byte(pRight); while( 1 ){ |
︙ | |||
7241 7242 7243 7244 7245 7246 7247 | 7251 7252 7253 7254 7255 7256 7257 7258 7259 7260 7261 7262 7263 7264 7265 7266 7267 7268 7269 7270 7271 7272 7273 7274 7275 7276 7277 7278 7279 7280 7281 7282 7283 7284 7285 7286 | - + - + | */ u8 eType = 0; Pgno iPtrPage = 0; releasePage(pPageMove); /* Move the page currently at pgnoRoot to pgnoMove. */ |
︙ | |||
7338 7339 7340 7341 7342 7343 7344 | 7348 7349 7350 7351 7352 7353 7354 7355 7356 7357 7358 7359 7360 7361 7362 | - + | int i; assert( sqlite3_mutex_held(pBt->mutex) ); if( pgno>btreePagecount(pBt) ){ return SQLITE_CORRUPT_BKPT; } |
︙ | |||
7440 7441 7442 7443 7444 7445 7446 | 7450 7451 7452 7453 7454 7455 7456 7457 7458 7459 7460 7461 7462 7463 7464 | - + | ** This error is caught long before control reaches this point. */ if( NEVER(pBt->pCursor) ){ sqlite3ConnectionBlocked(p->db, pBt->pCursor->pBtree->db); return SQLITE_LOCKED_SHAREDCACHE; } |
︙ | |||
7475 7476 7477 7478 7479 7480 7481 | 7485 7486 7487 7488 7489 7490 7491 7492 7493 7494 7495 7496 7497 7498 7499 7500 7501 7502 7503 7504 7505 7506 7507 7508 7509 | - + - + | }else{ /* The table being dropped does not have the largest root-page ** number in the database. So move the page that does into the ** gap left by the deleted root-page. */ MemPage *pMove; releasePage(pPage); |
︙ | |||
7897 7898 7899 7900 7901 7902 7903 | 7907 7908 7909 7910 7911 7912 7913 7914 7915 7916 7917 7918 7919 7920 7921 | - + | /* Check that the page exists */ pBt = pCheck->pBt; usableSize = pBt->usableSize; if( iPage==0 ) return 0; if( checkRef(pCheck, iPage, zParentContext) ) return 0; |
︙ |
Changes to src/pager.c.
︙ | |||
654 655 656 657 658 659 660 661 662 663 664 665 666 667 | 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 | + | sqlite3_backup *pBackup; /* Pointer to list of ongoing backup processes */ PagerSavepoint *aSavepoint; /* Array of active savepoints */ int nSavepoint; /* Number of elements in aSavepoint[] */ char dbFileVers[16]; /* Changes whenever database file changes */ void *pMap; /* Memory mapped prefix of database file */ i64 nMap; /* Size of mapping at pMap in bytes */ i64 nMapValid; /* Bytes at pMap known to be valid */ int nMmapOut; /* Number of mmap pages currently outstanding */ PgHdr *pFree; /* List of free mmap page headers (pDirty) */ /* ** End of the routinely-changing class members ***************************************************************************/ u16 nExtra; /* Add this many bytes to each in-memory page */ |
︙ | |||
2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 | 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 | + + + | assert( pPager->eLock==EXCLUSIVE_LOCK ); /* TODO: Is it safe to use Pager.dbFileSize here? */ rc = sqlite3OsFileSize(pPager->fd, ¤tSize); newSize = szPage*(i64)nPage; if( rc==SQLITE_OK && currentSize!=newSize ){ if( currentSize>newSize ){ rc = sqlite3OsTruncate(pPager->fd, newSize); if( newSize<pPager->nMapValid ){ pPager->nMapValid = newSize; } }else if( (currentSize+szPage)<=newSize ){ char *pTmp = pPager->pTmpSpace; memset(pTmp, 0, szPage); testcase( (newSize-szPage) == currentSize ); testcase( (newSize-szPage) > currentSize ); rc = sqlite3OsWrite(pPager->fd, pTmp, szPage, newSize-szPage); } |
︙ | |||
3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 | 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 | + | ** Unmap any mapping of the database file. */ static int pagerUnmap(Pager *pPager){ if( pPager->pMap ){ munmap(pPager->pMap, pPager->nMap); pPager->pMap = 0; pPager->nMap = 0; pPager->nMapValid = 0; } return SQLITE_OK; } static int pagerMap(Pager *pPager){ int rc; i64 sz = 0; |
︙ | |||
3835 3836 3837 3838 3839 3840 3841 | 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 | - + - + + + | if( rc==SQLITE_OK ){ void *pMap = mmap(0, sz, PROT_READ, MAP_SHARED, fd, 0); if( pMap==MAP_FAILED ){ assert( 0 ); return SQLITE_IOERR; } pPager->pMap = pMap; |
︙ | |||
5031 5032 5033 5034 5035 5036 5037 | 5038 5039 5040 5041 5042 5043 5044 5045 5046 5047 5048 5049 5050 5051 5052 5053 5054 5055 5056 | - - - + + + + + | assert( pPager->eState==PAGER_OPEN ); assert( (pPager->eLock==SHARED_LOCK) || (pPager->exclusiveMode && pPager->eLock>SHARED_LOCK) ); } |
︙ | |||
5068 5069 5070 5071 5072 5073 5074 5075 5076 5077 5078 5079 5080 5081 | 5077 5078 5079 5080 5081 5082 5083 5084 5085 5086 5087 5088 5089 5090 5091 5092 5093 5094 5095 5096 5097 5098 5099 | + + + + + + + + + | } }else{ memset(dbFileVers, 0, sizeof(dbFileVers)); } if( memcmp(pPager->dbFileVers, dbFileVers, sizeof(dbFileVers))!=0 ){ pager_reset(pPager); /* Unmap the database file. It is possible that external processes ** may have truncated the database file and then extended it back ** to its original size while this process was not holding a lock. ** In this case there may exist a Pager.pMap mapping that appears ** to be the right size but is not actually valid. Avoid this ** possibility by unmapping the db here. */ pagerUnmap(pPager); }else if( ((i64)nPage*pPager->pageSize)!=pPager->nMap ){ pagerUnmap(pPager); } } /* If there is a WAL file in the file-system, open this database in WAL ** mode. Otherwise, the following function call is a no-op. */ |
︙ | |||
5169 5170 5171 5172 5173 5174 5175 | 5187 5188 5189 5190 5191 5192 5193 5194 5195 5196 5197 5198 5199 5200 5201 5202 5203 5204 5205 5206 5207 5208 5209 5210 5211 5212 5213 5214 5215 5216 5217 5218 5219 5220 5221 5222 5223 5224 5225 5226 5227 5228 5229 5230 5231 5232 5233 5234 5235 5236 5237 5238 5239 5240 5241 5242 5243 5244 | - + - - + + + + + + + + + + + + + + + + - - + + + + + - - - - + + + + + + + + | ** Since Lookup() never goes to disk, it never has to deal with locks ** or journal files. */ int sqlite3PagerAcquire( Pager *pPager, /* The pager open on the database file */ Pgno pgno, /* Page number to fetch */ DbPage **ppPage, /* Write a pointer to the page here */ |
︙ | |||
5429 5430 5431 5432 5433 5434 5435 | 5468 5469 5470 5471 5472 5473 5474 5475 5476 5477 5478 5479 5480 5481 | - - | int sqlite3PagerBegin(Pager *pPager, int exFlag, int subjInMemory){ int rc = SQLITE_OK; if( pPager->errCode ) return pPager->errCode; assert( pPager->eState>=PAGER_READER && pPager->eState<PAGER_ERROR ); pPager->subjInMemory = (u8)subjInMemory; |
︙ | |||
5649 5650 5651 5652 5653 5654 5655 5656 5657 5658 5659 | 5686 5687 5688 5689 5690 5691 5692 5693 5694 5695 5696 5697 5698 5699 5700 5701 5702 5703 5704 | + - - - | int sqlite3PagerWrite(DbPage *pDbPage){ int rc = SQLITE_OK; PgHdr *pPg = pDbPage; Pager *pPager = pPg->pPager; Pgno nPagePerSector = (pPager->sectorSize/pPager->pageSize); assert( (pPg->flags & PGHDR_MMAP)==0 ); assert( pPager->eState>=PAGER_WRITER_LOCKED ); assert( pPager->eState!=PAGER_ERROR ); assert( assert_pager_state(pPager) ); |
︙ |
Changes to src/pager.h.
︙ | |||
74 75 76 77 78 79 80 81 82 83 84 85 86 87 | 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 | + + + + + + | #define PAGER_JOURNALMODE_DELETE 0 /* Commit by deleting journal file */ #define PAGER_JOURNALMODE_PERSIST 1 /* Commit by zeroing journal header */ #define PAGER_JOURNALMODE_OFF 2 /* Journal omitted. */ #define PAGER_JOURNALMODE_TRUNCATE 3 /* Commit by truncating journal */ #define PAGER_JOURNALMODE_MEMORY 4 /* In-memory journal file */ #define PAGER_JOURNALMODE_WAL 5 /* Use write-ahead logging */ /* ** Flags that make up the mask passed to sqlite3PagerAcquire(). */ #define PAGER_ACQUIRE_NOCONTENT 0x01 /* Do not load data from disk */ #define PAGER_ACQUIRE_READONLY 0x02 /* Read-only page is acceptable */ /* ** The remainder of this file contains the declarations of the functions ** that make up the Pager sub-system API. See source code comments for ** a detailed description of each routine. */ /* Open and close a Pager connection. */ |
︙ |