Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Use a different free-list format for server-mode databases in order to reduce contention. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | server-edition |
Files: | files | file ages | folders |
SHA3-256: |
778e8a102d8dc7b0fa006c2d90b0a8cd |
User & Date: | dan 2017-04-28 10:20:03.891 |
Context
2017-04-28
| ||
14:09 | Add a missing releasePage() call to the server-mode free-list management code. (check-in: a5a085483c user: dan tags: server-edition) | |
10:20 | Use a different free-list format for server-mode databases in order to reduce contention. (check-in: 778e8a102d user: dan tags: server-edition) | |
2017-04-27
| ||
14:12 | Do not write master journal filenames into server-mode journal files. Use SQLITE_MUTEX_STATIC_APP1 to protect critical sections in server.c. (check-in: 3144ae40d2 user: dan tags: server-edition) | |
Changes
Changes to src/btree.c.
︙ | ︙ | |||
5612 5613 5614 5615 5616 5617 5618 5619 5620 5621 5622 5623 5624 5625 | ){ return btreePrevious(pCur, pRes); } pCur->ix--; return SQLITE_OK; } /* ** Allocate a new page from the database file. ** ** The new page is marked as dirty. (In other words, sqlite3PagerWrite() ** has already been called on the new page.) The new page has also ** been referenced and the calling routine is responsible for calling ** sqlite3PagerUnref() on the new page when it is done. | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 5612 5613 5614 5615 5616 5617 5618 5619 5620 5621 5622 5623 5624 5625 5626 5627 5628 5629 5630 5631 5632 5633 5634 5635 5636 5637 5638 5639 5640 5641 5642 5643 5644 5645 5646 5647 5648 5649 5650 5651 5652 5653 5654 5655 5656 5657 5658 5659 5660 5661 5662 5663 5664 5665 5666 5667 5668 5669 5670 5671 5672 5673 5674 5675 5676 5677 5678 5679 5680 5681 5682 5683 5684 5685 5686 5687 5688 5689 5690 5691 5692 5693 5694 5695 5696 5697 5698 5699 5700 5701 5702 5703 5704 5705 5706 5707 5708 5709 5710 5711 5712 5713 5714 5715 5716 5717 5718 5719 5720 5721 5722 5723 5724 5725 5726 5727 5728 5729 5730 5731 5732 5733 5734 5735 5736 5737 5738 5739 5740 5741 5742 5743 5744 5745 5746 5747 5748 5749 5750 5751 5752 5753 5754 5755 5756 5757 5758 5759 5760 5761 5762 5763 5764 5765 5766 5767 5768 5769 5770 5771 5772 5773 5774 5775 5776 5777 5778 5779 5780 5781 5782 5783 5784 5785 5786 5787 5788 5789 5790 5791 5792 5793 5794 5795 5796 5797 5798 5799 5800 5801 5802 5803 5804 5805 5806 5807 5808 5809 5810 5811 5812 5813 5814 5815 5816 5817 5818 5819 5820 5821 5822 5823 5824 5825 5826 5827 5828 5829 5830 5831 5832 5833 5834 5835 5836 5837 5838 5839 5840 5841 5842 5843 5844 5845 5846 5847 5848 5849 5850 5851 5852 5853 5854 5855 5856 5857 5858 | ){ return btreePrevious(pCur, pRes); } pCur->ix--; return SQLITE_OK; } #ifdef SQLITE_SERVER_EDITION #define SERVER_DEFAULT_FREELISTS 16 #define SERVER_DEFAULT_FREELIST_SIZE 128 /* ** Allocate the free-node and the first SERVER_DEFAULT_FREELISTS ** trunk pages. */ static int allocateServerFreenode(BtShared *pBt){ int rc; MemPage *pPage1 = pBt->pPage1; rc = sqlite3PagerWrite(pPage1->pDbPage); if( rc==SQLITE_OK ){ Pgno pgnoNode = (++pBt->nPage); MemPage *pNode = 0; int i; put4byte(&pPage1->aData[32], pgnoNode); rc = btreeGetUnusedPage(pBt, pgnoNode, &pNode, PAGER_GET_NOCONTENT); if( rc==SQLITE_OK ){ rc = sqlite3PagerWrite(pNode->pDbPage); } if( rc==SQLITE_OK ){ put4byte(&pNode->aData[0], 0); put4byte(&pNode->aData[4], SERVER_DEFAULT_FREELISTS); } for(i=0; rc==SQLITE_OK && i<SERVER_DEFAULT_FREELISTS; i++){ MemPage *pTrunk = 0; Pgno pgnoTrunk; if( ++pBt->nPage==PENDING_BYTE_PAGE(pBt) ) pBt->nPage++; pgnoTrunk = pBt->nPage; rc = btreeGetUnusedPage(pBt, pgnoTrunk, &pTrunk, PAGER_GET_NOCONTENT); if( rc==SQLITE_OK ){ rc = sqlite3PagerWrite(pTrunk->pDbPage); } if( rc==SQLITE_OK ){ memset(pTrunk->aData, 0, 8); put4byte(&pNode->aData[8+i*4], pgnoTrunk); } releasePage(pTrunk); } releasePage(pNode); } return rc; } /* ** Return a reference to the first trunk page in one of the database free-lists. ** Allocate the database free-lists if required. */ static int findServerTrunk(BtShared *pBt, int bAlloc, MemPage **ppTrunk){ MemPage *pPage1 = pBt->pPage1; MemPage *pNode = 0; /* The node page */ MemPage *pTrunk = 0; /* The returned page */ Pgno iNode; /* Page number of node page */ int rc = SQLITE_OK; /* If the node page and free-list trunks have not yet been allocated, allocate ** them now. */ pPage1 = pBt->pPage1; iNode = get4byte(&pPage1->aData[32]); if( iNode==0 ){ rc = allocateServerFreenode(pBt); iNode = get4byte(&pPage1->aData[32]); } /* Grab the node page */ if( rc==SQLITE_OK ){ rc = btreeGetUnusedPage(pBt, iNode, &pNode, 0); } if( rc==SQLITE_OK ){ int nList; /* Number of free-lists in this db */ int i; /* Try to lock a free-list trunk. If bAlloc is true, it has to be a ** free-list trunk with at least one entry in the free-list. */ nList = (int)get4byte(&pNode->aData[4]); for(i=0; i<nList; i++){ Pgno iTrunk = get4byte(&pNode->aData[8+i*4]); if( SQLITE_OK==sqlite3PagerWritelock(pBt->pPager, iTrunk) ){ rc = btreeGetUnusedPage(pBt, iTrunk, &pTrunk, 0); if( rc==SQLITE_OK && bAlloc ){ if( !get4byte(&pTrunk->aData[0]) && !get4byte(&pTrunk->aData[4]) ){ releasePage(pTrunk); pTrunk = 0; } } if( rc!=SQLITE_OK || pTrunk ) break; } } /* No free pages in any free-list. Or perhaps we were locked out. In ** either case, try to allocate more from the end of the file now. */ if( i==nList ){ assert( rc==SQLITE_OK && pTrunk==0 ); rc = sqlite3PagerWrite(pPage1->pDbPage); for(i=0; rc==SQLITE_OK && i<nList; i++){ /* Add some free pages to each free-list. No server-locks are required ** to do this as we have a write-lock on page 1 - guaranteeing ** exclusive access to the db file. */ MemPage *pT = 0; Pgno iTrunk = get4byte(&pNode->aData[8+i*4]); rc = btreeGetUnusedPage(pBt, iTrunk, &pT, 0); if( rc==SQLITE_OK ){ rc = sqlite3PagerWrite(pT->pDbPage); } if( rc==SQLITE_OK ){ int iPg = get4byte(&pT->aData[4]); for(/*no-op*/; iPg<SERVER_DEFAULT_FREELIST_SIZE; iPg++){ if( ++pBt->nPage==PENDING_BYTE_PAGE(pBt) ) pBt->nPage++; put4byte(&pT->aData[8+iPg*4], pBt->nPage); } put4byte(&pT->aData[4], iPg); if( pTrunk==0 ){ pTrunk = pT; pT = 0; } } releasePage(pT); } if( rc==SQLITE_OK ){ MemPage *pLast = 0; rc = btreeGetUnusedPage(pBt, pBt->nPage, &pLast, 0); if( rc==SQLITE_OK ){ rc = sqlite3PagerWrite(pLast->pDbPage); releasePage(pLast); put4byte(28 + (u8*)pPage1->aData, pBt->nPage); } } } } releasePage(pNode); if( rc==SQLITE_OK ){ assert( pTrunk ); rc = sqlite3PagerWrite(pTrunk->pDbPage); } if( rc!=SQLITE_OK ){ releasePage(pTrunk); pTrunk = 0; } *ppTrunk = pTrunk; return rc; } static int allocateServerPage( BtShared *pBt, /* The btree */ MemPage **ppPage, /* Store pointer to the allocated page here */ Pgno *pPgno, /* Store the page number here */ Pgno nearby, /* Search for a page near this one */ u8 eMode /* BTALLOC_EXACT, BTALLOC_LT, or BTALLOC_ANY */ ){ int rc; /* Return code */ MemPage *pTrunk = 0; /* The node page */ Pgno pgnoNew = 0; assert( eMode==BTALLOC_ANY ); assert( sqlite3_mutex_held(pBt->mutex) ); rc = findServerTrunk(pBt, 1, &pTrunk); if( rc==SQLITE_OK ){ int nFree; /* Number of free pages on this trunk page */ nFree = (int)get4byte(&pTrunk->aData[4]); if( nFree==0 ){ pgnoNew = get4byte(&pTrunk->aData[0]); assert( pgnoNew ); }else{ nFree--; pgnoNew = get4byte(&pTrunk->aData[8+4*nFree]); put4byte(&pTrunk->aData[4], (u32)nFree); releasePage(pTrunk); pTrunk = 0; } } if( rc==SQLITE_OK ){ MemPage *pNew = 0; rc = btreeGetUnusedPage(pBt, pgnoNew, &pNew, pTrunk?0:PAGER_GET_NOCONTENT); if( rc==SQLITE_OK ){ rc = sqlite3PagerWrite(pNew->pDbPage); } if( rc==SQLITE_OK && pTrunk ){ memcpy(pTrunk->aData, pNew->aData, pBt->usableSize); } *ppPage = pNew; *pPgno = pgnoNew; } releasePage(pTrunk); return rc; } static int freeServerPage2(BtShared *pBt, MemPage *pPage, Pgno iPage){ int rc; /* Return code */ MemPage *pTrunk = 0; /* The node page */ assert( sqlite3_mutex_held(pBt->mutex) ); rc = findServerTrunk(pBt, 0, &pTrunk); if( rc==SQLITE_OK ){ int nFree; /* Number of free pages on this trunk page */ nFree = (int)get4byte(&pTrunk->aData[4]); if( nFree>=((pBt->usableSize / 4) - 2) ){ if( pPage==0 ){ rc = btreeGetUnusedPage(pBt, iPage, &pPage, 0); }else{ sqlite3PagerRef(pPage->pDbPage); } rc = sqlite3PagerWrite(pPage->pDbPage); if( rc==SQLITE_OK ){ memcpy(pPage->aData, pTrunk->aData, pBt->usableSize); put4byte(&pTrunk->aData[0], iPage); put4byte(&pTrunk->aData[4], 0); } releasePage(pPage); }else{ put4byte(&pTrunk->aData[8+nFree*4], iPage); put4byte(&pTrunk->aData[4], (u32)nFree+1); } } return rc; } #else # define allocateServerPage(v, w, x, y, z) SQLITE_OK # define freeServerPage2(x, y, z) SQLITE_OK #endif /* SQLITE_SERVER_EDITION */ /* ** Allocate a new page from the database file. ** ** The new page is marked as dirty. (In other words, sqlite3PagerWrite() ** has already been called on the new page.) The new page has also ** been referenced and the calling routine is responsible for calling ** sqlite3PagerUnref() on the new page when it is done. |
︙ | ︙ | |||
5648 5649 5650 5651 5652 5653 5654 5655 5656 5657 5658 5659 5660 5661 | MemPage *pPage1; int rc; u32 n; /* Number of pages on the freelist */ u32 k; /* Number of leaves on the trunk of the freelist */ MemPage *pTrunk = 0; MemPage *pPrevTrunk = 0; Pgno mxPage; /* Total size of the database file */ assert( sqlite3_mutex_held(pBt->mutex) ); assert( eMode==BTALLOC_ANY || (nearby>0 && IfNotOmitAV(pBt->autoVacuum)) ); pPage1 = pBt->pPage1; mxPage = btreePagecount(pBt); /* EVIDENCE-OF: R-05119-02637 The 4-byte big-endian integer at offset 36 ** stores stores the total number of pages on the freelist. */ | > > > > | 5881 5882 5883 5884 5885 5886 5887 5888 5889 5890 5891 5892 5893 5894 5895 5896 5897 5898 | MemPage *pPage1; int rc; u32 n; /* Number of pages on the freelist */ u32 k; /* Number of leaves on the trunk of the freelist */ MemPage *pTrunk = 0; MemPage *pPrevTrunk = 0; Pgno mxPage; /* Total size of the database file */ if( sqlite3PagerIsServer(pBt->pPager) ){ return allocateServerPage(pBt, ppPage, pPgno, nearby, eMode); } assert( sqlite3_mutex_held(pBt->mutex) ); assert( eMode==BTALLOC_ANY || (nearby>0 && IfNotOmitAV(pBt->autoVacuum)) ); pPage1 = pBt->pPage1; mxPage = btreePagecount(pBt); /* EVIDENCE-OF: R-05119-02637 The 4-byte big-endian integer at offset 36 ** stores stores the total number of pages on the freelist. */ |
︙ | ︙ | |||
5976 5977 5978 5979 5980 5981 5982 | if( pMemPage ){ pPage = pMemPage; sqlite3PagerRef(pPage->pDbPage); }else{ pPage = btreePageLookup(pBt, iPage); } | < < < < < < > > > > > > > > > > > | 6213 6214 6215 6216 6217 6218 6219 6220 6221 6222 6223 6224 6225 6226 6227 6228 6229 6230 6231 6232 6233 6234 6235 6236 6237 6238 6239 6240 6241 6242 6243 6244 6245 6246 6247 6248 | if( pMemPage ){ pPage = pMemPage; sqlite3PagerRef(pPage->pDbPage); }else{ pPage = btreePageLookup(pBt, iPage); } if( pBt->btsFlags & BTS_SECURE_DELETE ){ /* If the secure_delete option is enabled, then ** always fully overwrite deleted information with zeros. */ if( (!pPage && ((rc = btreeGetPage(pBt, iPage, &pPage, 0))!=0) ) || ((rc = sqlite3PagerWrite(pPage->pDbPage))!=0) ){ goto freepage_out; } memset(pPage->aData, 0, pPage->pBt->pageSize); } if( sqlite3PagerIsServer(pBt->pPager) ){ rc = freeServerPage2(pBt, pPage, iPage); goto freepage_out; } /* Increment the free page count on pPage1 */ rc = sqlite3PagerWrite(pPage1->pDbPage); if( rc ) goto freepage_out; nFree = get4byte(&pPage1->aData[36]); put4byte(&pPage1->aData[36], nFree+1); /* If the database supports auto-vacuum, write an entry in the pointer-map ** to indicate that the page is free. */ if( ISAUTOVACUUM ){ ptrmapPut(pBt, iPage, PTRMAP_FREEPAGE, 0, &rc); if( rc ) goto freepage_out; |
︙ | ︙ | |||
9438 9439 9440 9441 9442 9443 9444 9445 9446 9447 9448 9449 9450 9451 | pCheck->v1 = saved_v1; pCheck->v2 = saved_v2; return depth+1; } #endif /* SQLITE_OMIT_INTEGRITY_CHECK */ #ifndef SQLITE_OMIT_INTEGRITY_CHECK /* ** This routine does a complete check of the given BTree file. aRoot[] is ** an array of pages numbers were each page number is the root page of ** a table. nRoot is the number of entries in aRoot. ** ** A read-only or read-write transaction must be opened before calling ** this function. | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 9680 9681 9682 9683 9684 9685 9686 9687 9688 9689 9690 9691 9692 9693 9694 9695 9696 9697 9698 9699 9700 9701 9702 9703 9704 9705 9706 9707 9708 9709 9710 9711 9712 9713 9714 9715 9716 9717 9718 9719 9720 9721 9722 9723 9724 9725 9726 9727 9728 9729 9730 9731 9732 9733 9734 9735 9736 | pCheck->v1 = saved_v1; pCheck->v2 = saved_v2; return depth+1; } #endif /* SQLITE_OMIT_INTEGRITY_CHECK */ #ifndef SQLITE_OMIT_INTEGRITY_CHECK #if !defined(SQLITE_OMIT_INTEGRITY_CHECK) && defined(SQLITE_SERVER_EDITION) static void checkServerList(IntegrityCk *pCheck){ u32 pgnoNode = get4byte(&pCheck->pBt->pPage1->aData[32]); if( pgnoNode ){ DbPage *pNode = 0; u8 *aNodeData; u32 nList; /* Number of free-lists */ int i; checkRef(pCheck, pgnoNode); if( sqlite3PagerGet(pCheck->pPager, (Pgno)pgnoNode, &pNode, 0) ){ checkAppendMsg(pCheck, "failed to get node page %d", pgnoNode); return; } aNodeData = sqlite3PagerGetData(pNode); nList = get4byte(&aNodeData[4]); for(i=0; i<nList; i++){ u32 pgnoTrunk = get4byte(&aNodeData[8+4*i]); while( pgnoTrunk ){ DbPage *pTrunk = 0; checkRef(pCheck, pgnoTrunk); if( sqlite3PagerGet(pCheck->pPager, (Pgno)pgnoTrunk, &pTrunk, 0) ){ checkAppendMsg(pCheck, "failed to get page %d", pgnoTrunk); pgnoTrunk = 0; }else{ u8 *aTrunkData = sqlite3PagerGetData(pTrunk); int nLeaf = (int)get4byte(&aTrunkData[4]); int iLeaf; for(iLeaf=0; iLeaf<nLeaf; iLeaf++){ u32 pgnoLeaf = get4byte(&aTrunkData[8+iLeaf*4]); checkRef(pCheck, pgnoLeaf); } pgnoTrunk = get4byte(&aTrunkData[0]); sqlite3PagerUnref(pTrunk); } } } sqlite3PagerUnref(pNode); } } #endif /* ** This routine does a complete check of the given BTree file. aRoot[] is ** an array of pages numbers were each page number is the root page of ** a table. nRoot is the number of entries in aRoot. ** ** A read-only or read-write transaction must be opened before calling ** this function. |
︙ | ︙ | |||
9503 9504 9505 9506 9507 9508 9509 | i = PENDING_BYTE_PAGE(pBt); if( i<=sCheck.nPage ) setPageReferenced(&sCheck, i); /* Check the integrity of the freelist */ sCheck.zPfx = "Main freelist: "; | > > > > > > | | > | 9788 9789 9790 9791 9792 9793 9794 9795 9796 9797 9798 9799 9800 9801 9802 9803 9804 9805 9806 9807 9808 9809 9810 | i = PENDING_BYTE_PAGE(pBt); if( i<=sCheck.nPage ) setPageReferenced(&sCheck, i); /* Check the integrity of the freelist */ sCheck.zPfx = "Main freelist: "; #ifdef SQLITE_SERVER_EDITION if( sqlite3PagerIsServer(pBt->pPager) ){ checkServerList(&sCheck); }else #endif { checkList(&sCheck, 1, get4byte(&pBt->pPage1->aData[32]), get4byte(&pBt->pPage1->aData[36])); } sCheck.zPfx = 0; /* Check all the tables. */ testcase( pBt->db->flags & SQLITE_CellSizeCk ); pBt->db->flags &= ~SQLITE_CellSizeCk; for(i=0; (int)i<nRoot && sCheck.mxErr; i++){ |
︙ | ︙ |
Changes to src/pager.c.
︙ | ︙ | |||
7656 7657 7658 7659 7660 7661 7662 7663 7664 | ** is empty, return 0. */ int sqlite3PagerWalFramesize(Pager *pPager){ assert( pPager->eState>=PAGER_READER ); return sqlite3WalFramesize(pPager->pWal); } #endif #endif /* SQLITE_OMIT_DISKIO */ | > > > > > > > > > | 7656 7657 7658 7659 7660 7661 7662 7663 7664 7665 7666 7667 7668 7669 7670 7671 7672 7673 | ** is empty, return 0. */ int sqlite3PagerWalFramesize(Pager *pPager){ assert( pPager->eState>=PAGER_READER ); return sqlite3WalFramesize(pPager->pWal); } #endif #ifdef SQLITE_SERVER_EDITION int sqlite3PagerIsServer(Pager *pPager){ return pagerIsServer(pPager); } int sqlite3PagerWritelock(Pager *pPager, Pgno pgno){ return sqlite3ServerLock(pPager->pServer, pgno, 1); } #endif #endif /* SQLITE_OMIT_DISKIO */ |
Changes to src/pager.h.
︙ | ︙ | |||
234 235 236 237 238 239 240 241 242 243 | #else # define disable_simulated_io_errors() # define enable_simulated_io_errors() #endif #ifdef SQLITE_SERVER_EDITION int sqlite3PagerRollbackJournal(Pager*, int); #endif #endif /* SQLITE_PAGER_H */ | > > | 234 235 236 237 238 239 240 241 242 243 244 245 | #else # define disable_simulated_io_errors() # define enable_simulated_io_errors() #endif #ifdef SQLITE_SERVER_EDITION int sqlite3PagerRollbackJournal(Pager*, int); int sqlite3PagerIsServer(Pager *pPager); int sqlite3PagerWritelock(Pager *pPager, Pgno); #endif #endif /* SQLITE_PAGER_H */ |
Changes to src/server.c.
︙ | ︙ | |||
476 477 478 479 480 481 482 483 | sqlite3_log(SQLITE_BUSY_DEADLOCK, "Conflict at page %d", (int)pgno); rc = SQLITE_BUSY_DEADLOCK; goto server_lock_out; } v = *pSlot; } if( bWrite ){ | > | < < | 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 | sqlite3_log(SQLITE_BUSY_DEADLOCK, "Conflict at page %d", (int)pgno); rc = SQLITE_BUSY_DEADLOCK; goto server_lock_out; } v = *pSlot; } n = v | (1 << p->iClient); if( bWrite ){ n = n | ((p->iClient+1) << HMA_CLIENT_SLOTS); } if( __sync_val_compare_and_swap(pSlot, v, n)==v ) break; v = *pSlot; } } server_lock_out: |
︙ | ︙ |
Changes to test/server2.test.
︙ | ︙ | |||
37 38 39 40 41 42 43 | db2 eval { CREATE TABLE t3(a, b) } file exists test.db-hma } {1} do_test 1.4 { db2 close file exists test.db-hma } {1} | > | | 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 | db2 eval { CREATE TABLE t3(a, b) } file exists test.db-hma } {1} do_test 1.4 { db2 close file exists test.db-hma } {1} integrity_check 1.5 do_test 1.6 { db close file exists test.db-hma } {0} #------------------------------------------------------------------------- # |
︙ | ︙ |