Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Rework the SQLITE_MUTEXFREE_SHMLOCK code to reduce contention. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | mutexfree-shmlock |
Files: | files | file ages | folders |
SHA3-256: |
d9157dd176a2d18c6e02a2a0c7e16cef |
User & Date: | dan 2018-12-10 15:24:29.922 |
Context
2018-12-10
| ||
15:51 | Add extra tests to shmlock.test (direct testing of xShmLock methods). (check-in: d2c785f94c user: dan tags: mutexfree-shmlock) | |
15:24 | Rework the SQLITE_MUTEXFREE_SHMLOCK code to reduce contention. (check-in: d9157dd176 user: dan tags: mutexfree-shmlock) | |
09:45 | Avoid a mutex in-and-out in unixShmBarrier() on this branch. Use __sync_synchronize() instead. (check-in: a8c5fd86ce user: dan tags: mutexfree-shmlock) | |
Changes
Changes to src/os_unix.c.
︙ | ︙ | |||
44 45 46 47 48 49 50 51 52 53 54 55 56 57 | ** plus implementations of sqlite3_os_init() and sqlite3_os_end(). */ #include "sqliteInt.h" #if SQLITE_OS_UNIX /* This file is used on unix only */ /* Turn this feature on in all builds for now */ #define SQLITE_MUTEXFREE_SHMLOCK 1 /* ** There are various methods for file locking used for concurrency ** control: ** ** 1. POSIX locking (the default), ** 2. No locking, | > > | 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 | ** plus implementations of sqlite3_os_init() and sqlite3_os_end(). */ #include "sqliteInt.h" #if SQLITE_OS_UNIX /* This file is used on unix only */ /* Turn this feature on in all builds for now */ #define SQLITE_MUTEXFREE_SHMLOCK 1 #define SQLITE_MFS_NSHARD 5 #define SQLITE_MFS_EXCLUSIVE 255 /* ** There are various methods for file locking used for concurrency ** control: ** ** 1. POSIX locking (the default), ** 2. No locking, |
︙ | ︙ | |||
4251 4252 4253 4254 4255 4256 4257 | ** 0, and so on. ** ** If the 8-bits corresponding to a shm-locking locking slot are set to ** 0xFF, then a write-lock is held on the slot. Or, if they are set to ** a non-zero value smaller than 0xFF, then they represent the total ** number of read-locks held on the slot. There is no way to distinguish ** between a write-lock and 255 read-locks. */ | > > | > | 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 | ** 0, and so on. ** ** If the 8-bits corresponding to a shm-locking locking slot are set to ** 0xFF, then a write-lock is held on the slot. Or, if they are set to ** a non-zero value smaller than 0xFF, then they represent the total ** number of read-locks held on the slot. There is no way to distinguish ** between a write-lock and 255 read-locks. */ struct LockingSlot { u32 nLock; u64 aPadding[7]; } aMFSlot[3 + SQLITE_MFS_NSHARD*5]; #endif }; /* ** Atomic CAS primitive used in multi-process mode. Equivalent to: ** ** int unixCompareAndSwap(u32 *ptr, u32 oldval, u32 newval){ |
︙ | ︙ | |||
4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 | struct unixShm { unixShmNode *pShmNode; /* The underlying unixShmNode object */ unixShm *pNext; /* Next unixShm with the same unixShmNode */ u8 hasMutex; /* True if holding the unixShmNode->pShmMutex */ u8 id; /* Id of this connection within its unixShmNode */ u16 sharedMask; /* Mask of shared locks held */ u16 exclMask; /* Mask of exclusive locks held */ }; /* ** Constants used for locking */ #define UNIX_SHM_BASE ((22+SQLITE_SHM_NLOCK)*4) /* first lock byte */ #define UNIX_SHM_DMS (UNIX_SHM_BASE+SQLITE_SHM_NLOCK) /* deadman switch */ | > > > | 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 | struct unixShm { unixShmNode *pShmNode; /* The underlying unixShmNode object */ unixShm *pNext; /* Next unixShm with the same unixShmNode */ u8 hasMutex; /* True if holding the unixShmNode->pShmMutex */ u8 id; /* Id of this connection within its unixShmNode */ u16 sharedMask; /* Mask of shared locks held */ u16 exclMask; /* Mask of exclusive locks held */ #ifdef SQLITE_MUTEXFREE_SHMLOCK u8 aMFCurrent[8]; /* Current slot used for each shared lock */ #endif }; /* ** Constants used for locking */ #define UNIX_SHM_BASE ((22+SQLITE_SHM_NLOCK)*4) /* first lock byte */ #define UNIX_SHM_DMS (UNIX_SHM_BASE+SQLITE_SHM_NLOCK) /* deadman switch */ |
︙ | ︙ | |||
4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 | *pp = 0; } if( pShmNode->isReadonly && rc==SQLITE_OK ) rc = SQLITE_READONLY; sqlite3_mutex_leave(pShmNode->pShmMutex); return rc; } /* ** Change the lock state for a shared-memory segment. ** ** Note that the relationship between SHAREd and EXCLUSIVE locks is a little ** different here than in posix. In xShmLock(), one can go from unlocked ** to shared and back or from unlocked to exclusive and back. But one may ** not go from shared to exclusive or from exclusive to shared. | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 4802 4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825 4826 4827 4828 4829 4830 4831 4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854 4855 4856 4857 4858 4859 4860 4861 4862 4863 4864 4865 4866 4867 4868 4869 4870 4871 4872 4873 4874 4875 4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 | *pp = 0; } if( pShmNode->isReadonly && rc==SQLITE_OK ) rc = SQLITE_READONLY; sqlite3_mutex_leave(pShmNode->pShmMutex); return rc; } #ifdef SQLITE_MUTEXFREE_SHMLOCK static int unixMutexFreeShmlock( unixFile *pFd, /* Database file holding the shared memory */ int ofst, /* First lock to acquire or release */ int n, /* Number of locks to acquire or release */ int flags /* What to do with the lock */ ){ struct LockMapEntry { int iFirst; int nSlot; } aMap[9] = { { 0, 1 }, { 1, 1 }, { 2, 1 }, { 3+0*SQLITE_MFS_NSHARD, SQLITE_MFS_NSHARD }, { 3+1*SQLITE_MFS_NSHARD, SQLITE_MFS_NSHARD }, { 3+2*SQLITE_MFS_NSHARD, SQLITE_MFS_NSHARD }, { 3+3*SQLITE_MFS_NSHARD, SQLITE_MFS_NSHARD }, { 3+4*SQLITE_MFS_NSHARD, SQLITE_MFS_NSHARD }, { 3+5*SQLITE_MFS_NSHARD, 0 }, }; unixShm *p = pFd->pShm; /* The shared memory being locked */ unixShm *pX; /* For looping over all siblings */ unixShmNode *pShmNode = p->pShmNode; /* The underlying file iNode */ int rc = SQLITE_OK; int iIncr; u16 mask; /* Mask of locks to take or release */ if( flags & SQLITE_SHM_SHARED ){ /* SHARED locks */ u32 iOld, iNew, *ptr; int iIncr = -1; if( (flags & SQLITE_SHM_UNLOCK)==0 ){ p->aMFCurrent[ofst] = (p->aMFCurrent[ofst] + 1) % aMap[ofst].nSlot; iIncr = 1; } ptr = &pShmNode->aMFSlot[aMap[ofst].iFirst + p->aMFCurrent[ofst]].nLock; do { iOld = *ptr; iNew = iOld + iIncr; if( iNew>SQLITE_MFS_EXCLUSIVE ){ return SQLITE_BUSY; } }while( 0==unixCompareAndSwap(ptr, iOld, iNew) ); }else{ /* EXCLUSIVE locks */ int iFirst = aMap[ofst].iFirst; int iLast = aMap[ofst+n].iFirst; int i; for(i=iFirst; i<iLast; i++){ u32 *ptr = &pShmNode->aMFSlot[i].nLock; if( flags & SQLITE_SHM_UNLOCK ){ assert( (*ptr)==SQLITE_MFS_EXCLUSIVE ); *ptr = 0; }else{ u32 iOld; do { iOld = *ptr; if( iOld>0 ){ while( i>iFirst ){ i--; pShmNode->aMFSlot[i].nLock = 0; } return SQLITE_BUSY; } }while( 0==unixCompareAndSwap(ptr, iOld, SQLITE_MFS_EXCLUSIVE) ); } } } return SQLITE_OK; } #else # define unixMutexFreeShmlock(a,b,c,d) SQLITE_OK #endif /* ** Change the lock state for a shared-memory segment. ** ** Note that the relationship between SHAREd and EXCLUSIVE locks is a little ** different here than in posix. In xShmLock(), one can go from unlocked ** to shared and back or from unlocked to exclusive and back. But one may ** not go from shared to exclusive or from exclusive to shared. |
︙ | ︙ | |||
4826 4827 4828 4829 4830 4831 4832 4833 | assert( flags==(SQLITE_SHM_LOCK | SQLITE_SHM_SHARED) || flags==(SQLITE_SHM_LOCK | SQLITE_SHM_EXCLUSIVE) || flags==(SQLITE_SHM_UNLOCK | SQLITE_SHM_SHARED) || flags==(SQLITE_SHM_UNLOCK | SQLITE_SHM_EXCLUSIVE) ); assert( n==1 || (flags & SQLITE_SHM_EXCLUSIVE)!=0 ); assert( pShmNode->hShm>=0 || pDbFd->pInode->bProcessLock==1 ); assert( pShmNode->hShm<0 || pDbFd->pInode->bProcessLock==0 ); | < < < < | < < < < < < < < < < < | < < < | < < < < < < < < < < < < < < < | < < | | | < | < | | < | < < | 4911 4912 4913 4914 4915 4916 4917 4918 4919 4920 4921 4922 4923 4924 4925 4926 4927 4928 4929 4930 4931 4932 4933 4934 4935 4936 4937 4938 | assert( flags==(SQLITE_SHM_LOCK | SQLITE_SHM_SHARED) || flags==(SQLITE_SHM_LOCK | SQLITE_SHM_EXCLUSIVE) || flags==(SQLITE_SHM_UNLOCK | SQLITE_SHM_SHARED) || flags==(SQLITE_SHM_UNLOCK | SQLITE_SHM_EXCLUSIVE) ); assert( n==1 || (flags & SQLITE_SHM_EXCLUSIVE)!=0 ); assert( pShmNode->hShm>=0 || pDbFd->pInode->bProcessLock==1 ); assert( pShmNode->hShm<0 || pDbFd->pInode->bProcessLock==0 ); if( pDbFd->pInode->bProcessLock ){ return unixMutexFreeShmlock(pDbFd, ofst, n, flags); } mask = (1<<(ofst+n)) - (1<<ofst); assert( n>1 || mask==(1<<ofst) ); if( flags & SQLITE_SHM_LOCK ){ assert( !(flags&SQLITE_SHM_SHARED) || (p->sharedMask&mask)==0 ); assert( !(flags&SQLITE_SHM_EXCLUSIVE) || !(p->exclMask&mask) ); }else{ assert( !(flags&SQLITE_SHM_SHARED) || (p->sharedMask&mask)==mask ); assert( !(flags&SQLITE_SHM_EXCLUSIVE) || (p->exclMask&mask)==mask ); } sqlite3_mutex_enter(pShmNode->pShmMutex); if( flags & SQLITE_SHM_UNLOCK ){ u16 allMask = 0; /* Mask of locks held by siblings */ /* See if any siblings hold this same lock */ for(pX=pShmNode->pFirst; pX; pX=pX->pNext){ |
︙ | ︙ |
Changes to src/test_superlock.c.
︙ | ︙ | |||
37 38 39 40 41 42 43 44 45 46 47 48 49 50 | ** An instance of the following structure is allocated for each active ** superlock. The opaque handle returned by sqlite3demo_superlock() is ** actually a pointer to an instance of this structure. */ struct Superlock { sqlite3 *db; /* Database handle used to lock db */ int bWal; /* True if db is a WAL database */ }; typedef struct Superlock Superlock; /* ** The pCtx pointer passed to this function is actually a pointer to a ** SuperlockBusy structure. Invoke the busy-handler function encapsulated ** by the structure and return the result. | > > | 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 | ** An instance of the following structure is allocated for each active ** superlock. The opaque handle returned by sqlite3demo_superlock() is ** actually a pointer to an instance of this structure. */ struct Superlock { sqlite3 *db; /* Database handle used to lock db */ int bWal; /* True if db is a WAL database */ int bRecoveryLocked; /* True if WAL RECOVERY lock is held */ int bReaderLocked; /* True if WAL READER locks are held */ }; typedef struct Superlock Superlock; /* ** The pCtx pointer passed to this function is actually a pointer to a ** SuperlockBusy structure. Invoke the busy-handler function encapsulated ** by the structure and return the result. |
︙ | ︙ | |||
103 104 105 106 107 108 109 | } /* ** Obtain the extra locks on the database file required for WAL databases. ** Invoke the supplied busy-handler as required. */ static int superlockWalLock( | | > > > > > > | > > > | > > | 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 | } /* ** Obtain the extra locks on the database file required for WAL databases. ** Invoke the supplied busy-handler as required. */ static int superlockWalLock( Superlock *pLock, /* Superlock handle */ SuperlockBusy *pBusy /* Busy handler wrapper object */ ){ int rc; /* Return code */ sqlite3_file *fd = 0; /* Main database file handle */ void volatile *p = 0; /* Pointer to first page of shared memory */ sqlite3 *db = pLock->db; /* Obtain a pointer to the sqlite3_file object open on the main db file. */ rc = sqlite3_file_control(db, "main", SQLITE_FCNTL_FILE_POINTER, (void *)&fd); if( rc!=SQLITE_OK ) return rc; /* Obtain the "recovery" lock. Normally, this lock is only obtained by ** clients running database recovery. */ assert( pLock->bRecoveryLocked==0 ); rc = superlockShmLock(fd, 2, 1, pBusy); if( rc!=SQLITE_OK ) return rc; pLock->bRecoveryLocked = 1; /* Zero the start of the first shared-memory page. This means that any ** clients that open read or write transactions from this point on will ** have to run recovery before proceeding. Since they need the "recovery" ** lock that this process is holding to do that, no new read or write ** transactions may now be opened. Nor can a checkpoint be run, for the ** same reason. */ rc = fd->pMethods->xShmMap(fd, 0, 32*1024, 1, &p); if( rc!=SQLITE_OK ) return rc; memset((void *)p, 0, 32); /* Obtain exclusive locks on all the "read-lock" slots. Once these locks ** are held, it is guaranteed that there are no active reader, writer or ** checkpointer clients. */ assert( pLock->bReaderLocked==0 ); rc = superlockShmLock(fd, 3, SQLITE_SHM_NLOCK-3, pBusy); if( rc==SQLITE_OK ) pLock->bReaderLocked = 1; return rc; } /* ** Release a superlock held on a database file. The argument passed to ** this function must have been obtained from a successful call to ** sqlite3demo_superlock(). */ void sqlite3demo_superunlock(void *pLock){ Superlock *p = (Superlock *)pLock; if( p->bWal ){ int rc; /* Return code */ int flags = SQLITE_SHM_UNLOCK | SQLITE_SHM_EXCLUSIVE; sqlite3_file *fd = 0; rc = sqlite3_file_control(p->db, "main", SQLITE_FCNTL_FILE_POINTER, (void *)&fd); if( rc==SQLITE_OK ){ if( p->bRecoveryLocked ){ fd->pMethods->xShmLock(fd, 2, 1, flags); p->bRecoveryLocked = 0; } if( p->bReaderLocked ){ fd->pMethods->xShmLock(fd, 3, SQLITE_SHM_NLOCK-3, flags); p->bReaderLocked = 0; } } } sqlite3_close(p->db); sqlite3_free(p); } /* |
︙ | ︙ | |||
228 229 230 231 232 233 234 | ** to drop the WAL read and write locks currently held. Otherwise, the ** new WAL locks may conflict with the old. */ if( rc==SQLITE_OK ){ if( SQLITE_OK==(rc = superlockIsWal(pLock)) && pLock->bWal ){ rc = sqlite3_exec(pLock->db, "COMMIT", 0, 0, 0); if( rc==SQLITE_OK ){ | | | 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 | ** to drop the WAL read and write locks currently held. Otherwise, the ** new WAL locks may conflict with the old. */ if( rc==SQLITE_OK ){ if( SQLITE_OK==(rc = superlockIsWal(pLock)) && pLock->bWal ){ rc = sqlite3_exec(pLock->db, "COMMIT", 0, 0, 0); if( rc==SQLITE_OK ){ rc = superlockWalLock(pLock, &busy); } } } if( rc!=SQLITE_OK ){ sqlite3demo_superunlock(pLock); *ppLock = 0; |
︙ | ︙ |
Changes to src/wal.c.
︙ | ︙ | |||
2773 2774 2775 2776 2777 2778 2779 | ** checkpoint need not have completed for this to cause problems. */ volatile WalCkptInfo *pInfo = walCkptInfo(pWal); assert( pWal->readLock>0 || pWal->hdr.mxFrame==0 ); assert( pInfo->aReadMark[pWal->readLock]<=pSnapshot->mxFrame ); | | | | | | | < < < < < < < | 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 | ** checkpoint need not have completed for this to cause problems. */ volatile WalCkptInfo *pInfo = walCkptInfo(pWal); assert( pWal->readLock>0 || pWal->hdr.mxFrame==0 ); assert( pInfo->aReadMark[pWal->readLock]<=pSnapshot->mxFrame ); /* If it were possible for a checkpointer thread to run concurrent ** with this code, it would be a problem. In this case, it could be ** that the checkpointer has already determined that it will checkpoint ** snapshot X, where X is later in the wal file than pSnapshot, but ** has not yet set the pInfo->nBackfillAttempted variable to indicate ** its intent. Fortunately this is not possible, as the call to ** sqlite3WalSnapshotOpen() that sets pWal->pSnapshot also takes a ** SHARED lock on the checkpointer slot. */ if( rc==SQLITE_OK ){ /* Check that the wal file has not been wrapped. Assuming that it has ** not, also check that no checkpointer has attempted to checkpoint any ** frames beyond pSnapshot->mxFrame. If either of these conditions are ** true, return SQLITE_ERROR_SNAPSHOT. Otherwise, overwrite pWal->hdr ** with *pSnapshot and set *pChanged as appropriate for opening the ** snapshot. */ if( !memcmp(pSnapshot->aSalt, pWal->hdr.aSalt, sizeof(pWal->hdr.aSalt)) && pSnapshot->mxFrame>=pInfo->nBackfillAttempted ){ assert( pWal->readLock>0 ); memcpy(&pWal->hdr, pSnapshot, sizeof(WalIndexHdr)); *pChanged = bChanged; }else{ rc = SQLITE_ERROR_SNAPSHOT; } pWal->minFrame = 1; } if( rc!=SQLITE_OK ){ sqlite3WalEndReadTransaction(pWal); } |
︙ | ︙ |