Index: src/os_unix.c ================================================================== --- src/os_unix.c +++ src/os_unix.c @@ -46,10 +46,12 @@ #include "sqliteInt.h" #if SQLITE_OS_UNIX /* This file is used on unix only */ /* Turn this feature on in all builds for now */ #define SQLITE_MUTEXFREE_SHMLOCK 1 +#define SQLITE_MFS_NSHARD 5 +#define SQLITE_MFS_EXCLUSIVE 255 /* ** There are various methods for file locking used for concurrency ** control: ** @@ -4275,11 +4277,14 @@ ** If the 8-bits corresponding to a shm-locking locking slot are set to ** 0xFF, then a write-lock is held on the slot. Or, if they are set to ** a non-zero value smaller than 0xFF, then they represent the total ** number of read-locks held on the slot. There is no way to distinguish ** between a write-lock and 255 read-locks. */ - u64 lockmask; + struct LockingSlot { + u32 nLock; + u64 aPadding[7]; + } aMFSlot[3 + SQLITE_MFS_NSHARD*5]; #endif }; /* ** Atomic CAS primitive used in multi-process mode. Equivalent to: @@ -4314,10 +4319,13 @@ unixShm *pNext; /* Next unixShm with the same unixShmNode */ u8 hasMutex; /* True if holding the unixShmNode->pShmMutex */ u8 id; /* Id of this connection within its unixShmNode */ u16 sharedMask; /* Mask of shared locks held */ u16 exclMask; /* Mask of exclusive locks held */ +#ifdef SQLITE_MUTEXFREE_SHMLOCK + u8 aMFCurrent[8]; /* Current slot used for each shared lock */ +#endif }; /* ** Constants used for locking */ @@ -4818,10 +4826,87 @@ if( pShmNode->isReadonly && rc==SQLITE_OK ) rc = SQLITE_READONLY; sqlite3_mutex_leave(pShmNode->pShmMutex); return rc; } +#ifdef SQLITE_MUTEXFREE_SHMLOCK +static int unixMutexFreeShmlock( + unixFile *pFd, /* Database file holding the shared memory */ + int ofst, /* First lock to acquire or release */ + int n, /* Number of locks to acquire or release */ + int flags /* What to do with the lock */ +){ + struct LockMapEntry { + int iFirst; + int nSlot; + } aMap[9] = { + { 0, 1 }, + { 1, 1 }, + { 2, 1 }, + { 3+0*SQLITE_MFS_NSHARD, SQLITE_MFS_NSHARD }, + { 3+1*SQLITE_MFS_NSHARD, SQLITE_MFS_NSHARD }, + { 3+2*SQLITE_MFS_NSHARD, SQLITE_MFS_NSHARD }, + { 3+3*SQLITE_MFS_NSHARD, SQLITE_MFS_NSHARD }, + { 3+4*SQLITE_MFS_NSHARD, SQLITE_MFS_NSHARD }, + { 3+5*SQLITE_MFS_NSHARD, 0 }, + }; + + unixShm *p = pFd->pShm; /* The shared memory being locked */ + unixShm *pX; /* For looping over all siblings */ + unixShmNode *pShmNode = p->pShmNode; /* The underlying file iNode */ + int rc = SQLITE_OK; + int iIncr; + u16 mask; /* Mask of locks to take or release */ + + if( flags & SQLITE_SHM_SHARED ){ + /* SHARED locks */ + u32 iOld, iNew, *ptr; + int iIncr = -1; + if( (flags & SQLITE_SHM_UNLOCK)==0 ){ + p->aMFCurrent[ofst] = (p->aMFCurrent[ofst] + 1) % aMap[ofst].nSlot; + iIncr = 1; + } + ptr = &pShmNode->aMFSlot[aMap[ofst].iFirst + p->aMFCurrent[ofst]].nLock; + do { + iOld = *ptr; + iNew = iOld + iIncr; + if( iNew>SQLITE_MFS_EXCLUSIVE ){ + return SQLITE_BUSY; + } + }while( 0==unixCompareAndSwap(ptr, iOld, iNew) ); + }else{ + /* EXCLUSIVE locks */ + int iFirst = aMap[ofst].iFirst; + int iLast = aMap[ofst+n].iFirst; + int i; + for(i=iFirst; iaMFSlot[i].nLock; + if( flags & SQLITE_SHM_UNLOCK ){ + assert( (*ptr)==SQLITE_MFS_EXCLUSIVE ); + *ptr = 0; + }else{ + u32 iOld; + do { + iOld = *ptr; + if( iOld>0 ){ + while( i>iFirst ){ + i--; + pShmNode->aMFSlot[i].nLock = 0; + } + return SQLITE_BUSY; + } + }while( 0==unixCompareAndSwap(ptr, iOld, SQLITE_MFS_EXCLUSIVE) ); + } + } + } + + return SQLITE_OK; +} +#else +# define unixMutexFreeShmlock(a,b,c,d) SQLITE_OK +#endif + /* ** Change the lock state for a shared-memory segment. ** ** Note that the relationship between SHAREd and EXCLUSIVE locks is a little ** different here than in posix. In xShmLock(), one can go from unlocked @@ -4850,64 +4935,24 @@ || flags==(SQLITE_SHM_UNLOCK | SQLITE_SHM_SHARED) || flags==(SQLITE_SHM_UNLOCK | SQLITE_SHM_EXCLUSIVE) ); assert( n==1 || (flags & SQLITE_SHM_EXCLUSIVE)!=0 ); assert( pShmNode->hShm>=0 || pDbFd->pInode->bProcessLock==1 ); assert( pShmNode->hShm<0 || pDbFd->pInode->bProcessLock==0 ); + + if( pDbFd->pInode->bProcessLock ){ + return unixMutexFreeShmlock(pDbFd, ofst, n, flags); + } mask = (1<<(ofst+n)) - (1<1 || mask==(1<pInode->bProcessLock ){ - - while( 1 ){ - u64 lockmask = pShmNode->lockmask; - u64 newmask = lockmask; - int i; - for(i=ofst; i> (ix8)) & 0xFF; - if( flags & SQLITE_SHM_UNLOCK ){ - if( flags & SQLITE_SHM_EXCLUSIVE ){ - if( p->exclMask & (1 << i) ){ - newmask = newmask & ~((u64)0xFF<sharedMask & (1 << i) ){ - newmask = newmask & ~((u64)0xFF<exclMask & (1 << i))==0 ){ - newmask = newmask | ((u64)0xFF<sharedMask & (1 << i))==0 ){ - newmask = newmask & ~((u64)0xFF<lockmask, lockmask, newmask) ) break; - } - - if( flags & SQLITE_SHM_UNLOCK ){ - p->sharedMask &= ~mask; - p->exclMask &= ~mask; - }else if( flags & SQLITE_SHM_EXCLUSIVE ){ - p->exclMask |= mask; - }else{ - p->sharedMask |= mask; - } - - return SQLITE_OK; - } -#endif + if( flags & SQLITE_SHM_LOCK ){ + assert( !(flags&SQLITE_SHM_SHARED) || (p->sharedMask&mask)==0 ); + assert( !(flags&SQLITE_SHM_EXCLUSIVE) || !(p->exclMask&mask) ); + }else{ + assert( !(flags&SQLITE_SHM_SHARED) || (p->sharedMask&mask)==mask ); + assert( !(flags&SQLITE_SHM_EXCLUSIVE) || (p->exclMask&mask)==mask ); + } sqlite3_mutex_enter(pShmNode->pShmMutex); if( flags & SQLITE_SHM_UNLOCK ){ u16 allMask = 0; /* Mask of locks held by siblings */ Index: src/test_superlock.c ================================================================== --- src/test_superlock.c +++ src/test_superlock.c @@ -39,10 +39,12 @@ ** actually a pointer to an instance of this structure. */ struct Superlock { sqlite3 *db; /* Database handle used to lock db */ int bWal; /* True if db is a WAL database */ + int bRecoveryLocked; /* True if WAL RECOVERY lock is held */ + int bReaderLocked; /* True if WAL READER locks are held */ }; typedef struct Superlock Superlock; /* ** The pCtx pointer passed to this function is actually a pointer to a @@ -105,26 +107,29 @@ /* ** Obtain the extra locks on the database file required for WAL databases. ** Invoke the supplied busy-handler as required. */ static int superlockWalLock( - sqlite3 *db, /* Database handle open on WAL database */ + Superlock *pLock, /* Superlock handle */ SuperlockBusy *pBusy /* Busy handler wrapper object */ ){ int rc; /* Return code */ sqlite3_file *fd = 0; /* Main database file handle */ void volatile *p = 0; /* Pointer to first page of shared memory */ + sqlite3 *db = pLock->db; /* Obtain a pointer to the sqlite3_file object open on the main db file. */ rc = sqlite3_file_control(db, "main", SQLITE_FCNTL_FILE_POINTER, (void *)&fd); if( rc!=SQLITE_OK ) return rc; /* Obtain the "recovery" lock. Normally, this lock is only obtained by ** clients running database recovery. */ + assert( pLock->bRecoveryLocked==0 ); rc = superlockShmLock(fd, 2, 1, pBusy); if( rc!=SQLITE_OK ) return rc; + pLock->bRecoveryLocked = 1; /* Zero the start of the first shared-memory page. This means that any ** clients that open read or write transactions from this point on will ** have to run recovery before proceeding. Since they need the "recovery" ** lock that this process is holding to do that, no new read or write @@ -137,11 +142,13 @@ /* Obtain exclusive locks on all the "read-lock" slots. Once these locks ** are held, it is guaranteed that there are no active reader, writer or ** checkpointer clients. */ + assert( pLock->bReaderLocked==0 ); rc = superlockShmLock(fd, 3, SQLITE_SHM_NLOCK-3, pBusy); + if( rc==SQLITE_OK ) pLock->bReaderLocked = 1; return rc; } /* ** Release a superlock held on a database file. The argument passed to @@ -154,12 +161,18 @@ int rc; /* Return code */ int flags = SQLITE_SHM_UNLOCK | SQLITE_SHM_EXCLUSIVE; sqlite3_file *fd = 0; rc = sqlite3_file_control(p->db, "main", SQLITE_FCNTL_FILE_POINTER, (void *)&fd); if( rc==SQLITE_OK ){ - fd->pMethods->xShmLock(fd, 2, 1, flags); - fd->pMethods->xShmLock(fd, 3, SQLITE_SHM_NLOCK-3, flags); + if( p->bRecoveryLocked ){ + fd->pMethods->xShmLock(fd, 2, 1, flags); + p->bRecoveryLocked = 0; + } + if( p->bReaderLocked ){ + fd->pMethods->xShmLock(fd, 3, SQLITE_SHM_NLOCK-3, flags); + p->bReaderLocked = 0; + } } } sqlite3_close(p->db); sqlite3_free(p); } @@ -230,11 +243,11 @@ */ if( rc==SQLITE_OK ){ if( SQLITE_OK==(rc = superlockIsWal(pLock)) && pLock->bWal ){ rc = sqlite3_exec(pLock->db, "COMMIT", 0, 0, 0); if( rc==SQLITE_OK ){ - rc = superlockWalLock(pLock->db, &busy); + rc = superlockWalLock(pLock, &busy); } } } if( rc!=SQLITE_OK ){ Index: src/wal.c ================================================================== --- src/wal.c +++ src/wal.c @@ -2795,23 +2795,18 @@ volatile WalCkptInfo *pInfo = walCkptInfo(pWal); assert( pWal->readLock>0 || pWal->hdr.mxFrame==0 ); assert( pInfo->aReadMark[pWal->readLock]<=pSnapshot->mxFrame ); - /* It is possible that there is a checkpointer thread running - ** concurrent with this code. If this is the case, it may be that the - ** checkpointer has already determined that it will checkpoint + /* If it were possible for a checkpointer thread to run concurrent + ** with this code, it would be a problem. In this case, it could be + ** that the checkpointer has already determined that it will checkpoint ** snapshot X, where X is later in the wal file than pSnapshot, but ** has not yet set the pInfo->nBackfillAttempted variable to indicate - ** its intent. To avoid the race condition this leads to, ensure that - ** there is no checkpointer process by taking a shared CKPT lock - ** before checking pInfo->nBackfillAttempted. - ** - ** TODO: Does the aReadMark[] lock prevent a checkpointer from doing - ** this already? - */ - rc = walLockShared(pWal, WAL_CKPT_LOCK); + ** its intent. Fortunately this is not possible, as the call to + ** sqlite3WalSnapshotOpen() that sets pWal->pSnapshot also takes a + ** SHARED lock on the checkpointer slot. */ if( rc==SQLITE_OK ){ /* Check that the wal file has not been wrapped. Assuming that it has ** not, also check that no checkpointer has attempted to checkpoint any ** frames beyond pSnapshot->mxFrame. If either of these conditions are @@ -2826,12 +2821,10 @@ *pChanged = bChanged; }else{ rc = SQLITE_ERROR_SNAPSHOT; } - /* Release the shared CKPT lock obtained above. */ - walUnlockShared(pWal, WAL_CKPT_LOCK); pWal->minFrame = 1; } if( rc!=SQLITE_OK ){ Index: test/shmlock.test ================================================================== --- test/shmlock.test +++ test/shmlock.test @@ -102,10 +102,72 @@ vfs_shmlock db255 main exclusive lock 4 1 } {SQLITE_OK} vfs_shmlock db255 main exclusive unlock 4 1 + + for {set i 0} {$i < 256} {incr i} { + db$i close + } +} + +sqlite3 db0 test.db +sqlite3 db1 test.db +do_test 3.1 { execsql { SELECT * FROM t1 } db0 } {1 2} +do_test 3.2 { execsql { SELECT * FROM t1 } db1 } {1 2} + +set L(0) {n n n n n n n n} +set L(1) {n n n n n n n n} +proc random_lock_test {idx} { + global L + set iSlot [expr int(rand()*8)] + if {[expr int(rand()*2)]} { + # Unlock operation + if {[lindex $L($idx) $iSlot]!="n"} { + vfs_shmlock db$idx main [lindex $L($idx) $iSlot] unlock $iSlot 1 + lset L($idx) $iSlot n + } + } else { + # Lock operation + if {[lindex $L($idx) $iSlot]=="n"} { + set locktype [lindex {e s} [expr int(rand()*2)]] + set n 1 + if {$locktype=="e"} { + for {set l $iSlot} {$l<8 && [lindex $L($idx) $l]=="n"} {incr l} {} + set n [expr int(rand()*($l-$iSlot))+1] + # puts "iSlot=$iSlot l=$l L=$L($idx)" + # puts "$iSlot $n" + } + set res [vfs_shmlock db$idx main $locktype lock $iSlot $n] + + set bBusy 0 + for {set i $iSlot} {$i<($iSlot+$n)} {incr i} { + set other [lindex $L([expr ($idx+1)%2]) $i] + if {($other!="n" && $locktype=="e")||($other=="e" && $locktype=="s")} { + if {$res != "SQLITE_BUSY"} { error "BUSY not detected" } + set bBusy 1 + break + } + } + + if {$bBusy==0} { + if {$res != "SQLITE_OK"} { error "BUSY false-positive" } + for {set i $iSlot} {$i<($iSlot+$n)} {incr i} { + lset L($idx) $i $locktype + } + } + } + } +} + +set nStep 100000 +for {set i 0} {$i < $nStep} {incr i} { + random_lock_test 0 + random_lock_test 1 } + +db0 close +db1 close finish_test