Index: src/os_unix.c ================================================================== --- src/os_unix.c +++ src/os_unix.c @@ -246,10 +246,11 @@ #define UNIXFILE_PSOW 0x10 /* SQLITE_IOCAP_POWERSAFE_OVERWRITE */ #define UNIXFILE_DELETE 0x20 /* Delete on close */ #define UNIXFILE_URI 0x40 /* Filename might have query parameters */ #define UNIXFILE_NOLOCK 0x80 /* Do no file locking */ #define UNIXFILE_WARNED 0x0100 /* verifyDbFile() warnings issued */ +#define UNIXFILE_BLOCK 0x0200 /* Next SHM lock might block */ /* ** Include code that is common to all os_*.c files */ #include "os_common.h" @@ -4088,37 +4089,42 @@ ** ** Locks block if the mask is exactly UNIX_SHM_C and are non-blocking ** otherwise. */ static int unixShmSystemLock( - unixShmNode *pShmNode, /* Apply locks to this open shared-memory segment */ + unixFile *pFile, /* Open connection to the WAL file */ int lockType, /* F_UNLCK, F_RDLCK, or F_WRLCK */ int ofst, /* First byte of the locking range */ int n /* Number of bytes to lock */ ){ - struct flock f; /* The posix advisory locking structure */ - int rc = SQLITE_OK; /* Result code form fcntl() */ + unixShmNode *pShmNode; /* Apply locks to this open shared-memory segment */ + struct flock f; /* The posix advisory locking structure */ + int rc = SQLITE_OK; /* Result code form fcntl() */ /* Access to the unixShmNode object is serialized by the caller */ + pShmNode = pFile->pInode->pShmNode; assert( sqlite3_mutex_held(pShmNode->mutex) || pShmNode->nRef==0 ); /* Shared locks never span more than one byte */ assert( n==1 || lockType!=F_RDLCK ); /* Locks are within range */ assert( n>=1 && nh>=0 ){ + int lkType; /* Initialize the locking parameters */ memset(&f, 0, sizeof(f)); f.l_type = lockType; f.l_whence = SEEK_SET; f.l_start = ofst; f.l_len = n; - rc = osFcntl(pShmNode->h, F_SETLK, &f); + lkType = (pFile->ctrlFlags & UNIXFILE_BLOCK)!=0 ? F_SETLKW : F_SETLK; + rc = osFcntl(pShmNode->h, lkType, &f); rc = (rc!=(-1)) ? SQLITE_OK : SQLITE_BUSY; + pFile->ctrlFlags &= ~UNIXFILE_BLOCK; } /* Update the global lock state and do debug tracing */ #ifdef SQLITE_DEBUG { u16 mask; @@ -4324,17 +4330,17 @@ /* Check to see if another process is holding the dead-man switch. ** If not, truncate the file to zero length. */ rc = SQLITE_OK; - if( unixShmSystemLock(pShmNode, F_WRLCK, UNIX_SHM_DMS, 1)==SQLITE_OK ){ + if( unixShmSystemLock(pDbFd, F_WRLCK, UNIX_SHM_DMS, 1)==SQLITE_OK ){ if( robust_ftruncate(pShmNode->h, 0) ){ rc = unixLogError(SQLITE_IOERR_SHMOPEN, "ftruncate", zShmFilename); } } if( rc==SQLITE_OK ){ - rc = unixShmSystemLock(pShmNode, F_RDLCK, UNIX_SHM_DMS, 1); + rc = unixShmSystemLock(pDbFd, F_RDLCK, UNIX_SHM_DMS, 1); } if( rc ) goto shm_open_err; } } @@ -4562,11 +4568,11 @@ allMask |= pX->sharedMask; } /* Unlock the system-level locks */ if( (mask & allMask)==0 ){ - rc = unixShmSystemLock(pShmNode, F_UNLCK, ofst+UNIX_SHM_BASE, n); + rc = unixShmSystemLock(pDbFd, F_UNLCK, ofst+UNIX_SHM_BASE, n); }else{ rc = SQLITE_OK; } /* Undo the local locks */ @@ -4590,11 +4596,11 @@ } /* Get shared locks at the system level, if necessary */ if( rc==SQLITE_OK ){ if( (allShared & mask)==0 ){ - rc = unixShmSystemLock(pShmNode, F_RDLCK, ofst+UNIX_SHM_BASE, n); + rc = unixShmSystemLock(pDbFd, F_RDLCK, ofst+UNIX_SHM_BASE, n); }else{ rc = SQLITE_OK; } } @@ -4615,11 +4621,11 @@ /* Get the exclusive locks at the system level. Then if successful ** also mark the local connection as being locked. */ if( rc==SQLITE_OK ){ - rc = unixShmSystemLock(pShmNode, F_WRLCK, ofst+UNIX_SHM_BASE, n); + rc = unixShmSystemLock(pDbFd, F_WRLCK, ofst+UNIX_SHM_BASE, n); if( rc==SQLITE_OK ){ assert( (p->sharedMask & mask)==0 ); p->exclMask |= mask; } } @@ -7220,10 +7226,14 @@ ** This routine handles sqlite3_file_control() calls that are specific ** to proxy locking. */ static int proxyFileControl(sqlite3_file *id, int op, void *pArg){ switch( op ){ + case SQLITE_FCNTL_WAL_BLOCK: { + id->ctrlFlags |= UNIXFILE_BLOCK; + return SQLITE_OK; + } case SQLITE_FCNTL_GET_LOCKPROXYFILE: { unixFile *pFile = (unixFile*)id; if( pFile->pMethod == &proxyIoMethods ){ proxyLockingContext *pCtx = (proxyLockingContext*)pFile->lockingContext; proxyTakeConch(pFile); Index: src/sqlite.h.in ================================================================== --- src/sqlite.h.in +++ src/sqlite.h.in @@ -943,10 +943,17 @@ ** The [SQLITE_FCNTL_WIN32_SET_HANDLE] opcode is used for debugging. This ** opcode causes the xFileControl method to swap the file handle with the one ** pointed to by the pArg argument. This capability is used during testing ** and only needs to be supported when SQLITE_TEST is defined. ** +**
  • [[SQLITE_FCNTL_WAL_BLOCK]] +** The [SQLITE_FCNTL_WAL_BLOCK] is a signal to the VFS layer that it might +** be advantageous to block on the next WAL lock if the lock is not immediately +** available. The WAL subsystem issues this signal during rare +** circumstances in order to fix a problem with priority inversion. +** Applications should not use this file-control. +** ** */ #define SQLITE_FCNTL_LOCKSTATE 1 #define SQLITE_FCNTL_GET_LOCKPROXYFILE 2 #define SQLITE_FCNTL_SET_LOCKPROXYFILE 3 @@ -967,10 +974,11 @@ #define SQLITE_FCNTL_TRACE 19 #define SQLITE_FCNTL_HAS_MOVED 20 #define SQLITE_FCNTL_SYNC 21 #define SQLITE_FCNTL_COMMIT_PHASETWO 22 #define SQLITE_FCNTL_WIN32_SET_HANDLE 23 +#define SQLITE_FCNTL_WAL_BLOCK 24 /* deprecated names */ #define SQLITE_GET_LOCKPROXYFILE SQLITE_FCNTL_GET_LOCKPROXYFILE #define SQLITE_SET_LOCKPROXYFILE SQLITE_FCNTL_SET_LOCKPROXYFILE #define SQLITE_LAST_ERRNO SQLITE_FCNTL_LAST_ERRNO Index: src/wal.c ================================================================== --- src/wal.c +++ src/wal.c @@ -786,13 +786,14 @@ if( pWal->exclusiveMode ) return; (void)sqlite3OsShmLock(pWal->pDbFd, lockIdx, 1, SQLITE_SHM_UNLOCK | SQLITE_SHM_SHARED); WALTRACE(("WAL%p: release SHARED-%s\n", pWal, walLockName(lockIdx))); } -static int walLockExclusive(Wal *pWal, int lockIdx, int n){ +static int walLockExclusive(Wal *pWal, int lockIdx, int n, int fBlock){ int rc; if( pWal->exclusiveMode ) return SQLITE_OK; + if( fBlock ) sqlite3OsFileControl(pWal->pDbFd, SQLITE_FCNTL_WAL_BLOCK, 0); rc = sqlite3OsShmLock(pWal->pDbFd, lockIdx, n, SQLITE_SHM_LOCK | SQLITE_SHM_EXCLUSIVE); WALTRACE(("WAL%p: acquire EXCLUSIVE-%s cnt=%d %s\n", pWal, walLockName(lockIdx), n, rc ? "failed" : "ok")); VVA_ONLY( pWal->lockError = (u8)(rc!=SQLITE_OK && rc!=SQLITE_BUSY); ) @@ -1074,11 +1075,11 @@ assert( WAL_ALL_BUT_WRITE==WAL_WRITE_LOCK+1 ); assert( WAL_CKPT_LOCK==WAL_ALL_BUT_WRITE ); assert( pWal->writeLock ); iLock = WAL_ALL_BUT_WRITE + pWal->ckptLock; nLock = SQLITE_SHM_NLOCK - iLock; - rc = walLockExclusive(pWal, iLock, nLock); + rc = walLockExclusive(pWal, iLock, nLock, 0); if( rc ){ return rc; } WALTRACE(("WAL%p: recovery begin...\n", pWal)); @@ -1608,11 +1609,11 @@ int lockIdx, /* Offset of first byte to lock */ int n /* Number of bytes to lock */ ){ int rc; do { - rc = walLockExclusive(pWal, lockIdx, n); + rc = walLockExclusive(pWal, lockIdx, n, 0); }while( xBusy && rc==SQLITE_BUSY && xBusy(pBusyArg) ); return rc; } /* @@ -2041,11 +2042,11 @@ if( pWal->readOnly & WAL_SHM_RDONLY ){ if( SQLITE_OK==(rc = walLockShared(pWal, WAL_WRITE_LOCK)) ){ walUnlockShared(pWal, WAL_WRITE_LOCK); rc = SQLITE_READONLY_RECOVERY; } - }else if( SQLITE_OK==(rc = walLockExclusive(pWal, WAL_WRITE_LOCK, 1)) ){ + }else if( SQLITE_OK==(rc = walLockExclusive(pWal, WAL_WRITE_LOCK, 1, 1)) ){ pWal->writeLock = 1; if( SQLITE_OK==(rc = walIndexPage(pWal, 0, &page0)) ){ badHdr = walIndexTryHdr(pWal, pChanged); if( badHdr ){ /* If the wal-index header is still malformed even while holding @@ -2247,11 +2248,11 @@ { if( (pWal->readOnly & WAL_SHM_RDONLY)==0 && (mxReadMarkhdr.mxFrame || mxI==0) ){ for(i=1; iaReadMark[i] = pWal->hdr.mxFrame; mxI = i; walUnlockExclusive(pWal, WAL_READ_LOCK(i), 1); break; @@ -2503,11 +2504,11 @@ } /* Only one writer allowed at a time. Get the write lock. Return ** SQLITE_BUSY if unable. */ - rc = walLockExclusive(pWal, WAL_WRITE_LOCK, 1); + rc = walLockExclusive(pWal, WAL_WRITE_LOCK, 1, 0); if( rc ){ return rc; } pWal->writeLock = 1; @@ -2648,11 +2649,11 @@ volatile WalCkptInfo *pInfo = walCkptInfo(pWal); assert( pInfo->nBackfill==pWal->hdr.mxFrame ); if( pInfo->nBackfill>0 ){ u32 salt1; sqlite3_randomness(4, &salt1); - rc = walLockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1); + rc = walLockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1, 0); if( rc==SQLITE_OK ){ /* If all readers are using WAL_READ_LOCK(0) (in other words if no ** readers are currently using the WAL), then the transactions ** frames will overwrite the start of the existing log. Update the ** wal-index header to reflect this. @@ -2973,11 +2974,11 @@ if( pWal->readOnly ) return SQLITE_READONLY; WALTRACE(("WAL%p: checkpoint begins\n", pWal)); /* IMPLEMENTATION-OF: R-62028-47212 All calls obtain an exclusive ** "checkpoint" lock on the database file. */ - rc = walLockExclusive(pWal, WAL_CKPT_LOCK, 1); + rc = walLockExclusive(pWal, WAL_CKPT_LOCK, 1, 0); if( rc ){ /* EVIDENCE-OF: R-10421-19736 If any other process is running a ** checkpoint operation at the same time, the lock cannot be obtained and ** SQLITE_BUSY is returned. ** EVIDENCE-OF: R-53820-33897 Even if there is a busy-handler configured,