Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Transient locks in WAL mode can now block in order to resolve priority inversions. |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA1: |
ec2f46de531ec8ef91981b19b48ab64d |
User & Date: | drh 2015-03-17 16:59:57 |
Context
2015-03-17
| ||
17:08 | Also merge the WAL blocking lock tests that were somehow missed on the previous check-in. check-in: 7214dab7 user: drh tags: trunk | |
16:59 | Transient locks in WAL mode can now block in order to resolve priority inversions. check-in: ec2f46de user: drh tags: trunk | |
2015-03-16
| ||
20:40 | Make SQLite slightly more likely to use an auto-index within a sub-query. check-in: ab832336 user: dan tags: trunk | |
2015-03-10
| ||
20:22 | Arrange for some of the transient locks in WAL mode to block, as a single to the OS to fix priority inversions. check-in: c6e6d5f4 user: drh tags: wal-blocking-lock | |
Changes
Changes to src/os_unix.c.
244 244 # define UNIXFILE_DIRSYNC 0x00 245 245 #endif 246 246 #define UNIXFILE_PSOW 0x10 /* SQLITE_IOCAP_POWERSAFE_OVERWRITE */ 247 247 #define UNIXFILE_DELETE 0x20 /* Delete on close */ 248 248 #define UNIXFILE_URI 0x40 /* Filename might have query parameters */ 249 249 #define UNIXFILE_NOLOCK 0x80 /* Do no file locking */ 250 250 #define UNIXFILE_WARNED 0x0100 /* verifyDbFile() warnings issued */ 251 +#define UNIXFILE_BLOCK 0x0200 /* Next SHM lock might block */ 251 252 252 253 /* 253 254 ** Include code that is common to all os_*.c files 254 255 */ 255 256 #include "os_common.h" 256 257 257 258 /* ................................................................................ 4086 4087 /* 4087 4088 ** Apply posix advisory locks for all bytes from ofst through ofst+n-1. 4088 4089 ** 4089 4090 ** Locks block if the mask is exactly UNIX_SHM_C and are non-blocking 4090 4091 ** otherwise. 4091 4092 */ 4092 4093 static int unixShmSystemLock( 4093 - unixShmNode *pShmNode, /* Apply locks to this open shared-memory segment */ 4094 + unixFile *pFile, /* Open connection to the WAL file */ 4094 4095 int lockType, /* F_UNLCK, F_RDLCK, or F_WRLCK */ 4095 4096 int ofst, /* First byte of the locking range */ 4096 4097 int n /* Number of bytes to lock */ 4097 4098 ){ 4098 - struct flock f; /* The posix advisory locking structure */ 4099 - int rc = SQLITE_OK; /* Result code form fcntl() */ 4099 + unixShmNode *pShmNode; /* Apply locks to this open shared-memory segment */ 4100 + struct flock f; /* The posix advisory locking structure */ 4101 + int rc = SQLITE_OK; /* Result code form fcntl() */ 4100 4102 4101 4103 /* Access to the unixShmNode object is serialized by the caller */ 4104 + pShmNode = pFile->pInode->pShmNode; 4102 4105 assert( sqlite3_mutex_held(pShmNode->mutex) || pShmNode->nRef==0 ); 4103 4106 4104 4107 /* Shared locks never span more than one byte */ 4105 4108 assert( n==1 || lockType!=F_RDLCK ); 4106 4109 4107 4110 /* Locks are within range */ 4108 4111 assert( n>=1 && n<SQLITE_SHM_NLOCK ); 4109 4112 4110 4113 if( pShmNode->h>=0 ){ 4114 + int lkType; 4111 4115 /* Initialize the locking parameters */ 4112 4116 memset(&f, 0, sizeof(f)); 4113 4117 f.l_type = lockType; 4114 4118 f.l_whence = SEEK_SET; 4115 4119 f.l_start = ofst; 4116 4120 f.l_len = n; 4117 4121 4118 - rc = osFcntl(pShmNode->h, F_SETLK, &f); 4122 + lkType = (pFile->ctrlFlags & UNIXFILE_BLOCK)!=0 ? F_SETLKW : F_SETLK; 4123 + rc = osFcntl(pShmNode->h, lkType, &f); 4119 4124 rc = (rc!=(-1)) ? SQLITE_OK : SQLITE_BUSY; 4125 + pFile->ctrlFlags &= ~UNIXFILE_BLOCK; 4120 4126 } 4121 4127 4122 4128 /* Update the global lock state and do debug tracing */ 4123 4129 #ifdef SQLITE_DEBUG 4124 4130 { u16 mask; 4125 4131 OSTRACE(("SHM-LOCK ")); 4126 4132 mask = ofst>31 ? 0xffff : (1<<(ofst+n)) - (1<<ofst); ................................................................................ 4322 4328 */ 4323 4329 osFchown(pShmNode->h, sStat.st_uid, sStat.st_gid); 4324 4330 4325 4331 /* Check to see if another process is holding the dead-man switch. 4326 4332 ** If not, truncate the file to zero length. 4327 4333 */ 4328 4334 rc = SQLITE_OK; 4329 - if( unixShmSystemLock(pShmNode, F_WRLCK, UNIX_SHM_DMS, 1)==SQLITE_OK ){ 4335 + if( unixShmSystemLock(pDbFd, F_WRLCK, UNIX_SHM_DMS, 1)==SQLITE_OK ){ 4330 4336 if( robust_ftruncate(pShmNode->h, 0) ){ 4331 4337 rc = unixLogError(SQLITE_IOERR_SHMOPEN, "ftruncate", zShmFilename); 4332 4338 } 4333 4339 } 4334 4340 if( rc==SQLITE_OK ){ 4335 - rc = unixShmSystemLock(pShmNode, F_RDLCK, UNIX_SHM_DMS, 1); 4341 + rc = unixShmSystemLock(pDbFd, F_RDLCK, UNIX_SHM_DMS, 1); 4336 4342 } 4337 4343 if( rc ) goto shm_open_err; 4338 4344 } 4339 4345 } 4340 4346 4341 4347 /* Make the new connection a child of the unixShmNode */ 4342 4348 p->pShmNode = pShmNode; ................................................................................ 4560 4566 if( pX==p ) continue; 4561 4567 assert( (pX->exclMask & (p->exclMask|p->sharedMask))==0 ); 4562 4568 allMask |= pX->sharedMask; 4563 4569 } 4564 4570 4565 4571 /* Unlock the system-level locks */ 4566 4572 if( (mask & allMask)==0 ){ 4567 - rc = unixShmSystemLock(pShmNode, F_UNLCK, ofst+UNIX_SHM_BASE, n); 4573 + rc = unixShmSystemLock(pDbFd, F_UNLCK, ofst+UNIX_SHM_BASE, n); 4568 4574 }else{ 4569 4575 rc = SQLITE_OK; 4570 4576 } 4571 4577 4572 4578 /* Undo the local locks */ 4573 4579 if( rc==SQLITE_OK ){ 4574 4580 p->exclMask &= ~mask; ................................................................................ 4588 4594 } 4589 4595 allShared |= pX->sharedMask; 4590 4596 } 4591 4597 4592 4598 /* Get shared locks at the system level, if necessary */ 4593 4599 if( rc==SQLITE_OK ){ 4594 4600 if( (allShared & mask)==0 ){ 4595 - rc = unixShmSystemLock(pShmNode, F_RDLCK, ofst+UNIX_SHM_BASE, n); 4601 + rc = unixShmSystemLock(pDbFd, F_RDLCK, ofst+UNIX_SHM_BASE, n); 4596 4602 }else{ 4597 4603 rc = SQLITE_OK; 4598 4604 } 4599 4605 } 4600 4606 4601 4607 /* Get the local shared locks */ 4602 4608 if( rc==SQLITE_OK ){ ................................................................................ 4613 4619 } 4614 4620 } 4615 4621 4616 4622 /* Get the exclusive locks at the system level. Then if successful 4617 4623 ** also mark the local connection as being locked. 4618 4624 */ 4619 4625 if( rc==SQLITE_OK ){ 4620 - rc = unixShmSystemLock(pShmNode, F_WRLCK, ofst+UNIX_SHM_BASE, n); 4626 + rc = unixShmSystemLock(pDbFd, F_WRLCK, ofst+UNIX_SHM_BASE, n); 4621 4627 if( rc==SQLITE_OK ){ 4622 4628 assert( (p->sharedMask & mask)==0 ); 4623 4629 p->exclMask |= mask; 4624 4630 } 4625 4631 } 4626 4632 } 4627 4633 sqlite3_mutex_leave(pShmNode->mutex); ................................................................................ 7218 7224 7219 7225 /* 7220 7226 ** This routine handles sqlite3_file_control() calls that are specific 7221 7227 ** to proxy locking. 7222 7228 */ 7223 7229 static int proxyFileControl(sqlite3_file *id, int op, void *pArg){ 7224 7230 switch( op ){ 7231 + case SQLITE_FCNTL_WAL_BLOCK: { 7232 + id->ctrlFlags |= UNIXFILE_BLOCK; 7233 + return SQLITE_OK; 7234 + } 7225 7235 case SQLITE_FCNTL_GET_LOCKPROXYFILE: { 7226 7236 unixFile *pFile = (unixFile*)id; 7227 7237 if( pFile->pMethod == &proxyIoMethods ){ 7228 7238 proxyLockingContext *pCtx = (proxyLockingContext*)pFile->lockingContext; 7229 7239 proxyTakeConch(pFile); 7230 7240 if( pCtx->lockProxyPath ){ 7231 7241 *(const char **)pArg = pCtx->lockProxyPath;
Changes to src/sqlite.h.in.
941 941 ** 942 942 ** <li>[[SQLITE_FCNTL_WIN32_SET_HANDLE]] 943 943 ** The [SQLITE_FCNTL_WIN32_SET_HANDLE] opcode is used for debugging. This 944 944 ** opcode causes the xFileControl method to swap the file handle with the one 945 945 ** pointed to by the pArg argument. This capability is used during testing 946 946 ** and only needs to be supported when SQLITE_TEST is defined. 947 947 ** 948 +** <li>[[SQLITE_FCNTL_WAL_BLOCK]] 949 +** The [SQLITE_FCNTL_WAL_BLOCK] is a signal to the VFS layer that it might 950 +** be advantageous to block on the next WAL lock if the lock is not immediately 951 +** available. The WAL subsystem issues this signal during rare 952 +** circumstances in order to fix a problem with priority inversion. 953 +** Applications should <em>not</em> use this file-control. 954 +** 948 955 ** </ul> 949 956 */ 950 957 #define SQLITE_FCNTL_LOCKSTATE 1 951 958 #define SQLITE_FCNTL_GET_LOCKPROXYFILE 2 952 959 #define SQLITE_FCNTL_SET_LOCKPROXYFILE 3 953 960 #define SQLITE_FCNTL_LAST_ERRNO 4 954 961 #define SQLITE_FCNTL_SIZE_HINT 5 ................................................................................ 965 972 #define SQLITE_FCNTL_TEMPFILENAME 16 966 973 #define SQLITE_FCNTL_MMAP_SIZE 18 967 974 #define SQLITE_FCNTL_TRACE 19 968 975 #define SQLITE_FCNTL_HAS_MOVED 20 969 976 #define SQLITE_FCNTL_SYNC 21 970 977 #define SQLITE_FCNTL_COMMIT_PHASETWO 22 971 978 #define SQLITE_FCNTL_WIN32_SET_HANDLE 23 979 +#define SQLITE_FCNTL_WAL_BLOCK 24 972 980 973 981 /* deprecated names */ 974 982 #define SQLITE_GET_LOCKPROXYFILE SQLITE_FCNTL_GET_LOCKPROXYFILE 975 983 #define SQLITE_SET_LOCKPROXYFILE SQLITE_FCNTL_SET_LOCKPROXYFILE 976 984 #define SQLITE_LAST_ERRNO SQLITE_FCNTL_LAST_ERRNO 977 985 978 986
Changes to src/wal.c.
784 784 } 785 785 static void walUnlockShared(Wal *pWal, int lockIdx){ 786 786 if( pWal->exclusiveMode ) return; 787 787 (void)sqlite3OsShmLock(pWal->pDbFd, lockIdx, 1, 788 788 SQLITE_SHM_UNLOCK | SQLITE_SHM_SHARED); 789 789 WALTRACE(("WAL%p: release SHARED-%s\n", pWal, walLockName(lockIdx))); 790 790 } 791 -static int walLockExclusive(Wal *pWal, int lockIdx, int n){ 791 +static int walLockExclusive(Wal *pWal, int lockIdx, int n, int fBlock){ 792 792 int rc; 793 793 if( pWal->exclusiveMode ) return SQLITE_OK; 794 + if( fBlock ) sqlite3OsFileControl(pWal->pDbFd, SQLITE_FCNTL_WAL_BLOCK, 0); 794 795 rc = sqlite3OsShmLock(pWal->pDbFd, lockIdx, n, 795 796 SQLITE_SHM_LOCK | SQLITE_SHM_EXCLUSIVE); 796 797 WALTRACE(("WAL%p: acquire EXCLUSIVE-%s cnt=%d %s\n", pWal, 797 798 walLockName(lockIdx), n, rc ? "failed" : "ok")); 798 799 VVA_ONLY( pWal->lockError = (u8)(rc!=SQLITE_OK && rc!=SQLITE_BUSY); ) 799 800 return rc; 800 801 } ................................................................................ 1072 1073 */ 1073 1074 assert( pWal->ckptLock==1 || pWal->ckptLock==0 ); 1074 1075 assert( WAL_ALL_BUT_WRITE==WAL_WRITE_LOCK+1 ); 1075 1076 assert( WAL_CKPT_LOCK==WAL_ALL_BUT_WRITE ); 1076 1077 assert( pWal->writeLock ); 1077 1078 iLock = WAL_ALL_BUT_WRITE + pWal->ckptLock; 1078 1079 nLock = SQLITE_SHM_NLOCK - iLock; 1079 - rc = walLockExclusive(pWal, iLock, nLock); 1080 + rc = walLockExclusive(pWal, iLock, nLock, 0); 1080 1081 if( rc ){ 1081 1082 return rc; 1082 1083 } 1083 1084 WALTRACE(("WAL%p: recovery begin...\n", pWal)); 1084 1085 1085 1086 memset(&pWal->hdr, 0, sizeof(WalIndexHdr)); 1086 1087 ................................................................................ 1606 1607 int (*xBusy)(void*), /* Function to call when busy */ 1607 1608 void *pBusyArg, /* Context argument for xBusyHandler */ 1608 1609 int lockIdx, /* Offset of first byte to lock */ 1609 1610 int n /* Number of bytes to lock */ 1610 1611 ){ 1611 1612 int rc; 1612 1613 do { 1613 - rc = walLockExclusive(pWal, lockIdx, n); 1614 + rc = walLockExclusive(pWal, lockIdx, n, 0); 1614 1615 }while( xBusy && rc==SQLITE_BUSY && xBusy(pBusyArg) ); 1615 1616 return rc; 1616 1617 } 1617 1618 1618 1619 /* 1619 1620 ** The cache of the wal-index header must be valid to call this function. 1620 1621 ** Return the page-size in bytes used by the database. ................................................................................ 2039 2040 assert( badHdr==0 || pWal->writeLock==0 ); 2040 2041 if( badHdr ){ 2041 2042 if( pWal->readOnly & WAL_SHM_RDONLY ){ 2042 2043 if( SQLITE_OK==(rc = walLockShared(pWal, WAL_WRITE_LOCK)) ){ 2043 2044 walUnlockShared(pWal, WAL_WRITE_LOCK); 2044 2045 rc = SQLITE_READONLY_RECOVERY; 2045 2046 } 2046 - }else if( SQLITE_OK==(rc = walLockExclusive(pWal, WAL_WRITE_LOCK, 1)) ){ 2047 + }else if( SQLITE_OK==(rc = walLockExclusive(pWal, WAL_WRITE_LOCK, 1, 1)) ){ 2047 2048 pWal->writeLock = 1; 2048 2049 if( SQLITE_OK==(rc = walIndexPage(pWal, 0, &page0)) ){ 2049 2050 badHdr = walIndexTryHdr(pWal, pChanged); 2050 2051 if( badHdr ){ 2051 2052 /* If the wal-index header is still malformed even while holding 2052 2053 ** a WRITE lock, it can only mean that the header is corrupted and 2053 2054 ** needs to be reconstructed. So run recovery to do exactly that. ................................................................................ 2245 2246 } 2246 2247 /* There was once an "if" here. The extra "{" is to preserve indentation. */ 2247 2248 { 2248 2249 if( (pWal->readOnly & WAL_SHM_RDONLY)==0 2249 2250 && (mxReadMark<pWal->hdr.mxFrame || mxI==0) 2250 2251 ){ 2251 2252 for(i=1; i<WAL_NREADER; i++){ 2252 - rc = walLockExclusive(pWal, WAL_READ_LOCK(i), 1); 2253 + rc = walLockExclusive(pWal, WAL_READ_LOCK(i), 1, 0); 2253 2254 if( rc==SQLITE_OK ){ 2254 2255 mxReadMark = pInfo->aReadMark[i] = pWal->hdr.mxFrame; 2255 2256 mxI = i; 2256 2257 walUnlockExclusive(pWal, WAL_READ_LOCK(i), 1); 2257 2258 break; 2258 2259 }else if( rc!=SQLITE_BUSY ){ 2259 2260 return rc; ................................................................................ 2501 2502 if( pWal->readOnly ){ 2502 2503 return SQLITE_READONLY; 2503 2504 } 2504 2505 2505 2506 /* Only one writer allowed at a time. Get the write lock. Return 2506 2507 ** SQLITE_BUSY if unable. 2507 2508 */ 2508 - rc = walLockExclusive(pWal, WAL_WRITE_LOCK, 1); 2509 + rc = walLockExclusive(pWal, WAL_WRITE_LOCK, 1, 0); 2509 2510 if( rc ){ 2510 2511 return rc; 2511 2512 } 2512 2513 pWal->writeLock = 1; 2513 2514 2514 2515 /* If another connection has written to the database file since the 2515 2516 ** time the read transaction on this connection was started, then ................................................................................ 2646 2647 2647 2648 if( pWal->readLock==0 ){ 2648 2649 volatile WalCkptInfo *pInfo = walCkptInfo(pWal); 2649 2650 assert( pInfo->nBackfill==pWal->hdr.mxFrame ); 2650 2651 if( pInfo->nBackfill>0 ){ 2651 2652 u32 salt1; 2652 2653 sqlite3_randomness(4, &salt1); 2653 - rc = walLockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1); 2654 + rc = walLockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1, 0); 2654 2655 if( rc==SQLITE_OK ){ 2655 2656 /* If all readers are using WAL_READ_LOCK(0) (in other words if no 2656 2657 ** readers are currently using the WAL), then the transactions 2657 2658 ** frames will overwrite the start of the existing log. Update the 2658 2659 ** wal-index header to reflect this. 2659 2660 ** 2660 2661 ** In theory it would be Ok to update the cache of the header only ................................................................................ 2971 2972 assert( eMode!=SQLITE_CHECKPOINT_PASSIVE || xBusy==0 ); 2972 2973 2973 2974 if( pWal->readOnly ) return SQLITE_READONLY; 2974 2975 WALTRACE(("WAL%p: checkpoint begins\n", pWal)); 2975 2976 2976 2977 /* IMPLEMENTATION-OF: R-62028-47212 All calls obtain an exclusive 2977 2978 ** "checkpoint" lock on the database file. */ 2978 - rc = walLockExclusive(pWal, WAL_CKPT_LOCK, 1); 2979 + rc = walLockExclusive(pWal, WAL_CKPT_LOCK, 1, 0); 2979 2980 if( rc ){ 2980 2981 /* EVIDENCE-OF: R-10421-19736 If any other process is running a 2981 2982 ** checkpoint operation at the same time, the lock cannot be obtained and 2982 2983 ** SQLITE_BUSY is returned. 2983 2984 ** EVIDENCE-OF: R-53820-33897 Even if there is a busy-handler configured, 2984 2985 ** it will not be invoked in this case. 2985 2986 */