Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Rework mutexes on the SHM implemention for os_unix to avoid a deadlock during WAL recovery. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | wal |
Files: | files | file ages | folders |
SHA1: |
1a0f69bef2c489e81a3d4b910b426972 |
User & Date: | drh 2010-05-01 17:50:38.000 |
Context
2010-05-01
| ||
17:57 | Define an invariant to guarantee deadlock-free operation of SHM in os_unix.c and check that invariant with assert() statements. (check-in: 6af2dca75b user: drh tags: wal) | |
17:50 | Rework mutexes on the SHM implemention for os_unix to avoid a deadlock during WAL recovery. (check-in: 1a0f69bef2 user: drh tags: wal) | |
16:40 | Support compile-time option SQLITE_OMIT_WAL, for building without WAL support. (check-in: 9b230c43df user: dan tags: wal) | |
Changes
Changes to src/os_unix.c.
︙ | ︙ | |||
4590 4591 4592 4593 4594 4595 4596 | ** unixMutexHeld() is true when reading or writing any other field ** in this structure. */ struct unixShmFile { struct unixFileId fid; /* Unique file identifier */ sqlite3_mutex *mutex; /* Mutex to access this object */ sqlite3_mutex *mutexBuf; /* Mutex to access zBuf[] */ | < | 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 | ** unixMutexHeld() is true when reading or writing any other field ** in this structure. */ struct unixShmFile { struct unixFileId fid; /* Unique file identifier */ sqlite3_mutex *mutex; /* Mutex to access this object */ sqlite3_mutex *mutexBuf; /* Mutex to access zBuf[] */ char *zFilename; /* Name of the file */ int h; /* Open file descriptor */ int szMap; /* Size of the mapping of file into memory */ char *pMMapBuf; /* Where currently mmapped(). NULL if unmapped */ int nRef; /* Number of unixShm objects pointing to this */ unixShm *pFirst; /* All unixShm objects pointing to this */ unixShmFile *pNext; /* Next in list of all unixShmFile objects */ |
︙ | ︙ | |||
4629 4630 4631 4632 4633 4634 4635 | struct unixShm { unixShmFile *pFile; /* The underlying unixShmFile object */ unixShm *pNext; /* Next unixShm with the same unixShmFile */ u8 lockState; /* Current lock state */ u8 readLock; /* Which of the two read-lock states to use */ u8 hasMutex; /* True if holding the unixShmFile mutex */ u8 hasMutexBuf; /* True if holding pFile->mutexBuf */ | < | 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 | struct unixShm { unixShmFile *pFile; /* The underlying unixShmFile object */ unixShm *pNext; /* Next unixShm with the same unixShmFile */ u8 lockState; /* Current lock state */ u8 readLock; /* Which of the two read-lock states to use */ u8 hasMutex; /* True if holding the unixShmFile mutex */ u8 hasMutexBuf; /* True if holding pFile->mutexBuf */ u8 sharedMask; /* Mask of shared locks held */ u8 exclMask; /* Mask of exclusive locks held */ #ifdef SQLITE_DEBUG u8 id; /* Id of this connection with its unixShmFile */ #endif }; |
︙ | ︙ | |||
4905 4906 4907 4908 4909 4910 4911 | unixShmFile *p; assert( unixMutexHeld() ); pp = &unixShmFileList; while( (p = *pp)!=0 ){ if( p->nRef==0 ){ if( p->mutex ) sqlite3_mutex_free(p->mutex); if( p->mutexBuf ) sqlite3_mutex_free(p->mutexBuf); | < | 4903 4904 4905 4906 4907 4908 4909 4910 4911 4912 4913 4914 4915 4916 | unixShmFile *p; assert( unixMutexHeld() ); pp = &unixShmFileList; while( (p = *pp)!=0 ){ if( p->nRef==0 ){ if( p->mutex ) sqlite3_mutex_free(p->mutex); if( p->mutexBuf ) sqlite3_mutex_free(p->mutexBuf); if( p->h>=0 ) close(p->h); *pp = p->pNext; sqlite3_free(p); }else{ pp = &p->pNext; } } |
︙ | ︙ | |||
4973 4974 4975 4976 4977 4978 4979 | if( pFile->mutex==0 ){ rc = SQLITE_NOMEM; goto shm_open_err; } pFile->mutexBuf = sqlite3_mutex_alloc(SQLITE_MUTEX_FAST); if( pFile->mutexBuf==0 ){ rc = SQLITE_NOMEM; | < < < < < | 4970 4971 4972 4973 4974 4975 4976 4977 4978 4979 4980 4981 4982 4983 | if( pFile->mutex==0 ){ rc = SQLITE_NOMEM; goto shm_open_err; } pFile->mutexBuf = sqlite3_mutex_alloc(SQLITE_MUTEX_FAST); if( pFile->mutexBuf==0 ){ rc = SQLITE_NOMEM; goto shm_open_err; } pFile->h = open(zName, O_RDWR|O_CREAT, 0664); if( pFile->h<0 ){ rc = SQLITE_CANTOPEN_BKPT; goto shm_open_err; |
︙ | ︙ | |||
5120 5121 5122 5123 5124 5125 5126 | /* ** Map the shared storage into memory. The minimum size of the ** mapping should be reqMapSize if reqMapSize is positive. If ** reqMapSize is zero or negative, the implementation can choose ** whatever mapping size is convenient. ** ** *ppBuf is made to point to the memory which is a mapping of the | | > > | > > > > > > > > > | | 5112 5113 5114 5115 5116 5117 5118 5119 5120 5121 5122 5123 5124 5125 5126 5127 5128 5129 5130 5131 5132 5133 5134 5135 5136 5137 5138 5139 5140 5141 5142 5143 5144 5145 5146 5147 5148 5149 5150 5151 5152 5153 5154 5155 | /* ** Map the shared storage into memory. The minimum size of the ** mapping should be reqMapSize if reqMapSize is positive. If ** reqMapSize is zero or negative, the implementation can choose ** whatever mapping size is convenient. ** ** *ppBuf is made to point to the memory which is a mapping of the ** underlying storage. A mutex is acquired to prevent other threads ** from running while *ppBuf is in use in order to prevent other threads ** remapping *ppBuf out from under this thread. The unixShmRelease() ** call will release the mutex. However, if the lock state is CHECKPOINT, ** the mutex is not acquired because CHECKPOINT will never remap the ** buffer. RECOVER might remap, though, so CHECKPOINT will acquire ** the mutex if and when it promotes to RECOVER. ** ** RECOVER needs to be atomic. The same mutex that prevents *ppBuf from ** being remapped also prevents more than one thread from being in ** RECOVER at a time. But, RECOVER sometimes wants to remap itself. ** To prevent RECOVER from losing its lock while remapping, the ** mutex is not released by unixShmRelease() when in RECOVER. ** ** *pNewMapSize is set to the size of the mapping. ** ** *ppBuf and *pNewMapSize might be NULL and zero if no space has ** yet been allocated to the underlying storage. */ static int unixShmGet( sqlite3_shm *pSharedMem, /* Pointer returned by unixShmOpen() */ int reqMapSize, /* Requested size of mapping. -1 means don't care */ int *pNewMapSize, /* Write new size of mapping here */ void **ppBuf /* Write mapping buffer origin here */ ){ unixShm *p = (unixShm*)pSharedMem; unixShmFile *pFile = p->pFile; int rc = SQLITE_OK; if( p->lockState!=SQLITE_SHM_CHECKPOINT && p->hasMutexBuf==0 ){ sqlite3_mutex_enter(pFile->mutexBuf); p->hasMutexBuf = 1; } sqlite3_mutex_enter(pFile->mutex); if( pFile->szMap==0 || reqMapSize>pFile->szMap ){ int actualSize; if( unixShmSize(pSharedMem, -1, &actualSize)==SQLITE_OK |
︙ | ︙ | |||
5166 5167 5168 5169 5170 5171 5172 5173 5174 5175 | sqlite3_mutex_leave(pFile->mutex); return rc; } /* ** Release the lock held on the shared memory segment to that other ** threads are free to resize it if necessary. */ static int unixShmRelease(sqlite3_shm *pSharedMem){ unixShm *p = (unixShm*)pSharedMem; | > > > > > > | | 5169 5170 5171 5172 5173 5174 5175 5176 5177 5178 5179 5180 5181 5182 5183 5184 5185 5186 5187 5188 5189 5190 5191 5192 | sqlite3_mutex_leave(pFile->mutex); return rc; } /* ** Release the lock held on the shared memory segment to that other ** threads are free to resize it if necessary. ** ** If the lock is not currently held, this routine is a harmless no-op. ** ** If the shared-memory object is in lock state RECOVER, then we do not ** really want to release the lock, so in that case too, this routine ** is a no-op. */ static int unixShmRelease(sqlite3_shm *pSharedMem){ unixShm *p = (unixShm*)pSharedMem; if( p->hasMutexBuf && p->lockState!=SQLITE_SHM_RECOVER ){ unixShmFile *pFile = p->pFile; sqlite3_mutex_leave(pFile->mutexBuf); p->hasMutexBuf = 0; } return SQLITE_OK; } |
︙ | ︙ | |||
5231 5232 5233 5234 5235 5236 5237 5238 5239 5240 5241 5242 5243 5244 | p->id, getpid(), azLkName[desiredLock], azLkName[p->lockState])); if( pGotLock ) *pGotLock = p->lockState; return SQLITE_OK; } OSTRACE(("SHM-LOCK shmid-%d, pid-%d request %s->%s\n", p->id, getpid(), azLkName[p->lockState], azLkName[desiredLock])); sqlite3_mutex_enter(pFile->mutex); switch( desiredLock ){ case SQLITE_SHM_UNLOCK: { assert( p->lockState!=SQLITE_SHM_RECOVER ); unixShmUnlock(pFile, p, UNIX_SHM_A|UNIX_SHM_B|UNIX_SHM_C|UNIX_SHM_D); rc = SQLITE_OK; p->lockState = SQLITE_SHM_UNLOCK; | > > > > > | 5240 5241 5242 5243 5244 5245 5246 5247 5248 5249 5250 5251 5252 5253 5254 5255 5256 5257 5258 | p->id, getpid(), azLkName[desiredLock], azLkName[p->lockState])); if( pGotLock ) *pGotLock = p->lockState; return SQLITE_OK; } OSTRACE(("SHM-LOCK shmid-%d, pid-%d request %s->%s\n", p->id, getpid(), azLkName[p->lockState], azLkName[desiredLock])); if( desiredLock==SQLITE_SHM_RECOVER && !p->hasMutexBuf ){ sqlite3_mutex_enter(pFile->mutexBuf); p->hasMutexBuf = 1; } sqlite3_mutex_enter(pFile->mutex); switch( desiredLock ){ case SQLITE_SHM_UNLOCK: { assert( p->lockState!=SQLITE_SHM_RECOVER ); unixShmUnlock(pFile, p, UNIX_SHM_A|UNIX_SHM_B|UNIX_SHM_C|UNIX_SHM_D); rc = SQLITE_OK; p->lockState = SQLITE_SHM_UNLOCK; |
︙ | ︙ | |||
5264 5265 5266 5267 5268 5269 5270 | }else if( p->lockState==SQLITE_SHM_WRITE ){ rc = unixShmSharedLock(pFile, p, UNIX_SHM_A); unixShmUnlock(pFile, p, UNIX_SHM_C|UNIX_SHM_D); p->lockState = p->readLock = SQLITE_SHM_READ; }else{ assert( p->lockState==SQLITE_SHM_RECOVER ); unixShmUnlock(pFile, p, UNIX_SHM_MUTEX); | < < | 5278 5279 5280 5281 5282 5283 5284 5285 5286 5287 5288 5289 5290 5291 5292 5293 5294 5295 5296 5297 5298 5299 5300 5301 5302 5303 5304 5305 5306 5307 5308 5309 5310 5311 | }else if( p->lockState==SQLITE_SHM_WRITE ){ rc = unixShmSharedLock(pFile, p, UNIX_SHM_A); unixShmUnlock(pFile, p, UNIX_SHM_C|UNIX_SHM_D); p->lockState = p->readLock = SQLITE_SHM_READ; }else{ assert( p->lockState==SQLITE_SHM_RECOVER ); unixShmUnlock(pFile, p, UNIX_SHM_MUTEX); p->lockState = p->readLock; rc = SQLITE_OK; } break; } case SQLITE_SHM_WRITE: { assert( p->lockState==SQLITE_SHM_READ || p->lockState==SQLITE_SHM_READ_FULL ); rc = unixShmExclusiveLock(pFile, p, UNIX_SHM_C|UNIX_SHM_D); if( rc==SQLITE_OK ){ p->lockState = SQLITE_SHM_WRITE; } break; } case SQLITE_SHM_CHECKPOINT: { assert( p->lockState==SQLITE_SHM_UNLOCK || p->lockState==SQLITE_SHM_PENDING || p->lockState==SQLITE_SHM_RECOVER ); if( p->lockState==SQLITE_SHM_RECOVER ){ unixShmUnlock(pFile, p, UNIX_SHM_MUTEX); p->lockState = SQLITE_SHM_CHECKPOINT; rc = SQLITE_OK; } if( p->lockState==SQLITE_SHM_UNLOCK ){ rc = unixShmExclusiveLock(pFile, p, UNIX_SHM_B|UNIX_SHM_C); if( rc==SQLITE_OK ){ p->lockState = SQLITE_SHM_PENDING; |
︙ | ︙ | |||
5308 5309 5310 5311 5312 5313 5314 | break; } default: { assert( desiredLock==SQLITE_SHM_RECOVER ); assert( p->lockState==SQLITE_SHM_READ || p->lockState==SQLITE_SHM_READ_FULL || p->lockState==SQLITE_SHM_CHECKPOINT ); | | < < | 5320 5321 5322 5323 5324 5325 5326 5327 5328 5329 5330 5331 5332 5333 5334 | break; } default: { assert( desiredLock==SQLITE_SHM_RECOVER ); assert( p->lockState==SQLITE_SHM_READ || p->lockState==SQLITE_SHM_READ_FULL || p->lockState==SQLITE_SHM_CHECKPOINT ); assert( sqlite3_mutex_held(pFile->mutexBuf) ); rc = unixShmExclusiveLock(pFile, p, UNIX_SHM_MUTEX); if( rc==SQLITE_OK ){ p->lockState = SQLITE_SHM_RECOVER; } break; } } |
︙ | ︙ |