/ Check-in [1a0f69be]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Rework mutexes on the SHM implemention for os_unix to avoid a deadlock during WAL recovery.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | wal
Files: files | file ages | folders
SHA1: 1a0f69bef2c489e81a3d4b910b426972e9ed4054
User & Date: drh 2010-05-01 17:50:38
Context
2010-05-01
17:57
Define an invariant to guarantee deadlock-free operation of SHM in os_unix.c and check that invariant with assert() statements. check-in: 6af2dca7 user: drh tags: wal
17:50
Rework mutexes on the SHM implemention for os_unix to avoid a deadlock during WAL recovery. check-in: 1a0f69be user: drh tags: wal
16:40
Support compile-time option SQLITE_OMIT_WAL, for building without WAL support. check-in: 9b230c43 user: dan tags: wal
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to src/os_unix.c.

4590
4591
4592
4593
4594
4595
4596
4597
4598
4599
4600
4601
4602
4603
4604
....
4629
4630
4631
4632
4633
4634
4635
4636
4637
4638
4639
4640
4641
4642
4643
....
4905
4906
4907
4908
4909
4910
4911
4912
4913
4914
4915
4916
4917
4918
4919
....
4973
4974
4975
4976
4977
4978
4979
4980
4981
4982
4983
4984
4985
4986
4987
4988
4989
4990
4991
....
5120
5121
5122
5123
5124
5125
5126
5127
5128











5129
5130
5131
5132
5133
5134
5135
....
5138
5139
5140
5141
5142
5143
5144
5145
5146
5147
5148
5149
5150
5151
5152
....
5166
5167
5168
5169
5170
5171
5172






5173
5174
5175
5176
5177
5178
5179
5180
5181
5182
5183
....
5231
5232
5233
5234
5235
5236
5237





5238
5239
5240
5241
5242
5243
5244
....
5264
5265
5266
5267
5268
5269
5270
5271
5272
5273
5274
5275
5276
5277
5278
....
5285
5286
5287
5288
5289
5290
5291
5292
5293
5294
5295
5296
5297
5298
5299
....
5308
5309
5310
5311
5312
5313
5314
5315
5316
5317
5318
5319
5320
5321
5322
5323
5324
** unixMutexHeld() is true when reading or writing any other field
** in this structure.
*/
struct unixShmFile {
  struct unixFileId fid;     /* Unique file identifier */
  sqlite3_mutex *mutex;      /* Mutex to access this object */
  sqlite3_mutex *mutexBuf;   /* Mutex to access zBuf[] */
  sqlite3_mutex *mutexRecov; /* The RECOVER mutex */
  char *zFilename;           /* Name of the file */
  int h;                     /* Open file descriptor */
  int szMap;                 /* Size of the mapping of file into memory */
  char *pMMapBuf;            /* Where currently mmapped().  NULL if unmapped */
  int nRef;                  /* Number of unixShm objects pointing to this */
  unixShm *pFirst;           /* All unixShm objects pointing to this */
  unixShmFile *pNext;        /* Next in list of all unixShmFile objects */
................................................................................
struct unixShm {
  unixShmFile *pFile;        /* The underlying unixShmFile object */
  unixShm *pNext;            /* Next unixShm with the same unixShmFile */
  u8 lockState;              /* Current lock state */
  u8 readLock;               /* Which of the two read-lock states to use */
  u8 hasMutex;               /* True if holding the unixShmFile mutex */
  u8 hasMutexBuf;            /* True if holding pFile->mutexBuf */
  u8 hasMutexRecov;          /* True if holding pFile->mutexRecov */
  u8 sharedMask;             /* Mask of shared locks held */
  u8 exclMask;               /* Mask of exclusive locks held */
#ifdef SQLITE_DEBUG
  u8 id;                     /* Id of this connection with its unixShmFile */
#endif
};

................................................................................
  unixShmFile *p;
  assert( unixMutexHeld() );
  pp = &unixShmFileList;
  while( (p = *pp)!=0 ){
    if( p->nRef==0 ){
      if( p->mutex ) sqlite3_mutex_free(p->mutex);
      if( p->mutexBuf ) sqlite3_mutex_free(p->mutexBuf);
      if( p->mutexRecov ) sqlite3_mutex_free(p->mutexRecov);
      if( p->h>=0 ) close(p->h);
      *pp = p->pNext;
      sqlite3_free(p);
    }else{
      pp = &p->pNext;
    }
  }
................................................................................
    if( pFile->mutex==0 ){
      rc = SQLITE_NOMEM;
      goto shm_open_err;
    }
    pFile->mutexBuf = sqlite3_mutex_alloc(SQLITE_MUTEX_FAST);
    if( pFile->mutexBuf==0 ){
      rc = SQLITE_NOMEM;
      goto shm_open_err;
    }
    pFile->mutexRecov = sqlite3_mutex_alloc(SQLITE_MUTEX_FAST);
    if( pFile->mutexRecov==0 ){
      rc = SQLITE_NOMEM;
      goto shm_open_err;
    }

    pFile->h = open(zName, O_RDWR|O_CREAT, 0664);
    if( pFile->h<0 ){
      rc = SQLITE_CANTOPEN_BKPT;
      goto shm_open_err;
................................................................................
/*
** Map the shared storage into memory.  The minimum size of the
** mapping should be reqMapSize if reqMapSize is positive.  If
** reqMapSize is zero or negative, the implementation can choose
** whatever mapping size is convenient.
**
** *ppBuf is made to point to the memory which is a mapping of the
** underlying storage.  This segment is locked.  unixShmRelease()
** must be called to release the lock.











**
** *pNewMapSize is set to the size of the mapping.
**
** *ppBuf and *pNewMapSize might be NULL and zero if no space has
** yet been allocated to the underlying storage.
*/
static int unixShmGet(
................................................................................
  int *pNewMapSize,        /* Write new size of mapping here */
  void **ppBuf             /* Write mapping buffer origin here */
){
  unixShm *p = (unixShm*)pSharedMem;
  unixShmFile *pFile = p->pFile;
  int rc = SQLITE_OK;

  if( p->lockState!=SQLITE_SHM_CHECKPOINT ){
    sqlite3_mutex_enter(pFile->mutexBuf);
    p->hasMutexBuf = 1;
  }
  sqlite3_mutex_enter(pFile->mutex);
  if( pFile->szMap==0 || reqMapSize>pFile->szMap ){
    int actualSize;
    if( unixShmSize(pSharedMem, -1, &actualSize)==SQLITE_OK
................................................................................
  sqlite3_mutex_leave(pFile->mutex);
  return rc;
}

/*
** Release the lock held on the shared memory segment to that other
** threads are free to resize it if necessary.






*/
static int unixShmRelease(sqlite3_shm *pSharedMem){
  unixShm *p = (unixShm*)pSharedMem;
  if( p->hasMutexBuf ){
    unixShmFile *pFile = p->pFile;
    sqlite3_mutex_leave(pFile->mutexBuf);
    p->hasMutexBuf = 0;
  }
  return SQLITE_OK;
}

................................................................................
             p->id, getpid(), azLkName[desiredLock], azLkName[p->lockState]));
    if( pGotLock ) *pGotLock = p->lockState;
    return SQLITE_OK;
  }

  OSTRACE(("SHM-LOCK shmid-%d, pid-%d request %s->%s\n",
            p->id, getpid(), azLkName[p->lockState], azLkName[desiredLock]));





  sqlite3_mutex_enter(pFile->mutex);
  switch( desiredLock ){
    case SQLITE_SHM_UNLOCK: {
      assert( p->lockState!=SQLITE_SHM_RECOVER );
      unixShmUnlock(pFile, p, UNIX_SHM_A|UNIX_SHM_B|UNIX_SHM_C|UNIX_SHM_D);
      rc = SQLITE_OK;
      p->lockState = SQLITE_SHM_UNLOCK;
................................................................................
      }else if( p->lockState==SQLITE_SHM_WRITE ){
        rc = unixShmSharedLock(pFile, p, UNIX_SHM_A);
        unixShmUnlock(pFile, p, UNIX_SHM_C|UNIX_SHM_D);
        p->lockState = p->readLock = SQLITE_SHM_READ;
      }else{
        assert( p->lockState==SQLITE_SHM_RECOVER );
        unixShmUnlock(pFile, p, UNIX_SHM_MUTEX);
        sqlite3_mutex_leave(pFile->mutexRecov);
        p->lockState = p->readLock;
        rc = SQLITE_OK;
      }
      break;
    }
    case SQLITE_SHM_WRITE: {
      assert( p->lockState==SQLITE_SHM_READ 
................................................................................
    }
    case SQLITE_SHM_CHECKPOINT: {
      assert( p->lockState==SQLITE_SHM_UNLOCK
           || p->lockState==SQLITE_SHM_PENDING
           || p->lockState==SQLITE_SHM_RECOVER );
      if( p->lockState==SQLITE_SHM_RECOVER ){
        unixShmUnlock(pFile, p, UNIX_SHM_MUTEX);
        sqlite3_mutex_leave(pFile->mutexRecov);
        p->lockState = SQLITE_SHM_CHECKPOINT;
        rc = SQLITE_OK;
      }
      if( p->lockState==SQLITE_SHM_UNLOCK ){
        rc = unixShmExclusiveLock(pFile, p, UNIX_SHM_B|UNIX_SHM_C);
        if( rc==SQLITE_OK ){
          p->lockState = SQLITE_SHM_PENDING;
................................................................................
      break;
    }
    default: {
      assert( desiredLock==SQLITE_SHM_RECOVER );
      assert( p->lockState==SQLITE_SHM_READ
           || p->lockState==SQLITE_SHM_READ_FULL
           || p->lockState==SQLITE_SHM_CHECKPOINT );
      sqlite3_mutex_leave(pFile->mutex);
      sqlite3_mutex_enter(pFile->mutexRecov);
      sqlite3_mutex_enter(pFile->mutex);
      rc = unixShmExclusiveLock(pFile, p, UNIX_SHM_MUTEX);
      if( rc==SQLITE_OK ){
        p->lockState = SQLITE_SHM_RECOVER;
      }
      break;
    }
  }







<







 







<







 







<







 







<
<
<
<
<







 







|
|
>
>
>
>
>
>
>
>
>
>
>







 







|







 







>
>
>
>
>
>



|







 







>
>
>
>
>







 







<







 







<







 







|
<
<







4590
4591
4592
4593
4594
4595
4596

4597
4598
4599
4600
4601
4602
4603
....
4628
4629
4630
4631
4632
4633
4634

4635
4636
4637
4638
4639
4640
4641
....
4903
4904
4905
4906
4907
4908
4909

4910
4911
4912
4913
4914
4915
4916
....
4970
4971
4972
4973
4974
4975
4976





4977
4978
4979
4980
4981
4982
4983
....
5112
5113
5114
5115
5116
5117
5118
5119
5120
5121
5122
5123
5124
5125
5126
5127
5128
5129
5130
5131
5132
5133
5134
5135
5136
5137
5138
....
5141
5142
5143
5144
5145
5146
5147
5148
5149
5150
5151
5152
5153
5154
5155
....
5169
5170
5171
5172
5173
5174
5175
5176
5177
5178
5179
5180
5181
5182
5183
5184
5185
5186
5187
5188
5189
5190
5191
5192
....
5240
5241
5242
5243
5244
5245
5246
5247
5248
5249
5250
5251
5252
5253
5254
5255
5256
5257
5258
....
5278
5279
5280
5281
5282
5283
5284

5285
5286
5287
5288
5289
5290
5291
....
5298
5299
5300
5301
5302
5303
5304

5305
5306
5307
5308
5309
5310
5311
....
5320
5321
5322
5323
5324
5325
5326
5327


5328
5329
5330
5331
5332
5333
5334
** unixMutexHeld() is true when reading or writing any other field
** in this structure.
*/
struct unixShmFile {
  struct unixFileId fid;     /* Unique file identifier */
  sqlite3_mutex *mutex;      /* Mutex to access this object */
  sqlite3_mutex *mutexBuf;   /* Mutex to access zBuf[] */

  char *zFilename;           /* Name of the file */
  int h;                     /* Open file descriptor */
  int szMap;                 /* Size of the mapping of file into memory */
  char *pMMapBuf;            /* Where currently mmapped().  NULL if unmapped */
  int nRef;                  /* Number of unixShm objects pointing to this */
  unixShm *pFirst;           /* All unixShm objects pointing to this */
  unixShmFile *pNext;        /* Next in list of all unixShmFile objects */
................................................................................
struct unixShm {
  unixShmFile *pFile;        /* The underlying unixShmFile object */
  unixShm *pNext;            /* Next unixShm with the same unixShmFile */
  u8 lockState;              /* Current lock state */
  u8 readLock;               /* Which of the two read-lock states to use */
  u8 hasMutex;               /* True if holding the unixShmFile mutex */
  u8 hasMutexBuf;            /* True if holding pFile->mutexBuf */

  u8 sharedMask;             /* Mask of shared locks held */
  u8 exclMask;               /* Mask of exclusive locks held */
#ifdef SQLITE_DEBUG
  u8 id;                     /* Id of this connection with its unixShmFile */
#endif
};

................................................................................
  unixShmFile *p;
  assert( unixMutexHeld() );
  pp = &unixShmFileList;
  while( (p = *pp)!=0 ){
    if( p->nRef==0 ){
      if( p->mutex ) sqlite3_mutex_free(p->mutex);
      if( p->mutexBuf ) sqlite3_mutex_free(p->mutexBuf);

      if( p->h>=0 ) close(p->h);
      *pp = p->pNext;
      sqlite3_free(p);
    }else{
      pp = &p->pNext;
    }
  }
................................................................................
    if( pFile->mutex==0 ){
      rc = SQLITE_NOMEM;
      goto shm_open_err;
    }
    pFile->mutexBuf = sqlite3_mutex_alloc(SQLITE_MUTEX_FAST);
    if( pFile->mutexBuf==0 ){
      rc = SQLITE_NOMEM;





      goto shm_open_err;
    }

    pFile->h = open(zName, O_RDWR|O_CREAT, 0664);
    if( pFile->h<0 ){
      rc = SQLITE_CANTOPEN_BKPT;
      goto shm_open_err;
................................................................................
/*
** Map the shared storage into memory.  The minimum size of the
** mapping should be reqMapSize if reqMapSize is positive.  If
** reqMapSize is zero or negative, the implementation can choose
** whatever mapping size is convenient.
**
** *ppBuf is made to point to the memory which is a mapping of the
** underlying storage.  A mutex is acquired to prevent other threads
** from running while *ppBuf is in use in order to prevent other threads
** remapping *ppBuf out from under this thread.  The unixShmRelease()
** call will release the mutex.  However, if the lock state is CHECKPOINT,
** the mutex is not acquired because CHECKPOINT will never remap the
** buffer.  RECOVER might remap, though, so CHECKPOINT will acquire
** the mutex if and when it promotes to RECOVER.
**
** RECOVER needs to be atomic.  The same mutex that prevents *ppBuf from
** being remapped also prevents more than one thread from being in
** RECOVER at a time.  But, RECOVER sometimes wants to remap itself.
** To prevent RECOVER from losing its lock while remapping, the
** mutex is not released by unixShmRelease() when in RECOVER.
**
** *pNewMapSize is set to the size of the mapping.
**
** *ppBuf and *pNewMapSize might be NULL and zero if no space has
** yet been allocated to the underlying storage.
*/
static int unixShmGet(
................................................................................
  int *pNewMapSize,        /* Write new size of mapping here */
  void **ppBuf             /* Write mapping buffer origin here */
){
  unixShm *p = (unixShm*)pSharedMem;
  unixShmFile *pFile = p->pFile;
  int rc = SQLITE_OK;

  if( p->lockState!=SQLITE_SHM_CHECKPOINT && p->hasMutexBuf==0 ){
    sqlite3_mutex_enter(pFile->mutexBuf);
    p->hasMutexBuf = 1;
  }
  sqlite3_mutex_enter(pFile->mutex);
  if( pFile->szMap==0 || reqMapSize>pFile->szMap ){
    int actualSize;
    if( unixShmSize(pSharedMem, -1, &actualSize)==SQLITE_OK
................................................................................
  sqlite3_mutex_leave(pFile->mutex);
  return rc;
}

/*
** Release the lock held on the shared memory segment to that other
** threads are free to resize it if necessary.
**
** If the lock is not currently held, this routine is a harmless no-op.
**
** If the shared-memory object is in lock state RECOVER, then we do not
** really want to release the lock, so in that case too, this routine
** is a no-op.
*/
static int unixShmRelease(sqlite3_shm *pSharedMem){
  unixShm *p = (unixShm*)pSharedMem;
  if( p->hasMutexBuf && p->lockState!=SQLITE_SHM_RECOVER ){
    unixShmFile *pFile = p->pFile;
    sqlite3_mutex_leave(pFile->mutexBuf);
    p->hasMutexBuf = 0;
  }
  return SQLITE_OK;
}

................................................................................
             p->id, getpid(), azLkName[desiredLock], azLkName[p->lockState]));
    if( pGotLock ) *pGotLock = p->lockState;
    return SQLITE_OK;
  }

  OSTRACE(("SHM-LOCK shmid-%d, pid-%d request %s->%s\n",
            p->id, getpid(), azLkName[p->lockState], azLkName[desiredLock]));
  
  if( desiredLock==SQLITE_SHM_RECOVER && !p->hasMutexBuf ){
    sqlite3_mutex_enter(pFile->mutexBuf);
    p->hasMutexBuf = 1;
  }
  sqlite3_mutex_enter(pFile->mutex);
  switch( desiredLock ){
    case SQLITE_SHM_UNLOCK: {
      assert( p->lockState!=SQLITE_SHM_RECOVER );
      unixShmUnlock(pFile, p, UNIX_SHM_A|UNIX_SHM_B|UNIX_SHM_C|UNIX_SHM_D);
      rc = SQLITE_OK;
      p->lockState = SQLITE_SHM_UNLOCK;
................................................................................
      }else if( p->lockState==SQLITE_SHM_WRITE ){
        rc = unixShmSharedLock(pFile, p, UNIX_SHM_A);
        unixShmUnlock(pFile, p, UNIX_SHM_C|UNIX_SHM_D);
        p->lockState = p->readLock = SQLITE_SHM_READ;
      }else{
        assert( p->lockState==SQLITE_SHM_RECOVER );
        unixShmUnlock(pFile, p, UNIX_SHM_MUTEX);

        p->lockState = p->readLock;
        rc = SQLITE_OK;
      }
      break;
    }
    case SQLITE_SHM_WRITE: {
      assert( p->lockState==SQLITE_SHM_READ 
................................................................................
    }
    case SQLITE_SHM_CHECKPOINT: {
      assert( p->lockState==SQLITE_SHM_UNLOCK
           || p->lockState==SQLITE_SHM_PENDING
           || p->lockState==SQLITE_SHM_RECOVER );
      if( p->lockState==SQLITE_SHM_RECOVER ){
        unixShmUnlock(pFile, p, UNIX_SHM_MUTEX);

        p->lockState = SQLITE_SHM_CHECKPOINT;
        rc = SQLITE_OK;
      }
      if( p->lockState==SQLITE_SHM_UNLOCK ){
        rc = unixShmExclusiveLock(pFile, p, UNIX_SHM_B|UNIX_SHM_C);
        if( rc==SQLITE_OK ){
          p->lockState = SQLITE_SHM_PENDING;
................................................................................
      break;
    }
    default: {
      assert( desiredLock==SQLITE_SHM_RECOVER );
      assert( p->lockState==SQLITE_SHM_READ
           || p->lockState==SQLITE_SHM_READ_FULL
           || p->lockState==SQLITE_SHM_CHECKPOINT );
      assert( sqlite3_mutex_held(pFile->mutexBuf) );


      rc = unixShmExclusiveLock(pFile, p, UNIX_SHM_MUTEX);
      if( rc==SQLITE_OK ){
        p->lockState = SQLITE_SHM_RECOVER;
      }
      break;
    }
  }