SQLite

Check-in [d9157dd176]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Rework the SQLITE_MUTEXFREE_SHMLOCK code to reduce contention.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | mutexfree-shmlock
Files: files | file ages | folders
SHA3-256: d9157dd176a2d18c6e02a2a0c7e16cef2da43bf44be9765e0363f34aebad23e9
User & Date: dan 2018-12-10 15:24:29.922
Context
2018-12-10
15:51
Add extra tests to shmlock.test (direct testing of xShmLock methods). (check-in: d2c785f94c user: dan tags: mutexfree-shmlock)
15:24
Rework the SQLITE_MUTEXFREE_SHMLOCK code to reduce contention. (check-in: d9157dd176 user: dan tags: mutexfree-shmlock)
09:45
Avoid a mutex in-and-out in unixShmBarrier() on this branch. Use __sync_synchronize() instead. (check-in: a8c5fd86ce user: dan tags: mutexfree-shmlock)
Changes
Unified Diff Ignore Whitespace Patch
Changes to src/os_unix.c.
44
45
46
47
48
49
50


51
52
53
54
55
56
57
**      plus implementations of sqlite3_os_init() and sqlite3_os_end().
*/
#include "sqliteInt.h"
#if SQLITE_OS_UNIX              /* This file is used on unix only */

/* Turn this feature on in all builds for now */
#define SQLITE_MUTEXFREE_SHMLOCK 1



/*
** There are various methods for file locking used for concurrency
** control:
**
**   1. POSIX locking (the default),
**   2. No locking,







>
>







44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
**      plus implementations of sqlite3_os_init() and sqlite3_os_end().
*/
#include "sqliteInt.h"
#if SQLITE_OS_UNIX              /* This file is used on unix only */

/* Turn this feature on in all builds for now */
#define SQLITE_MUTEXFREE_SHMLOCK 1
#define SQLITE_MFS_NSHARD        5
#define SQLITE_MFS_EXCLUSIVE     255

/*
** There are various methods for file locking used for concurrency
** control:
**
**   1. POSIX locking (the default),
**   2. No locking,
4251
4252
4253
4254
4255
4256
4257


4258

4259
4260
4261
4262
4263
4264
4265
  ** 0, and so on.
  **
  ** If the 8-bits corresponding to a shm-locking locking slot are set to
  ** 0xFF, then a write-lock is held on the slot. Or, if they are set to
  ** a non-zero value smaller than 0xFF, then they represent the total 
  ** number of read-locks held on the slot. There is no way to distinguish
  ** between a write-lock and 255 read-locks.  */


  u64 lockmask;

#endif
};

/*
** Atomic CAS primitive used in multi-process mode. Equivalent to:
** 
**   int unixCompareAndSwap(u32 *ptr, u32 oldval, u32 newval){







>
>
|
>







4253
4254
4255
4256
4257
4258
4259
4260
4261
4262
4263
4264
4265
4266
4267
4268
4269
4270
  ** 0, and so on.
  **
  ** If the 8-bits corresponding to a shm-locking locking slot are set to
  ** 0xFF, then a write-lock is held on the slot. Or, if they are set to
  ** a non-zero value smaller than 0xFF, then they represent the total 
  ** number of read-locks held on the slot. There is no way to distinguish
  ** between a write-lock and 255 read-locks.  */
  struct LockingSlot {
    u32 nLock;
    u64 aPadding[7];
  } aMFSlot[3 + SQLITE_MFS_NSHARD*5];
#endif
};

/*
** Atomic CAS primitive used in multi-process mode. Equivalent to:
** 
**   int unixCompareAndSwap(u32 *ptr, u32 oldval, u32 newval){
4290
4291
4292
4293
4294
4295
4296



4297
4298
4299
4300
4301
4302
4303
struct unixShm {
  unixShmNode *pShmNode;     /* The underlying unixShmNode object */
  unixShm *pNext;            /* Next unixShm with the same unixShmNode */
  u8 hasMutex;               /* True if holding the unixShmNode->pShmMutex */
  u8 id;                     /* Id of this connection within its unixShmNode */
  u16 sharedMask;            /* Mask of shared locks held */
  u16 exclMask;              /* Mask of exclusive locks held */



};

/*
** Constants used for locking
*/
#define UNIX_SHM_BASE   ((22+SQLITE_SHM_NLOCK)*4)         /* first lock byte */
#define UNIX_SHM_DMS    (UNIX_SHM_BASE+SQLITE_SHM_NLOCK)  /* deadman switch */







>
>
>







4295
4296
4297
4298
4299
4300
4301
4302
4303
4304
4305
4306
4307
4308
4309
4310
4311
struct unixShm {
  unixShmNode *pShmNode;     /* The underlying unixShmNode object */
  unixShm *pNext;            /* Next unixShm with the same unixShmNode */
  u8 hasMutex;               /* True if holding the unixShmNode->pShmMutex */
  u8 id;                     /* Id of this connection within its unixShmNode */
  u16 sharedMask;            /* Mask of shared locks held */
  u16 exclMask;              /* Mask of exclusive locks held */
#ifdef SQLITE_MUTEXFREE_SHMLOCK
  u8 aMFCurrent[8];          /* Current slot used for each shared lock */
#endif
};

/*
** Constants used for locking
*/
#define UNIX_SHM_BASE   ((22+SQLITE_SHM_NLOCK)*4)         /* first lock byte */
#define UNIX_SHM_DMS    (UNIX_SHM_BASE+SQLITE_SHM_NLOCK)  /* deadman switch */
4794
4795
4796
4797
4798
4799
4800













































































4801
4802
4803
4804
4805
4806
4807
    *pp = 0;
  }
  if( pShmNode->isReadonly && rc==SQLITE_OK ) rc = SQLITE_READONLY;
  sqlite3_mutex_leave(pShmNode->pShmMutex);
  return rc;
}














































































/*
** Change the lock state for a shared-memory segment.
**
** Note that the relationship between SHAREd and EXCLUSIVE locks is a little
** different here than in posix.  In xShmLock(), one can go from unlocked
** to shared and back or from unlocked to exclusive and back.  But one may
** not go from shared to exclusive or from exclusive to shared.







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







4802
4803
4804
4805
4806
4807
4808
4809
4810
4811
4812
4813
4814
4815
4816
4817
4818
4819
4820
4821
4822
4823
4824
4825
4826
4827
4828
4829
4830
4831
4832
4833
4834
4835
4836
4837
4838
4839
4840
4841
4842
4843
4844
4845
4846
4847
4848
4849
4850
4851
4852
4853
4854
4855
4856
4857
4858
4859
4860
4861
4862
4863
4864
4865
4866
4867
4868
4869
4870
4871
4872
4873
4874
4875
4876
4877
4878
4879
4880
4881
4882
4883
4884
4885
4886
4887
4888
4889
4890
4891
4892
    *pp = 0;
  }
  if( pShmNode->isReadonly && rc==SQLITE_OK ) rc = SQLITE_READONLY;
  sqlite3_mutex_leave(pShmNode->pShmMutex);
  return rc;
}

#ifdef SQLITE_MUTEXFREE_SHMLOCK
static int unixMutexFreeShmlock(
  unixFile *pFd,             /* Database file holding the shared memory */
  int ofst,                  /* First lock to acquire or release */
  int n,                     /* Number of locks to acquire or release */
  int flags                  /* What to do with the lock */
){
  struct LockMapEntry {
    int iFirst;
    int nSlot;
  } aMap[9] = {
    { 0, 1 },
    { 1, 1 },
    { 2, 1 },
    { 3+0*SQLITE_MFS_NSHARD, SQLITE_MFS_NSHARD },
    { 3+1*SQLITE_MFS_NSHARD, SQLITE_MFS_NSHARD },
    { 3+2*SQLITE_MFS_NSHARD, SQLITE_MFS_NSHARD },
    { 3+3*SQLITE_MFS_NSHARD, SQLITE_MFS_NSHARD },
    { 3+4*SQLITE_MFS_NSHARD, SQLITE_MFS_NSHARD },
    { 3+5*SQLITE_MFS_NSHARD, 0 },
  };

  unixShm *p = pFd->pShm;               /* The shared memory being locked */
  unixShm *pX;                          /* For looping over all siblings */
  unixShmNode *pShmNode = p->pShmNode;  /* The underlying file iNode */
  int rc = SQLITE_OK;
  int iIncr;
  u16 mask;                             /* Mask of locks to take or release */

  if( flags & SQLITE_SHM_SHARED ){
    /* SHARED locks */
    u32 iOld, iNew, *ptr;
    int iIncr = -1;
    if( (flags & SQLITE_SHM_UNLOCK)==0 ){
      p->aMFCurrent[ofst] = (p->aMFCurrent[ofst] + 1) % aMap[ofst].nSlot;
      iIncr = 1;
    }
    ptr = &pShmNode->aMFSlot[aMap[ofst].iFirst + p->aMFCurrent[ofst]].nLock;
    do {
      iOld = *ptr;
      iNew = iOld + iIncr;
      if( iNew>SQLITE_MFS_EXCLUSIVE ){
        return SQLITE_BUSY;
      }
    }while( 0==unixCompareAndSwap(ptr, iOld, iNew) );
  }else{
    /* EXCLUSIVE locks */
    int iFirst = aMap[ofst].iFirst;
    int iLast = aMap[ofst+n].iFirst;
    int i;
    for(i=iFirst; i<iLast; i++){
      u32 *ptr = &pShmNode->aMFSlot[i].nLock;
      if( flags & SQLITE_SHM_UNLOCK ){
        assert( (*ptr)==SQLITE_MFS_EXCLUSIVE );
        *ptr = 0;
      }else{
        u32 iOld;
        do {
          iOld = *ptr;
          if( iOld>0 ){
            while( i>iFirst ){
              i--;
              pShmNode->aMFSlot[i].nLock = 0;
            }
            return SQLITE_BUSY;
          }
        }while( 0==unixCompareAndSwap(ptr, iOld, SQLITE_MFS_EXCLUSIVE) );
      }
    }
  }

  return SQLITE_OK;
}
#else
# define unixMutexFreeShmlock(a,b,c,d) SQLITE_OK
#endif

/*
** Change the lock state for a shared-memory segment.
**
** Note that the relationship between SHAREd and EXCLUSIVE locks is a little
** different here than in posix.  In xShmLock(), one can go from unlocked
** to shared and back or from unlocked to exclusive and back.  But one may
** not go from shared to exclusive or from exclusive to shared.
4826
4827
4828
4829
4830
4831
4832
4833
4834
4835
4836
4837
4838
4839
4840
4841
4842
4843
4844
4845
4846
4847
4848
4849
4850
4851
4852
4853
4854
4855
4856
4857
4858
4859
4860
4861
4862
4863
4864
4865
4866
4867
4868
4869
4870
4871
4872
4873
4874
4875
4876
4877
4878
4879
4880
4881
4882
4883
4884
4885
4886
4887
4888
4889
4890
4891
4892
4893
  assert( flags==(SQLITE_SHM_LOCK | SQLITE_SHM_SHARED)
       || flags==(SQLITE_SHM_LOCK | SQLITE_SHM_EXCLUSIVE)
       || flags==(SQLITE_SHM_UNLOCK | SQLITE_SHM_SHARED)
       || flags==(SQLITE_SHM_UNLOCK | SQLITE_SHM_EXCLUSIVE) );
  assert( n==1 || (flags & SQLITE_SHM_EXCLUSIVE)!=0 );
  assert( pShmNode->hShm>=0 || pDbFd->pInode->bProcessLock==1 );
  assert( pShmNode->hShm<0 || pDbFd->pInode->bProcessLock==0 );

  mask = (1<<(ofst+n)) - (1<<ofst);
  assert( n>1 || mask==(1<<ofst) );

#ifdef SQLITE_MUTEXFREE_SHMLOCK
  if( pDbFd->pInode->bProcessLock ){

    while( 1 ){
      u64 lockmask = pShmNode->lockmask;
      u64 newmask = lockmask;
      int i;
      for(i=ofst; i<n+ofst; i++){
        int ix8 = i*8;
        u8 v = (lockmask >> (ix8)) & 0xFF;
        if( flags & SQLITE_SHM_UNLOCK ){
          if( flags & SQLITE_SHM_EXCLUSIVE ){
            if( p->exclMask & (1 << i) ){
              newmask = newmask & ~((u64)0xFF<<ix8);
            }
          }else{
            if( p->sharedMask & (1 << i) ){
              newmask = newmask & ~((u64)0xFF<<ix8) | ((u64)(v-1)<<ix8);
            }
          }
        }else{
          if( flags & SQLITE_SHM_EXCLUSIVE ){
            if( v ) return SQLITE_BUSY;
            if( (p->exclMask & (1 << i))==0 ){
              newmask = newmask | ((u64)0xFF<<ix8);
            }
          }else{
            if( v==0xFF ) return SQLITE_BUSY;
            if( (p->sharedMask & (1 << i))==0 ){
              newmask = newmask & ~((u64)0xFF<<ix8) | ((u64)(v+1)<<ix8);
            }
          }
        }
      }

      if( unixCompareAndSwap(&pShmNode->lockmask, lockmask, newmask) ) break;
    }

    if( flags & SQLITE_SHM_UNLOCK ){
      p->sharedMask &= ~mask;
      p->exclMask &= ~mask;
    }else if( flags & SQLITE_SHM_EXCLUSIVE ){
      p->exclMask |= mask;
    }else{
      p->sharedMask |= mask;
    }

    return SQLITE_OK;
  }
#endif

  sqlite3_mutex_enter(pShmNode->pShmMutex);
  if( flags & SQLITE_SHM_UNLOCK ){
    u16 allMask = 0; /* Mask of locks held by siblings */

    /* See if any siblings hold this same lock */
    for(pX=pShmNode->pFirst; pX; pX=pX->pNext){








<
<
<
<

|
<
<
<
<
<
<
<
<
<
<
<
|
<
<
<
|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
|
<
<
|
|
|
<
|
<
|
|
<
|
<

<







4911
4912
4913
4914
4915
4916
4917
4918




4919
4920











4921



4922















4923


4924
4925
4926

4927

4928
4929

4930

4931

4932
4933
4934
4935
4936
4937
4938
  assert( flags==(SQLITE_SHM_LOCK | SQLITE_SHM_SHARED)
       || flags==(SQLITE_SHM_LOCK | SQLITE_SHM_EXCLUSIVE)
       || flags==(SQLITE_SHM_UNLOCK | SQLITE_SHM_SHARED)
       || flags==(SQLITE_SHM_UNLOCK | SQLITE_SHM_EXCLUSIVE) );
  assert( n==1 || (flags & SQLITE_SHM_EXCLUSIVE)!=0 );
  assert( pShmNode->hShm>=0 || pDbFd->pInode->bProcessLock==1 );
  assert( pShmNode->hShm<0 || pDbFd->pInode->bProcessLock==0 );





  if( pDbFd->pInode->bProcessLock ){
    return unixMutexFreeShmlock(pDbFd, ofst, n, flags);











  }



















  mask = (1<<(ofst+n)) - (1<<ofst);


  assert( n>1 || mask==(1<<ofst) );
  if( flags & SQLITE_SHM_LOCK ){
    assert( !(flags&SQLITE_SHM_SHARED) || (p->sharedMask&mask)==0 );

    assert( !(flags&SQLITE_SHM_EXCLUSIVE) || !(p->exclMask&mask) );

  }else{
    assert( !(flags&SQLITE_SHM_SHARED) || (p->sharedMask&mask)==mask );

    assert( !(flags&SQLITE_SHM_EXCLUSIVE) || (p->exclMask&mask)==mask );

  }


  sqlite3_mutex_enter(pShmNode->pShmMutex);
  if( flags & SQLITE_SHM_UNLOCK ){
    u16 allMask = 0; /* Mask of locks held by siblings */

    /* See if any siblings hold this same lock */
    for(pX=pShmNode->pFirst; pX; pX=pX->pNext){
Changes to src/test_superlock.c.
37
38
39
40
41
42
43


44
45
46
47
48
49
50
** An instance of the following structure is allocated for each active
** superlock. The opaque handle returned by sqlite3demo_superlock() is
** actually a pointer to an instance of this structure.
*/
struct Superlock {
  sqlite3 *db;                    /* Database handle used to lock db */
  int bWal;                       /* True if db is a WAL database */


};
typedef struct Superlock Superlock;

/*
** The pCtx pointer passed to this function is actually a pointer to a
** SuperlockBusy structure. Invoke the busy-handler function encapsulated
** by the structure and return the result.







>
>







37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
** An instance of the following structure is allocated for each active
** superlock. The opaque handle returned by sqlite3demo_superlock() is
** actually a pointer to an instance of this structure.
*/
struct Superlock {
  sqlite3 *db;                    /* Database handle used to lock db */
  int bWal;                       /* True if db is a WAL database */
  int bRecoveryLocked;            /* True if WAL RECOVERY lock is held */
  int bReaderLocked;              /* True if WAL READER locks are held */
};
typedef struct Superlock Superlock;

/*
** The pCtx pointer passed to this function is actually a pointer to a
** SuperlockBusy structure. Invoke the busy-handler function encapsulated
** by the structure and return the result.
103
104
105
106
107
108
109
110
111
112
113
114
115

116
117
118
119
120
121
122
123

124
125

126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141

142

143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158

159



160


161
162
163
164
165
166
167
}

/*
** Obtain the extra locks on the database file required for WAL databases.
** Invoke the supplied busy-handler as required.
*/
static int superlockWalLock(
  sqlite3 *db,                    /* Database handle open on WAL database */
  SuperlockBusy *pBusy            /* Busy handler wrapper object */
){
  int rc;                         /* Return code */
  sqlite3_file *fd = 0;           /* Main database file handle */
  void volatile *p = 0;           /* Pointer to first page of shared memory */


  /* Obtain a pointer to the sqlite3_file object open on the main db file. */
  rc = sqlite3_file_control(db, "main", SQLITE_FCNTL_FILE_POINTER, (void *)&fd);
  if( rc!=SQLITE_OK ) return rc;

  /* Obtain the "recovery" lock. Normally, this lock is only obtained by
  ** clients running database recovery.  
  */

  rc = superlockShmLock(fd, 2, 1, pBusy);
  if( rc!=SQLITE_OK ) return rc;


  /* Zero the start of the first shared-memory page. This means that any
  ** clients that open read or write transactions from this point on will
  ** have to run recovery before proceeding. Since they need the "recovery"
  ** lock that this process is holding to do that, no new read or write
  ** transactions may now be opened. Nor can a checkpoint be run, for the
  ** same reason.
  */
  rc = fd->pMethods->xShmMap(fd, 0, 32*1024, 1, &p);
  if( rc!=SQLITE_OK ) return rc;
  memset((void *)p, 0, 32);

  /* Obtain exclusive locks on all the "read-lock" slots. Once these locks
  ** are held, it is guaranteed that there are no active reader, writer or 
  ** checkpointer clients.
  */

  rc = superlockShmLock(fd, 3, SQLITE_SHM_NLOCK-3, pBusy);

  return rc;
}

/*
** Release a superlock held on a database file. The argument passed to 
** this function must have been obtained from a successful call to
** sqlite3demo_superlock().
*/
void sqlite3demo_superunlock(void *pLock){
  Superlock *p = (Superlock *)pLock;
  if( p->bWal ){
    int rc;                         /* Return code */
    int flags = SQLITE_SHM_UNLOCK | SQLITE_SHM_EXCLUSIVE;
    sqlite3_file *fd = 0;
    rc = sqlite3_file_control(p->db, "main", SQLITE_FCNTL_FILE_POINTER, (void *)&fd);
    if( rc==SQLITE_OK ){

      fd->pMethods->xShmLock(fd, 2, 1, flags);



      fd->pMethods->xShmLock(fd, 3, SQLITE_SHM_NLOCK-3, flags);


    }
  }
  sqlite3_close(p->db);
  sqlite3_free(p);
}

/*







|





>








>


>
















>

>
















>
|
>
>
>
|
>
>







105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
}

/*
** Obtain the extra locks on the database file required for WAL databases.
** Invoke the supplied busy-handler as required.
*/
static int superlockWalLock(
  Superlock *pLock,               /* Superlock handle */
  SuperlockBusy *pBusy            /* Busy handler wrapper object */
){
  int rc;                         /* Return code */
  sqlite3_file *fd = 0;           /* Main database file handle */
  void volatile *p = 0;           /* Pointer to first page of shared memory */
  sqlite3 *db = pLock->db;

  /* Obtain a pointer to the sqlite3_file object open on the main db file. */
  rc = sqlite3_file_control(db, "main", SQLITE_FCNTL_FILE_POINTER, (void *)&fd);
  if( rc!=SQLITE_OK ) return rc;

  /* Obtain the "recovery" lock. Normally, this lock is only obtained by
  ** clients running database recovery.  
  */
  assert( pLock->bRecoveryLocked==0 );
  rc = superlockShmLock(fd, 2, 1, pBusy);
  if( rc!=SQLITE_OK ) return rc;
  pLock->bRecoveryLocked = 1;

  /* Zero the start of the first shared-memory page. This means that any
  ** clients that open read or write transactions from this point on will
  ** have to run recovery before proceeding. Since they need the "recovery"
  ** lock that this process is holding to do that, no new read or write
  ** transactions may now be opened. Nor can a checkpoint be run, for the
  ** same reason.
  */
  rc = fd->pMethods->xShmMap(fd, 0, 32*1024, 1, &p);
  if( rc!=SQLITE_OK ) return rc;
  memset((void *)p, 0, 32);

  /* Obtain exclusive locks on all the "read-lock" slots. Once these locks
  ** are held, it is guaranteed that there are no active reader, writer or 
  ** checkpointer clients.
  */
  assert( pLock->bReaderLocked==0 );
  rc = superlockShmLock(fd, 3, SQLITE_SHM_NLOCK-3, pBusy);
  if( rc==SQLITE_OK ) pLock->bReaderLocked = 1;
  return rc;
}

/*
** Release a superlock held on a database file. The argument passed to 
** this function must have been obtained from a successful call to
** sqlite3demo_superlock().
*/
void sqlite3demo_superunlock(void *pLock){
  Superlock *p = (Superlock *)pLock;
  if( p->bWal ){
    int rc;                         /* Return code */
    int flags = SQLITE_SHM_UNLOCK | SQLITE_SHM_EXCLUSIVE;
    sqlite3_file *fd = 0;
    rc = sqlite3_file_control(p->db, "main", SQLITE_FCNTL_FILE_POINTER, (void *)&fd);
    if( rc==SQLITE_OK ){
      if( p->bRecoveryLocked ){
        fd->pMethods->xShmLock(fd, 2, 1, flags);
        p->bRecoveryLocked = 0;
      }
      if( p->bReaderLocked ){
        fd->pMethods->xShmLock(fd, 3, SQLITE_SHM_NLOCK-3, flags);
        p->bReaderLocked = 0;
      }
    }
  }
  sqlite3_close(p->db);
  sqlite3_free(p);
}

/*
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
  ** to drop the WAL read and write locks currently held. Otherwise, the
  ** new WAL locks may conflict with the old.
  */
  if( rc==SQLITE_OK ){
    if( SQLITE_OK==(rc = superlockIsWal(pLock)) && pLock->bWal ){
      rc = sqlite3_exec(pLock->db, "COMMIT", 0, 0, 0);
      if( rc==SQLITE_OK ){
        rc = superlockWalLock(pLock->db, &busy);
      }
    }
  }

  if( rc!=SQLITE_OK ){
    sqlite3demo_superunlock(pLock);
    *ppLock = 0;







|







241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
  ** to drop the WAL read and write locks currently held. Otherwise, the
  ** new WAL locks may conflict with the old.
  */
  if( rc==SQLITE_OK ){
    if( SQLITE_OK==(rc = superlockIsWal(pLock)) && pLock->bWal ){
      rc = sqlite3_exec(pLock->db, "COMMIT", 0, 0, 0);
      if( rc==SQLITE_OK ){
        rc = superlockWalLock(pLock, &busy);
      }
    }
  }

  if( rc!=SQLITE_OK ){
    sqlite3demo_superunlock(pLock);
    *ppLock = 0;
Changes to src/wal.c.
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
      **         checkpoint need not have completed for this to cause problems.
      */
      volatile WalCkptInfo *pInfo = walCkptInfo(pWal);

      assert( pWal->readLock>0 || pWal->hdr.mxFrame==0 );
      assert( pInfo->aReadMark[pWal->readLock]<=pSnapshot->mxFrame );

      /* It is possible that there is a checkpointer thread running 
      ** concurrent with this code. If this is the case, it may be that the
      ** checkpointer has already determined that it will checkpoint 
      ** snapshot X, where X is later in the wal file than pSnapshot, but 
      ** has not yet set the pInfo->nBackfillAttempted variable to indicate 
      ** its intent. To avoid the race condition this leads to, ensure that
      ** there is no checkpointer process by taking a shared CKPT lock 
      ** before checking pInfo->nBackfillAttempted.  
      **
      ** TODO: Does the aReadMark[] lock prevent a checkpointer from doing
      **       this already?
      */
      rc = walLockShared(pWal, WAL_CKPT_LOCK);

      if( rc==SQLITE_OK ){
        /* Check that the wal file has not been wrapped. Assuming that it has
        ** not, also check that no checkpointer has attempted to checkpoint any
        ** frames beyond pSnapshot->mxFrame. If either of these conditions are
        ** true, return SQLITE_ERROR_SNAPSHOT. Otherwise, overwrite pWal->hdr
        ** with *pSnapshot and set *pChanged as appropriate for opening the
        ** snapshot.  */
        if( !memcmp(pSnapshot->aSalt, pWal->hdr.aSalt, sizeof(pWal->hdr.aSalt))
         && pSnapshot->mxFrame>=pInfo->nBackfillAttempted
        ){
          assert( pWal->readLock>0 );
          memcpy(&pWal->hdr, pSnapshot, sizeof(WalIndexHdr));
          *pChanged = bChanged;
        }else{
          rc = SQLITE_ERROR_SNAPSHOT;
        }

        /* Release the shared CKPT lock obtained above. */
        walUnlockShared(pWal, WAL_CKPT_LOCK);
        pWal->minFrame = 1;
      }


      if( rc!=SQLITE_OK ){
        sqlite3WalEndReadTransaction(pWal);
      }







|
|
|


|
|
|
<
<
<
<
<


















<
<







2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787





2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805


2806
2807
2808
2809
2810
2811
2812
      **         checkpoint need not have completed for this to cause problems.
      */
      volatile WalCkptInfo *pInfo = walCkptInfo(pWal);

      assert( pWal->readLock>0 || pWal->hdr.mxFrame==0 );
      assert( pInfo->aReadMark[pWal->readLock]<=pSnapshot->mxFrame );

      /* If it were possible for a checkpointer thread to run concurrent 
      ** with this code, it would be a problem. In this case, it could be
      ** that the checkpointer has already determined that it will checkpoint 
      ** snapshot X, where X is later in the wal file than pSnapshot, but 
      ** has not yet set the pInfo->nBackfillAttempted variable to indicate 
      ** its intent. Fortunately this is not possible, as the call to
      ** sqlite3WalSnapshotOpen() that sets pWal->pSnapshot also takes a
      ** SHARED lock on the checkpointer slot.  */






      if( rc==SQLITE_OK ){
        /* Check that the wal file has not been wrapped. Assuming that it has
        ** not, also check that no checkpointer has attempted to checkpoint any
        ** frames beyond pSnapshot->mxFrame. If either of these conditions are
        ** true, return SQLITE_ERROR_SNAPSHOT. Otherwise, overwrite pWal->hdr
        ** with *pSnapshot and set *pChanged as appropriate for opening the
        ** snapshot.  */
        if( !memcmp(pSnapshot->aSalt, pWal->hdr.aSalt, sizeof(pWal->hdr.aSalt))
         && pSnapshot->mxFrame>=pInfo->nBackfillAttempted
        ){
          assert( pWal->readLock>0 );
          memcpy(&pWal->hdr, pSnapshot, sizeof(WalIndexHdr));
          *pChanged = bChanged;
        }else{
          rc = SQLITE_ERROR_SNAPSHOT;
        }



        pWal->minFrame = 1;
      }


      if( rc!=SQLITE_OK ){
        sqlite3WalEndReadTransaction(pWal);
      }