SQLite

Changes On Branch wal-incr-ckpt
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Changes In Branch wal-incr-ckpt Excluding Merge-Ins

This is equivalent to a diff from 15abbc34 to f4b9003a

2010-06-01
15:24
The incremental checkpoint feature is not perfect yet, but it is working well enough to merge it into the trunk. (check-in: 1d3e569e user: drh tags: trunk)
14:30
Merge with [15abbc3416]. (Closed-Leaf check-in: f4b9003a user: dan tags: wal-incr-ckpt)
14:12
If an attempt to get a read-lock on the WAL fails with SQLITE_BUSY_RECOVER, call the busy-handler at the btree level. (check-in: ce644965 user: dan tags: wal-incr-ckpt)
2010-05-31
06:38
Changes to the way one of the WAL/OOM tests works. (check-in: 15abbc34 user: dan tags: trunk)
2010-05-29
08:40
Add tests to fkey2.test to check that ON CONFLICT clauses do not affect SQLite's behaviour when an FK constraint is violated. (check-in: e9e5b100 user: dan tags: trunk)

Changes to src/btree.c.

2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
        }
      }
    }
  
    if( rc!=SQLITE_OK ){
      unlockBtreeIfUnused(pBt);
    }
  }while( rc==SQLITE_BUSY && pBt->inTransaction==TRANS_NONE &&
          btreeInvokeBusyHandler(pBt) );

  if( rc==SQLITE_OK ){
    if( p->inTrans==TRANS_NONE ){
      pBt->nTransaction++;
#ifndef SQLITE_OMIT_SHARED_CACHE
      if( p->sharable ){







|







2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
        }
      }
    }
  
    if( rc!=SQLITE_OK ){
      unlockBtreeIfUnused(pBt);
    }
  }while( (rc&0xFF)==SQLITE_BUSY && pBt->inTransaction==TRANS_NONE &&
          btreeInvokeBusyHandler(pBt) );

  if( rc==SQLITE_OK ){
    if( p->inTrans==TRANS_NONE ){
      pBt->nTransaction++;
#ifndef SQLITE_OMIT_SHARED_CACHE
      if( p->sharable ){

Changes to src/os.c.

106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
}
int sqlite3OsShmGet(sqlite3_file *id,int reqSize,int *pSize,void volatile **pp){
  return id->pMethods->xShmGet(id, reqSize, pSize, pp);
}
int sqlite3OsShmRelease(sqlite3_file *id){
  return id->pMethods->xShmRelease(id);
}
int sqlite3OsShmLock(sqlite3_file *id, int desiredLock, int *pGotLock){
  return id->pMethods->xShmLock(id, desiredLock, pGotLock);
}
void sqlite3OsShmBarrier(sqlite3_file *id){
  id->pMethods->xShmBarrier(id);
}
int sqlite3OsShmClose(sqlite3_file *id, int deleteFlag){
  return id->pMethods->xShmClose(id, deleteFlag);
}







|
|







106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
}
int sqlite3OsShmGet(sqlite3_file *id,int reqSize,int *pSize,void volatile **pp){
  return id->pMethods->xShmGet(id, reqSize, pSize, pp);
}
int sqlite3OsShmRelease(sqlite3_file *id){
  return id->pMethods->xShmRelease(id);
}
int sqlite3OsShmLock(sqlite3_file *id, int offset, int n, int flags){
  return id->pMethods->xShmLock(id, offset, n, flags);
}
void sqlite3OsShmBarrier(sqlite3_file *id){
  id->pMethods->xShmBarrier(id);
}
int sqlite3OsShmClose(sqlite3_file *id, int deleteFlag){
  return id->pMethods->xShmClose(id, deleteFlag);
}

Changes to src/os.h.

243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
#define SQLITE_FCNTL_DB_UNCHANGED 0xca093fa0
int sqlite3OsSectorSize(sqlite3_file *id);
int sqlite3OsDeviceCharacteristics(sqlite3_file *id);
int sqlite3OsShmOpen(sqlite3_file *id);
int sqlite3OsShmSize(sqlite3_file *id, int, int*);
int sqlite3OsShmGet(sqlite3_file *id, int, int*, void volatile**);
int sqlite3OsShmRelease(sqlite3_file *id);
int sqlite3OsShmLock(sqlite3_file *id, int, int*);
void sqlite3OsShmBarrier(sqlite3_file *id);
int sqlite3OsShmClose(sqlite3_file *id, int);

/* 
** Functions for accessing sqlite3_vfs methods 
*/
int sqlite3OsOpen(sqlite3_vfs *, const char *, sqlite3_file*, int, int *);







|







243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
#define SQLITE_FCNTL_DB_UNCHANGED 0xca093fa0
int sqlite3OsSectorSize(sqlite3_file *id);
int sqlite3OsDeviceCharacteristics(sqlite3_file *id);
int sqlite3OsShmOpen(sqlite3_file *id);
int sqlite3OsShmSize(sqlite3_file *id, int, int*);
int sqlite3OsShmGet(sqlite3_file *id, int, int*, void volatile**);
int sqlite3OsShmRelease(sqlite3_file *id);
int sqlite3OsShmLock(sqlite3_file *id, int, int, int);
void sqlite3OsShmBarrier(sqlite3_file *id);
int sqlite3OsShmClose(sqlite3_file *id, int);

/* 
** Functions for accessing sqlite3_vfs methods 
*/
int sqlite3OsOpen(sqlite3_vfs *, const char *, sqlite3_file*, int, int *);

Changes to src/os_unix.c.

2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
 */
static int semClose(sqlite3_file *id) {
  if( id ){
    unixFile *pFile = (unixFile*)id;
    semUnlock(id, NO_LOCK);
    assert( pFile );
    unixEnterMutex();
    releaseLockInfo(pFile->pInode);
    unixLeaveMutex();
    closeUnixFile(id);
  }
  return SQLITE_OK;
}

#endif /* OS_VXWORKS */







|







2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
 */
static int semClose(sqlite3_file *id) {
  if( id ){
    unixFile *pFile = (unixFile*)id;
    semUnlock(id, NO_LOCK);
    assert( pFile );
    unixEnterMutex();
    releaseInodeInfo(pFile->pInode);
    unixLeaveMutex();
    closeUnixFile(id);
  }
  return SQLITE_OK;
}

#endif /* OS_VXWORKS */
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
      /* If there are outstanding locks, do not actually close the file just
      ** yet because that would clear those locks.  Instead, add the file
      ** descriptor to pInode->aPending.  It will be automatically closed when
      ** the last lock is cleared.
      */
      setPendingFd(pFile);
    }
    releaseLockInfo(pFile->pInode);
    sqlite3_free(pFile->lockingContext);
    rc = closeUnixFile(id);
    unixLeaveMutex();
  }
  return rc;
}








|







2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
      /* If there are outstanding locks, do not actually close the file just
      ** yet because that would clear those locks.  Instead, add the file
      ** descriptor to pInode->aPending.  It will be automatically closed when
      ** the last lock is cleared.
      */
      setPendingFd(pFile);
    }
    releaseInodeInfo(pFile->pInode);
    sqlite3_free(pFile->lockingContext);
    rc = closeUnixFile(id);
    unixLeaveMutex();
  }
  return rc;
}

3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238

3239
3240
3241
3242
3243
3244
3245
3246
3247
3248






3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282

3283

3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312

3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
**
** All other fields are read/write.  The unixShm.pFile->mutex must be held
** while accessing any read/write fields.
*/
struct unixShm {
  unixShmNode *pShmNode;     /* The underlying unixShmNode object */
  unixShm *pNext;            /* Next unixShm with the same unixShmNode */
  u8 lockState;              /* Current lock state */
  u8 hasMutex;               /* True if holding the unixShmNode mutex */
  u8 hasMutexBuf;            /* True if holding pFile->mutexBuf */
  u8 sharedMask;             /* Mask of shared locks held */
  u8 exclMask;               /* Mask of exclusive locks held */
#ifdef SQLITE_DEBUG
  u8 id;                     /* Id of this connection within its unixShmNode */
#endif
};

/*
** Size increment by which shared memory grows
*/
#define SQLITE_UNIX_SHM_INCR  4096

/*
** Constants used for locking
*/
#define UNIX_SHM_BASE      80        /* Byte offset of the first lock byte */
#define UNIX_SHM_DMS       0x01      /* Mask for Dead-Man-Switch lock */
#define UNIX_SHM_A         0x10      /* Mask for region locks... */
#define UNIX_SHM_B         0x20
#define UNIX_SHM_C         0x40
#define UNIX_SHM_D         0x80

#ifdef SQLITE_DEBUG
/*
** Return a pointer to a nul-terminated string in static memory that
** describes a locking mask.  The string is of the form "MSABCD" with
** each character representing a lock.  "M" for MUTEX, "S" for DMS, 
** and "A" through "D" for the region locks.  If a lock is held, the
** letter is shown.  If the lock is not held, the letter is converted
** to ".".
**
** This routine is for debugging purposes only and does not appear
** in a production build.
*/
static const char *unixShmLockString(u8 mask){
  static char zBuf[48];
  static int iBuf = 0;
  char *z;

  z = &zBuf[iBuf];
  iBuf += 8;
  if( iBuf>=sizeof(zBuf) ) iBuf = 0;

  z[0] = (mask & UNIX_SHM_DMS)   ? 'S' : '.';
  z[1] = (mask & UNIX_SHM_A)     ? 'A' : '.';
  z[2] = (mask & UNIX_SHM_B)     ? 'B' : '.';
  z[3] = (mask & UNIX_SHM_C)     ? 'C' : '.';
  z[4] = (mask & UNIX_SHM_D)     ? 'D' : '.';
  z[5] = 0;
  return z;
}
#endif /* SQLITE_DEBUG */

/*
** Apply posix advisory locks for all bytes identified in lockMask.
**
** lockMask might contain multiple bits but all bits are guaranteed
** to be contiguous.
**
** Locks block if the mask is exactly UNIX_SHM_C and are non-blocking
** otherwise.
*/
static int unixShmSystemLock(
  unixShmNode *pShmNode, /* Apply locks to this open shared-memory segment */
  int lockType,          /* F_UNLCK, F_RDLCK, or F_WRLCK */

  u8 lockMask            /* Which bytes to lock or unlock */
){
  struct flock f;       /* The posix advisory locking structure */
  int lockOp;           /* The opcode for fcntl() */
  int i;                /* Offset into the locking byte range */
  int rc;               /* Result code form fcntl() */
  u8 mask;              /* Mask of bits in lockMask */

  /* Access to the unixShmNode object is serialized by the caller */
  assert( sqlite3_mutex_held(pShmNode->mutex) || pShmNode->nRef==0 );







  /* Initialize the locking parameters */
  memset(&f, 0, sizeof(f));
  f.l_type = lockType;
  f.l_whence = SEEK_SET;
  if( lockMask==UNIX_SHM_C && lockType!=F_UNLCK ){
    lockOp = F_SETLKW;
    OSTRACE(("SHM-LOCK requesting blocking lock\n"));
  }else{
    lockOp = F_SETLK;
  }

  /* Find the first bit in lockMask that is set */
  for(i=0, mask=0x01; mask!=0 && (lockMask&mask)==0; mask <<= 1, i++){}
  assert( mask!=0 );
  f.l_start = i+UNIX_SHM_BASE;
  f.l_len = 1;

  /* Extend the locking range for each additional bit that is set */
  mask <<= 1;
  while( mask!=0 && (lockMask & mask)!=0 ){
    f.l_len++;
    mask <<= 1;
  }

  /* Verify that all bits set in lockMask are contiguous */
  assert( mask==0 || (lockMask & ~(mask | (mask-1)))==0 );

  /* Acquire the system-level lock */
  rc = fcntl(pShmNode->h, lockOp, &f);
  rc = (rc!=(-1)) ? SQLITE_OK : SQLITE_BUSY;

  /* Update the global lock state and do debug tracing */
#ifdef SQLITE_DEBUG

  OSTRACE(("SHM-LOCK "));

  if( rc==SQLITE_OK ){
    if( lockType==F_UNLCK ){
      OSTRACE(("unlock ok"));
      pShmNode->exclMask &= ~lockMask;
      pShmNode->sharedMask &= ~lockMask;
    }else if( lockType==F_RDLCK ){
      OSTRACE(("read-lock ok"));
      pShmNode->exclMask &= ~lockMask;
      pShmNode->sharedMask |= lockMask;
    }else{
      assert( lockType==F_WRLCK );
      OSTRACE(("write-lock ok"));
      pShmNode->exclMask |= lockMask;
      pShmNode->sharedMask &= ~lockMask;
    }
  }else{
    if( lockType==F_UNLCK ){
      OSTRACE(("unlock failed"));
    }else if( lockType==F_RDLCK ){
      OSTRACE(("read-lock failed"));
    }else{
      assert( lockType==F_WRLCK );
      OSTRACE(("write-lock failed"));
    }
  }
  OSTRACE((" - change requested %s - afterwards %s:%s\n",
           unixShmLockString(lockMask),
           unixShmLockString(pShmNode->sharedMask),
           unixShmLockString(pShmNode->exclMask)));

#endif

  return rc;        
}

/*
** For connection p, unlock all of the locks identified by the unlockMask
** parameter.
*/
static int unixShmUnlock(
  unixShmNode *pShmNode,   /* The underlying shared-memory file */
  unixShm *p,              /* The connection to be unlocked */
  u8 unlockMask            /* Mask of locks to be unlocked */
){
  int rc;      /* Result code */
  unixShm *pX; /* For looping over all sibling connections */
  u8 allMask;  /* Union of locks held by connections other than "p" */

  /* Access to the unixShmNode object is serialized by the caller */
  assert( sqlite3_mutex_held(pShmNode->mutex) );

  /* Compute locks held by sibling connections */
  allMask = 0;
  for(pX=pShmNode->pFirst; pX; pX=pX->pNext){
    if( pX==p ) continue;
    assert( (pX->exclMask & (p->exclMask|p->sharedMask))==0 );
    allMask |= pX->sharedMask;
  }

  /* Unlock the system-level locks */
  if( (unlockMask & allMask)!=unlockMask ){
    rc = unixShmSystemLock(pShmNode, F_UNLCK, unlockMask & ~allMask);
  }else{
    rc = SQLITE_OK;
  }

  /* Undo the local locks */
  if( rc==SQLITE_OK ){
    p->exclMask &= ~unlockMask;
    p->sharedMask &= ~unlockMask;
  } 
  return rc;
}

/*
** Get reader locks for connection p on all locks in the readMask parameter.
*/
static int unixShmSharedLock(
  unixShmNode *pShmNode,   /* The underlying shared-memory file */
  unixShm *p,              /* The connection to get the shared locks */
  u8 readMask              /* Mask of shared locks to be acquired */
){
  int rc;        /* Result code */
  unixShm *pX;   /* For looping over all sibling connections */
  u8 allShared;  /* Union of locks held by connections other than "p" */

  /* Access to the unixShmNode object is serialized by the caller */
  assert( sqlite3_mutex_held(pShmNode->mutex) );

  /* Find out which shared locks are already held by sibling connections.
  ** If any sibling already holds an exclusive lock, go ahead and return
  ** SQLITE_BUSY.
  */
  allShared = 0;
  for(pX=pShmNode->pFirst; pX; pX=pX->pNext){
    if( pX==p ) continue;
    if( (pX->exclMask & readMask)!=0 ) return SQLITE_BUSY;
    allShared |= pX->sharedMask;
  }

  /* Get shared locks at the system level, if necessary */
  if( (~allShared) & readMask ){
    rc = unixShmSystemLock(pShmNode, F_RDLCK, readMask);
  }else{
    rc = SQLITE_OK;
  }

  /* Get the local shared locks */
  if( rc==SQLITE_OK ){
    p->sharedMask |= readMask;
  }
  return rc;
}

/*
** For connection p, get an exclusive lock on all locks identified in
** the writeMask parameter.
*/
static int unixShmExclusiveLock(
  unixShmNode *pShmNode,    /* The underlying shared-memory file */
  unixShm *p,               /* The connection to get the exclusive locks */
  u8 writeMask              /* Mask of exclusive locks to be acquired */
){
  int rc;        /* Result code */
  unixShm *pX;   /* For looping over all sibling connections */

  /* Access to the unixShmNode object is serialized by the caller */
  assert( sqlite3_mutex_held(pShmNode->mutex) );

  /* Make sure no sibling connections hold locks that will block this
  ** lock.  If any do, return SQLITE_BUSY right away.
  */
  for(pX=pShmNode->pFirst; pX; pX=pX->pNext){
    if( pX==p ) continue;
    if( (pX->exclMask & writeMask)!=0 ) return SQLITE_BUSY;
    if( (pX->sharedMask & writeMask)!=0 ) return SQLITE_BUSY;
  }

  /* Get the exclusive locks at the system level.  Then if successful
  ** also mark the local connection as being locked.
  */
  rc = unixShmSystemLock(pShmNode, F_WRLCK, writeMask);
  if( rc==SQLITE_OK ){
    p->sharedMask &= ~writeMask;
    p->exclMask |= writeMask;
  }
  return rc;
}

/*
** Purge the unixShmNodeList list of all entries with unixShmNode.nRef==0.
**
** This is not a VFS shared-memory method; it is a utility function called
** by VFS shared-memory methods.
*/







<


|
|






<
<
<
<
<


<
<
<
|
<
|

<

<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
|
<
<
<







>
|


<
<
|
<



>
>
>
>
>
>





<
<
<
<
<
<
<
<
<
<
|
|

<
<
<
<
<
<
<
<
<
<
<
|




>

>


|
|
|

|
|
|


|
|
|



|




|


|
<
|
<
>





<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







3164
3165
3166
3167
3168
3169
3170

3171
3172
3173
3174
3175
3176
3177
3178
3179
3180





3181
3182



3183

3184
3185

3186






























3187



3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198


3199

3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213










3214
3215
3216











3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250

3251

3252
3253
3254
3255
3256
3257

















































































































3258
3259
3260
3261
3262
3263
3264
**
** All other fields are read/write.  The unixShm.pFile->mutex must be held
** while accessing any read/write fields.
*/
struct unixShm {
  unixShmNode *pShmNode;     /* The underlying unixShmNode object */
  unixShm *pNext;            /* Next unixShm with the same unixShmNode */

  u8 hasMutex;               /* True if holding the unixShmNode mutex */
  u8 hasMutexBuf;            /* True if holding pFile->mutexBuf */
  u16 sharedMask;            /* Mask of shared locks held */
  u16 exclMask;              /* Mask of exclusive locks held */
#ifdef SQLITE_DEBUG
  u8 id;                     /* Id of this connection within its unixShmNode */
#endif
};

/*





** Constants used for locking
*/



#define UNIX_SHM_BASE   ((18+SQLITE_SHM_NLOCK)*4)         /* first lock byte */

#define UNIX_SHM_DMS    (UNIX_SHM_BASE+SQLITE_SHM_NLOCK)  /* deadman switch */


/*






























** Apply posix advisory locks for all bytes from ofst through ofst+n-1.



**
** Locks block if the mask is exactly UNIX_SHM_C and are non-blocking
** otherwise.
*/
static int unixShmSystemLock(
  unixShmNode *pShmNode, /* Apply locks to this open shared-memory segment */
  int lockType,          /* F_UNLCK, F_RDLCK, or F_WRLCK */
  int ofst,              /* First byte of the locking range */
  int n                  /* Number of bytes to lock */
){
  struct flock f;       /* The posix advisory locking structure */


  int rc = SQLITE_OK;   /* Result code form fcntl() */


  /* Access to the unixShmNode object is serialized by the caller */
  assert( sqlite3_mutex_held(pShmNode->mutex) || pShmNode->nRef==0 );

  /* Shared locks never span more than one byte */
  assert( n==1 || lockType!=F_RDLCK );

  /* Locks are within range */
  assert( n>=1 && n<SQLITE_SHM_NLOCK );

  /* Initialize the locking parameters */
  memset(&f, 0, sizeof(f));
  f.l_type = lockType;
  f.l_whence = SEEK_SET;










  f.l_start = ofst;
  f.l_len = n;












  rc = fcntl(pShmNode->h, F_SETLK, &f);
  rc = (rc!=(-1)) ? SQLITE_OK : SQLITE_BUSY;

  /* Update the global lock state and do debug tracing */
#ifdef SQLITE_DEBUG
  { u16 mask;
  OSTRACE(("SHM-LOCK "));
  mask = (1<<(ofst+n)) - (1<<ofst);
  if( rc==SQLITE_OK ){
    if( lockType==F_UNLCK ){
      OSTRACE(("unlock %d ok", ofst));
      pShmNode->exclMask &= ~mask;
      pShmNode->sharedMask &= ~mask;
    }else if( lockType==F_RDLCK ){
      OSTRACE(("read-lock %d ok", ofst));
      pShmNode->exclMask &= ~mask;
      pShmNode->sharedMask |= mask;
    }else{
      assert( lockType==F_WRLCK );
      OSTRACE(("write-lock %d ok", ofst));
      pShmNode->exclMask |= mask;
      pShmNode->sharedMask &= ~mask;
    }
  }else{
    if( lockType==F_UNLCK ){
      OSTRACE(("unlock %d failed", ofst));
    }else if( lockType==F_RDLCK ){
      OSTRACE(("read-lock failed"));
    }else{
      assert( lockType==F_WRLCK );
      OSTRACE(("write-lock %d failed", ofst));
    }
  }
  OSTRACE((" - afterwards %03x,%03x\n",

           pShmNode->sharedMask, pShmNode->exclMask));

  }
#endif

  return rc;        
}



















































































































/*
** Purge the unixShmNodeList list of all entries with unixShmNode.nRef==0.
**
** This is not a VFS shared-memory method; it is a utility function called
** by VFS shared-memory methods.
*/
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
      goto shm_open_err;
    }

    /* Check to see if another process is holding the dead-man switch.
    ** If not, truncate the file to zero length. 
    */
    rc = SQLITE_OK;
    if( unixShmSystemLock(pShmNode, F_WRLCK, UNIX_SHM_DMS)==SQLITE_OK ){
      if( ftruncate(pShmNode->h, 0) ){
        rc = SQLITE_IOERR;
      }
    }
    if( rc==SQLITE_OK ){
      rc = unixShmSystemLock(pShmNode, F_RDLCK, UNIX_SHM_DMS);
    }
    if( rc ) goto shm_open_err;
  }

  /* Make the new connection a child of the unixShmNode */
  p->pShmNode = pShmNode;
  p->pNext = pShmNode->pFirst;







|





|







3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
      goto shm_open_err;
    }

    /* Check to see if another process is holding the dead-man switch.
    ** If not, truncate the file to zero length. 
    */
    rc = SQLITE_OK;
    if( unixShmSystemLock(pShmNode, F_WRLCK, UNIX_SHM_DMS, 1)==SQLITE_OK ){
      if( ftruncate(pShmNode->h, 0) ){
        rc = SQLITE_IOERR;
      }
    }
    if( rc==SQLITE_OK ){
      rc = unixShmSystemLock(pShmNode, F_RDLCK, UNIX_SHM_DMS, 1);
    }
    if( rc ) goto shm_open_err;
  }

  /* Make the new connection a child of the unixShmNode */
  p->pShmNode = pShmNode;
  p->pNext = pShmNode->pFirst;
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585

3586
3587
3588
3589
3590
3591
3592
  p = pDbFd->pShm;
  if( p==0 ) return SQLITE_OK;
  pShmNode = p->pShmNode;

  assert( pShmNode==pDbFd->pInode->pShmNode );
  assert( pShmNode->pInode==pDbFd->pInode );

  /* Verify that the connection being closed holds no locks */
  assert( p->exclMask==0 );
  assert( p->sharedMask==0 );

  /* Remove connection p from the set of connections associated
  ** with pShmNode */
  sqlite3_mutex_enter(pShmNode->mutex);
  for(pp=&pShmNode->pFirst; (*pp)!=p; pp = &(*pp)->pNext){}
  *pp = p->pNext;

  /* Free the connection p */

  sqlite3_free(p);
  pDbFd->pShm = 0;
  sqlite3_mutex_leave(pShmNode->mutex);

  /* If pShmNode->nRef has reached 0, then close the underlying
  ** shared-memory file, too */
  unixEnterMutex();







<
<
<
<







>







3395
3396
3397
3398
3399
3400
3401




3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
  p = pDbFd->pShm;
  if( p==0 ) return SQLITE_OK;
  pShmNode = p->pShmNode;

  assert( pShmNode==pDbFd->pInode->pShmNode );
  assert( pShmNode->pInode==pDbFd->pInode );





  /* Remove connection p from the set of connections associated
  ** with pShmNode */
  sqlite3_mutex_enter(pShmNode->mutex);
  for(pp=&pShmNode->pFirst; (*pp)!=p; pp = &(*pp)->pNext){}
  *pp = p->pNext;

  /* Free the connection p */
  assert( p->hasMutexBuf==0 );
  sqlite3_free(p);
  pDbFd->pShm = 0;
  sqlite3_mutex_leave(pShmNode->mutex);

  /* If pShmNode->nRef has reached 0, then close the underlying
  ** shared-memory file, too */
  unixEnterMutex();
3637
3638
3639
3640
3641
3642
3643





















3644
3645
3646
3647
3648
3649
3650
    }
    rc = ftruncate(pShmNode->h, reqSize);
    reqSize = -1;
  }
  return rc;
}























/*
** Map the shared storage into memory. 
**
** If reqMapSize is positive, then an attempt is made to make the
** mapping at least reqMapSize bytes in size.  However, the mapping
** will never be larger than the size of the underlying shared memory







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
    }
    rc = ftruncate(pShmNode->h, reqSize);
    reqSize = -1;
  }
  return rc;
}

/*
** Release the lock held on the shared memory segment to that other
** threads are free to resize it if necessary.
**
** If the lock is not currently held, this routine is a harmless no-op.
**
** If the shared-memory object is in lock state RECOVER, then we do not
** really want to release the lock, so in that case too, this routine
** is a no-op.
*/
static int unixShmRelease(sqlite3_file *fd){
  unixFile *pDbFd = (unixFile*)fd;
  unixShm *p = pDbFd->pShm;

  if( p->hasMutexBuf ){
    assert( sqlite3_mutex_notheld(p->pShmNode->mutex) );
    sqlite3_mutex_leave(p->pShmNode->mutexBuf);
    p->hasMutexBuf = 0;
  }
  return SQLITE_OK;
}

/*
** Map the shared storage into memory. 
**
** If reqMapSize is positive, then an attempt is made to make the
** mapping at least reqMapSize bytes in size.  However, the mapping
** will never be larger than the size of the underlying shared memory
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
  unixShm *p = pDbFd->pShm;
  unixShmNode *pShmNode = p->pShmNode;
  int rc = SQLITE_OK;

  assert( pShmNode==pDbFd->pInode->pShmNode );
  assert( pShmNode->pInode==pDbFd->pInode );

  if( p->lockState!=SQLITE_SHM_CHECKPOINT && p->hasMutexBuf==0 ){
    assert( sqlite3_mutex_notheld(pShmNode->mutex) );
    sqlite3_mutex_enter(pShmNode->mutexBuf);
    p->hasMutexBuf = 1;
  }
  sqlite3_mutex_enter(pShmNode->mutex);
  if( pShmNode->szMap==0 || reqMapSize>pShmNode->szMap ){
    int actualSize;







|







3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
  unixShm *p = pDbFd->pShm;
  unixShmNode *pShmNode = p->pShmNode;
  int rc = SQLITE_OK;

  assert( pShmNode==pDbFd->pInode->pShmNode );
  assert( pShmNode->pInode==pDbFd->pInode );

  if( p->hasMutexBuf==0 ){
    assert( sqlite3_mutex_notheld(pShmNode->mutex) );
    sqlite3_mutex_enter(pShmNode->mutexBuf);
    p->hasMutexBuf = 1;
  }
  sqlite3_mutex_enter(pShmNode->mutex);
  if( pShmNode->szMap==0 || reqMapSize>pShmNode->szMap ){
    int actualSize;
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759





3760
3761
3762
3763

3764
3765
3766
3767

3768
3769

3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781

3782
3783
3784
3785
3786

3787

3788

3789
3790
3791


3792

3793
3794
3795


3796
3797

3798
3799
3800
3801
3802

3803


3804
3805


3806
3807

3808

3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819

3820
3821
3822

3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845







3846
3847
3848
3849
3850

3851
3852
3853
3854
3855
3856
3857
3858
3859


3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873

3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
      pShmNode->pMMapBuf = 0;
      pShmNode->szMap = 0;
    }
  }
  *pNewMapSize = pShmNode->szMap;
  *ppBuf = pShmNode->pMMapBuf;
  sqlite3_mutex_leave(pShmNode->mutex);
  return rc;
}

/*
** Release the lock held on the shared memory segment to that other
** threads are free to resize it if necessary.
**
** If the lock is not currently held, this routine is a harmless no-op.
**
** If the shared-memory object is in lock state RECOVER, then we do not
** really want to release the lock, so in that case too, this routine
** is a no-op.
*/
static int unixShmRelease(sqlite3_file *fd){
  unixFile *pDbFd = (unixFile*)fd;
  unixShm *p = pDbFd->pShm;

  if( p->hasMutexBuf && p->lockState!=SQLITE_SHM_RECOVER ){
    assert( sqlite3_mutex_notheld(p->pShmNode->mutex) );
    sqlite3_mutex_leave(p->pShmNode->mutexBuf);
    p->hasMutexBuf = 0;
  }
  return SQLITE_OK;
}

/*
** Symbolic names for LOCK states used for debugging.
*/
#ifdef SQLITE_DEBUG
static const char *azLkName[] = {
  "UNLOCK",
  "READ",
  "READ_FULL",
  "WRITE",
  "PENDING",
  "CHECKPOINT",
  "RECOVER"
};
#endif


/*
** Change the lock state for a shared-memory segment.





*/
static int unixShmLock(
  sqlite3_file *fd,          /* Database file holding the shared memory */
  int desiredLock,           /* One of SQLITE_SHM_xxxxx locking states */

  int *pGotLock              /* The lock you actually got */
){
  unixFile *pDbFd = (unixFile*)fd;
  unixShm *p = pDbFd->pShm;

  unixShmNode *pShmNode = p->pShmNode;
  int rc = SQLITE_PROTOCOL;


  assert( pShmNode==pDbFd->pInode->pShmNode );
  assert( pShmNode->pInode==pDbFd->pInode );

  /* Note that SQLITE_SHM_READ_FULL and SQLITE_SHM_PENDING are never
  ** directly requested; they are side effects from requesting
  ** SQLITE_SHM_READ and SQLITE_SHM_CHECKPOINT, respectively.
  */
  assert( desiredLock==SQLITE_SHM_UNLOCK
       || desiredLock==SQLITE_SHM_READ
       || desiredLock==SQLITE_SHM_WRITE
       || desiredLock==SQLITE_SHM_CHECKPOINT

       || desiredLock==SQLITE_SHM_RECOVER );

  /* Return directly if this is just a lock state query, or if
  ** the connection is already in the desired locking state.
  */

  if( desiredLock==p->lockState

   || (desiredLock==SQLITE_SHM_READ && p->lockState==SQLITE_SHM_READ_FULL)

  ){
    OSTRACE(("SHM-LOCK shmid-%d, pid-%d request %s and got %s\n",
             p->id, getpid(), azLkName[desiredLock], azLkName[p->lockState]));


    if( pGotLock ) *pGotLock = p->lockState;

    return SQLITE_OK;
  }



  OSTRACE(("SHM-LOCK shmid-%d, pid-%d request %s->%s\n",
            p->id, getpid(), azLkName[p->lockState], azLkName[desiredLock]));

  
  if( desiredLock==SQLITE_SHM_RECOVER && !p->hasMutexBuf ){
    assert( sqlite3_mutex_notheld(pShmNode->mutex) );
    sqlite3_mutex_enter(pShmNode->mutexBuf);
    p->hasMutexBuf = 1;

  }


  sqlite3_mutex_enter(pShmNode->mutex);
  switch( desiredLock ){


    case SQLITE_SHM_UNLOCK: {
      assert( p->lockState!=SQLITE_SHM_RECOVER );

      unixShmUnlock(pShmNode, p, UNIX_SHM_A|UNIX_SHM_B|UNIX_SHM_C|UNIX_SHM_D);

      rc = SQLITE_OK;
      p->lockState = SQLITE_SHM_UNLOCK;
      break;
    }
    case SQLITE_SHM_READ: {
      if( p->lockState==SQLITE_SHM_UNLOCK ){
        int nAttempt;
        rc = SQLITE_BUSY;
        assert( p->lockState==SQLITE_SHM_UNLOCK );
        for(nAttempt=0; nAttempt<5 && rc==SQLITE_BUSY; nAttempt++){
          rc = unixShmSharedLock(pShmNode, p, UNIX_SHM_A|UNIX_SHM_B);

          if( rc==SQLITE_BUSY ){
            rc = unixShmSharedLock(pShmNode, p, UNIX_SHM_D);
            if( rc==SQLITE_OK ){

              p->lockState = SQLITE_SHM_READ_FULL;
            }
          }else{
            unixShmUnlock(pShmNode, p, UNIX_SHM_B);
            p->lockState = SQLITE_SHM_READ;
          }
        }
      }else{
       assert( p->lockState==SQLITE_SHM_WRITE
               || p->lockState==SQLITE_SHM_RECOVER );
        rc = unixShmSharedLock(pShmNode, p, UNIX_SHM_A);
        unixShmUnlock(pShmNode, p, UNIX_SHM_C|UNIX_SHM_D);
        p->lockState = SQLITE_SHM_READ;
      }
      break;
    }
    case SQLITE_SHM_WRITE: {
      assert( p->lockState==SQLITE_SHM_READ 
              || p->lockState==SQLITE_SHM_READ_FULL );
      rc = unixShmExclusiveLock(pShmNode, p, UNIX_SHM_C|UNIX_SHM_D);
      if( rc==SQLITE_OK ){
        p->lockState = SQLITE_SHM_WRITE;
      }







      break;
    }
    case SQLITE_SHM_CHECKPOINT: {
      assert( p->lockState==SQLITE_SHM_UNLOCK
           || p->lockState==SQLITE_SHM_PENDING

      );
      if( p->lockState==SQLITE_SHM_UNLOCK ){
        rc = unixShmExclusiveLock(pShmNode, p, UNIX_SHM_B|UNIX_SHM_C);
        if( rc==SQLITE_OK ){
          p->lockState = SQLITE_SHM_PENDING;
        }
      }
      if( p->lockState==SQLITE_SHM_PENDING ){
        rc = unixShmExclusiveLock(pShmNode, p, UNIX_SHM_A);


        if( rc==SQLITE_OK ){
          p->lockState = SQLITE_SHM_CHECKPOINT;
        }
      }
      break;
    }
    default: {
      assert( desiredLock==SQLITE_SHM_RECOVER );
      assert( p->lockState==SQLITE_SHM_READ
           || p->lockState==SQLITE_SHM_READ_FULL
      );
      assert( sqlite3_mutex_held(pShmNode->mutexBuf) );
      rc = unixShmExclusiveLock(pShmNode, p, UNIX_SHM_C);
      if( rc==SQLITE_OK ){

        p->lockState = SQLITE_SHM_RECOVER;
      }
      break;
    }
  }
  sqlite3_mutex_leave(pShmNode->mutex);
  OSTRACE(("SHM-LOCK shmid-%d, pid-%d got %s\n",
           p->id, getpid(), azLkName[p->lockState]));
  if( pGotLock ) *pGotLock = p->lockState;
  return rc;
}

/*
** Implement a memory barrier or memory fence on shared memory.  
**
** All loads and stores begun before the barrier must complete before
** any load or store begun after the barrier.
*/
static void unixShmBarrier(
  sqlite3_file *fd           /* Database file holding the shared memory */
){
#ifdef __GNUC__
  __sync_synchronize();
#else
  unixMutexEnter();
  unixMutexLeave();
#endif
}


#else
# define unixShmOpen    0
# define unixShmSize    0
# define unixShmGet     0







|
<
|
<
<
<
<
<
<
<
<
<
<
|
<
<
|
<
<
<
<
<
|


<
<
<
<
<
<
<
<
<
<
<
<
<
<

<


>
>
>
>
>



|
>
|

|
|
>
|
|
>



|
<
<
<
<
|
|
|
|
>
|

<
<
<
>
|
>
|
>
|
<
<
>
>
|
>
|
|

>
>
|
|
>
|
|
|
|
|
>
|
>
>
|
<
>
>
|
<
>
|
>
|
<
|
|
<
<
<
<
<
<
|
>
|
|
|
>
|
<
|
<
|
|
|
<
<
<
<
<
<
|
|
<
<
<
<
<
|
|
|
>
>
>
>
>
>
>
|
|
<
<
<
>
<
<
<
<
<
|
<
<
|
>
>
|
<
<
<
<
<
<
<
<
<
<
<
|

>
|

<



|
|
<












<
<
<
|
|
<







3555
3556
3557
3558
3559
3560
3561
3562

3563










3564


3565





3566
3567
3568














3569

3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593




3594
3595
3596
3597
3598
3599
3600



3601
3602
3603
3604
3605
3606


3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628

3629
3630
3631

3632
3633
3634
3635

3636
3637






3638
3639
3640
3641
3642
3643
3644

3645

3646
3647
3648






3649
3650





3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662



3663





3664


3665
3666
3667
3668











3669
3670
3671
3672
3673

3674
3675
3676
3677
3678

3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690



3691
3692

3693
3694
3695
3696
3697
3698
3699
      pShmNode->pMMapBuf = 0;
      pShmNode->szMap = 0;
    }
  }
  *pNewMapSize = pShmNode->szMap;
  *ppBuf = pShmNode->pMMapBuf;
  sqlite3_mutex_leave(pShmNode->mutex);
  if( *ppBuf==0 ){

    /* Do not hold the mutex if a NULL pointer is being returned. */










    unixShmRelease(fd);


  }





  return rc;
}

















/*
** Change the lock state for a shared-memory segment.
**
** Note that the relationship between SHAREd and EXCLUSIVE locks is a little
** different here than in posix.  In xShmLock(), one can go from unlocked
** to shared and back or from unlocked to exclusive and back.  But one may
** not go from shared to exclusive or from exclusive to shared.
*/
static int unixShmLock(
  sqlite3_file *fd,          /* Database file holding the shared memory */
  int ofst,                  /* First lock to acquire or release */
  int n,                     /* Number of locks to acquire or release */
  int flags                  /* What to do with the lock */
){
  unixFile *pDbFd = (unixFile*)fd;      /* Connection holding shared memory */
  unixShm *p = pDbFd->pShm;             /* The shared memory being locked */
  unixShm *pX;                          /* For looping over all siblings */
  unixShmNode *pShmNode = p->pShmNode;  /* The underlying file iNode */
  int rc = SQLITE_OK;                   /* Result code */
  u16 mask;                             /* Mask of locks to take or release */

  assert( pShmNode==pDbFd->pInode->pShmNode );
  assert( pShmNode->pInode==pDbFd->pInode );
  assert( ofst>=0 && ofst+n<=SQLITE_SHM_NLOCK );




  assert( n>=1 );
  assert( flags==(SQLITE_SHM_LOCK | SQLITE_SHM_SHARED)
       || flags==(SQLITE_SHM_LOCK | SQLITE_SHM_EXCLUSIVE)
       || flags==(SQLITE_SHM_UNLOCK | SQLITE_SHM_SHARED)
       || flags==(SQLITE_SHM_UNLOCK | SQLITE_SHM_EXCLUSIVE) );
  assert( n==1 || (flags & SQLITE_SHM_EXCLUSIVE)!=0 );




  mask = (1<<(ofst+n)) - (1<<ofst);
  assert( n>1 || mask==(1<<ofst) );
  sqlite3_mutex_enter(pShmNode->mutex);
  if( flags & SQLITE_SHM_UNLOCK ){
    u16 allMask = 0; /* Mask of locks held by siblings */



    /* See if any siblings hold this same lock */
    for(pX=pShmNode->pFirst; pX; pX=pX->pNext){
      if( pX==p ) continue;
      assert( (pX->exclMask & (p->exclMask|p->sharedMask))==0 );
      allMask |= pX->sharedMask;
    }

    /* Unlock the system-level locks */
    if( (mask & allMask)==0 ){
      rc = unixShmSystemLock(pShmNode, F_UNLCK, ofst+UNIX_SHM_BASE, n);
    }else{
      rc = SQLITE_OK;
    }

    /* Undo the local locks */
    if( rc==SQLITE_OK ){
      p->exclMask &= ~mask;
      p->sharedMask &= ~mask;
    } 
  }else if( flags & SQLITE_SHM_SHARED ){
    u16 allShared = 0;  /* Union of locks held by connections other than "p" */


    /* Find out which shared locks are already held by sibling connections.
    ** If any sibling already holds an exclusive lock, go ahead and return
    ** SQLITE_BUSY.

    */
    for(pX=pShmNode->pFirst; pX; pX=pX->pNext){
      if( (pX->exclMask & mask)!=0 ){
        rc = SQLITE_BUSY;

        break;
      }






      allShared |= pX->sharedMask;
    }

    /* Get shared locks at the system level, if necessary */
    if( rc==SQLITE_OK ){
      if( (allShared & mask)==0 ){
        rc = unixShmSystemLock(pShmNode, F_RDLCK, ofst+UNIX_SHM_BASE, n);

      }else{

        rc = SQLITE_OK;
      }
    }







    /* Get the local shared locks */





    if( rc==SQLITE_OK ){
      p->sharedMask |= mask;
    }
  }else{
    /* Make sure no sibling connections hold locks that will block this
    ** lock.  If any do, return SQLITE_BUSY right away.
    */
    for(pX=pShmNode->pFirst; pX; pX=pX->pNext){
      if( (pX->exclMask & mask)!=0 || (pX->sharedMask & mask)!=0 ){
        rc = SQLITE_BUSY;
        break;
      }



    }





  


    /* Get the exclusive locks at the system level.  Then if successful
    ** also mark the local connection as being locked.
    */
    if( rc==SQLITE_OK ){











      rc = unixShmSystemLock(pShmNode, F_WRLCK, ofst+UNIX_SHM_BASE, n);
      if( rc==SQLITE_OK ){
        assert( (p->sharedMask & mask)==0 );
        p->exclMask |= mask;
      }

    }
  }
  sqlite3_mutex_leave(pShmNode->mutex);
  OSTRACE(("SHM-LOCK shmid-%d, pid-%d got %03x,%03x\n",
           p->id, getpid(), p->sharedMask, p->exclMask));

  return rc;
}

/*
** Implement a memory barrier or memory fence on shared memory.  
**
** All loads and stores begun before the barrier must complete before
** any load or store begun after the barrier.
*/
static void unixShmBarrier(
  sqlite3_file *fd           /* Database file holding the shared memory */
){



  unixEnterMutex();
  unixLeaveMutex();

}


#else
# define unixShmOpen    0
# define unixShmSize    0
# define unixShmGet     0

Changes to src/os_win.c.

1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
  DWORD lastErrno;           /* The Windows errno from the last I/O error */
  int szMap;                 /* Size of the mapping of file into memory */
  char *pMMapBuf;            /* Where currently mmapped().  NULL if unmapped */
  int nRef;                  /* Number of winShm objects pointing to this */
  winShm *pFirst;            /* All winShm objects pointing to this */
  winShmNode *pNext;         /* Next in list of all winShmNode objects */
#ifdef SQLITE_DEBUG
  u8 exclMask;               /* Mask of exclusive locks held */
  u8 sharedMask;             /* Mask of shared locks held */
  u8 nextShmId;              /* Next available winShm.id value */
#endif
};

/*
** A global array of all winShmNode objects.
**







<
<







1223
1224
1225
1226
1227
1228
1229


1230
1231
1232
1233
1234
1235
1236
  DWORD lastErrno;           /* The Windows errno from the last I/O error */
  int szMap;                 /* Size of the mapping of file into memory */
  char *pMMapBuf;            /* Where currently mmapped().  NULL if unmapped */
  int nRef;                  /* Number of winShm objects pointing to this */
  winShm *pFirst;            /* All winShm objects pointing to this */
  winShmNode *pNext;         /* Next in list of all winShmNode objects */
#ifdef SQLITE_DEBUG


  u8 nextShmId;              /* Next available winShm.id value */
#endif
};

/*
** A global array of all winShmNode objects.
**
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326

1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
**
** The winShm.pFile element is initialized when the object is created
** and is read-only thereafter.
*/
struct winShm {
  winShmNode *pShmNode;      /* The underlying winShmNode object */
  winShm *pNext;             /* Next winShm with the same winShmNode */
  u8 lockState;              /* Current lock state */
  u8 hasMutex;               /* True if holding the winShmNode mutex */
  u8 hasMutexBuf;            /* True if holding pFile->mutexBuf */
  u8 sharedMask;             /* Mask of shared locks held */
  u8 exclMask;               /* Mask of exclusive locks held */
#ifdef SQLITE_DEBUG
  u8 id;                     /* Id of this connection with its winShmNode */
#endif
};

/*
** Size increment by which shared memory grows
*/
#define SQLITE_WIN_SHM_INCR  4096

/*
** Constants used for locking
*/
#define WIN_SHM_BASE      80        /* Byte offset of the first lock byte */
#define WIN_SHM_DMS       0x01      /* Mask for Dead-Man-Switch lock */
#define WIN_SHM_A         0x10      /* Mask for region locks... */
#define WIN_SHM_B         0x20
#define WIN_SHM_C         0x40
#define WIN_SHM_D         0x80

#ifdef SQLITE_DEBUG
/*
** Return a pointer to a nul-terminated string in static memory that
** describes a locking mask.  The string is of the form "MSABCD" with
** each character representing a lock.  "M" for MUTEX, "S" for DMS, 
** and "A" through "D" for the region locks.  If a lock is held, the
** letter is shown.  If the lock is not held, the letter is converted
** to ".".
**
** This routine is for debugging purposes only and does not appear
** in a production build.
*/
static const char *winShmLockString(u8 mask){
  static char zBuf[48];
  static int iBuf = 0;
  char *z;

  z = &zBuf[iBuf];
  iBuf += 8;
  if( iBuf>=sizeof(zBuf) ) iBuf = 0;

  z[0] = (mask & WIN_SHM_DMS)   ? 'S' : '.';
  z[1] = (mask & WIN_SHM_A)     ? 'A' : '.';
  z[2] = (mask & WIN_SHM_B)     ? 'B' : '.';
  z[3] = (mask & WIN_SHM_C)     ? 'C' : '.';
  z[4] = (mask & WIN_SHM_D)     ? 'D' : '.';
  z[5] = 0;
  return z;
}
#endif /* SQLITE_DEBUG */

/*
** Apply posix advisory locks for all bytes identified in lockMask.
**
** lockMask might contain multiple bits but all bits are guaranteed
** to be contiguous.
**
** Locks block if the mask is exactly WIN_SHM_C and are non-blocking
** otherwise.
*/
#define _SHM_UNLCK  1
#define _SHM_RDLCK  2
#define _SHM_WRLCK  3
static int winShmSystemLock(
  winShmNode *pFile,    /* Apply locks to this open shared-memory segment */
  int lockType,         /* _SHM_UNLCK, _SHM_RDLCK, or _SHM_WRLCK */

  u8 lockMask           /* Which bytes to lock or unlock */
){
  OVERLAPPED ovlp;
  DWORD dwFlags;
  int nBytes;           /* Number of bytes to lock */
  int i;                /* Offset into the locking byte range */
  int rc = 0;           /* Result code form Lock/UnlockFileEx() */
  u8 mask;              /* Mask of bits in lockMask */

  /* Access to the winShmNode object is serialized by the caller */
  assert( sqlite3_mutex_held(pFile->mutex) || pFile->nRef==0 );

  /* Initialize the locking parameters */
  if( lockMask==WIN_SHM_C && lockType!=_SHM_UNLCK ){
    dwFlags = 0;
    OSTRACE(("SHM-LOCK %d requesting blocking lock %s\n", 
             pFile->hFile.h,
             winShmLockString(lockMask)));
  }else{
    dwFlags = LOCKFILE_FAIL_IMMEDIATELY;
    OSTRACE(("SHM-LOCK %d requesting %s %s\n", 
             pFile->hFile.h,
             lockType!=_SHM_UNLCK ? "lock" : "unlock", 
             winShmLockString(lockMask)));
  }
  if( lockType == _SHM_WRLCK ) dwFlags |= LOCKFILE_EXCLUSIVE_LOCK;

  /* Find the first bit in lockMask that is set */
  for(i=0, mask=0x01; mask!=0 && (lockMask&mask)==0; mask <<= 1, i++){}
  assert( mask!=0 );
  memset(&ovlp, 0, sizeof(OVERLAPPED));
  ovlp.Offset = i+WIN_SHM_BASE;
  nBytes = 1;

  /* Extend the locking range for each additional bit that is set */
  mask <<= 1;
  while( mask!=0 && (lockMask & mask)!=0 ){
    nBytes++;
    mask <<= 1;
  }

  /* Verify that all bits set in lockMask are contiguous */
  assert( mask==0 || (lockMask & ~(mask | (mask-1)))==0 );

  /* Release/Acquire the system-level lock */
  if( lockType==_SHM_UNLCK ){
    for(i=0; i<nBytes; i++, ovlp.Offset++){
      rc = UnlockFileEx(pFile->hFile.h, 0, 1, 0, &ovlp);
      if( !rc ) break;
    }
  }else{
    /* release old individual byte locks (if any)
    ** and set new individual byte locks */
    for(i=0; i<nBytes; i++, ovlp.Offset++){
      UnlockFileEx(pFile->hFile.h, 0, 1, 0, &ovlp);
      rc = LockFileEx(pFile->hFile.h, dwFlags, 0, 1, 0, &ovlp);
      if( !rc ) break;
    }
  }
  if( !rc ){
    OSTRACE(("SHM-LOCK %d %s ERROR 0x%08lx\n", 
             pFile->hFile.h,
             lockType==_SHM_UNLCK ? "UnlockFileEx" : "LockFileEx",
             GetLastError()));
    /* release individual byte locks (if any) */
    ovlp.Offset-=i;
    for(i=0; i<nBytes; i++, ovlp.Offset++){
      UnlockFileEx(pFile->hFile.h, 0, 1, 0, &ovlp);
    }
  }
  rc = (rc!=0) ? SQLITE_OK : SQLITE_BUSY;

  /* Update the global lock state and do debug tracing */
#ifdef SQLITE_DEBUG
  OSTRACE(("SHM-LOCK %d ", pFile->hFile.h));
  if( rc==SQLITE_OK ){
    if( lockType==_SHM_UNLCK ){
      OSTRACE(("unlock ok"));
      pFile->exclMask &= ~lockMask;
      pFile->sharedMask &= ~lockMask;
    }else if( lockType==_SHM_RDLCK ){
      OSTRACE(("read-lock ok"));
      pFile->exclMask &= ~lockMask;
      pFile->sharedMask |= lockMask;
    }else{
      assert( lockType==_SHM_WRLCK );
      OSTRACE(("write-lock ok"));
      pFile->exclMask |= lockMask;
      pFile->sharedMask &= ~lockMask;
    }
  }else{
    if( lockType==_SHM_UNLCK ){
      OSTRACE(("unlock failed"));
    }else if( lockType==_SHM_RDLCK ){
      OSTRACE(("read-lock failed"));
    }else{
      assert( lockType==_SHM_WRLCK );
      OSTRACE(("write-lock failed"));
    }
  }
  OSTRACE((" - change requested %s - afterwards %s:%s\n",
           winShmLockString(lockMask),
           winShmLockString(pFile->sharedMask),
           winShmLockString(pFile->exclMask)));
#endif

  return rc;
}

/*
** For connection p, unlock all of the locks identified by the unlockMask
** parameter.
*/
static int winShmUnlock(
  winShmNode *pFile,   /* The underlying shared-memory file */
  winShm *p,           /* The connection to be unlocked */
  u8 unlockMask         /* Mask of locks to be unlocked */
){
  int rc;      /* Result code */
  winShm *pX; /* For looping over all sibling connections */
  u8 allMask;  /* Union of locks held by connections other than "p" */

  /* Access to the winShmNode object is serialized by the caller */
  assert( sqlite3_mutex_held(pFile->mutex) );

  /* don't attempt to unlock anything we don't have locks for */
  if( (unlockMask & (p->exclMask|p->sharedMask)) != unlockMask ){
    OSTRACE(("SHM-LOCK %d unlocking more than we have locked - requested %s - have %s\n",
             pFile->hFile.h,
             winShmLockString(unlockMask),
             winShmLockString(p->exclMask|p->sharedMask)));
    unlockMask &= (p->exclMask|p->sharedMask);
  }

  /* Compute locks held by sibling connections */
  allMask = 0;
  for(pX=pFile->pFirst; pX; pX=pX->pNext){
    if( pX==p ) continue;
    assert( (pX->exclMask & (p->exclMask|p->sharedMask))==0 );
    allMask |= pX->sharedMask;
  }

  /* Unlock the system-level locks */
  if( (unlockMask & allMask)!=unlockMask ){
    rc = winShmSystemLock(pFile, _SHM_UNLCK, unlockMask & ~allMask);
  }else{
    rc = SQLITE_OK;
  }

  /* Undo the local locks */
  if( rc==SQLITE_OK ){
    p->exclMask &= ~unlockMask;
    p->sharedMask &= ~unlockMask;
  } 
  return rc;
}

/*
** Get reader locks for connection p on all locks in the readMask parameter.
*/
static int winShmSharedLock(
  winShmNode *pFile,   /* The underlying shared-memory file */
  winShm *p,           /* The connection to get the shared locks */
  u8 readMask           /* Mask of shared locks to be acquired */
){
  int rc;        /* Result code */
  winShm *pX;   /* For looping over all sibling connections */
  u8 allShared;  /* Union of locks held by connections other than "p" */

  /* Access to the winShmNode object is serialized by the caller */
  assert( sqlite3_mutex_held(pFile->mutex) );

  /* Find out which shared locks are already held by sibling connections.
  ** If any sibling already holds an exclusive lock, go ahead and return
  ** SQLITE_BUSY.
  */
  allShared = 0;
  for(pX=pFile->pFirst; pX; pX=pX->pNext){
    if( pX==p ) continue;
    if( (pX->exclMask & readMask)!=0 ) return SQLITE_BUSY;
    allShared |= pX->sharedMask;
  }

  /* Get shared locks at the system level, if necessary */
  if( (~allShared) & readMask ){
    rc = winShmSystemLock(pFile, _SHM_RDLCK, readMask);
  }else{
    rc = SQLITE_OK;
  }

  /* Get the local shared locks */
  if( rc==SQLITE_OK ){
    p->sharedMask |= readMask;
  }
  return rc;
}

/*
** For connection p, get an exclusive lock on all locks identified in
** the writeMask parameter.
*/
static int winShmExclusiveLock(
  winShmNode *pFile,    /* The underlying shared-memory file */
  winShm *p,            /* The connection to get the exclusive locks */
  u8 writeMask           /* Mask of exclusive locks to be acquired */
){
  int rc;        /* Result code */
  winShm *pX;   /* For looping over all sibling connections */

  /* Access to the winShmNode object is serialized by the caller */
  assert( sqlite3_mutex_held(pFile->mutex) );

  /* Make sure no sibling connections hold locks that will block this
  ** lock.  If any do, return SQLITE_BUSY right away.
  */
  for(pX=pFile->pFirst; pX; pX=pX->pNext){
    if( pX==p ) continue;
    if( (pX->exclMask & writeMask)!=0 ) return SQLITE_BUSY;
    if( (pX->sharedMask & writeMask)!=0 ) return SQLITE_BUSY;
  }

  /* Get the exclusive locks at the system level.  Then if successful
  ** also mark the local connection as being locked.
  */
  rc = winShmSystemLock(pFile, _SHM_WRLCK, writeMask);
  if( rc==SQLITE_OK ){
    p->sharedMask &= ~writeMask;
    p->exclMask |= writeMask;
  }
  return rc;
}

/*
** Purge the winShmNodeList list of all entries with winShmNode.nRef==0.
**
** This is not a VFS shared-memory method; it is a utility function called







<


<
<






<
<
<
<
<


<
<
<
|
<
|

<

<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
|
<
<
<
<
<
<







>
|



<
<

<





<
<
<
<
<
<
|
<
<
<
<
<



<
<

|
<
<
<
<
<
<
<
<
<
<
<



<
|
<
<

<
<
<
<
|
<
<






<
<
<
<
|
<


<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







1247
1248
1249
1250
1251
1252
1253

1254
1255


1256
1257
1258
1259
1260
1261





1262
1263



1264

1265
1266

1267






























1268






1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280


1281

1282
1283
1284
1285
1286






1287





1288
1289
1290


1291
1292











1293
1294
1295

1296


1297




1298


1299
1300
1301
1302
1303
1304




1305

1306
1307





























































































































































1308
1309
1310
1311
1312
1313
1314
**
** The winShm.pFile element is initialized when the object is created
** and is read-only thereafter.
*/
struct winShm {
  winShmNode *pShmNode;      /* The underlying winShmNode object */
  winShm *pNext;             /* Next winShm with the same winShmNode */

  u8 hasMutex;               /* True if holding the winShmNode mutex */
  u8 hasMutexBuf;            /* True if holding pFile->mutexBuf */


#ifdef SQLITE_DEBUG
  u8 id;                     /* Id of this connection with its winShmNode */
#endif
};

/*





** Constants used for locking
*/



#define WIN_SHM_BASE   ((18+SQLITE_SHM_NLOCK)*4)        /* first lock byte */

#define WIN_SHM_DMS    (WIN_SHM_BASE+SQLITE_SHM_NLOCK)  /* deadman switch */


/*






























** Apply advisory locks for all n bytes beginning at ofst.






*/
#define _SHM_UNLCK  1
#define _SHM_RDLCK  2
#define _SHM_WRLCK  3
static int winShmSystemLock(
  winShmNode *pFile,    /* Apply locks to this open shared-memory segment */
  int lockType,         /* _SHM_UNLCK, _SHM_RDLCK, or _SHM_WRLCK */
  int ofst,             /* Offset to first byte to be locked/unlocked */
  int nByte             /* Number of bytes to lock or unlock */
){
  OVERLAPPED ovlp;
  DWORD dwFlags;


  int rc = 0;           /* Result code form Lock/UnlockFileEx() */


  /* Access to the winShmNode object is serialized by the caller */
  assert( sqlite3_mutex_held(pFile->mutex) || pFile->nRef==0 );

  /* Initialize the locking parameters */






  dwFlags = LOCKFILE_FAIL_IMMEDIATELY;





  if( lockType == _SHM_WRLCK ) dwFlags |= LOCKFILE_EXCLUSIVE_LOCK;

  /* Find the first bit in lockMask that is set */


  memset(&ovlp, 0, sizeof(OVERLAPPED));
  ovlp.Offset = ofst;












  /* Release/Acquire the system-level lock */
  if( lockType==_SHM_UNLCK ){

    rc = UnlockFileEx(pFile->hFile.h, 0, nByte, 0, &ovlp);


  }else{




    rc = LockFileEx(pFile->hFile.h, dwFlags, 0, nByte, 0, &ovlp);


  }
  if( !rc ){
    OSTRACE(("SHM-LOCK %d %s ERROR 0x%08lx\n", 
             pFile->hFile.h,
             lockType==_SHM_UNLCK ? "UnlockFileEx" : "LockFileEx",
             GetLastError()));




  }

  rc = (rc!=0) ? SQLITE_OK : SQLITE_BUSY;






























































































































































  return rc;
}

/*
** Purge the winShmNodeList list of all entries with winShmNode.nRef==0.
**
** This is not a VFS shared-memory method; it is a utility function called
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686

1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
      rc = SQLITE_CANTOPEN_BKPT;
      goto shm_open_err;
    }

    /* Check to see if another process is holding the dead-man switch.
    ** If not, truncate the file to zero length. 
    */
    if( winShmSystemLock(pShmNode, _SHM_WRLCK, WIN_SHM_DMS)==SQLITE_OK ){
      rc = winTruncate((sqlite3_file *)&pShmNode->hFile, 0);
    }
    if( rc==SQLITE_OK ){

      rc = winShmSystemLock(pShmNode, _SHM_RDLCK, WIN_SHM_DMS);
    }
    if( rc ) goto shm_open_err;
  }

  /* Make the new connection a child of the winShmNode */
  p->pShmNode = pShmNode;
  p->pNext = pShmNode->pFirst;
#ifdef SQLITE_DEBUG
  p->id = pShmNode->nextShmId++;
#endif
  pShmNode->pFirst = p;
  pShmNode->nRef++;
  pDbFd->pShm = p;
  winShmLeaveMutex();
  return SQLITE_OK;

  /* Jump here on any error */
shm_open_err:
  winShmSystemLock(pShmNode, _SHM_UNLCK, WIN_SHM_DMS);
  winShmPurge();                 /* This call frees pShmNode if required */
  sqlite3_free(p);
  sqlite3_free(pNew);
  winShmLeaveMutex();
  return rc;
}








|



>
|


















|







1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
      rc = SQLITE_CANTOPEN_BKPT;
      goto shm_open_err;
    }

    /* Check to see if another process is holding the dead-man switch.
    ** If not, truncate the file to zero length. 
    */
    if( winShmSystemLock(pShmNode, _SHM_WRLCK, WIN_SHM_DMS, 1)==SQLITE_OK ){
      rc = winTruncate((sqlite3_file *)&pShmNode->hFile, 0);
    }
    if( rc==SQLITE_OK ){
      winShmSystemLock(pShmNode, _SHM_UNLCK, WIN_SHM_DMS, 1);
      rc = winShmSystemLock(pShmNode, _SHM_RDLCK, WIN_SHM_DMS, 1);
    }
    if( rc ) goto shm_open_err;
  }

  /* Make the new connection a child of the winShmNode */
  p->pShmNode = pShmNode;
  p->pNext = pShmNode->pFirst;
#ifdef SQLITE_DEBUG
  p->id = pShmNode->nextShmId++;
#endif
  pShmNode->pFirst = p;
  pShmNode->nRef++;
  pDbFd->pShm = p;
  winShmLeaveMutex();
  return SQLITE_OK;

  /* Jump here on any error */
shm_open_err:
  winShmSystemLock(pShmNode, _SHM_UNLCK, WIN_SHM_DMS, 1);
  winShmPurge();                 /* This call frees pShmNode if required */
  sqlite3_free(p);
  sqlite3_free(pNew);
  winShmLeaveMutex();
  return rc;
}

1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
  winShmNode *pShmNode; /* The underlying shared-memory file */
  winShm **pp;          /* For looping over sibling connections */

  pDbFd = (winFile*)fd;
  p = pDbFd->pShm;
  pShmNode = p->pShmNode;

  /* Verify that the connection being closed holds no locks */
  assert( p->exclMask==0 );
  assert( p->sharedMask==0 );

  /* Remove connection p from the set of connections associated
  ** with pShmNode */
  sqlite3_mutex_enter(pShmNode->mutex);
  for(pp=&pShmNode->pFirst; (*pp)!=p; pp = &(*pp)->pNext){}
  *pp = p->pNext;

  /* Free the connection p */







<
<
<
<







1477
1478
1479
1480
1481
1482
1483




1484
1485
1486
1487
1488
1489
1490
  winShmNode *pShmNode; /* The underlying shared-memory file */
  winShm **pp;          /* For looping over sibling connections */

  pDbFd = (winFile*)fd;
  p = pDbFd->pShm;
  pShmNode = p->pShmNode;





  /* Remove connection p from the set of connections associated
  ** with pShmNode */
  sqlite3_mutex_enter(pShmNode->mutex);
  for(pp=&pShmNode->pFirst; (*pp)!=p; pp = &(*pp)->pNext){}
  *pp = p->pNext;

  /* Free the connection p */
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
  winShmNode *pShmNode = p->pShmNode;
  int rc = SQLITE_OK;

  *pNewSize = 0;
  if( reqSize>=0 ){
    sqlite3_int64 sz;
    rc = winFileSize((sqlite3_file *)&pShmNode->hFile, &sz);
    if( SQLITE_OK==rc ){
      reqSize = (reqSize + SQLITE_WIN_SHM_INCR - 1)/SQLITE_WIN_SHM_INCR;
      reqSize *= SQLITE_WIN_SHM_INCR;
      if( reqSize>sz ){
        rc = winTruncate((sqlite3_file *)&pShmNode->hFile, reqSize);
      }
    }
  }
  if( SQLITE_OK==rc ){
    sqlite3_int64 sz;
    rc = winFileSize((sqlite3_file *)&pShmNode->hFile, &sz);
    if( SQLITE_OK==rc ){
      *pNewSize = (int)sz;







|
<
<
<
|
<







1527
1528
1529
1530
1531
1532
1533
1534



1535

1536
1537
1538
1539
1540
1541
1542
  winShmNode *pShmNode = p->pShmNode;
  int rc = SQLITE_OK;

  *pNewSize = 0;
  if( reqSize>=0 ){
    sqlite3_int64 sz;
    rc = winFileSize((sqlite3_file *)&pShmNode->hFile, &sz);
    if( SQLITE_OK==rc && reqSize>sz ){



      rc = winTruncate((sqlite3_file *)&pShmNode->hFile, reqSize);

    }
  }
  if( SQLITE_OK==rc ){
    sqlite3_int64 sz;
    rc = winFileSize((sqlite3_file *)&pShmNode->hFile, &sz);
    if( SQLITE_OK==rc ){
      *pNewSize = (int)sz;
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
** *ppBuf and *pNewMapSize might be NULL and zero if no space has
** yet been allocated to the underlying storage.
*/
static int winShmGet(
  sqlite3_file *fd,        /* The database file holding the shared memory */
  int reqMapSize,          /* Requested size of mapping. -1 means don't care */
  int *pNewMapSize,        /* Write new size of mapping here */
  void **ppBuf             /* Write mapping buffer origin here */
){
  winFile *pDbFd = (winFile*)fd;
  winShm *p = pDbFd->pShm;
  winShmNode *pShmNode = p->pShmNode;
  int rc = SQLITE_OK;

  if( p->lockState!=SQLITE_SHM_CHECKPOINT && p->hasMutexBuf==0 ){
    assert( sqlite3_mutex_notheld(pShmNode->mutex) );
    sqlite3_mutex_enter(pShmNode->mutexBuf);
    p->hasMutexBuf = 1;
  }
  sqlite3_mutex_enter(pShmNode->mutex);
  if( pShmNode->szMap==0 || reqMapSize>pShmNode->szMap ){
    int actualSize;







|






|







1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
** *ppBuf and *pNewMapSize might be NULL and zero if no space has
** yet been allocated to the underlying storage.
*/
static int winShmGet(
  sqlite3_file *fd,        /* The database file holding the shared memory */
  int reqMapSize,          /* Requested size of mapping. -1 means don't care */
  int *pNewMapSize,        /* Write new size of mapping here */
  void volatile **ppBuf    /* Write mapping buffer origin here */
){
  winFile *pDbFd = (winFile*)fd;
  winShm *p = pDbFd->pShm;
  winShmNode *pShmNode = p->pShmNode;
  int rc = SQLITE_OK;

  if( p->hasMutexBuf==0 ){
    assert( sqlite3_mutex_notheld(pShmNode->mutex) );
    sqlite3_mutex_enter(pShmNode->mutexBuf);
    p->hasMutexBuf = 1;
  }
  sqlite3_mutex_enter(pShmNode->mutex);
  if( pShmNode->szMap==0 || reqMapSize>pShmNode->szMap ){
    int actualSize;
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953

1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
** If the shared-memory object is in lock state RECOVER, then we do not
** really want to release the lock, so in that case too, this routine
** is a no-op.
*/
static int winShmRelease(sqlite3_file *fd){
  winFile *pDbFd = (winFile*)fd;
  winShm *p = pDbFd->pShm;
  if( p->hasMutexBuf && p->lockState!=SQLITE_SHM_RECOVER ){
    winShmNode *pShmNode = p->pShmNode;
    assert( sqlite3_mutex_notheld(pShmNode->mutex) );
    sqlite3_mutex_leave(pShmNode->mutexBuf);
    p->hasMutexBuf = 0;
  }
  return SQLITE_OK;
}

/*
** Symbolic names for LOCK states used for debugging.
*/
#ifdef SQLITE_DEBUG
static const char *azLkName[] = {
  "UNLOCK",
  "READ",
  "READ_FULL",
  "WRITE",
  "PENDING",
  "CHECKPOINT",
  "RECOVER"
};
#endif


/*
** Change the lock state for a shared-memory segment.
*/
static int winShmLock(
  sqlite3_file *fd,          /* Database holding the shared memory */
  int desiredLock,           /* One of SQLITE_SHM_xxxxx locking states */

  int *pGotLock              /* The lock you actually got */
){
  winFile *pDbFd = (winFile*)fd;
  winShm *p = pDbFd->pShm;
  winShmNode *pShmNode = p->pShmNode;
  int rc = SQLITE_PROTOCOL;

  /* Note that SQLITE_SHM_READ_FULL and SQLITE_SHM_PENDING are never
  ** directly requested; they are side effects from requesting
  ** SQLITE_SHM_READ and SQLITE_SHM_CHECKPOINT, respectively.
  */
  assert( desiredLock==SQLITE_SHM_UNLOCK
       || desiredLock==SQLITE_SHM_READ
       || desiredLock==SQLITE_SHM_WRITE
       || desiredLock==SQLITE_SHM_CHECKPOINT
       || desiredLock==SQLITE_SHM_RECOVER );

  /* Return directly if this is just a lock state query, or if
  ** the connection is already in the desired locking state.
  */
  if( desiredLock==p->lockState
   || (desiredLock==SQLITE_SHM_READ && p->lockState==SQLITE_SHM_READ_FULL)
  ){
    OSTRACE(("SHM-LOCK %d shmid-%d, pid-%d request %s and got %s\n",
             pShmNode->hFile.h,
             p->id, (int)GetCurrentProcessId(), azLkName[desiredLock],
             azLkName[p->lockState]));
    if( pGotLock ) *pGotLock = p->lockState;
    return SQLITE_OK;
  }

  OSTRACE(("SHM-LOCK %d shmid-%d, pid-%d request %s->%s\n",
           pShmNode->hFile.h,
           p->id, (int)GetCurrentProcessId(), azLkName[p->lockState], 
           azLkName[desiredLock]));
  
  if( desiredLock==SQLITE_SHM_RECOVER && !p->hasMutexBuf ){
    assert( sqlite3_mutex_notheld(pShmNode->mutex) );
    sqlite3_mutex_enter(pShmNode->mutexBuf);
    p->hasMutexBuf = 1;
  }
  sqlite3_mutex_enter(pShmNode->mutex);
  switch( desiredLock ){
    case SQLITE_SHM_UNLOCK: {
      assert( p->lockState!=SQLITE_SHM_RECOVER );
      winShmUnlock(pShmNode, p, WIN_SHM_A|WIN_SHM_B|WIN_SHM_C|WIN_SHM_D);
      rc = SQLITE_OK;
      p->lockState = SQLITE_SHM_UNLOCK;
      break;
    }
    case SQLITE_SHM_READ: {
      if( p->lockState==SQLITE_SHM_UNLOCK ){
        int nAttempt;
        rc = SQLITE_BUSY;
        assert( p->lockState==SQLITE_SHM_UNLOCK );
        for(nAttempt=0; nAttempt<5 && rc==SQLITE_BUSY; nAttempt++){
          rc = winShmSharedLock(pShmNode, p, WIN_SHM_A|WIN_SHM_B);
          if( rc==SQLITE_BUSY ){
            rc = winShmSharedLock(pShmNode, p, WIN_SHM_D);
            if( rc==SQLITE_OK ){
              p->lockState = SQLITE_SHM_READ_FULL;
            }
          }else{
            winShmUnlock(pShmNode, p, WIN_SHM_B);
            p->lockState = SQLITE_SHM_READ;
          }
        }
      }else{
       assert( p->lockState==SQLITE_SHM_WRITE
               || p->lockState==SQLITE_SHM_RECOVER );
        rc = winShmSharedLock(pShmNode, p, WIN_SHM_A);
        winShmUnlock(pShmNode, p, WIN_SHM_C|WIN_SHM_D);
        p->lockState = SQLITE_SHM_READ;
      }
      break;
    }
    case SQLITE_SHM_WRITE: {
      assert( p->lockState==SQLITE_SHM_READ 
              || p->lockState==SQLITE_SHM_READ_FULL );
      rc = winShmExclusiveLock(pShmNode, p, WIN_SHM_C|WIN_SHM_D);
      if( rc==SQLITE_OK ){
        p->lockState = SQLITE_SHM_WRITE;
      }
      break;
    }
    case SQLITE_SHM_CHECKPOINT: {
      assert( p->lockState==SQLITE_SHM_UNLOCK
           || p->lockState==SQLITE_SHM_PENDING
      );
      if( p->lockState==SQLITE_SHM_UNLOCK ){
        rc = winShmExclusiveLock(pShmNode, p, WIN_SHM_B|WIN_SHM_C);
        if( rc==SQLITE_OK ){
          p->lockState = SQLITE_SHM_PENDING;
        }
      }
      if( p->lockState==SQLITE_SHM_PENDING ){
        rc = winShmExclusiveLock(pShmNode, p, WIN_SHM_A);
        if( rc==SQLITE_OK ){
          p->lockState = SQLITE_SHM_CHECKPOINT;
        }
      }
      break;
    }
    default: {
      assert( desiredLock==SQLITE_SHM_RECOVER );
      assert( p->lockState==SQLITE_SHM_READ
           || p->lockState==SQLITE_SHM_READ_FULL
      );
      assert( sqlite3_mutex_held(pShmNode->mutexBuf) );
      rc = winShmExclusiveLock(pShmNode, p, WIN_SHM_C);
      if( rc==SQLITE_OK ){
        p->lockState = SQLITE_SHM_RECOVER;
      }
      break;
    }
  }
  sqlite3_mutex_leave(pShmNode->mutex);
  OSTRACE(("SHM-LOCK %d shmid-%d, pid-%d got %s\n",
           pShmNode->hFile.h, 
           p->id, (int)GetCurrentProcessId(), azLkName[p->lockState]));
  if( pGotLock ) *pGotLock = p->lockState;
  return rc;
}

/*
** Implement a memory barrier or memory fence on shared memory.  
**
** All loads and stores begun before the barrier must complete before







|







<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<





|
|
>
|






<
<
|
<
|
|
|
<
<
|
<
<
<
<
<
<
<
<
<
<
<
<
<
|
<
<
<
<
|
<
<
<
<
|

<
|
<
|
<
<
<
<
|
<
<
<
<
<
<
<
|
<
<
<
|
|
<
|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<

|
<
|
<







1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
















1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690


1691

1692
1693
1694


1695













1696




1697




1698
1699

1700

1701




1702







1703



1704
1705

1706


















































1707
1708

1709

1710
1711
1712
1713
1714
1715
1716
** If the shared-memory object is in lock state RECOVER, then we do not
** really want to release the lock, so in that case too, this routine
** is a no-op.
*/
static int winShmRelease(sqlite3_file *fd){
  winFile *pDbFd = (winFile*)fd;
  winShm *p = pDbFd->pShm;
  if( p->hasMutexBuf ){
    winShmNode *pShmNode = p->pShmNode;
    assert( sqlite3_mutex_notheld(pShmNode->mutex) );
    sqlite3_mutex_leave(pShmNode->mutexBuf);
    p->hasMutexBuf = 0;
  }
  return SQLITE_OK;
}

















/*
** Change the lock state for a shared-memory segment.
*/
static int winShmLock(
  sqlite3_file *fd,          /* Database file holding the shared memory */
  int ofst,                  /* First lock to acquire or release */
  int n,                     /* Number of locks to acquire or release */
  int flags                  /* What to do with the lock */
){
  winFile *pDbFd = (winFile*)fd;
  winShm *p = pDbFd->pShm;
  winShmNode *pShmNode = p->pShmNode;
  int rc = SQLITE_PROTOCOL;



  assert( ofst>=0 && ofst+n<=SQLITE_SHM_NLOCK );

  assert( n>=1 );
  assert( flags==(SQLITE_SHM_LOCK | SQLITE_SHM_SHARED)
       || flags==(SQLITE_SHM_LOCK | SQLITE_SHM_EXCLUSIVE)


       || flags==(SQLITE_SHM_UNLOCK | SQLITE_SHM_SHARED)













       || flags==(SQLITE_SHM_UNLOCK | SQLITE_SHM_EXCLUSIVE) );




  assert( n==1 || (flags & SQLITE_SHM_EXCLUSIVE)!=0 );





  sqlite3_mutex_enter(pShmNode->mutex);

  if( flags & SQLITE_SHM_UNLOCK ){

    rc = winShmSystemLock(pShmNode, _SHM_UNLCK, ofst+WIN_SHM_BASE, n);




  }else if( flags & SQLITE_SHM_SHARED ){







    rc = winShmSystemLock(pShmNode, _SHM_RDLCK, ofst+WIN_SHM_BASE, n);



  }else{
    rc = winShmSystemLock(pShmNode, _SHM_WRLCK, ofst+WIN_SHM_BASE, n);

  }


















































  sqlite3_mutex_leave(pShmNode->mutex);
  OSTRACE(("SHM-LOCK shmid-%d, pid-%d %s\n",

           p->id, (int)GetCurrentProcessId(), rc ? "failed" : "ok"));

  return rc;
}

/*
** Implement a memory barrier or memory fence on shared memory.  
**
** All loads and stores begun before the barrier must complete before

Changes to src/pager.c.

1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
  return (pPager->pWal!=0);
}
#else
# define pagerUseWal(x) 0
# define pagerRollbackWal(x) 0
# define pagerWalFrames(v,w,x,y,z) 0
# define pagerOpenWalIfPresent(z) SQLITE_OK
# define pagerOpenSnapshot(z) SQLITE_OK
#endif

/*
** Unlock the database file. This function is a no-op if the pager
** is in exclusive mode.
**
** If the pager is currently in error state, discard the contents of 







|







1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
  return (pPager->pWal!=0);
}
#else
# define pagerUseWal(x) 0
# define pagerRollbackWal(x) 0
# define pagerWalFrames(v,w,x,y,z) 0
# define pagerOpenWalIfPresent(z) SQLITE_OK
# define pagerBeginReadTransaction(z) SQLITE_OK
#endif

/*
** Unlock the database file. This function is a no-op if the pager
** is in exclusive mode.
**
** If the pager is currently in error state, discard the contents of 
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
    ** this happens.  One can argue that this doesn't need to be cleared
    ** until the change-counter check fails in PagerSharedLock().
    ** Clearing the page size cache here is being conservative.
    */
    pPager->dbSizeValid = 0;

    if( pagerUseWal(pPager) ){
      sqlite3WalCloseSnapshot(pPager->pWal);
    }else{
      rc = osUnlock(pPager->fd, NO_LOCK);
    }
    if( rc ){
      pPager->errCode = rc;
    }
    IOTRACE(("UNLOCK %p\n", pPager))







|







1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
    ** this happens.  One can argue that this doesn't need to be cleared
    ** until the change-counter check fails in PagerSharedLock().
    ** Clearing the page size cache here is being conservative.
    */
    pPager->dbSizeValid = 0;

    if( pagerUseWal(pPager) ){
      sqlite3WalEndReadTransaction(pPager->pWal);
    }else{
      rc = osUnlock(pPager->fd, NO_LOCK);
    }
    if( rc ){
      pPager->errCode = rc;
    }
    IOTRACE(("UNLOCK %p\n", pPager))
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
  }
  sqlite3BitvecDestroy(pPager->pInJournal);
  pPager->pInJournal = 0;
  pPager->nRec = 0;
  sqlite3PcacheCleanAll(pPager->pPCache);

  if( pagerUseWal(pPager) ){
    rc2 = sqlite3WalWriteLock(pPager->pWal, 0);
    pPager->state = PAGER_SHARED;

    /* If the connection was in locking_mode=exclusive mode but is no longer,
    ** drop the EXCLUSIVE lock held on the database file.
    */
    if( rc2==SQLITE_OK 
     && !pPager->exclusiveMode 
     && sqlite3WalExclusiveMode(pPager->pWal, -1) 
    ){
      sqlite3WalExclusiveMode(pPager->pWal, 0);
      rc2 = osUnlock(pPager->fd, SHARED_LOCK);
    }
  }else if( !pPager->exclusiveMode ){
    rc2 = osUnlock(pPager->fd, SHARED_LOCK);
    pPager->state = PAGER_SHARED;
    pPager->changeCountDone = 0;
  }else if( pPager->state==PAGER_SYNCED ){







|







|

<







1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449

1450
1451
1452
1453
1454
1455
1456
  }
  sqlite3BitvecDestroy(pPager->pInJournal);
  pPager->pInJournal = 0;
  pPager->nRec = 0;
  sqlite3PcacheCleanAll(pPager->pPCache);

  if( pagerUseWal(pPager) ){
    rc2 = sqlite3WalEndWriteTransaction(pPager->pWal);
    pPager->state = PAGER_SHARED;

    /* If the connection was in locking_mode=exclusive mode but is no longer,
    ** drop the EXCLUSIVE lock held on the database file.
    */
    if( rc2==SQLITE_OK 
     && !pPager->exclusiveMode 
     && sqlite3WalExclusiveMode(pPager->pWal, 0) 
    ){

      rc2 = osUnlock(pPager->fd, SHARED_LOCK);
    }
  }else if( !pPager->exclusiveMode ){
    rc2 = osUnlock(pPager->fd, SHARED_LOCK);
    pPager->state = PAGER_SHARED;
    pPager->changeCountDone = 0;
  }else if( pPager->state==PAGER_SYNCED ){
2358
2359
2360
2361
2362
2363
2364




2365

2366
2367
2368
2369
2370
2371
2372







2373
2374
2375
2376
2377
2378
2379
2380
      sqlite3BackupUpdate(pPager->pBackup, p->pgno, (u8 *)p->pData);
    }
  }
  return rc;
}

/*




** Open a WAL snapshot on the log file this pager is connected to.

*/
static int pagerOpenSnapshot(Pager *pPager){
  int rc;                         /* Return code */
  int changed = 0;                /* True if cache must be reset */

  assert( pagerUseWal(pPager) );








  rc = sqlite3WalOpenSnapshot(pPager->pWal, &changed);
  if( rc==SQLITE_OK ){
    int dummy;
    if( changed ){
      pager_reset(pPager);
      assert( pPager->errCode || pPager->dbSizeValid==0 );
    }
    rc = sqlite3PagerPagecount(pPager, &dummy);







>
>
>
>
|
>

|





>
>
>
>
>
>
>
|







2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
      sqlite3BackupUpdate(pPager->pBackup, p->pgno, (u8 *)p->pData);
    }
  }
  return rc;
}

/*
** Begin a read transaction on the WAL.
**
** This routine used to be called "pagerOpenSnapshot()" because it essentially
** makes a snapshot of the database at the current point in time and preserves
** that snapshot for use by the reader in spite of concurrently changes by
** other writers or checkpointers.
*/
static int pagerBeginReadTransaction(Pager *pPager){
  int rc;                         /* Return code */
  int changed = 0;                /* True if cache must be reset */

  assert( pagerUseWal(pPager) );

  /* sqlite3WalEndReadTransaction() was not called for the previous
  ** transaction in locking_mode=EXCLUSIVE.  So call it now.  If we
  ** are in locking_mode=NORMAL and EndRead() was previously called,
  ** the duplicate call is harmless.
  */
  sqlite3WalEndReadTransaction(pPager->pWal);

  rc = sqlite3WalBeginReadTransaction(pPager->pWal, &changed);
  if( rc==SQLITE_OK ){
    int dummy;
    if( changed ){
      pager_reset(pPager);
      assert( pPager->errCode || pPager->dbSizeValid==0 );
    }
    rc = sqlite3PagerPagecount(pPager, &dummy);
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
    int isWal;                    /* True if WAL file exists */
    rc = pagerHasWAL(pPager, &isWal);
    if( rc==SQLITE_OK ){
      if( isWal ){
        pager_reset(pPager);
        rc = sqlite3PagerOpenWal(pPager, 0);
        if( rc==SQLITE_OK ){
          rc = pagerOpenSnapshot(pPager);
        }
      }else if( pPager->journalMode==PAGER_JOURNALMODE_WAL ){
        pPager->journalMode = PAGER_JOURNALMODE_DELETE;
      }
    }
  }
  return rc;







|







2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
    int isWal;                    /* True if WAL file exists */
    rc = pagerHasWAL(pPager, &isWal);
    if( rc==SQLITE_OK ){
      if( isWal ){
        pager_reset(pPager);
        rc = sqlite3PagerOpenWal(pPager, 0);
        if( rc==SQLITE_OK ){
          rc = pagerBeginReadTransaction(pPager);
        }
      }else if( pPager->journalMode==PAGER_JOURNALMODE_WAL ){
        pPager->journalMode = PAGER_JOURNALMODE_DELETE;
      }
    }
  }
  return rc;
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
      isErrorReset = 1;
    }
    pPager->errCode = SQLITE_OK;
    pager_reset(pPager);
  }

  if( pagerUseWal(pPager) ){
    rc = pagerOpenSnapshot(pPager);
  }else if( pPager->state==PAGER_UNLOCK || isErrorReset ){
    sqlite3_vfs * const pVfs = pPager->pVfs;
    int isHotJournal = 0;
    assert( !MEMDB );
    assert( sqlite3PcacheRefCount(pPager->pPCache)==0 );
    if( pPager->noReadlock ){
      assert( pPager->readOnly );







|







4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
      isErrorReset = 1;
    }
    pPager->errCode = SQLITE_OK;
    pager_reset(pPager);
  }

  if( pagerUseWal(pPager) ){
    rc = pagerBeginReadTransaction(pPager);
  }else if( pPager->state==PAGER_UNLOCK || isErrorReset ){
    sqlite3_vfs * const pVfs = pPager->pVfs;
    int isHotJournal = 0;
    assert( !MEMDB );
    assert( sqlite3PcacheRefCount(pPager->pPCache)==0 );
    if( pPager->noReadlock ){
      assert( pPager->readOnly );
4537
4538
4539
4540
4541
4542
4543
4544
4545
4546
4547
4548
4549
4550
4551
4552
4553
4554
4555
4556
4557
4558
4559
4560
4561
4562
4563
4564
4565
4566
4567
4568
4569
4570
4571
    assert( pPager->pInJournal==0 );
    assert( !MEMDB && !pPager->tempFile );

    if( pagerUseWal(pPager) ){
      /* If the pager is configured to use locking_mode=exclusive, and an
      ** exclusive lock on the database is not already held, obtain it now.
      */
      if( pPager->exclusiveMode && !sqlite3WalExclusiveMode(pPager->pWal, -1) ){
        rc = sqlite3OsLock(pPager->fd, EXCLUSIVE_LOCK);
        pPager->state = PAGER_SHARED;
        if( rc!=SQLITE_OK ){
          return rc;
        }
        sqlite3WalExclusiveMode(pPager->pWal, 1);
      }

      /* Grab the write lock on the log file. If successful, upgrade to
      ** PAGER_RESERVED state. Otherwise, return an error code to the caller.
      ** The busy-handler is not invoked if another connection already
      ** holds the write-lock. If possible, the upper layer will call it.
      **
      ** WAL mode sets Pager.state to PAGER_RESERVED when it has an open
      ** transaction, but never to PAGER_EXCLUSIVE. This is because in 
      ** PAGER_EXCLUSIVE state the code to roll back savepoint transactions
      ** may copy data from the sub-journal into the database file as well
      ** as into the page cache. Which would be incorrect in WAL mode.
      */
      rc = sqlite3WalWriteLock(pPager->pWal, 1);
      if( rc==SQLITE_OK ){
        pPager->dbOrigSize = pPager->dbSize;
        pPager->state = PAGER_RESERVED;
        pPager->journalOff = 0;
      }

      assert( rc!=SQLITE_OK || pPager->state==PAGER_RESERVED );







|



















|







4548
4549
4550
4551
4552
4553
4554
4555
4556
4557
4558
4559
4560
4561
4562
4563
4564
4565
4566
4567
4568
4569
4570
4571
4572
4573
4574
4575
4576
4577
4578
4579
4580
4581
4582
    assert( pPager->pInJournal==0 );
    assert( !MEMDB && !pPager->tempFile );

    if( pagerUseWal(pPager) ){
      /* If the pager is configured to use locking_mode=exclusive, and an
      ** exclusive lock on the database is not already held, obtain it now.
      */
      if( pPager->exclusiveMode && sqlite3WalExclusiveMode(pPager->pWal, -1) ){
        rc = sqlite3OsLock(pPager->fd, EXCLUSIVE_LOCK);
        pPager->state = PAGER_SHARED;
        if( rc!=SQLITE_OK ){
          return rc;
        }
        sqlite3WalExclusiveMode(pPager->pWal, 1);
      }

      /* Grab the write lock on the log file. If successful, upgrade to
      ** PAGER_RESERVED state. Otherwise, return an error code to the caller.
      ** The busy-handler is not invoked if another connection already
      ** holds the write-lock. If possible, the upper layer will call it.
      **
      ** WAL mode sets Pager.state to PAGER_RESERVED when it has an open
      ** transaction, but never to PAGER_EXCLUSIVE. This is because in 
      ** PAGER_EXCLUSIVE state the code to roll back savepoint transactions
      ** may copy data from the sub-journal into the database file as well
      ** as into the page cache. Which would be incorrect in WAL mode.
      */
      rc = sqlite3WalBeginWriteTransaction(pPager->pWal);
      if( rc==SQLITE_OK ){
        pPager->dbOrigSize = pPager->dbSize;
        pPager->state = PAGER_RESERVED;
        pPager->journalOff = 0;
      }

      assert( rc!=SQLITE_OK || pPager->state==PAGER_RESERVED );
5888
5889
5890
5891
5892
5893
5894
5895
5896
5897
5898
5899
5900
5901
5902
5903
*/
int sqlite3PagerCheckpoint(Pager *pPager){
  int rc = SQLITE_OK;
  if( pPager->pWal ){
    u8 *zBuf = (u8 *)pPager->pTmpSpace;
    rc = sqlite3WalCheckpoint(pPager->pWal,
        (pPager->noSync ? 0 : pPager->sync_flags),
        pPager->pageSize, zBuf, 
        pPager->xBusyHandler, pPager->pBusyHandlerArg
    );
  }
  return rc;
}

int sqlite3PagerWalCallback(Pager *pPager){
  return sqlite3WalCallback(pPager->pWal);







|
<







5899
5900
5901
5902
5903
5904
5905
5906

5907
5908
5909
5910
5911
5912
5913
*/
int sqlite3PagerCheckpoint(Pager *pPager){
  int rc = SQLITE_OK;
  if( pPager->pWal ){
    u8 *zBuf = (u8 *)pPager->pTmpSpace;
    rc = sqlite3WalCheckpoint(pPager->pWal,
        (pPager->noSync ? 0 : pPager->sync_flags),
        pPager->pageSize, zBuf

    );
  }
  return rc;
}

int sqlite3PagerWalCallback(Pager *pPager){
  return sqlite3WalCallback(pPager->pWal);

Changes to src/sqlite.h.in.

440
441
442
443
444
445
446
447

448
449
450
451
452
453
454
#define SQLITE_IOERR_BLOCKED           (SQLITE_IOERR | (11<<8))
#define SQLITE_IOERR_NOMEM             (SQLITE_IOERR | (12<<8))
#define SQLITE_IOERR_ACCESS            (SQLITE_IOERR | (13<<8))
#define SQLITE_IOERR_CHECKRESERVEDLOCK (SQLITE_IOERR | (14<<8))
#define SQLITE_IOERR_LOCK              (SQLITE_IOERR | (15<<8))
#define SQLITE_IOERR_CLOSE             (SQLITE_IOERR | (16<<8))
#define SQLITE_IOERR_DIR_CLOSE         (SQLITE_IOERR | (17<<8))
#define SQLITE_LOCKED_SHAREDCACHE      (SQLITE_LOCKED | (1<<8) )


/*
** CAPI3REF: Flags For File Open Operations
**
** These bit values are intended for use in the
** 3rd parameter to the [sqlite3_open_v2()] interface and
** in the 4th parameter to the xOpen method of the







|
>







440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
#define SQLITE_IOERR_BLOCKED           (SQLITE_IOERR | (11<<8))
#define SQLITE_IOERR_NOMEM             (SQLITE_IOERR | (12<<8))
#define SQLITE_IOERR_ACCESS            (SQLITE_IOERR | (13<<8))
#define SQLITE_IOERR_CHECKRESERVEDLOCK (SQLITE_IOERR | (14<<8))
#define SQLITE_IOERR_LOCK              (SQLITE_IOERR | (15<<8))
#define SQLITE_IOERR_CLOSE             (SQLITE_IOERR | (16<<8))
#define SQLITE_IOERR_DIR_CLOSE         (SQLITE_IOERR | (17<<8))
#define SQLITE_LOCKED_SHAREDCACHE      (SQLITE_LOCKED |  (1<<8))
#define SQLITE_BUSY_RECOVERY           (SQLITE_BUSY   |  (1<<8))

/*
** CAPI3REF: Flags For File Open Operations
**
** These bit values are intended for use in the
** 3rd parameter to the [sqlite3_open_v2()] interface and
** in the 4th parameter to the xOpen method of the
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
  int (*xSectorSize)(sqlite3_file*);
  int (*xDeviceCharacteristics)(sqlite3_file*);
  /* Methods above are valid for version 1 */
  int (*xShmOpen)(sqlite3_file*);
  int (*xShmSize)(sqlite3_file*, int reqSize, int *pNewSize);
  int (*xShmGet)(sqlite3_file*, int reqSize, int *pSize, void volatile**);
  int (*xShmRelease)(sqlite3_file*);
  int (*xShmLock)(sqlite3_file*, int desiredLock, int *gotLock);
  void (*xShmBarrier)(sqlite3_file*);
  int (*xShmClose)(sqlite3_file*, int deleteFlag);
  /* Methods above are valid for version 2 */
  /* Additional methods may be added in future releases */
};

/*







|







655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
  int (*xSectorSize)(sqlite3_file*);
  int (*xDeviceCharacteristics)(sqlite3_file*);
  /* Methods above are valid for version 1 */
  int (*xShmOpen)(sqlite3_file*);
  int (*xShmSize)(sqlite3_file*, int reqSize, int *pNewSize);
  int (*xShmGet)(sqlite3_file*, int reqSize, int *pSize, void volatile**);
  int (*xShmRelease)(sqlite3_file*);
  int (*xShmLock)(sqlite3_file*, int offset, int n, int flags);
  void (*xShmBarrier)(sqlite3_file*);
  int (*xShmClose)(sqlite3_file*, int deleteFlag);
  /* Methods above are valid for version 2 */
  /* Additional methods may be added in future releases */
};

/*
884
885
886
887
888
889
890
891
















892
893
894
895
896
897
898








899
900
901
902
903
904
905
906
907
#define SQLITE_ACCESS_EXISTS    0
#define SQLITE_ACCESS_READWRITE 1
#define SQLITE_ACCESS_READ      2

/*
** CAPI3REF: Flags for the xShmLock VFS method
**
** These integer constants define the various locking states that
















** an sqlite3_shm object can be in.
*/
#define SQLITE_SHM_UNLOCK       0
#define SQLITE_SHM_READ         1
#define SQLITE_SHM_READ_FULL    2
#define SQLITE_SHM_WRITE        3
#define SQLITE_SHM_PENDING      4








#define SQLITE_SHM_CHECKPOINT   5
#define SQLITE_SHM_RECOVER      6

/*
** CAPI3REF: Initialize The SQLite Library
**
** ^The sqlite3_initialize() routine initializes the
** SQLite library.  ^The sqlite3_shutdown() routine
** deallocates any resources that were allocated by sqlite3_initialize().







|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|

|
|
|
|
|
>
>
>
>
>
>
>
>
|
|







885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
#define SQLITE_ACCESS_EXISTS    0
#define SQLITE_ACCESS_READWRITE 1
#define SQLITE_ACCESS_READ      2

/*
** CAPI3REF: Flags for the xShmLock VFS method
**
** These integer constants define the various locking operations
** allowed by the xShmLock method of [sqlite3_io_methods].  The
** following are the only legal combinations of flags to the
** xShmLock method:
**
** <ul>
** <li>  SQLITE_SHM_LOCK | SQLITE_SHM_SHARED
** <li>  SQLITE_SHM_LOCK | SQLITE_SHM_EXCLUSIVE
** <li>  SQLITE_SHM_UNLOCK | SQLITE_SHM_SHARED
** <li>  SQLITE_SHM_UNLOCK | SQLITE_SHM_EXCLUSIVE
** </ul>
**
** When unlocking, the same SHARED or EXCLUSIVE flag must be supplied as
** was given no the corresponding lock.  
**
** The xShmLock method can transition between unlocked and SHARED or
** between unlocked and EXCLUSIVE.  It cannot transition between SHARED
** and EXCLUSIVE.
*/
#define SQLITE_SHM_UNLOCK       1
#define SQLITE_SHM_LOCK         2
#define SQLITE_SHM_SHARED       4
#define SQLITE_SHM_EXCLUSIVE    8

/*
** CAPI3REF: Maximum xShmLock index
**
** The xShmLock method on [sqlite3_io_methods] may use values
** between 0 and this upper bound as its "offset" argument.
** The SQLite core will never attempt to acquire or release a
** lock outside of this range
*/
#define SQLITE_SHM_NLOCK        8


/*
** CAPI3REF: Initialize The SQLite Library
**
** ^The sqlite3_initialize() routine initializes the
** SQLite library.  ^The sqlite3_shutdown() routine
** deallocates any resources that were allocated by sqlite3_initialize().

Changes to src/test1.c.

4605
4606
4607
4608
4609
4610
4611
4612
4613
4614
4615
4616
4617
4618
4619
4620
4621
4622
4623
4624


4625
4626
4627
4628
4629
4630
4631
4632
4633

4634
4635
4636
4637
4638
4639
4640
4641
4642
4643
4644
4645
4646







4647
4648
4649
4650
4651
4652
4653
                     Tcl_GetStringFromObj(Tcl_NewIntObj(iArg), 0), " ", 0);
    return TCL_ERROR;
  }
  return TCL_OK;  
}

/*
** tclcmd:   file_control_lockproxy_test DB
**
** This TCL command runs the sqlite3_file_control interface and
** verifies correct operation of the SQLITE_GET_LOCKPROXYFILE and
** SQLITE_SET_LOCKPROXYFILE verbs.
*/
static int file_control_lockproxy_test(
  ClientData clientData, /* Pointer to sqlite3_enable_XXX function */
  Tcl_Interp *interp,    /* The TCL interpreter that invoked this command */
  int objc,              /* Number of arguments */
  Tcl_Obj *CONST objv[]  /* Command arguments */
){
  sqlite3 *db;


  
  if( objc!=2 ){
    Tcl_AppendResult(interp, "wrong # args: should be \"",
                     Tcl_GetStringFromObj(objv[0], 0), " DB", 0);
    return TCL_ERROR;
  }
  if( getDbPointer(interp, Tcl_GetString(objv[1]), &db) ){
   return TCL_ERROR;
  }

  
#if !defined(SQLITE_ENABLE_LOCKING_STYLE)
#  if defined(__APPLE__)
#    define SQLITE_ENABLE_LOCKING_STYLE 1
#  else
#    define SQLITE_ENABLE_LOCKING_STYLE 0
#  endif
#endif
#if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__)
  {
    char *proxyPath = "test.proxy";
    char *testPath;
    int rc;







    rc = sqlite3_file_control(db, NULL, SQLITE_SET_LOCKPROXYFILE, proxyPath);
    if( rc ){
      Tcl_SetObjResult(interp, Tcl_NewIntObj(rc)); 
      return TCL_ERROR;
    }
    rc = sqlite3_file_control(db, NULL, SQLITE_GET_LOCKPROXYFILE, &testPath);
    if( strncmp(proxyPath,testPath,11) ){







|












>
>

|

|





>










<


>
>
>
>
>
>
>







4605
4606
4607
4608
4609
4610
4611
4612
4613
4614
4615
4616
4617
4618
4619
4620
4621
4622
4623
4624
4625
4626
4627
4628
4629
4630
4631
4632
4633
4634
4635
4636
4637
4638
4639
4640
4641
4642
4643
4644
4645
4646

4647
4648
4649
4650
4651
4652
4653
4654
4655
4656
4657
4658
4659
4660
4661
4662
                     Tcl_GetStringFromObj(Tcl_NewIntObj(iArg), 0), " ", 0);
    return TCL_ERROR;
  }
  return TCL_OK;  
}

/*
** tclcmd:   file_control_lockproxy_test DB PWD
**
** This TCL command runs the sqlite3_file_control interface and
** verifies correct operation of the SQLITE_GET_LOCKPROXYFILE and
** SQLITE_SET_LOCKPROXYFILE verbs.
*/
static int file_control_lockproxy_test(
  ClientData clientData, /* Pointer to sqlite3_enable_XXX function */
  Tcl_Interp *interp,    /* The TCL interpreter that invoked this command */
  int objc,              /* Number of arguments */
  Tcl_Obj *CONST objv[]  /* Command arguments */
){
  sqlite3 *db;
  const char *zPwd;
  int nPwd;
  
  if( objc!=3 ){
    Tcl_AppendResult(interp, "wrong # args: should be \"",
                     Tcl_GetStringFromObj(objv[0], 0), " DB PWD", 0);
    return TCL_ERROR;
  }
  if( getDbPointer(interp, Tcl_GetString(objv[1]), &db) ){
   return TCL_ERROR;
  }
  zPwd = Tcl_GetStringFromObj(objv[2], &nPwd);
  
#if !defined(SQLITE_ENABLE_LOCKING_STYLE)
#  if defined(__APPLE__)
#    define SQLITE_ENABLE_LOCKING_STYLE 1
#  else
#    define SQLITE_ENABLE_LOCKING_STYLE 0
#  endif
#endif
#if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__)
  {

    char *testPath;
    int rc;
    char proxyPath[400];
    
    if( sizeof(proxyPath)<nPwd+20 ){
      Tcl_AppendResult(interp, "PWD too big", (void*)0);
      return TCL_ERROR;
    }
    sprintf(proxyPath, "%s/test.proxy", zPwd);
    rc = sqlite3_file_control(db, NULL, SQLITE_SET_LOCKPROXYFILE, proxyPath);
    if( rc ){
      Tcl_SetObjResult(interp, Tcl_NewIntObj(rc)); 
      return TCL_ERROR;
    }
    rc = sqlite3_file_control(db, NULL, SQLITE_GET_LOCKPROXYFILE, &testPath);
    if( strncmp(proxyPath,testPath,11) ){
5132
5133
5134
5135
5136
5137
5138

5139
5140
5141
5142
5143
5144
5145
#if SQLITE_OS_WIN
  extern int sqlite3_os_type;
#endif
#ifdef SQLITE_DEBUG
  extern int sqlite3WhereTrace;
  extern int sqlite3OSTrace;
  extern int sqlite3VdbeAddopTrace;

#endif
#ifdef SQLITE_TEST
  extern char sqlite3_query_plan[];
  static char *query_plan = sqlite3_query_plan;
#ifdef SQLITE_ENABLE_FTS3
  extern int sqlite3_fts3_enable_parentheses;
#endif







>







5141
5142
5143
5144
5145
5146
5147
5148
5149
5150
5151
5152
5153
5154
5155
#if SQLITE_OS_WIN
  extern int sqlite3_os_type;
#endif
#ifdef SQLITE_DEBUG
  extern int sqlite3WhereTrace;
  extern int sqlite3OSTrace;
  extern int sqlite3VdbeAddopTrace;
  extern int sqlite3WalTrace;
#endif
#ifdef SQLITE_TEST
  extern char sqlite3_query_plan[];
  static char *query_plan = sqlite3_query_plan;
#ifdef SQLITE_ENABLE_FTS3
  extern int sqlite3_fts3_enable_parentheses;
#endif
5199
5200
5201
5202
5203
5204
5205


5206
5207
5208
5209
5210
5211
5212
#ifdef SQLITE_DEBUG
  Tcl_LinkVar(interp, "sqlite_addop_trace",
      (char*)&sqlite3VdbeAddopTrace, TCL_LINK_INT);
  Tcl_LinkVar(interp, "sqlite_where_trace",
      (char*)&sqlite3WhereTrace, TCL_LINK_INT);
  Tcl_LinkVar(interp, "sqlite_os_trace",
      (char*)&sqlite3OSTrace, TCL_LINK_INT);


#endif
#ifndef SQLITE_OMIT_DISKIO
  Tcl_LinkVar(interp, "sqlite_opentemp_count",
      (char*)&sqlite3_opentemp_count, TCL_LINK_INT);
#endif
  Tcl_LinkVar(interp, "sqlite_static_bind_value",
      (char*)&sqlite_static_bind_value, TCL_LINK_STRING);







>
>







5209
5210
5211
5212
5213
5214
5215
5216
5217
5218
5219
5220
5221
5222
5223
5224
#ifdef SQLITE_DEBUG
  Tcl_LinkVar(interp, "sqlite_addop_trace",
      (char*)&sqlite3VdbeAddopTrace, TCL_LINK_INT);
  Tcl_LinkVar(interp, "sqlite_where_trace",
      (char*)&sqlite3WhereTrace, TCL_LINK_INT);
  Tcl_LinkVar(interp, "sqlite_os_trace",
      (char*)&sqlite3OSTrace, TCL_LINK_INT);
  Tcl_LinkVar(interp, "sqlite_wal_trace",
      (char*)&sqlite3WalTrace, TCL_LINK_INT);
#endif
#ifndef SQLITE_OMIT_DISKIO
  Tcl_LinkVar(interp, "sqlite_opentemp_count",
      (char*)&sqlite3_opentemp_count, TCL_LINK_INT);
#endif
  Tcl_LinkVar(interp, "sqlite_static_bind_value",
      (char*)&sqlite_static_bind_value, TCL_LINK_STRING);

Changes to src/test6.c.

536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
  void volatile **pp
){
  return sqlite3OsShmGet(((CrashFile*)pFile)->pRealFile, reqSize, pSize, pp);
}
static int cfShmRelease(sqlite3_file *pFile){
  return sqlite3OsShmRelease(((CrashFile*)pFile)->pRealFile);
}
static int cfShmLock(sqlite3_file *pFile, int desired, int *pGot){
  return sqlite3OsShmLock(((CrashFile*)pFile)->pRealFile, desired, pGot);
}
static void cfShmBarrier(sqlite3_file *pFile){
  sqlite3OsShmBarrier(((CrashFile*)pFile)->pRealFile);
}
static int cfShmClose(sqlite3_file *pFile, int delFlag){
  return sqlite3OsShmClose(((CrashFile*)pFile)->pRealFile, delFlag);
}







|
|







536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
  void volatile **pp
){
  return sqlite3OsShmGet(((CrashFile*)pFile)->pRealFile, reqSize, pSize, pp);
}
static int cfShmRelease(sqlite3_file *pFile){
  return sqlite3OsShmRelease(((CrashFile*)pFile)->pRealFile);
}
static int cfShmLock(sqlite3_file *pFile, int ofst, int n, int flags){
  return sqlite3OsShmLock(((CrashFile*)pFile)->pRealFile, ofst, n, flags);
}
static void cfShmBarrier(sqlite3_file *pFile){
  sqlite3OsShmBarrier(((CrashFile*)pFile)->pRealFile);
}
static int cfShmClose(sqlite3_file *pFile, int delFlag){
  return sqlite3OsShmClose(((CrashFile*)pFile)->pRealFile, delFlag);
}

Changes to src/test_devsym.c.

50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
static int devsymFileControl(sqlite3_file*, int op, void *pArg);
static int devsymSectorSize(sqlite3_file*);
static int devsymDeviceCharacteristics(sqlite3_file*);
static int devsymShmOpen(sqlite3_file*);
static int devsymShmSize(sqlite3_file*,int,int*);
static int devsymShmGet(sqlite3_file*,int,int*,volatile void**);
static int devsymShmRelease(sqlite3_file*);
static int devsymShmLock(sqlite3_file*,int,int*);
static void devsymShmBarrier(sqlite3_file*);
static int devsymShmClose(sqlite3_file*,int);

/*
** Method declarations for devsym_vfs.
*/
static int devsymOpen(sqlite3_vfs*, const char *, sqlite3_file*, int , int *);







|







50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
static int devsymFileControl(sqlite3_file*, int op, void *pArg);
static int devsymSectorSize(sqlite3_file*);
static int devsymDeviceCharacteristics(sqlite3_file*);
static int devsymShmOpen(sqlite3_file*);
static int devsymShmSize(sqlite3_file*,int,int*);
static int devsymShmGet(sqlite3_file*,int,int*,volatile void**);
static int devsymShmRelease(sqlite3_file*);
static int devsymShmLock(sqlite3_file*,int,int,int);
static void devsymShmBarrier(sqlite3_file*);
static int devsymShmClose(sqlite3_file*,int);

/*
** Method declarations for devsym_vfs.
*/
static int devsymOpen(sqlite3_vfs*, const char *, sqlite3_file*, int , int *);
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
  devsym_file *p = (devsym_file *)pFile;
  return sqlite3OsShmGet(p->pReal, reqSz, pSize, pp);
}
static int devsymShmRelease(sqlite3_file *pFile){
  devsym_file *p = (devsym_file *)pFile;
  return sqlite3OsShmRelease(p->pReal);
}
static int devsymShmLock(sqlite3_file *pFile, int desired, int *pGot){
  devsym_file *p = (devsym_file *)pFile;
  return sqlite3OsShmLock(p->pReal, desired, pGot);
}
static void devsymShmBarrier(sqlite3_file *pFile){
  devsym_file *p = (devsym_file *)pFile;
  sqlite3OsShmBarrier(p->pReal);
}
static int devsymShmClose(sqlite3_file *pFile, int delFlag){
  devsym_file *p = (devsym_file *)pFile;







|

|







259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
  devsym_file *p = (devsym_file *)pFile;
  return sqlite3OsShmGet(p->pReal, reqSz, pSize, pp);
}
static int devsymShmRelease(sqlite3_file *pFile){
  devsym_file *p = (devsym_file *)pFile;
  return sqlite3OsShmRelease(p->pReal);
}
static int devsymShmLock(sqlite3_file *pFile, int ofst, int n, int flags){
  devsym_file *p = (devsym_file *)pFile;
  return sqlite3OsShmLock(p->pReal, ofst, n, flags);
}
static void devsymShmBarrier(sqlite3_file *pFile){
  devsym_file *p = (devsym_file *)pFile;
  sqlite3OsShmBarrier(p->pReal);
}
static int devsymShmClose(sqlite3_file *pFile, int delFlag){
  devsym_file *p = (devsym_file *)pFile;

Changes to src/test_osinst.c.

151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
static int vfslogSectorSize(sqlite3_file*);
static int vfslogDeviceCharacteristics(sqlite3_file*);

static int vfslogShmOpen(sqlite3_file *pFile);
static int vfslogShmSize(sqlite3_file *pFile, int reqSize, int *pNewSize);
static int vfslogShmGet(sqlite3_file *pFile, int,int*,volatile void **);
static int vfslogShmRelease(sqlite3_file *pFile);
static int vfslogShmLock(sqlite3_file *pFile, int desiredLock, int *gotLock);
static void vfslogShmBarrier(sqlite3_file*);
static int vfslogShmClose(sqlite3_file *pFile, int deleteFlag);

/*
** Method declarations for vfslog_vfs.
*/
static int vfslogOpen(sqlite3_vfs*, const char *, sqlite3_file*, int , int *);







|







151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
static int vfslogSectorSize(sqlite3_file*);
static int vfslogDeviceCharacteristics(sqlite3_file*);

static int vfslogShmOpen(sqlite3_file *pFile);
static int vfslogShmSize(sqlite3_file *pFile, int reqSize, int *pNewSize);
static int vfslogShmGet(sqlite3_file *pFile, int,int*,volatile void **);
static int vfslogShmRelease(sqlite3_file *pFile);
static int vfslogShmLock(sqlite3_file *pFile, int ofst, int n, int flags);
static void vfslogShmBarrier(sqlite3_file*);
static int vfslogShmClose(sqlite3_file *pFile, int deleteFlag);

/*
** Method declarations for vfslog_vfs.
*/
static int vfslogOpen(sqlite3_vfs*, const char *, sqlite3_file*, int , int *);
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
  VfslogFile *p = (VfslogFile *)pFile;
  t = vfslog_time();
  rc = p->pReal->pMethods->xShmRelease(p->pReal);
  t = vfslog_time() - t;
  vfslog_call(p->pVfslog, OS_SHMRELEASE, p->iFileId, t, rc, 0, 0);
  return rc;
}
static int vfslogShmLock(sqlite3_file *pFile, int desiredLock, int *gotLock){
  int rc;
  sqlite3_uint64 t;
  VfslogFile *p = (VfslogFile *)pFile;
  t = vfslog_time();
  rc = p->pReal->pMethods->xShmLock(p->pReal, desiredLock, gotLock);
  t = vfslog_time() - t;
  vfslog_call(p->pVfslog, OS_SHMLOCK, p->iFileId, t, rc, 0, 0);
  return rc;
}
static void vfslogShmBarrier(sqlite3_file *pFile){
  sqlite3_uint64 t;
  VfslogFile *p = (VfslogFile *)pFile;







|




|







456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
  VfslogFile *p = (VfslogFile *)pFile;
  t = vfslog_time();
  rc = p->pReal->pMethods->xShmRelease(p->pReal);
  t = vfslog_time() - t;
  vfslog_call(p->pVfslog, OS_SHMRELEASE, p->iFileId, t, rc, 0, 0);
  return rc;
}
static int vfslogShmLock(sqlite3_file *pFile, int ofst, int n, int flags){
  int rc;
  sqlite3_uint64 t;
  VfslogFile *p = (VfslogFile *)pFile;
  t = vfslog_time();
  rc = p->pReal->pMethods->xShmLock(p->pReal, ofst, n, flags);
  t = vfslog_time() - t;
  vfslog_call(p->pVfslog, OS_SHMLOCK, p->iFileId, t, rc, 0, 0);
  return rc;
}
static void vfslogShmBarrier(sqlite3_file *pFile){
  sqlite3_uint64 t;
  VfslogFile *p = (VfslogFile *)pFile;

Changes to src/test_vfs.c.

98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
static int tvfsSleep(sqlite3_vfs*, int microseconds);
static int tvfsCurrentTime(sqlite3_vfs*, double*);

static int tvfsShmOpen(sqlite3_file*);
static int tvfsShmSize(sqlite3_file*, int , int *);
static int tvfsShmGet(sqlite3_file*, int , int *, volatile void **);
static int tvfsShmRelease(sqlite3_file*);
static int tvfsShmLock(sqlite3_file*, int , int *);
static void tvfsShmBarrier(sqlite3_file*);
static int tvfsShmClose(sqlite3_file*, int);

static sqlite3_io_methods tvfs_io_methods = {
  2,                            /* iVersion */
  tvfsClose,                      /* xClose */
  tvfsRead,                       /* xRead */







|







98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
static int tvfsSleep(sqlite3_vfs*, int microseconds);
static int tvfsCurrentTime(sqlite3_vfs*, double*);

static int tvfsShmOpen(sqlite3_file*);
static int tvfsShmSize(sqlite3_file*, int , int *);
static int tvfsShmGet(sqlite3_file*, int , int *, volatile void **);
static int tvfsShmRelease(sqlite3_file*);
static int tvfsShmLock(sqlite3_file*, int , int, int);
static void tvfsShmBarrier(sqlite3_file*);
static int tvfsShmClose(sqlite3_file*, int);

static sqlite3_io_methods tvfs_io_methods = {
  2,                            /* iVersion */
  tvfsClose,                      /* xClose */
  tvfsRead,                       /* xRead */
540
541
542
543
544
545
546
547
548

549
550
551
552
553

554

555
556
557

558

559
560
561


562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
  tvfsResultCode(p, &rc);

  return rc;
}

static int tvfsShmLock(
  sqlite3_file *pFile,
  int desiredLock,
  int *gotLock

){
  int rc = SQLITE_OK;
  TestvfsFile *pFd = (TestvfsFile *)pFile;
  Testvfs *p = (Testvfs *)(pFd->pVfs->pAppData);
  char *zLock = "";



  switch( desiredLock ){
    case SQLITE_SHM_READ:         zLock = "READ";       break;
    case SQLITE_SHM_WRITE:        zLock = "WRITE";      break;

    case SQLITE_SHM_CHECKPOINT:   zLock = "CHECKPOINT"; break;

    case SQLITE_SHM_RECOVER:      zLock = "RECOVER";    break;
    case SQLITE_SHM_PENDING:      zLock = "PENDING";    break;
    case SQLITE_SHM_UNLOCK:       zLock = "UNLOCK";     break;


  }
  tvfsExecTcl(p, "xShmLock", 
      Tcl_NewStringObj(pFd->pShm->zFile, -1), pFd->pShmId,
      Tcl_NewStringObj(zLock, -1)
  );
  tvfsResultCode(p, &rc);
  if( rc==SQLITE_OK ){
    *gotLock = desiredLock;
  }

  return rc;
}

static void tvfsShmBarrier(sqlite3_file *pFile){
  int rc = SQLITE_OK;
  TestvfsFile *pFd = (TestvfsFile *)pFile;
  Testvfs *p = (Testvfs *)(pFd->pVfs->pAppData);







|
|
>




|
>

>
|
|
|
>
|
>
|
|
|
>
>






<
<
<
<







540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574




575
576
577
578
579
580
581
  tvfsResultCode(p, &rc);

  return rc;
}

static int tvfsShmLock(
  sqlite3_file *pFile,
  int ofst,
  int n,
  int flags
){
  int rc = SQLITE_OK;
  TestvfsFile *pFd = (TestvfsFile *)pFile;
  Testvfs *p = (Testvfs *)(pFd->pVfs->pAppData);
  int nLock;
  char zLock[80];

  sqlite3_snprintf(sizeof(zLock), zLock, "%d %d", ofst, n);
  nLock = strlen(zLock);
  if( flags & SQLITE_SHM_LOCK ){
    strcpy(&zLock[nLock], " lock");
  }else{
    strcpy(&zLock[nLock], " unlock");
  }
  nLock += strlen(&zLock[nLock]);
  if( flags & SQLITE_SHM_SHARED ){
    strcpy(&zLock[nLock], " shared");
  }else{
    strcpy(&zLock[nLock], " exclusive");
  }
  tvfsExecTcl(p, "xShmLock", 
      Tcl_NewStringObj(pFd->pShm->zFile, -1), pFd->pShmId,
      Tcl_NewStringObj(zLock, -1)
  );
  tvfsResultCode(p, &rc);




  return rc;
}

static void tvfsShmBarrier(sqlite3_file *pFile){
  int rc = SQLITE_OK;
  TestvfsFile *pFd = (TestvfsFile *)pFile;
  Testvfs *p = (Testvfs *)(pFd->pVfs->pAppData);
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
**   VFSNAME shm FILENAME ?NEWVALUE?
**
** When the xShmLock method is invoked by SQLite, the following script is
** run:
**
**   SCRIPT xShmLock    FILENAME ID LOCK
**
** where LOCK is one of "UNLOCK", "READ", "READ_FULL", "WRITE", "PENDING",
** "CHECKPOINT" or "RECOVER". The script should return an SQLite error
** code.
*/
static int testvfs_cmd(
  ClientData cd,
  Tcl_Interp *interp,
  int objc,
  Tcl_Obj *CONST objv[]
){







|
<
<







715
716
717
718
719
720
721
722


723
724
725
726
727
728
729
**   VFSNAME shm FILENAME ?NEWVALUE?
**
** When the xShmLock method is invoked by SQLite, the following script is
** run:
**
**   SCRIPT xShmLock    FILENAME ID LOCK
**
** where LOCK is of the form "OFFSET NBYTE lock/unlock shared/exclusive"


*/
static int testvfs_cmd(
  ClientData cd,
  Tcl_Interp *interp,
  int objc,
  Tcl_Obj *CONST objv[]
){

Changes to src/vdbe.c.

476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
** implement a loop.  This test used to be on every single instruction,
** but that meant we more testing that we needed.  By only testing the
** flag on jump instructions, we get a (small) speed improvement.
*/
#define CHECK_FOR_INTERRUPT \
   if( db->u1.isInterrupted ) goto abort_due_to_interrupt;

#ifdef SQLITE_DEBUG
static int fileExists(sqlite3 *db, const char *zFile){
  int res = 0;
  int rc = SQLITE_OK;
#ifdef SQLITE_TEST
  /* If we are currently testing IO errors, then do not call OsAccess() to
  ** test for the presence of zFile. This is because any IO error that
  ** occurs here will not be reported, causing the test to fail.
  */
  extern int sqlite3_io_error_pending;
  if( sqlite3_io_error_pending<=0 )
#endif
    rc = sqlite3OsAccess(db->pVfs, zFile, SQLITE_ACCESS_EXISTS, &res);
  return (res && rc==SQLITE_OK);
}
#endif

#ifndef NDEBUG
/*
** This function is only called from within an assert() expression. It
** checks that the sqlite3.nTransaction variable is correctly set to
** the number of non-transaction savepoints currently in the 
** linked list starting at sqlite3.pSavepoint.







<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







476
477
478
479
480
481
482
















483
484
485
486
487
488
489
** implement a loop.  This test used to be on every single instruction,
** but that meant we more testing that we needed.  By only testing the
** flag on jump instructions, we get a (small) speed improvement.
*/
#define CHECK_FOR_INTERRUPT \
   if( db->u1.isInterrupted ) goto abort_due_to_interrupt;


















#ifndef NDEBUG
/*
** This function is only called from within an assert() expression. It
** checks that the sqlite3.nTransaction variable is correctly set to
** the number of non-transaction savepoints currently in the 
** linked list starting at sqlite3.pSavepoint.
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
  CHECK_FOR_INTERRUPT;
  sqlite3VdbeIOTraceSql(p);
#ifndef SQLITE_OMIT_PROGRESS_CALLBACK
  checkProgress = db->xProgress!=0;
#endif
#ifdef SQLITE_DEBUG
  sqlite3BeginBenignMalloc();
  if( p->pc==0 
   && ((p->db->flags & SQLITE_VdbeListing) || fileExists(db, "vdbe_explain"))
  ){
    int i;
    printf("VDBE Program Listing:\n");
    sqlite3VdbePrintSql(p);
    for(i=0; i<p->nOp; i++){
      sqlite3VdbePrintOp(stdout, i, &aOp[i]);
    }
  }
  if( fileExists(db, "vdbe_trace") ){
    p->trace = stdout;
  }
  sqlite3EndBenignMalloc();
#endif
  for(pc=p->pc; rc==SQLITE_OK; pc++){
    assert( pc>=0 && pc<p->nOp );
    if( db->mallocFailed ) goto no_mem;
#ifdef VDBE_PROFILE
    origPc = pc;







|
<
<







<
<
<







574
575
576
577
578
579
580
581


582
583
584
585
586
587
588



589
590
591
592
593
594
595
  CHECK_FOR_INTERRUPT;
  sqlite3VdbeIOTraceSql(p);
#ifndef SQLITE_OMIT_PROGRESS_CALLBACK
  checkProgress = db->xProgress!=0;
#endif
#ifdef SQLITE_DEBUG
  sqlite3BeginBenignMalloc();
  if( p->pc==0  && (p->db->flags & SQLITE_VdbeListing)!=0 ){


    int i;
    printf("VDBE Program Listing:\n");
    sqlite3VdbePrintSql(p);
    for(i=0; i<p->nOp; i++){
      sqlite3VdbePrintOp(stdout, i, &aOp[i]);
    }
  }



  sqlite3EndBenignMalloc();
#endif
  for(pc=p->pc; rc==SQLITE_OK; pc++){
    assert( pc>=0 && pc<p->nOp );
    if( db->mallocFailed ) goto no_mem;
#ifdef VDBE_PROFILE
    origPc = pc;
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
    if( p->trace ){
      if( pc==0 ){
        printf("VDBE Execution Trace:\n");
        sqlite3VdbePrintSql(p);
      }
      sqlite3VdbePrintOp(p->trace, pc, pOp);
    }
    if( p->trace==0 && pc==0 ){
      sqlite3BeginBenignMalloc();
      if( fileExists(db, "vdbe_sqltrace") ){
        sqlite3VdbePrintSql(p);
      }
      sqlite3EndBenignMalloc();
    }
#endif
      

    /* Check to see if we need to simulate an interrupt.  This only happens
    ** if we have a special test build.
    */
#ifdef SQLITE_TEST







<
<
<
<
<
<
<







603
604
605
606
607
608
609







610
611
612
613
614
615
616
    if( p->trace ){
      if( pc==0 ){
        printf("VDBE Execution Trace:\n");
        sqlite3VdbePrintSql(p);
      }
      sqlite3VdbePrintOp(p->trace, pc, pOp);
    }







#endif
      

    /* Check to see if we need to simulate an interrupt.  This only happens
    ** if we have a special test build.
    */
#ifdef SQLITE_TEST

Changes to src/wal.c.

89
90
91
92
93
94
95
96

97
98
99
100









101
102
103
104
105
106
107
108
** being considered valid at the same time and being checkpointing together
** following a crash.
**
** READER ALGORITHM
**
** To read a page from the database (call it page number P), a reader
** first checks the WAL to see if it contains page P.  If so, then the
** last valid instance of page P that is or is followed by a commit frame

** become the value read.  If the WAL contains no copies of page P that
** are valid and which are or are followed by a commit frame, then page
** P is read from the database file.
**









** The reader algorithm in the previous paragraph works correctly, but 
** because frames for page P can appear anywhere within the WAL, the
** reader has to scan the entire WAL looking for page P frames.  If the
** WAL is large (multiple megabytes is typical) that scan can be slow,
** and read performance suffers.  To overcome this problem, a separate
** data structure called the wal-index is maintained to expedite the
** search for frames of a particular page.
** 







|
>
|
|
|

>
>
>
>
>
>
>
>
>
|







89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
** being considered valid at the same time and being checkpointing together
** following a crash.
**
** READER ALGORITHM
**
** To read a page from the database (call it page number P), a reader
** first checks the WAL to see if it contains page P.  If so, then the
** last valid instance of page P that is a followed by a commit frame
** or is a commit frame itself becomes the value read.  If the WAL
** contains no copies of page P that are valid and which are a commit
** frame or are followed by a commit frame, then page P is read from
** the database file.
**
** To start a read transaction, the reader records the index of the last
** valid frame in the WAL.  The reader uses this recorded "mxFrame" value
** for all subsequent read operations.  New transactions can be appended
** to the WAL, but as long as the reader uses its original mxFrame value
** and ignores the newly appended content, it will see a consistent snapshot
** of the database from a single point in time.  This technique allows
** multiple concurrent readers to view different versions of the database
** content simultaneously.
**
** The reader algorithm in the previous paragraphs works correctly, but 
** because frames for page P can appear anywhere within the WAL, the
** reader has to scan the entire WAL looking for page P frames.  If the
** WAL is large (multiple megabytes is typical) that scan can be slow,
** and read performance suffers.  To overcome this problem, a separate
** data structure called the wal-index is maintained to expedite the
** search for frames of a particular page.
** 
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
** table is never more than half full.  The expected number of collisions 
** prior to finding a match is 1.  Each entry of the hash table is an
** 1-based index of an entry in the mapping section of the same
** index block.   Let K be the 1-based index of the largest entry in
** the mapping section.  (For index blocks other than the last, K will
** always be exactly HASHTABLE_NPAGE (4096) and for the last index block
** K will be (mxFrame%HASHTABLE_NPAGE).)  Unused slots of the hash table
** contain a value greater than K.  Note that no hash table slot ever
** contains a zero value.
**
** To look for page P in the hash table, first compute a hash iKey on
** P as follows:
**
**      iKey = (P * 383) % HASHTABLE_NSLOT
**
** Then start scanning entries of the hash table, starting with iKey







|
<







167
168
169
170
171
172
173
174

175
176
177
178
179
180
181
** table is never more than half full.  The expected number of collisions 
** prior to finding a match is 1.  Each entry of the hash table is an
** 1-based index of an entry in the mapping section of the same
** index block.   Let K be the 1-based index of the largest entry in
** the mapping section.  (For index blocks other than the last, K will
** always be exactly HASHTABLE_NPAGE (4096) and for the last index block
** K will be (mxFrame%HASHTABLE_NPAGE).)  Unused slots of the hash table
** contain a value of 0.

**
** To look for page P in the hash table, first compute a hash iKey on
** P as follows:
**
**      iKey = (P * 383) % HASHTABLE_NSLOT
**
** Then start scanning entries of the hash table, starting with iKey
210
211
212
213
214
215
216






















217
218
219
220

221
222
223
224
225
226
227
228
229
230

231
232
233
234
235
236
237
238
239




















































240
241
242
243
244
245
246
247
248
249
250
251
252
253
** that correspond to frames greater than the new K value are removed
** from the hash table at this point.
*/
#ifndef SQLITE_OMIT_WAL

#include "wal.h"
























/* Object declarations */
typedef struct WalIndexHdr WalIndexHdr;
typedef struct WalIterator WalIterator;



/*
** The following object holds a copy of the wal-index header content.
**
** The actual header in the wal-index consists of two copies of this
** object.
*/
struct WalIndexHdr {
  u32 iChange;                    /* Counter incremented each transaction */

  u16 bigEndCksum;                /* True if checksums in WAL are big-endian */
  u16 szPage;                     /* Database page size in bytes */
  u32 mxFrame;                    /* Index of last valid frame in the WAL */
  u32 nPage;                      /* Size of database in pages */
  u32 aFrameCksum[2];             /* Checksum of last frame in log */
  u32 aSalt[2];                   /* Two salt values copied from WAL header */
  u32 aCksum[2];                  /* Checksum over all prior fields */
};





















































/* A block of WALINDEX_LOCK_RESERVED bytes beginning at
** WALINDEX_LOCK_OFFSET is reserved for locks. Since some systems
** only support mandatory file-locks, we do not read or write data
** from the region of the file on which locks are applied.
*/
#define WALINDEX_LOCK_OFFSET   (sizeof(WalIndexHdr)*2)
#define WALINDEX_LOCK_RESERVED 8
#define WALINDEX_HDR_SIZE      (WALINDEX_LOCK_OFFSET+WALINDEX_LOCK_RESERVED)

/* Size of header before each frame in wal */
#define WAL_FRAME_HDRSIZE 24

/* Size of write ahead log header */
#define WAL_HDRSIZE 24







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>




>










>
|








>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>





|
|







219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
** that correspond to frames greater than the new K value are removed
** from the hash table at this point.
*/
#ifndef SQLITE_OMIT_WAL

#include "wal.h"

/*
** Trace output macros
*/
#if defined(SQLITE_TEST) && defined(SQLITE_DEBUG)
int sqlite3WalTrace = 0;
# define WALTRACE(X)  if(sqlite3WalTrace) sqlite3DebugPrintf X
#else
# define WALTRACE(X)
#endif


/*
** Indices of various locking bytes.   WAL_NREADER is the number
** of available reader locks and should be at least 3.
*/
#define WAL_WRITE_LOCK         0
#define WAL_ALL_BUT_WRITE      1
#define WAL_CKPT_LOCK          1
#define WAL_RECOVER_LOCK       2
#define WAL_READ_LOCK(I)       (3+(I))
#define WAL_NREADER            (SQLITE_SHM_NLOCK-3)


/* Object declarations */
typedef struct WalIndexHdr WalIndexHdr;
typedef struct WalIterator WalIterator;
typedef struct WalCkptInfo WalCkptInfo;


/*
** The following object holds a copy of the wal-index header content.
**
** The actual header in the wal-index consists of two copies of this
** object.
*/
struct WalIndexHdr {
  u32 iChange;                    /* Counter incremented each transaction */
  u8 isInit;                      /* 1 when initialized */
  u8 bigEndCksum;                 /* True if checksums in WAL are big-endian */
  u16 szPage;                     /* Database page size in bytes */
  u32 mxFrame;                    /* Index of last valid frame in the WAL */
  u32 nPage;                      /* Size of database in pages */
  u32 aFrameCksum[2];             /* Checksum of last frame in log */
  u32 aSalt[2];                   /* Two salt values copied from WAL header */
  u32 aCksum[2];                  /* Checksum over all prior fields */
};

/*
** A copy of the following object occurs in the wal-index immediately
** following the second copy of the WalIndexHdr.  This object stores
** information used by checkpoint.
**
** nBackfill is the number of frames in the WAL that have been written
** back into the database. (We call the act of moving content from WAL to
** database "backfilling".)  The nBackfill number is never greater than
** WalIndexHdr.mxFrame.  nBackfill can only be increased by threads
** holding the WAL_CKPT_LOCK lock (which includes a recovery thread).
** However, a WAL_WRITE_LOCK thread can move the value of nBackfill from
** mxFrame back to zero when the WAL is reset.
**
** There is one entry in aReadMark[] for each reader lock.  If a reader
** holds read-lock K, then the value in aReadMark[K] is no greater than
** the mxFrame for that reader.  aReadMark[0] is a special case.  It
** always holds zero.  Readers holding WAL_READ_LOCK(0) always ignore 
** the entire WAL and read all content directly from the database.
**
** The value of aReadMark[K] may only be changed by a thread that
** is holding an exclusive lock on WAL_READ_LOCK(K).  Thus, the value of
** aReadMark[K] cannot changed while there is a reader is using that mark
** since the reader will be holding a shared lock on WAL_READ_LOCK(K).
**
** The checkpointer may only transfer frames from WAL to database where
** the frame numbers are less than or equal to every aReadMark[] that is
** in use (that is, every aReadMark[j] for which there is a corresponding
** WAL_READ_LOCK(j)).  New readers (usually) pick the aReadMark[] with the
** largest value and will increase an unused aReadMark[] to mxFrame if there
** is not already an aReadMark[] equal to mxFrame.  The exception to the
** previous sentence is when nBackfill equals mxFrame (meaning that everything
** in the WAL has been backfilled into the database) then new readers
** will choose aReadMark[0] which has value 0 and hence such reader will
** get all their all content directly from the database file and ignore 
** the WAL.
**
** Writers normally append new frames to the end of the WAL.  However,
** if nBackfill equals mxFrame (meaning that all WAL content has been
** written back into the database) and if no readers are using the WAL
** (in other words, if there are no WAL_READ_LOCK(i) where i>0) then
** the writer will first "reset" the WAL back to the beginning and start
** writing new content beginning at frame 1.
**
** We assume that 32-bit loads are atomic and so no locks are needed in
** order to read from any aReadMark[] entries.
*/
struct WalCkptInfo {
  u32 nBackfill;                  /* Number of WAL frames backfilled into DB */
  u32 aReadMark[WAL_NREADER];     /* Reader marks */
};


/* A block of WALINDEX_LOCK_RESERVED bytes beginning at
** WALINDEX_LOCK_OFFSET is reserved for locks. Since some systems
** only support mandatory file-locks, we do not read or write data
** from the region of the file on which locks are applied.
*/
#define WALINDEX_LOCK_OFFSET   (sizeof(WalIndexHdr)*2 + sizeof(WalCkptInfo))
#define WALINDEX_LOCK_RESERVED 16
#define WALINDEX_HDR_SIZE      (WALINDEX_LOCK_OFFSET+WALINDEX_LOCK_RESERVED)

/* Size of header before each frame in wal */
#define WAL_FRAME_HDRSIZE 24

/* Size of write ahead log header */
#define WAL_HDRSIZE 24
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289


290
291
292
293
294








295
296
297
298
299
300
301
)

/*
** An open write-ahead log file is represented by an instance of the
** following object.
*/
struct Wal {
  sqlite3_vfs *pVfs;         /* The VFS used to create pFd */
  sqlite3_file *pDbFd;       /* File handle for the database file */
  sqlite3_file *pWalFd;      /* File handle for WAL file */
  u32 iCallback;             /* Value to pass to log callback (or 0) */
  int szWIndex;              /* Size of the wal-index that is mapped in mem */
  volatile u32 *pWiData;     /* Pointer to wal-index content in memory */
  u8 lockState;              /* SQLITE_SHM_xxxx constant showing lock state */
  u8 readerType;             /* SQLITE_SHM_READ or SQLITE_SHM_READ_FULL */
  u8 exclusiveMode;          /* Non-zero if connection is in exclusive mode */
  u8 isWindexOpen;           /* True if ShmOpen() called on pDbFd */


  WalIndexHdr hdr;           /* Wal-index for current snapshot */
  char *zWalName;            /* Name of WAL file */
  int szPage;                /* Database page size */
  u32 nCkpt;                 /* Checkpoint sequence counter in the wal-header */
};










/*
** This structure is used to implement an iterator that loops through
** all frames in the WAL in database page order. Where two or more frames
** correspond to the same database page, the iterator visits only the 
** frame most recently written to the WAL (in other words, the frame with







|





|
|

|
>
>
|

<


>
>
>
>
>
>
>
>







358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378

379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
)

/*
** An open write-ahead log file is represented by an instance of the
** following object.
*/
struct Wal {
  sqlite3_vfs *pVfs;         /* The VFS used to create pDbFd */
  sqlite3_file *pDbFd;       /* File handle for the database file */
  sqlite3_file *pWalFd;      /* File handle for WAL file */
  u32 iCallback;             /* Value to pass to log callback (or 0) */
  int szWIndex;              /* Size of the wal-index that is mapped in mem */
  volatile u32 *pWiData;     /* Pointer to wal-index content in memory */
  u16 szPage;                /* Database page size */
  i16 readLock;              /* Which read lock is being held.  -1 for none */
  u8 exclusiveMode;          /* Non-zero if connection is in exclusive mode */
  u8 isWIndexOpen;           /* True if ShmOpen() called on pDbFd */
  u8 writeLock;              /* True if in a write transaction */
  u8 ckptLock;               /* True if holding a checkpoint lock */
  WalIndexHdr hdr;           /* Wal-index header for current transaction */
  char *zWalName;            /* Name of WAL file */

  u32 nCkpt;                 /* Checkpoint sequence counter in the wal-header */
};

/*
** Return a pointer to the WalCkptInfo structure in the wal-index.
*/
static volatile WalCkptInfo *walCkptInfo(Wal *pWal){
  assert( pWal->pWiData!=0 );
  return (volatile WalCkptInfo*)&pWal->pWiData[sizeof(WalIndexHdr)/2];
}


/*
** This structure is used to implement an iterator that loops through
** all frames in the WAL in database page order. Where two or more frames
** correspond to the same database page, the iterator visits only the 
** frame most recently written to the WAL (in other words, the frame with
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411



412
413
414
415
416
417
418
419
420
421
422
423
424
425
    }while( aData<aEnd );
  }

  aOut[0] = s1;
  aOut[1] = s2;
}

/*
** Attempt to change the lock status.
**
** When changing the lock status to SQLITE_SHM_READ, store the
** type of reader lock (either SQLITE_SHM_READ or SQLITE_SHM_READ_FULL)
** in pWal->readerType.
*/
static int walSetLock(Wal *pWal, int desiredStatus){
  int rc = SQLITE_OK;             /* Return code */
  if( pWal->exclusiveMode || pWal->lockState==desiredStatus ){
    pWal->lockState = desiredStatus;
  }else{
    int got = pWal->lockState;
    rc = sqlite3OsShmLock(pWal->pDbFd, desiredStatus, &got);
    pWal->lockState = got;
    if( got==SQLITE_SHM_READ_FULL || got==SQLITE_SHM_READ ){
      pWal->readerType = got;
      pWal->lockState = SQLITE_SHM_READ;
    }
  }
  return rc;
}

/*
** Write the header information in pWal->hdr into the wal-index.
**
** The checksum on pWal->hdr is updated before it is written.
*/
static void walIndexWriteHdr(Wal *pWal){
  WalIndexHdr *aHdr;



  walChecksumBytes(1, (u8*)&pWal->hdr,
                   sizeof(pWal->hdr) - sizeof(pWal->hdr.aCksum),
                   0, pWal->hdr.aCksum);
  aHdr = (WalIndexHdr*)pWal->pWiData;
  memcpy(&aHdr[1], &pWal->hdr, sizeof(pWal->hdr));
  sqlite3OsShmBarrier(pWal->pDbFd);
  memcpy(&aHdr[0], &pWal->hdr, sizeof(pWal->hdr));
}

/*
** This function encodes a single frame header and writes it to a buffer
** supplied by the caller. A frame-header is made up of a series of 
** 4-byte big-endian integers, as follows:
**







<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







>
>
>
|
<


|

|







469
470
471
472
473
474
475























476
477
478
479
480
481
482
483
484
485
486

487
488
489
490
491
492
493
494
495
496
497
498
    }while( aData<aEnd );
  }

  aOut[0] = s1;
  aOut[1] = s2;
}
























/*
** Write the header information in pWal->hdr into the wal-index.
**
** The checksum on pWal->hdr is updated before it is written.
*/
static void walIndexWriteHdr(Wal *pWal){
  WalIndexHdr *aHdr;

  assert( pWal->writeLock );
  pWal->hdr.isInit = 1;
  walChecksumBytes(1, (u8*)&pWal->hdr, offsetof(WalIndexHdr, aCksum),

                   0, pWal->hdr.aCksum);
  aHdr = (WalIndexHdr*)pWal->pWiData;
  memcpy(&aHdr[1], &pWal->hdr, sizeof(WalIndexHdr));
  sqlite3OsShmBarrier(pWal->pDbFd);
  memcpy(&aHdr[0], &pWal->hdr, sizeof(WalIndexHdr));
}

/*
** This function encodes a single frame header and writes it to a buffer
** supplied by the caller. A frame-header is made up of a series of 
** 4-byte big-endian integers, as follows:
**
515
516
517
518
519
520
521





























































522
523
524
525
526
527
528
** create incompatibilities.
*/
#define HASHTABLE_NPAGE      4096  /* Must be power of 2 and multiple of 256 */
#define HASHTABLE_DATATYPE   u16
#define HASHTABLE_HASH_1     383                  /* Should be prime */
#define HASHTABLE_NSLOT      (HASHTABLE_NPAGE*2)  /* Must be a power of 2 */
#define HASHTABLE_NBYTE      (sizeof(HASHTABLE_DATATYPE)*HASHTABLE_NSLOT)






























































/*
** Return the index in the Wal.pWiData array that corresponds to 
** frame iFrame.
**
** Wal.pWiData is an array of u32 elements that is the wal-index.
** The array begins with a header and is then followed by alternating







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
** create incompatibilities.
*/
#define HASHTABLE_NPAGE      4096  /* Must be power of 2 and multiple of 256 */
#define HASHTABLE_DATATYPE   u16
#define HASHTABLE_HASH_1     383                  /* Should be prime */
#define HASHTABLE_NSLOT      (HASHTABLE_NPAGE*2)  /* Must be a power of 2 */
#define HASHTABLE_NBYTE      (sizeof(HASHTABLE_DATATYPE)*HASHTABLE_NSLOT)

#if defined(SQLITE_TEST) && defined(SQLITE_DEBUG)
/*
** Names of locks.  This routine is used to provide debugging output and is not
** a part of an ordinary build.
*/
static const char *walLockName(int lockIdx){
  if( lockIdx==WAL_WRITE_LOCK ){
    return "WRITE-LOCK";
  }else if( lockIdx==WAL_CKPT_LOCK ){
    return "CKPT-LOCK";
  }else if( lockIdx==WAL_RECOVER_LOCK ){
    return "RECOVER-LOCK";
  }else{
    static char zName[15];
    sqlite3_snprintf(sizeof(zName), zName, "READ-LOCK[%d]",
                     lockIdx-WAL_READ_LOCK(0));
    return zName;
  }
}
#endif /*defined(SQLITE_TEST) || defined(SQLITE_DEBUG) */
    

/*
** Set or release locks on the WAL.  Locks are either shared or exclusive.
** A lock cannot be moved directly between shared and exclusive - it must go
** through the unlocked state first.
**
** In locking_mode=EXCLUSIVE, all of these routines become no-ops.
*/
static int walLockShared(Wal *pWal, int lockIdx){
  int rc;
  if( pWal->exclusiveMode ) return SQLITE_OK;
  rc = sqlite3OsShmLock(pWal->pDbFd, lockIdx, 1,
                        SQLITE_SHM_LOCK | SQLITE_SHM_SHARED);
  WALTRACE(("WAL%p: acquire SHARED-%s %s\n", pWal,
            walLockName(lockIdx), rc ? "failed" : "ok"));
  return rc;
}
static void walUnlockShared(Wal *pWal, int lockIdx){
  if( pWal->exclusiveMode ) return;
  (void)sqlite3OsShmLock(pWal->pDbFd, lockIdx, 1,
                         SQLITE_SHM_UNLOCK | SQLITE_SHM_SHARED);
  WALTRACE(("WAL%p: release SHARED-%s\n", pWal, walLockName(lockIdx)));
}
static int walLockExclusive(Wal *pWal, int lockIdx, int n){
  int rc;
  if( pWal->exclusiveMode ) return SQLITE_OK;
  rc = sqlite3OsShmLock(pWal->pDbFd, lockIdx, n,
                        SQLITE_SHM_LOCK | SQLITE_SHM_EXCLUSIVE);
  WALTRACE(("WAL%p: acquire EXCLUSIVE-%s cnt=%d %s\n", pWal,
            walLockName(lockIdx), n, rc ? "failed" : "ok"));
  return rc;
}
static void walUnlockExclusive(Wal *pWal, int lockIdx, int n){
  if( pWal->exclusiveMode ) return;
  (void)sqlite3OsShmLock(pWal->pDbFd, lockIdx, n,
                         SQLITE_SHM_UNLOCK | SQLITE_SHM_EXCLUSIVE);
  WALTRACE(("WAL%p: release EXCLUSIVE-%s cnt=%d\n", pWal,
             walLockName(lockIdx), n));
}

/*
** Return the index in the Wal.pWiData array that corresponds to 
** frame iFrame.
**
** Wal.pWiData is an array of u32 elements that is the wal-index.
** The array begins with a header and is then followed by alternating
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
      (WALINDEX_LOCK_OFFSET+WALINDEX_LOCK_RESERVED)/sizeof(u32)
    + (((iFrame-1)/HASHTABLE_NPAGE) * HASHTABLE_NBYTE)/sizeof(u32)
    + (iFrame-1)
  );
}

/*
** Return the minimum mapping size in bytes that can be used to read the
** wal-index up to and including frame iFrame. If iFrame is the last frame
** in a block of 256 frames, the returned byte-count includes the space
** required by the 256-byte index block.
*/
static int walMappingSize(u32 iFrame){
  const int nByte = (sizeof(u32)*HASHTABLE_NPAGE + HASHTABLE_NBYTE) ;
  return ( WALINDEX_LOCK_OFFSET 
         + WALINDEX_LOCK_RESERVED 
         + nByte * ((iFrame + HASHTABLE_NPAGE - 1)/HASHTABLE_NPAGE)
  );







|
|
|
|







673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
      (WALINDEX_LOCK_OFFSET+WALINDEX_LOCK_RESERVED)/sizeof(u32)
    + (((iFrame-1)/HASHTABLE_NPAGE) * HASHTABLE_NBYTE)/sizeof(u32)
    + (iFrame-1)
  );
}

/*
** Return the minimum size of the shared-memory, in bytes, that is needed
** to support a wal-index containing frame iFrame.  The value returned
** includes the wal-index header and the complete "block" containing iFrame,
** including the hash table segment that follows the block.
*/
static int walMappingSize(u32 iFrame){
  const int nByte = (sizeof(u32)*HASHTABLE_NPAGE + HASHTABLE_NBYTE) ;
  return ( WALINDEX_LOCK_OFFSET 
         + WALINDEX_LOCK_RESERVED 
         + nByte * ((iFrame + HASHTABLE_NPAGE - 1)/HASHTABLE_NPAGE)
  );
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615

616
617
618
619
620
621
622
**
** If enlargeTo is non-negative, then increase the size of the underlying
** storage to be at least as big as enlargeTo before remapping.
*/
static int walIndexRemap(Wal *pWal, int enlargeTo){
  int rc;
  int sz;
  assert( pWal->lockState>=SQLITE_SHM_WRITE );
  rc = sqlite3OsShmSize(pWal->pDbFd, enlargeTo, &sz);
  if( rc==SQLITE_OK && sz>pWal->szWIndex ){
    walIndexUnmap(pWal);
    rc = walIndexMap(pWal, sz);
  }
  assert( pWal->szWIndex>=enlargeTo || rc!=SQLITE_OK );
  return rc;
}

/*
** Compute a hash on a page number.  The resulting hash value must land
** between 0 and (HASHTABLE_NSLOT-1).

*/
static int walHash(u32 iPage){
  assert( iPage>0 );
  assert( (HASHTABLE_NSLOT & (HASHTABLE_NSLOT-1))==0 );
  return (iPage*HASHTABLE_HASH_1) & (HASHTABLE_NSLOT-1);
}
static int walNextHash(int iPriorHash){







|











|
>







730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
**
** If enlargeTo is non-negative, then increase the size of the underlying
** storage to be at least as big as enlargeTo before remapping.
*/
static int walIndexRemap(Wal *pWal, int enlargeTo){
  int rc;
  int sz;
  assert( pWal->writeLock );
  rc = sqlite3OsShmSize(pWal->pDbFd, enlargeTo, &sz);
  if( rc==SQLITE_OK && sz>pWal->szWIndex ){
    walIndexUnmap(pWal);
    rc = walIndexMap(pWal, sz);
  }
  assert( pWal->szWIndex>=enlargeTo || rc!=SQLITE_OK );
  return rc;
}

/*
** Compute a hash on a page number.  The resulting hash value must land
** between 0 and (HASHTABLE_NSLOT-1).  The walHashNext() function advances
** the hash to the next value in the event of a collision.
*/
static int walHash(u32 iPage){
  assert( iPage>0 );
  assert( (HASHTABLE_NSLOT & (HASHTABLE_NSLOT-1))==0 );
  return (iPage*HASHTABLE_HASH_1) & (HASHTABLE_NSLOT-1);
}
static int walNextHash(int iPriorHash){
671
672
673
674
675
676
677
678
679
680

681
682
683
684
685
686
687
688
689
690

691
692
693




694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712

713
714
715
716
717
718
719
/*
** Remove entries from the hash table that point to WAL slots greater
** than pWal->hdr.mxFrame.
**
** This function is called whenever pWal->hdr.mxFrame is decreased due
** to a rollback or savepoint.
**
** At most only the very last hash table needs to be updated.  Any
** later hash tables will be automatically cleared when pWal->hdr.mxFrame
** advances to the point where those hash tables are actually needed.

*/
static void walCleanupHash(Wal *pWal){
  volatile HASHTABLE_DATATYPE *aHash;  /* Pointer to hash table to clear */
  volatile u32 *aPgno;                 /* Unused return from walHashFind() */
  u32 iZero;                           /* frame == (aHash[x]+iZero) */
  int iLimit;                          /* Zero values greater than this */

  assert( pWal->lockState==SQLITE_SHM_WRITE );
  walHashFind(pWal, pWal->hdr.mxFrame+1, &aHash, &aPgno, &iZero);
  iLimit = pWal->hdr.mxFrame - iZero;

  if( iLimit>0 ){
    int nByte;                    /* Number of bytes to zero in aPgno[] */
    int i;                        /* Used to iterate through aHash[] */




    for(i=0; i<HASHTABLE_NSLOT; i++){
      if( aHash[i]>iLimit ){
        aHash[i] = 0;
      }
    }

    /* Zero the entries in the aPgno array that correspond to frames with
    ** frame numbers greater than pWal->hdr.mxFrame. 
    */
    nByte = sizeof(u32) * (HASHTABLE_NPAGE-iLimit);
    memset((void *)&aPgno[iZero+iLimit+1], 0, nByte);
    assert( &((u8 *)&aPgno[iZero+iLimit+1])[nByte]==(u8 *)aHash );
  }

#ifdef SQLITE_ENABLE_EXPENSIVE_ASSERT
  /* Verify that the every entry in the mapping region is still reachable
  ** via the hash table even after the cleanup.
  */
  {

    int i;           /* Loop counter */
    int iKey;        /* Hash key */
    for(i=1; i<=iLimit; i++){
      for(iKey=walHash(aPgno[i+iZero]); aHash[iKey]; iKey=walNextHash(iKey)){
        if( aHash[iKey]==i ) break;
      }
      assert( aHash[iKey]==i );







|
|
|
>





|

|
|
|
>
|


>
>
>
>


















<
>







806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852

853
854
855
856
857
858
859
860
/*
** Remove entries from the hash table that point to WAL slots greater
** than pWal->hdr.mxFrame.
**
** This function is called whenever pWal->hdr.mxFrame is decreased due
** to a rollback or savepoint.
**
** At most only the hash table containing pWal->hdr.mxFrame needs to be
** updated.  Any later hash tables will be automatically cleared when
** pWal->hdr.mxFrame advances to the point where those hash tables are
** actually needed.
*/
static void walCleanupHash(Wal *pWal){
  volatile HASHTABLE_DATATYPE *aHash;  /* Pointer to hash table to clear */
  volatile u32 *aPgno;                 /* Unused return from walHashFind() */
  u32 iZero;                           /* frame == (aHash[x]+iZero) */
  int iLimit = 0;                      /* Zero values greater than this */

  assert( pWal->writeLock );
  testcase( pWal->hdr.mxFrame==HASHTABLE_NPAGE-1 );
  testcase( pWal->hdr.mxFrame==HASHTABLE_NPAGE );
  testcase( pWal->hdr.mxFrame==HASHTABLE_NPAGE+1 );
  if( (pWal->hdr.mxFrame % HASHTABLE_NPAGE)>0 ){
    int nByte;                    /* Number of bytes to zero in aPgno[] */
    int i;                        /* Used to iterate through aHash[] */

    walHashFind(pWal, pWal->hdr.mxFrame+1, &aHash, &aPgno, &iZero);
    iLimit = pWal->hdr.mxFrame - iZero;
    assert( iLimit>0 );
    for(i=0; i<HASHTABLE_NSLOT; i++){
      if( aHash[i]>iLimit ){
        aHash[i] = 0;
      }
    }

    /* Zero the entries in the aPgno array that correspond to frames with
    ** frame numbers greater than pWal->hdr.mxFrame. 
    */
    nByte = sizeof(u32) * (HASHTABLE_NPAGE-iLimit);
    memset((void *)&aPgno[iZero+iLimit+1], 0, nByte);
    assert( &((u8 *)&aPgno[iZero+iLimit+1])[nByte]==(u8 *)aHash );
  }

#ifdef SQLITE_ENABLE_EXPENSIVE_ASSERT
  /* Verify that the every entry in the mapping region is still reachable
  ** via the hash table even after the cleanup.
  */

  if( iLimit ){
    int i;           /* Loop counter */
    int iKey;        /* Hash key */
    for(i=1; i<=iLimit; i++){
      for(iKey=walHash(aPgno[i+iZero]); aHash[iKey]; iKey=walNextHash(iKey)){
        if( aHash[iKey]==i ) break;
      }
      assert( aHash[iKey]==i );
806
807
808
809
810
811
812


813




814
815
816
817
818


819









820








821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848

  return rc;
}


/*
** Recover the wal-index by reading the write-ahead log file. 


** The caller must hold RECOVER lock on the wal-index file.




*/
static int walIndexRecover(Wal *pWal){
  int rc;                         /* Return Code */
  i64 nSize;                      /* Size of log file */
  u32 aFrameCksum[2] = {0, 0};












  assert( pWal->lockState>SQLITE_SHM_READ );








  memset(&pWal->hdr, 0, sizeof(WalIndexHdr));

  rc = sqlite3OsFileSize(pWal->pWalFd, &nSize);
  if( rc!=SQLITE_OK ){
    return rc;
  }

  if( nSize>WAL_HDRSIZE ){
    u8 aBuf[WAL_HDRSIZE];         /* Buffer to load WAL header into */
    u8 *aFrame = 0;               /* Malloc'd buffer to load entire frame */
    int szFrame;                  /* Number of bytes in buffer aFrame[] */
    u8 *aData;                    /* Pointer to data part of aFrame buffer */
    int iFrame;                   /* Index of last frame read */
    i64 iOffset;                  /* Next offset to read from log file */
    int szPage;                   /* Page size according to the log */
    u32 magic;                    /* Magic value read from WAL header */

    /* Read in the WAL header. */
    rc = sqlite3OsRead(pWal->pWalFd, aBuf, WAL_HDRSIZE, 0);
    if( rc!=SQLITE_OK ){
      return rc;
    }

    /* If the database page size is not a power of two, or is greater than
    ** SQLITE_MAX_PAGE_SIZE, conclude that the WAL file contains no valid 
    ** data. Similarly, if the 'magic' value is invalid, ignore the whole
    ** WAL file.
    */







>
>
|
>
>
>
>





>
>

>
>
>
>
>
>
>
>
>
|
>
>
>
>
>
>
>
>




|















|







947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014

  return rc;
}


/*
** Recover the wal-index by reading the write-ahead log file. 
**
** This routine first tries to establish an exclusive lock on the
** wal-index to prevent other threads/processes from doing anything
** with the WAL or wal-index while recovery is running.  The
** WAL_RECOVER_LOCK is also held so that other threads will know
** that this thread is running recovery.  If unable to establish
** the necessary locks, this routine returns SQLITE_BUSY.
*/
static int walIndexRecover(Wal *pWal){
  int rc;                         /* Return Code */
  i64 nSize;                      /* Size of log file */
  u32 aFrameCksum[2] = {0, 0};
  int iLock;                      /* Lock offset to lock for checkpoint */
  int nLock;                      /* Number of locks to hold */

  /* Obtain an exclusive lock on all byte in the locking range not already
  ** locked by the caller. The caller is guaranteed to have locked the
  ** WAL_WRITE_LOCK byte, and may have also locked the WAL_CKPT_LOCK byte.
  ** If successful, the same bytes that are locked here are unlocked before
  ** this function returns.
  */
  assert( pWal->ckptLock==1 || pWal->ckptLock==0 );
  assert( WAL_ALL_BUT_WRITE==WAL_WRITE_LOCK+1 );
  assert( WAL_CKPT_LOCK==WAL_ALL_BUT_WRITE );
  assert( pWal->writeLock );
  iLock = WAL_ALL_BUT_WRITE + pWal->ckptLock;
  nLock = SQLITE_SHM_NLOCK - iLock;
  rc = walLockExclusive(pWal, iLock, nLock);
  if( rc ){
    return rc;
  }
  WALTRACE(("WAL%p: recovery begin...\n", pWal));

  memset(&pWal->hdr, 0, sizeof(WalIndexHdr));

  rc = sqlite3OsFileSize(pWal->pWalFd, &nSize);
  if( rc!=SQLITE_OK ){
    goto recovery_error;
  }

  if( nSize>WAL_HDRSIZE ){
    u8 aBuf[WAL_HDRSIZE];         /* Buffer to load WAL header into */
    u8 *aFrame = 0;               /* Malloc'd buffer to load entire frame */
    int szFrame;                  /* Number of bytes in buffer aFrame[] */
    u8 *aData;                    /* Pointer to data part of aFrame buffer */
    int iFrame;                   /* Index of last frame read */
    i64 iOffset;                  /* Next offset to read from log file */
    int szPage;                   /* Page size according to the log */
    u32 magic;                    /* Magic value read from WAL header */

    /* Read in the WAL header. */
    rc = sqlite3OsRead(pWal->pWalFd, aBuf, WAL_HDRSIZE, 0);
    if( rc!=SQLITE_OK ){
      goto recovery_error;
    }

    /* If the database page size is not a power of two, or is greater than
    ** SQLITE_MAX_PAGE_SIZE, conclude that the WAL file contains no valid 
    ** data. Similarly, if the 'magic' value is invalid, ignore the whole
    ** WAL file.
    */
863
864
865
866
867
868
869
870

871
872
873
874
875
876
877
        aBuf, WAL_HDRSIZE, 0, pWal->hdr.aFrameCksum
    );

    /* Malloc a buffer to read frames into. */
    szFrame = szPage + WAL_FRAME_HDRSIZE;
    aFrame = (u8 *)sqlite3_malloc(szFrame);
    if( !aFrame ){
      return SQLITE_NOMEM;

    }
    aData = &aFrame[WAL_FRAME_HDRSIZE];

    /* Read all frames from the log file. */
    iFrame = 0;
    for(iOffset=WAL_HDRSIZE; (iOffset+szFrame)<=nSize; iOffset+=szFrame){
      u32 pgno;                   /* Database page number for frame */







|
>







1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
        aBuf, WAL_HDRSIZE, 0, pWal->hdr.aFrameCksum
    );

    /* Malloc a buffer to read frames into. */
    szFrame = szPage + WAL_FRAME_HDRSIZE;
    aFrame = (u8 *)sqlite3_malloc(szFrame);
    if( !aFrame ){
      rc = SQLITE_NOMEM;
      goto recovery_error;
    }
    aData = &aFrame[WAL_FRAME_HDRSIZE];

    /* Read all frames from the log file. */
    iFrame = 0;
    for(iOffset=WAL_HDRSIZE; (iOffset+szFrame)<=nSize; iOffset+=szFrame){
      u32 pgno;                   /* Database page number for frame */
903
904
905
906
907
908
909
910










911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
  if( rc==SQLITE_OK && pWal->hdr.mxFrame==0 ){
    rc = walIndexRemap(pWal, walMappingSize(1));
  }
  if( rc==SQLITE_OK ){
    pWal->hdr.aFrameCksum[0] = aFrameCksum[0];
    pWal->hdr.aFrameCksum[1] = aFrameCksum[1];
    walIndexWriteHdr(pWal);
  }










  return rc;
}

/*
** Close an open wal-index.
*/
static void walIndexClose(Wal *pWal, int isDelete){
  if( pWal->isWindexOpen ){
    int notUsed;
    sqlite3OsShmLock(pWal->pDbFd, SQLITE_SHM_UNLOCK, &notUsed);
    sqlite3OsShmClose(pWal->pDbFd, isDelete);
    pWal->isWindexOpen = 0;
  }
}

/* 
** Open a connection to the log file associated with database zDb. The
** database file does not actually have to exist. zDb is used only to
** figure out the name of the log file to open. If the log file does not 
** exist it is created by this call.
**
** A SHARED lock should be held on the database file when this function
** is called. The purpose of this SHARED lock is to prevent any other
** client from unlinking the log or wal-index file. If another process
** were to do this just after this client opened one of these files, the
** system would be badly broken.
**
** If the log file is successfully opened, SQLITE_OK is returned and 
** *ppWal is set to point to a new WAL handle. If an error occurs,
** an SQLite error code is returned and *ppWal is left unmodified.
*/







|
>
>
>
>
>
>
>
>
>
>







|
<
<

|




|
|
<
<



|







1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095


1096
1097
1098
1099
1100
1101
1102
1103


1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
  if( rc==SQLITE_OK && pWal->hdr.mxFrame==0 ){
    rc = walIndexRemap(pWal, walMappingSize(1));
  }
  if( rc==SQLITE_OK ){
    pWal->hdr.aFrameCksum[0] = aFrameCksum[0];
    pWal->hdr.aFrameCksum[1] = aFrameCksum[1];
    walIndexWriteHdr(pWal);

    /* Zero the checkpoint-header. This is safe because this thread is 
    ** currently holding locks that exclude all other readers, writers and
    ** checkpointers.
    */
    memset((void *)walCkptInfo(pWal), 0, sizeof(WalCkptInfo));
  }

recovery_error:
  WALTRACE(("WAL%p: recovery %s\n", pWal, rc ? "failed" : "ok"));
  walUnlockExclusive(pWal, iLock, nLock);
  return rc;
}

/*
** Close an open wal-index.
*/
static void walIndexClose(Wal *pWal, int isDelete){
  if( pWal->isWIndexOpen ){


    sqlite3OsShmClose(pWal->pDbFd, isDelete);
    pWal->isWIndexOpen = 0;
  }
}

/* 
** Open a connection to the WAL file associated with database zDbName.
** The database file must already be opened on connection pDbFd.


**
** A SHARED lock should be held on the database file when this function
** is called. The purpose of this SHARED lock is to prevent any other
** client from unlinking the WAL or wal-index file. If another process
** were to do this just after this client opened one of these files, the
** system would be badly broken.
**
** If the log file is successfully opened, SQLITE_OK is returned and 
** *ppWal is set to point to a new WAL handle. If an error occurs,
** an SQLite error code is returned and *ppWal is left unmodified.
*/
974
975
976
977
978
979
980

981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998

999
1000
1001
1002
1003
1004
1005
    return SQLITE_NOMEM;
  }

  pRet->pVfs = pVfs;
  pRet->pWalFd = (sqlite3_file *)&pRet[1];
  pRet->pDbFd = pDbFd;
  pRet->szWIndex = -1;

  sqlite3_randomness(8, &pRet->hdr.aSalt);
  pRet->zWalName = zWal = pVfs->szOsFile + (char*)pRet->pWalFd;
  sqlite3_snprintf(nWal, zWal, "%s-wal", zDbName);
  rc = sqlite3OsShmOpen(pDbFd);

  /* Open file handle on the write-ahead log file. */
  if( rc==SQLITE_OK ){
    pRet->isWindexOpen = 1;
    flags = (SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE|SQLITE_OPEN_MAIN_JOURNAL);
    rc = sqlite3OsOpen(pVfs, zWal, pRet->pWalFd, flags, &flags);
  }

  if( rc!=SQLITE_OK ){
    walIndexClose(pRet, 0);
    sqlite3OsClose(pRet->pWalFd);
    sqlite3_free(pRet);
  }else{
    *ppWal = pRet;

  }
  return rc;
}

/*
** Find the smallest page number out of all pages held in the WAL that
** has not been returned by any prior invocation of this method on the







>







|










>







1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
    return SQLITE_NOMEM;
  }

  pRet->pVfs = pVfs;
  pRet->pWalFd = (sqlite3_file *)&pRet[1];
  pRet->pDbFd = pDbFd;
  pRet->szWIndex = -1;
  pRet->readLock = -1;
  sqlite3_randomness(8, &pRet->hdr.aSalt);
  pRet->zWalName = zWal = pVfs->szOsFile + (char*)pRet->pWalFd;
  sqlite3_snprintf(nWal, zWal, "%s-wal", zDbName);
  rc = sqlite3OsShmOpen(pDbFd);

  /* Open file handle on the write-ahead log file. */
  if( rc==SQLITE_OK ){
    pRet->isWIndexOpen = 1;
    flags = (SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE|SQLITE_OPEN_MAIN_JOURNAL);
    rc = sqlite3OsOpen(pVfs, zWal, pRet->pWalFd, flags, &flags);
  }

  if( rc!=SQLITE_OK ){
    walIndexClose(pRet, 0);
    sqlite3OsClose(pRet->pWalFd);
    sqlite3_free(pRet);
  }else{
    *ppWal = pRet;
    WALTRACE(("WAL%d: opened\n", pRet));
  }
  return rc;
}

/*
** Find the smallest page number out of all pages held in the WAL that
** has not been returned by any prior invocation of this method on the
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
  }

  /* This routine only runs while holding SQLITE_SHM_CHECKPOINT.  No other
  ** thread is able to write to shared memory while this routine is
  ** running (or, indeed, while the WalIterator object exists).  Hence,
  ** we can cast off the volatile qualifacation from shared memory
  */
  assert( pWal->lockState==SQLITE_SHM_CHECKPOINT );
  aData = (u32*)pWal->pWiData;

  /* Allocate space for the WalIterator object */
  iLast = pWal->hdr.mxFrame;
  nSegment = (iLast >> 8) + 1;
  nFinal = (iLast & 0x000000FF);
  nByte = sizeof(WalIterator) + (nSegment+1)*(sizeof(struct WalSegment)+256);







|







1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
  }

  /* This routine only runs while holding SQLITE_SHM_CHECKPOINT.  No other
  ** thread is able to write to shared memory while this routine is
  ** running (or, indeed, while the WalIterator object exists).  Hence,
  ** we can cast off the volatile qualifacation from shared memory
  */
  assert( pWal->ckptLock );
  aData = (u32*)pWal->pWiData;

  /* Allocate space for the WalIterator object */
  iLast = pWal->hdr.mxFrame;
  nSegment = (iLast >> 8) + 1;
  nFinal = (iLast & 0x000000FF);
  nByte = sizeof(WalIterator) + (nSegment+1)*(sizeof(struct WalSegment)+256);
1175
1176
1177
1178
1179
1180
1181
1182

1183



























1184

1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196




1197
1198
1199
1200
1201

1202
1203

1204

1205

1206



















1207
1208







1209
1210
1211
1212
1213
1214
1215
1216

1217
1218
1219
1220
1221
1222
1223
1224


1225

1226
1227
1228
1229
1230
1231
1232
1233


1234
1235
1236
1237
1238
1239



1240
1241
1242
1243
1244
1245
1246
1247
1248

/* 
** Free an iterator allocated by walIteratorInit().
*/
static void walIteratorFree(WalIterator *p){
  sqlite3_free(p);
}


/*



























** Checkpoint the contents of the log file.

*/
static int walCheckpoint(
  Wal *pWal,                      /* Wal connection */
  int sync_flags,                 /* Flags for OsSync() (or 0) */
  int nBuf,                       /* Size of zBuf in bytes */
  u8 *zBuf                        /* Temporary buffer to use */
){
  int rc;                         /* Return code */
  int szPage = pWal->hdr.szPage;  /* Database page-size */
  WalIterator *pIter = 0;         /* Wal iterator context */
  u32 iDbpage = 0;                /* Next database page to write */
  u32 iFrame = 0;                 /* Wal frame containing data for iDbpage */





  /* Allocate the iterator */
  rc = walIteratorInit(pWal, &pIter);
  if( rc!=SQLITE_OK || pWal->hdr.mxFrame==0 ){
    goto out;

  }


  if( pWal->hdr.szPage!=nBuf ){

    rc = SQLITE_CORRUPT_BKPT;

    goto out;



















  }








  /* Sync the log file to disk */
  if( sync_flags ){
    rc = sqlite3OsSync(pWal->pWalFd, sync_flags);
    if( rc!=SQLITE_OK ) goto out;
  }

  /* Iterate through the contents of the log, copying data to the db file. */
  while( 0==walIteratorNext(pIter, &iDbpage, &iFrame) ){

    rc = sqlite3OsRead(pWal->pWalFd, zBuf, szPage, 
        walFrameOffset(iFrame, szPage) + WAL_FRAME_HDRSIZE
    );
    if( rc!=SQLITE_OK ) goto out;
    rc = sqlite3OsWrite(pWal->pDbFd, zBuf, szPage, (iDbpage-1)*szPage);
    if( rc!=SQLITE_OK ) goto out;
  }



  /* Truncate the database file */

  rc = sqlite3OsTruncate(pWal->pDbFd, ((i64)pWal->hdr.nPage*(i64)szPage));
  if( rc!=SQLITE_OK ) goto out;

  /* Sync the database file. If successful, update the wal-index. */
  if( sync_flags ){
    rc = sqlite3OsSync(pWal->pDbFd, sync_flags);
    if( rc!=SQLITE_OK ) goto out;
  }


  pWal->hdr.mxFrame = 0;
  pWal->nCkpt++;
  sqlite3Put4byte((u8*)pWal->hdr.aSalt,
                   1 + sqlite3Get4byte((u8*)pWal->hdr.aSalt));
  sqlite3_randomness(4, &pWal->hdr.aSalt[1]);
  walIndexWriteHdr(pWal);




 out:
  walIteratorFree(pIter);
  return rc;
}

/*
** Close a connection to a log file.
*/








>

>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
>












>
>
>
>




|
>


>

>
|
>
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
|
>
>
>
>
>
>
>
|
|
|
<
|

|
|
>
|
|
|
|
|
|
|

>
>
|
>
|
<
<
<
|
|
<
|
>
>
|
<
|
|
|
|
>
>
>
|
|







1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449

1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467



1468
1469

1470
1471
1472
1473

1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489

/* 
** Free an iterator allocated by walIteratorInit().
*/
static void walIteratorFree(WalIterator *p){
  sqlite3_free(p);
}


/*
** Copy as much content as we can from the WAL back into the database file
** in response to an sqlite3_wal_checkpoint() request or the equivalent.
**
** The amount of information copies from WAL to database might be limited
** by active readers.  This routine will never overwrite a database page
** that a concurrent reader might be using.
**
** All I/O barrier operations (a.k.a fsyncs) occur in this routine when
** SQLite is in WAL-mode in synchronous=NORMAL.  That means that if 
** checkpoints are always run by a background thread or background 
** process, foreground threads will never block on a lengthy fsync call.
**
** Fsync is called on the WAL before writing content out of the WAL and
** into the database.  This ensures that if the new content is persistent
** in the WAL and can be recovered following a power-loss or hard reset.
**
** Fsync is also called on the database file if (and only if) the entire
** WAL content is copied into the database file.  This second fsync makes
** it safe to delete the WAL since the new content will persist in the
** database file.
**
** This routine uses and updates the nBackfill field of the wal-index header.
** This is the only routine tha will increase the value of nBackfill.  
** (A WAL reset or recovery will revert nBackfill to zero, but not increase
** its value.)
**
** The caller must be holding sufficient locks to ensure that no other
** checkpoint is running (in any other thread or process) at the same
** time.
*/
static int walCheckpoint(
  Wal *pWal,                      /* Wal connection */
  int sync_flags,                 /* Flags for OsSync() (or 0) */
  int nBuf,                       /* Size of zBuf in bytes */
  u8 *zBuf                        /* Temporary buffer to use */
){
  int rc;                         /* Return code */
  int szPage = pWal->hdr.szPage;  /* Database page-size */
  WalIterator *pIter = 0;         /* Wal iterator context */
  u32 iDbpage = 0;                /* Next database page to write */
  u32 iFrame = 0;                 /* Wal frame containing data for iDbpage */
  u32 mxSafeFrame;                /* Max frame that can be backfilled */
  int i;                          /* Loop counter */
  volatile WalIndexHdr *pHdr;     /* The actual wal-index header in SHM */
  volatile WalCkptInfo *pInfo;    /* The checkpoint status information */

  /* Allocate the iterator */
  rc = walIteratorInit(pWal, &pIter);
  if( rc!=SQLITE_OK || pWal->hdr.mxFrame==0 ){
    walIteratorFree(pIter);
    return rc;
  }

  /*** TODO:  Move this test out to the caller.  Make it an assert() here ***/
  if( pWal->hdr.szPage!=nBuf ){
    walIteratorFree(pIter);
    return SQLITE_CORRUPT_BKPT;
  }

  /* Compute in mxSafeFrame the index of the last frame of the WAL that is
  ** safe to write into the database.  Frames beyond mxSafeFrame might
  ** overwrite database pages that are in use by active readers and thus
  ** cannot be backfilled from the WAL.
  */
  mxSafeFrame = pWal->hdr.mxFrame;
  pHdr = (volatile WalIndexHdr*)pWal->pWiData;
  pInfo = (volatile WalCkptInfo*)&pHdr[2];
  assert( pInfo==walCkptInfo(pWal) );
  for(i=1; i<WAL_NREADER; i++){
    u32 y = pInfo->aReadMark[i];
    if( y>0 && (mxSafeFrame==0 || mxSafeFrame>=y) ){
      if( y<=pWal->hdr.mxFrame
       && walLockExclusive(pWal, WAL_READ_LOCK(i), 1)==SQLITE_OK
      ){
        pInfo->aReadMark[i] = 0;
        walUnlockExclusive(pWal, WAL_READ_LOCK(i), 1);
      }else{
        mxSafeFrame = y-1;
      }
    }
  }

  if( pInfo->nBackfill<mxSafeFrame
   && (rc = walLockExclusive(pWal, WAL_READ_LOCK(0), 1))==SQLITE_OK
  ){
    u32 nBackfill = pInfo->nBackfill;

    /* Sync the WAL to disk */
    if( sync_flags ){
      rc = sqlite3OsSync(pWal->pWalFd, sync_flags);

    }

    /* Iterate through the contents of the WAL, copying data to the db file. */
    while( rc==SQLITE_OK && 0==walIteratorNext(pIter, &iDbpage, &iFrame) ){
      if( iFrame<=nBackfill || iFrame>mxSafeFrame ) continue;
      rc = sqlite3OsRead(pWal->pWalFd, zBuf, szPage, 
          walFrameOffset(iFrame, szPage) + WAL_FRAME_HDRSIZE
      );
      if( rc!=SQLITE_OK ) break;
      rc = sqlite3OsWrite(pWal->pDbFd, zBuf, szPage, (iDbpage-1)*szPage);
      if( rc!=SQLITE_OK ) break;
    }

    /* If work was actually accomplished... */
    if( rc==SQLITE_OK && pInfo->nBackfill<mxSafeFrame ){
      pInfo->nBackfill = mxSafeFrame;
      if( mxSafeFrame==pHdr[0].mxFrame && sync_flags ){
        rc = sqlite3OsTruncate(pWal->pDbFd, ((i64)pWal->hdr.nPage*(i64)szPage));



        if( rc==SQLITE_OK && sync_flags ){
          rc = sqlite3OsSync(pWal->pDbFd, sync_flags);

        }
      }
    }


    /* Release the reader lock held while backfilling */
    walUnlockExclusive(pWal, WAL_READ_LOCK(0), 1);
  }else{
    /* Reset the return code so as not to report a checkpoint failure
    ** just because active readers prevent any backfill.
    */
    rc = SQLITE_OK;
  }

  walIteratorFree(pIter);
  return rc;
}

/*
** Close a connection to a log file.
*/
1262
1263
1264
1265
1266
1267
1268

1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280

1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292

1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
    ** the database. In this case checkpoint the database and unlink both
    ** the wal and wal-index files.
    **
    ** The EXCLUSIVE lock is not released before returning.
    */
    rc = sqlite3OsLock(pWal->pDbFd, SQLITE_LOCK_EXCLUSIVE);
    if( rc==SQLITE_OK ){

      rc = sqlite3WalCheckpoint(pWal, sync_flags, nBuf, zBuf, 0, 0);
      if( rc==SQLITE_OK ){
        isDelete = 1;
      }
      walIndexUnmap(pWal);
    }

    walIndexClose(pWal, isDelete);
    sqlite3OsClose(pWal->pWalFd);
    if( isDelete ){
      sqlite3OsDelete(pWal->pVfs, pWal->zWalName, 0);
    }

    sqlite3_free(pWal);
  }
  return rc;
}

/*
** Try to read the wal-index header.  Return 0 on success and 1 if
** there is a problem.
**
** The wal-index is in shared memory.  Another thread or process might
** be writing the header at the same time this procedure is trying to
** read it, which might result in inconsistency.  A dirty read is detected

** by verifying a checksum on the header.
**
** If and only if the read is consistent and the header is different from
** pWal->hdr, then pWal->hdr is updated to the content of the new header
** and *pChanged is set to 1.
**
** If the checksum cannot be verified return non-zero. If the header
** is read successfully and the checksum verified, return zero.
*/
int walIndexTryHdr(Wal *pWal, int *pChanged){
  u32 aCksum[2];               /* Checksum on the header content */
  WalIndexHdr h1, h2;          /* Two copies of the header content */
  WalIndexHdr *aHdr;           /* Header in shared memory */

  if( pWal->szWIndex < WALINDEX_HDR_SIZE ){
    /* The wal-index is not large enough to hold the header, then assume
    ** header is invalid. */
    return 1;
  }
  assert( pWal->pWiData );

  /* Read the header. The caller may or may not have an exclusive 
  ** (WRITE, PENDING, CHECKPOINT or RECOVER) lock on the wal-index
  ** file, meaning it is possible that an inconsistent snapshot is read
  ** from the file. If this happens, return non-zero.
  **
  ** There are two copies of the header at the beginning of the wal-index.
  ** When reading, read [0] first then [1].  Writes are in the reverse order.
  ** Memory barriers are used to prevent the compiler or the hardware from
  ** reordering the reads and writes.
  */
  aHdr = (WalIndexHdr*)pWal->pWiData;
  memcpy(&h1, &aHdr[0], sizeof(h1));
  sqlite3OsShmBarrier(pWal->pDbFd);
  memcpy(&h2, &aHdr[1], sizeof(h2));

  if( memcmp(&h1, &h2, sizeof(h1))!=0 ){
    return 1;   /* Dirty read */
  }  
  if( h1.szPage==0 ){
    return 1;   /* Malformed header - probably all zeros */
  }
  walChecksumBytes(1, (u8*)&h1, sizeof(h1)-sizeof(h1.aCksum), 0, aCksum);
  if( aCksum[0]!=h1.aCksum[0] || aCksum[1]!=h1.aCksum[1] ){
    return 1;   /* Checksum does not match */
  }








>
|











>












>
|




















|
|
|















|







1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
    ** the database. In this case checkpoint the database and unlink both
    ** the wal and wal-index files.
    **
    ** The EXCLUSIVE lock is not released before returning.
    */
    rc = sqlite3OsLock(pWal->pDbFd, SQLITE_LOCK_EXCLUSIVE);
    if( rc==SQLITE_OK ){
      pWal->exclusiveMode = 1;
      rc = sqlite3WalCheckpoint(pWal, sync_flags, nBuf, zBuf);
      if( rc==SQLITE_OK ){
        isDelete = 1;
      }
      walIndexUnmap(pWal);
    }

    walIndexClose(pWal, isDelete);
    sqlite3OsClose(pWal->pWalFd);
    if( isDelete ){
      sqlite3OsDelete(pWal->pVfs, pWal->zWalName, 0);
    }
    WALTRACE(("WAL%p: closed\n", pWal));
    sqlite3_free(pWal);
  }
  return rc;
}

/*
** Try to read the wal-index header.  Return 0 on success and 1 if
** there is a problem.
**
** The wal-index is in shared memory.  Another thread or process might
** be writing the header at the same time this procedure is trying to
** read it, which might result in inconsistency.  A dirty read is detected
** by verifying that both copies of the header are the same and also by
** a checksum on the header.
**
** If and only if the read is consistent and the header is different from
** pWal->hdr, then pWal->hdr is updated to the content of the new header
** and *pChanged is set to 1.
**
** If the checksum cannot be verified return non-zero. If the header
** is read successfully and the checksum verified, return zero.
*/
int walIndexTryHdr(Wal *pWal, int *pChanged){
  u32 aCksum[2];               /* Checksum on the header content */
  WalIndexHdr h1, h2;          /* Two copies of the header content */
  WalIndexHdr *aHdr;           /* Header in shared memory */

  if( pWal->szWIndex < WALINDEX_HDR_SIZE ){
    /* The wal-index is not large enough to hold the header, then assume
    ** header is invalid. */
    return 1;
  }
  assert( pWal->pWiData );

  /* Read the header. This might happen currently with a write to the
  ** same area of shared memory on a different CPU in a SMP,
  ** meaning it is possible that an inconsistent snapshot is read
  ** from the file. If this happens, return non-zero.
  **
  ** There are two copies of the header at the beginning of the wal-index.
  ** When reading, read [0] first then [1].  Writes are in the reverse order.
  ** Memory barriers are used to prevent the compiler or the hardware from
  ** reordering the reads and writes.
  */
  aHdr = (WalIndexHdr*)pWal->pWiData;
  memcpy(&h1, &aHdr[0], sizeof(h1));
  sqlite3OsShmBarrier(pWal->pDbFd);
  memcpy(&h2, &aHdr[1], sizeof(h2));

  if( memcmp(&h1, &h2, sizeof(h1))!=0 ){
    return 1;   /* Dirty read */
  }  
  if( h1.isInit==0 ){
    return 1;   /* Malformed header - probably all zeros */
  }
  walChecksumBytes(1, (u8*)&h1, sizeof(h1)-sizeof(h1.aCksum), 0, aCksum);
  if( aCksum[0]!=h1.aCksum[0] || aCksum[1]!=h1.aCksum[1] ){
    return 1;   /* Checksum does not match */
  }

1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408

1409




1410
1411

1412

1413
1414
1415

1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430




















































































































































1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446

1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466

1467
1468
1469
1470
1471
1472
1473

1474
1475
1476
1477





1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489



1490
1491
1492
1493


1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
** after this routine returns.
**
** If the wal-index header is successfully read, return SQLITE_OK. 
** Otherwise an SQLite error code.
*/
static int walIndexReadHdr(Wal *pWal, int *pChanged){
  int rc;                         /* Return code */
  int lockState;                  /* pWal->lockState before running recovery */

  assert( pWal->lockState>=SQLITE_SHM_READ );
  assert( pChanged );
  rc = walIndexMap(pWal, walMappingSize(1));
  if( rc!=SQLITE_OK ){
    return rc;
  }

  /* First attempt to read the wal-index header. This may fail for one
  ** of two reasons: (a) the wal-index does not yet exist or has been
  ** corrupted and needs to be constructed by running recovery, or (b)
  ** the caller is only holding a READ lock and made a dirty read of
  ** the wal-index header.
  **
  ** A dirty read of the wal-index header occurs if another thread or
  ** process happens to be writing to the wal-index header at roughly
  ** the same time as this thread is reading it. In this case it is 
  ** possible that an inconsistent header is read (which is detected
  ** using the header checksum mechanism).
  */
  if( walIndexTryHdr(pWal, pChanged)!=0 ){

    /* If the first attempt to read the header failed, lock the wal-index
    ** file with an exclusive lock and try again. If the header checksum 
    ** verification fails again, we can be sure that it is not simply a
    ** dirty read, but that the wal-index really does need to be 
    ** reconstructed by running log recovery.
    **
    ** In the paragraph above, an "exclusive lock" may be any of WRITE,
    ** PENDING, CHECKPOINT or RECOVER. If any of these are already held,
    ** no locking operations are required. If the caller currently holds
    ** a READ lock, then upgrade to a RECOVER lock before re-reading the
    ** wal-index header and revert to a READ lock before returning.
    */
    lockState = pWal->lockState;
    if( lockState>SQLITE_SHM_READ
     || SQLITE_OK==(rc = walSetLock(pWal, SQLITE_SHM_RECOVER)) 
    ){

      if( walIndexTryHdr(pWal, pChanged) ){




        *pChanged = 1;
        rc = walIndexRecover(pWal);

      }

      if( lockState==SQLITE_SHM_READ ){
        walSetLock(pWal, SQLITE_SHM_READ);
      }

    }
  }

  /* Make sure the mapping is large enough to cover the entire wal-index */
  if( rc==SQLITE_OK ){
    int szWanted = walMappingSize(pWal->hdr.mxFrame);
    if( pWal->szWIndex<szWanted ){
      rc = walIndexMap(pWal, szWanted);
    }
  }

  return rc;
}

/*




















































































































































** Take a snapshot of the state of the WAL and wal-index for the current
** instant in time.  The current thread will continue to use this snapshot.
** Other threads might containing appending to the WAL and wal-index but
** the extra content appended will be ignored by the current thread.
**
** A snapshot is like a read transaction.
**
** No other threads are allowed to run a checkpoint while this thread is
** holding the snapshot since a checkpoint would remove data out from under
** this thread.
**
** If this call obtains a new read-lock and the database contents have been
** modified since the most recent call to WalCloseSnapshot() on this Wal
** connection, then *pChanged is set to 1 before returning. Otherwise, it 
** is left unmodified. This is used by the pager layer to determine whether 
** or not any cached pages may be safely reused.

*/
int sqlite3WalOpenSnapshot(Wal *pWal, int *pChanged){
  int rc;                         /* Return code */

  rc = walSetLock(pWal, SQLITE_SHM_READ);
  assert( rc!=SQLITE_OK || pWal->lockState==SQLITE_SHM_READ );

  if( rc==SQLITE_OK ){
    rc = walIndexReadHdr(pWal, pChanged);
    if( rc!=SQLITE_OK ){
      /* An error occured while attempting log recovery. */
      sqlite3WalCloseSnapshot(pWal);
    }
  }

  walIndexUnmap(pWal);
  return rc;
}

/*

** Unlock the current snapshot.
*/
void sqlite3WalCloseSnapshot(Wal *pWal){
  assert( pWal->lockState==SQLITE_SHM_READ
       || pWal->lockState==SQLITE_SHM_UNLOCK
  );
  walSetLock(pWal, SQLITE_SHM_UNLOCK);

}

/*
** Read a page from the log, if it is present. 





*/
int sqlite3WalRead(
  Wal *pWal,                      /* WAL handle */
  Pgno pgno,                      /* Database page number to read data for */
  int *pInWal,                    /* OUT: True if data is read from WAL */
  int nOut,                       /* Size of buffer pOut in bytes */
  u8 *pOut                        /* Buffer to write page data to */
){
  int rc;                         /* Return code */
  u32 iRead = 0;                  /* If !=0, WAL frame to return data from */
  u32 iLast = pWal->hdr.mxFrame;  /* Last page in WAL for this reader */
  int iHash;                      /* Used to loop through N hash tables */




  /* If the "last page" field of the wal-index header snapshot is 0, then
  ** no data will be read from the wal under any circumstances. Return early
  ** in this case to avoid the walIndexMap/Unmap overhead.


  */
  if( iLast==0 ){
    *pInWal = 0;
    return SQLITE_OK;
  }

  /* Ensure the wal-index is mapped. */
  assert( pWal->lockState==SQLITE_SHM_READ||pWal->lockState==SQLITE_SHM_WRITE );
  rc = walIndexMap(pWal, walMappingSize(iLast));
  if( rc!=SQLITE_OK ){
    return rc;
  }

  /* Search the hash table or tables for an entry matching page number
  ** pgno. Each iteration of the following for() loop searches one







|

<






|
<
<
<
<
|
<
<
<
<
<

|

|
|
<
<
<
<
<
<
<
<
<
|
|
|
|
|
>
|
>
>
>
>
|

>

>
|
|
<
>















>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|

|
|

<
<
<
<
<
<
|
<
|
|
<
>

|


<
<
|
<
|
<
<
<
<
<
|





>
|

|
|
|
<
|
>



|
>
>
>
>
>












>
>
>



|
>
>

|





<







1607
1608
1609
1610
1611
1612
1613
1614
1615

1616
1617
1618
1619
1620
1621
1622




1623





1624
1625
1626
1627
1628









1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646

1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815






1816

1817
1818

1819
1820
1821
1822
1823


1824

1825





1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837

1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876

1877
1878
1879
1880
1881
1882
1883
** after this routine returns.
**
** If the wal-index header is successfully read, return SQLITE_OK. 
** Otherwise an SQLite error code.
*/
static int walIndexReadHdr(Wal *pWal, int *pChanged){
  int rc;                         /* Return code */
  int badHdr;                     /* True if a header read failed */


  assert( pChanged );
  rc = walIndexMap(pWal, walMappingSize(1));
  if( rc!=SQLITE_OK ){
    return rc;
  }

  /* Try once to read the header straight out.  This works most of the




  ** time.





  */
  badHdr = walIndexTryHdr(pWal, pChanged);

  /* If the first attempt failed, it might have been due to a race
  ** with a writer.  So get a WRITE lock and try again.









  */
  assert( badHdr==0 || pWal->writeLock==0 );
  if( badHdr ){
    rc = walLockExclusive(pWal, WAL_WRITE_LOCK, 1);
    if( rc==SQLITE_OK ){
      pWal->writeLock = 1;
      badHdr = walIndexTryHdr(pWal, pChanged);
      if( badHdr ){
        /* If the wal-index header is still malformed even while holding
        ** a WRITE lock, it can only mean that the header is corrupted and
        ** needs to be reconstructed.  So run recovery to do exactly that.
        */
        rc = walIndexRecover(pWal);
        *pChanged = 1;
      }
      walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1);
      pWal->writeLock = 0;
    }else if( rc!=SQLITE_BUSY ){

      return rc;
    }
  }

  /* Make sure the mapping is large enough to cover the entire wal-index */
  if( rc==SQLITE_OK ){
    int szWanted = walMappingSize(pWal->hdr.mxFrame);
    if( pWal->szWIndex<szWanted ){
      rc = walIndexMap(pWal, szWanted);
    }
  }

  return rc;
}

/*
** This is the value that walTryBeginRead returns when it needs to
** be retried.
*/
#define WAL_RETRY  (-1)

/*
** Attempt to start a read transaction.  This might fail due to a race or
** other transient condition.  When that happens, it returns WAL_RETRY to
** indicate to the caller that it is safe to retry immediately.
**
** On success return SQLITE_OK.  On a permantent failure (such an
** I/O error or an SQLITE_BUSY because another process is running
** recovery) return a positive error code.
**
** On success, this routine obtains a read lock on 
** WAL_READ_LOCK(pWal->readLock).  The pWal->readLock integer is
** in the range 0 <= pWal->readLock < WAL_NREADER.  If pWal->readLock==(-1)
** that means the Wal does not hold any read lock.  The reader must not
** access any database page that is modified by a WAL frame up to and
** including frame number aReadMark[pWal->readLock].  The reader will
** use WAL frames up to and including pWal->hdr.mxFrame if pWal->readLock>0
** Or if pWal->readLock==0, then the reader will ignore the WAL
** completely and get all content directly from the database file.
** When the read transaction is completed, the caller must release the
** lock on WAL_READ_LOCK(pWal->readLock) and set pWal->readLock to -1.
**
** This routine uses the nBackfill and aReadMark[] fields of the header
** to select a particular WAL_READ_LOCK() that strives to let the
** checkpoint process do as much work as possible.  This routine might
** update values of the aReadMark[] array in the header, but if it does
** so it takes care to hold an exclusive lock on the corresponding
** WAL_READ_LOCK() while changing values.
*/
static int walTryBeginRead(Wal *pWal, int *pChanged, int useWal){
  volatile WalIndexHdr *pHdr;     /* Header of the wal-index */
  volatile WalCkptInfo *pInfo;    /* Checkpoint information in wal-index */
  u32 mxReadMark;                 /* Largest aReadMark[] value */
  int mxI;                        /* Index of largest aReadMark[] value */
  int i;                          /* Loop counter */
  int rc;                         /* Return code  */

  assert( pWal->readLock<0 );     /* Not currently locked */

  if( !useWal ){
    rc = walIndexReadHdr(pWal, pChanged);
    if( rc==SQLITE_BUSY ){
      /* If there is not a recovery running in another thread or process
      ** then convert BUSY errors to WAL_RETRY.  If recovery is known to
      ** be running, convert BUSY to BUSY_RECOVERY.  There is a race here
      ** which might cause WAL_RETRY to be returned even if BUSY_RECOVERY
      ** would be technically correct.  But the race is benign since with
      ** WAL_RETRY this routine will be called again and will probably be
      ** right on the second iteration.
      */
      rc = walLockShared(pWal, WAL_RECOVER_LOCK);
      if( rc==SQLITE_OK ){
        walUnlockShared(pWal, WAL_RECOVER_LOCK);
        rc = WAL_RETRY;
      }else if( rc==SQLITE_BUSY ){
        rc = SQLITE_BUSY_RECOVERY;
      }
    }
  }else{
    rc = walIndexMap(pWal, walMappingSize(pWal->hdr.mxFrame));
  }
  if( rc!=SQLITE_OK ){
    return rc;
  }

  pHdr = (volatile WalIndexHdr*)pWal->pWiData;
  pInfo = (volatile WalCkptInfo*)&pHdr[2];
  assert( pInfo==walCkptInfo(pWal) );
  if( !useWal && pInfo->nBackfill==pWal->hdr.mxFrame ){
    /* The WAL has been completely backfilled (or it is empty).
    ** and can be safely ignored.
    */
    rc = walLockShared(pWal, WAL_READ_LOCK(0));
    if( rc==SQLITE_OK ){
      if( pHdr->mxFrame!=pWal->hdr.mxFrame ){
        walUnlockShared(pWal, WAL_READ_LOCK(0));
        return WAL_RETRY;
      }
      pWal->readLock = 0;
      return SQLITE_OK;
    }else if( rc!=SQLITE_BUSY ){
      return rc;
    }
  }

  /* If we get this far, it means that the reader will want to use
  ** the WAL to get at content from recent commits.  The job now is
  ** to select one of the aReadMark[] entries that is closest to
  ** but not exceeding pWal->hdr.mxFrame and lock that entry.
  */
  mxReadMark = 0;
  mxI = 0;
  for(i=1; i<WAL_NREADER; i++){
    u32 thisMark = pInfo->aReadMark[i];
    if( mxReadMark<thisMark ){
      mxReadMark = thisMark;
      mxI = i;
    }
  }
  if( mxI==0 ){
    /* If we get here, it means that all of the aReadMark[] entries between
    ** 1 and WAL_NREADER-1 are zero.  Try to initialize aReadMark[1] to
    ** be mxFrame, then retry.
    */
    rc = walLockExclusive(pWal, WAL_READ_LOCK(1), 1);
    if( rc==SQLITE_OK ){
      pInfo->aReadMark[1] = pWal->hdr.mxFrame+1;
      walUnlockExclusive(pWal, WAL_READ_LOCK(1), 1);
    }
    return WAL_RETRY;
  }else{
    if( mxReadMark < pWal->hdr.mxFrame ){
      for(i=1; i<WAL_NREADER; i++){
        rc = walLockExclusive(pWal, WAL_READ_LOCK(i), 1);
        if( rc==SQLITE_OK ){
          mxReadMark = pInfo->aReadMark[i] = pWal->hdr.mxFrame+1;
          mxI = i;
          walUnlockExclusive(pWal, WAL_READ_LOCK(i), 1);
          break;
        }
      }
    }

    rc = walLockShared(pWal, WAL_READ_LOCK(mxI));
    if( rc ){
      return rc==SQLITE_BUSY ? WAL_RETRY : rc;
    }
    if( pInfo->aReadMark[mxI]!=mxReadMark
     || pHdr[0].mxFrame!=pWal->hdr.mxFrame
     || (sqlite3OsShmBarrier(pWal->pDbFd), pHdr[1].mxFrame!=pWal->hdr.mxFrame)
    ){
      walUnlockShared(pWal, WAL_READ_LOCK(mxI));
      return WAL_RETRY;
    }else{
      pWal->readLock = mxI;
    }
  }
  return rc;
}

/*
** Begin a read transaction on the database.
**
** This routine used to be called sqlite3OpenSnapshot() and with good reason:
** it takes a snapshot of the state of the WAL and wal-index for the current
** instant in time.  The current thread will continue to use this snapshot.
** Other threads might append new content to the WAL and wal-index but
** that extra content is ignored by the current thread.
**






** If the database contents have changes since the previous read

** transaction, then *pChanged is set to 1 before returning.  The
** Pager layer will use this to know that is cache is stale and

** needs to be flushed.
*/
int sqlite3WalBeginReadTransaction(Wal *pWal, int *pChanged){
  int rc;                         /* Return code */



  do{

    rc = walTryBeginRead(pWal, pChanged, 0);





  }while( rc==WAL_RETRY );
  walIndexUnmap(pWal);
  return rc;
}

/*
** Finish with a read transaction.  All this does is release the
** read-lock.
*/
void sqlite3WalEndReadTransaction(Wal *pWal){
  if( pWal->readLock>=0 ){
    walUnlockShared(pWal, WAL_READ_LOCK(pWal->readLock));

    pWal->readLock = -1;
  }
}

/*
** Read a page from the WAL, if it is present in the WAL and if the 
** current read transaction is configured to use the WAL.  
**
** The *pInWal is set to 1 if the requested page is in the WAL and
** has been loaded.  Or *pInWal is set to 0 if the page was not in 
** the WAL and needs to be read out of the database.
*/
int sqlite3WalRead(
  Wal *pWal,                      /* WAL handle */
  Pgno pgno,                      /* Database page number to read data for */
  int *pInWal,                    /* OUT: True if data is read from WAL */
  int nOut,                       /* Size of buffer pOut in bytes */
  u8 *pOut                        /* Buffer to write page data to */
){
  int rc;                         /* Return code */
  u32 iRead = 0;                  /* If !=0, WAL frame to return data from */
  u32 iLast = pWal->hdr.mxFrame;  /* Last page in WAL for this reader */
  int iHash;                      /* Used to loop through N hash tables */

  /* This routine is only called from within a read transaction */
  assert( pWal->readLock>=0 );

  /* If the "last page" field of the wal-index header snapshot is 0, then
  ** no data will be read from the wal under any circumstances. Return early
  ** in this case to avoid the walIndexMap/Unmap overhead.  Likewise, if
  ** pWal->readLock==0, then the WAL is ignored by the reader so
  ** return early, as if the WAL were empty.
  */
  if( iLast==0 || pWal->readLock==0 ){
    *pInWal = 0;
    return SQLITE_OK;
  }

  /* Ensure the wal-index is mapped. */

  rc = walIndexMap(pWal, walMappingSize(iLast));
  if( rc!=SQLITE_OK ){
    return rc;
  }

  /* Search the hash table or tables for an entry matching page number
  ** pgno. Each iteration of the following for() loop searches one
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614

1615
1616
1617







1618


1619
1620
1621

1622


1623
1624






1625

1626
1627

1628
1629
1630
1631
1632
1633

1634
1635
1636
1637



1638








1639







1640






1641
1642

1643


1644

1645

1646










1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
}


/* 
** Set *pPgno to the size of the database file (or zero, if unknown).
*/
void sqlite3WalDbsize(Wal *pWal, Pgno *pPgno){
  assert( pWal->lockState==SQLITE_SHM_READ
       || pWal->lockState==SQLITE_SHM_WRITE );
  *pPgno = pWal->hdr.nPage;
}


/* 
** This function returns SQLITE_OK if the caller may write to the database.
** Otherwise, if the caller is operating on a snapshot that has already







** been overwritten by another writer, SQLITE_BUSY is returned.


*/
int sqlite3WalWriteLock(Wal *pWal, int op){
  int rc = SQLITE_OK;

  if( op ){


    assert( pWal->lockState==SQLITE_SHM_READ );
    rc = walSetLock(pWal, SQLITE_SHM_WRITE);








    /* If this connection is not reading the most recent database snapshot,
    ** it is not possible to write to the database. In this case release

    ** the write locks and return SQLITE_BUSY.
    */
    if( rc==SQLITE_OK ){
      rc = walIndexMap(pWal, walMappingSize(1));
      assert( pWal->szWIndex>=WALINDEX_HDR_SIZE || rc!=SQLITE_OK );
      if( rc==SQLITE_OK

       && memcmp(&pWal->hdr, (void*)pWal->pWiData, sizeof(WalIndexHdr))
      ){
        rc = SQLITE_BUSY;
      }



      walIndexUnmap(pWal);








      if( rc!=SQLITE_OK ){







        walSetLock(pWal, SQLITE_SHM_READ);






      }
    }

  }else if( pWal->lockState==SQLITE_SHM_WRITE ){


    rc = walSetLock(pWal, SQLITE_SHM_READ);

  }

  return rc;










}

/*
** If any data has been written (but not committed) to the log file, this
** function moves the write-pointer back to the start of the transaction.
**
** Additionally, the callback function is invoked for each frame written
** to the log since the start of the transaction. If the callback returns
** other than SQLITE_OK, it is not invoked again and the error code is
** returned to the caller.
**
** Otherwise, if the callback function does not return an error, this
** function returns SQLITE_OK.
*/
int sqlite3WalUndo(Wal *pWal, int (*xUndo)(void *, Pgno), void *pUndoCtx){
  int rc = SQLITE_OK;
  if( pWal->lockState==SQLITE_SHM_WRITE ){
    int unused;
    Pgno iMax = pWal->hdr.mxFrame;
    Pgno iFrame;
  
    assert( pWal->pWiData==0 );
    rc = walIndexReadHdr(pWal, &unused);
    if( rc==SQLITE_OK ){
      rc = walIndexMap(pWal, walMappingSize(iMax));
    }
    if( rc==SQLITE_OK ){
      for(iFrame=pWal->hdr.mxFrame+1; rc==SQLITE_OK && iFrame<=iMax; iFrame++){
        assert( pWal->lockState==SQLITE_SHM_WRITE );
        rc = xUndo(pUndoCtx, pWal->pWiData[walIndexEntry(iFrame)]);
      }
      walCleanupHash(pWal);
    }
    walIndexUnmap(pWal);
  }
  return rc;
}

/* 
** Argument aWalData must point to an array of WAL_SAVEPOINT_NDATA u32 
** values. This function populates the array with values required to 
** "rollback" the write position of the WAL handle back to the current 
** point in the event of a savepoint rollback (via WalSavepointUndo()).
*/
void sqlite3WalSavepoint(Wal *pWal, u32 *aWalData){
  assert( pWal->lockState==SQLITE_SHM_WRITE );
  aWalData[0] = pWal->hdr.mxFrame;
  aWalData[1] = pWal->hdr.aFrameCksum[0];
  aWalData[2] = pWal->hdr.aFrameCksum[1];
}

/* 
** Move the write position of the WAL back to the point identified by
** the values in the aWalData[] array. aWalData must point to an array
** of WAL_SAVEPOINT_NDATA u32 values that has been previously populated
** by a call to WalSavepoint().
*/
int sqlite3WalSavepointUndo(Wal *pWal, u32 *aWalData){
  int rc = SQLITE_OK;
  assert( pWal->lockState==SQLITE_SHM_WRITE );

  assert( aWalData[0]<=pWal->hdr.mxFrame );
  if( aWalData[0]<pWal->hdr.mxFrame ){
    rc = walIndexMap(pWal, walMappingSize(pWal->hdr.mxFrame));
    pWal->hdr.mxFrame = aWalData[0];
    pWal->hdr.aFrameCksum[0] = aWalData[1];
    pWal->hdr.aFrameCksum[1] = aWalData[2];







|
<



>

|
<
>
>
>
>
>
>
>
|
>
>

|
|
>
|
>
>
|
|
>
>
>
>
>
>
|
>
|
|
>
|
|
<
|
<
|
>
|
<
|
|
>
>
>
|
>
>
>
>
>
>
>
>
|
>
>
>
>
>
>
>
|
>
>
>
>
>
>


>
|
>
>
|
>

>

>
>
>
>
>
>
>
>
>
>







|








|











|
















|













|







1978
1979
1980
1981
1982
1983
1984
1985

1986
1987
1988
1989
1990
1991

1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023

2024

2025
2026
2027

2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
}


/* 
** Set *pPgno to the size of the database file (or zero, if unknown).
*/
void sqlite3WalDbsize(Wal *pWal, Pgno *pPgno){
  assert( pWal->readLock>=0 );

  *pPgno = pWal->hdr.nPage;
}


/* 
** This function starts a write transaction on the WAL.

**
** A read transaction must have already been started by a prior call
** to sqlite3WalBeginReadTransaction().
**
** If another thread or process has written into the database since
** the read transaction was started, then it is not possible for this
** thread to write as doing so would cause a fork.  So this routine
** returns SQLITE_BUSY in that case and no write transaction is started.
**
** There can only be a single writer active at a time.
*/
int sqlite3WalBeginWriteTransaction(Wal *pWal){
  int rc;
  volatile WalCkptInfo *pInfo;

  /* Cannot start a write transaction without first holding a read
  ** transaction. */
  assert( pWal->readLock>=0 );

  /* Only one writer allowed at a time.  Get the write lock.  Return
  ** SQLITE_BUSY if unable.
  */
  rc = walLockExclusive(pWal, WAL_WRITE_LOCK, 1);
  if( rc ){
    return rc;
  }
  pWal->writeLock = 1;

  /* If another connection has written to the database file since the
  ** time the read transaction on this connection was started, then
  ** the write is disallowed.
  */

  rc = walIndexMap(pWal, walMappingSize(pWal->hdr.mxFrame));

  if( rc ){
    walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1);
    pWal->writeLock = 0;

    return rc;
  }
  if( memcmp(&pWal->hdr, (void*)pWal->pWiData, sizeof(WalIndexHdr))!=0 ){
    walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1);
    pWal->writeLock = 0;
    walIndexUnmap(pWal);
    return SQLITE_BUSY;
  }

  pInfo = walCkptInfo(pWal);
  if( pWal->readLock==0 ){
    assert( pInfo->nBackfill==pWal->hdr.mxFrame );
    if( pInfo->nBackfill>0 ){
      rc = walLockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1);
      if( rc==SQLITE_OK ){
        /* If all readers are using WAL_READ_LOCK(0) (in other words if no
        ** readers are currently using the WAL) */
        pWal->nCkpt++;
        pWal->hdr.mxFrame = 0;
        sqlite3Put4byte((u8*)pWal->hdr.aSalt,
                         1 + sqlite3Get4byte((u8*)pWal->hdr.aSalt));
        sqlite3_randomness(4, &pWal->hdr.aSalt[1]);
        walIndexWriteHdr(pWal);
        pInfo->nBackfill = 0;
        memset((void*)&pInfo->aReadMark[1], 0,
               sizeof(pInfo->aReadMark)-sizeof(u32));
        rc = sqlite3OsTruncate(pWal->pDbFd, 
                               ((i64)pWal->hdr.nPage*(i64)pWal->szPage));
        walUnlockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1);
      }
    }
    walUnlockShared(pWal, WAL_READ_LOCK(0));
    pWal->readLock = -1;
    do{
      int notUsed;
      rc = walTryBeginRead(pWal, &notUsed, 1);
    }while( rc==WAL_RETRY );
  }
  walIndexUnmap(pWal);
  return rc;
}

/*
** End a write transaction.  The commit has already been done.  This
** routine merely releases the lock.
*/
int sqlite3WalEndWriteTransaction(Wal *pWal){
  walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1);
  pWal->writeLock = 0;
  return SQLITE_OK;
}

/*
** If any data has been written (but not committed) to the log file, this
** function moves the write-pointer back to the start of the transaction.
**
** Additionally, the callback function is invoked for each frame written
** to the WAL since the start of the transaction. If the callback returns
** other than SQLITE_OK, it is not invoked again and the error code is
** returned to the caller.
**
** Otherwise, if the callback function does not return an error, this
** function returns SQLITE_OK.
*/
int sqlite3WalUndo(Wal *pWal, int (*xUndo)(void *, Pgno), void *pUndoCtx){
  int rc = SQLITE_OK;
  if( pWal->writeLock ){
    int unused;
    Pgno iMax = pWal->hdr.mxFrame;
    Pgno iFrame;
  
    assert( pWal->pWiData==0 );
    rc = walIndexReadHdr(pWal, &unused);
    if( rc==SQLITE_OK ){
      rc = walIndexMap(pWal, walMappingSize(iMax));
    }
    if( rc==SQLITE_OK ){
      for(iFrame=pWal->hdr.mxFrame+1; rc==SQLITE_OK && iFrame<=iMax; iFrame++){
        assert( pWal->writeLock );
        rc = xUndo(pUndoCtx, pWal->pWiData[walIndexEntry(iFrame)]);
      }
      walCleanupHash(pWal);
    }
    walIndexUnmap(pWal);
  }
  return rc;
}

/* 
** Argument aWalData must point to an array of WAL_SAVEPOINT_NDATA u32 
** values. This function populates the array with values required to 
** "rollback" the write position of the WAL handle back to the current 
** point in the event of a savepoint rollback (via WalSavepointUndo()).
*/
void sqlite3WalSavepoint(Wal *pWal, u32 *aWalData){
  assert( pWal->writeLock );
  aWalData[0] = pWal->hdr.mxFrame;
  aWalData[1] = pWal->hdr.aFrameCksum[0];
  aWalData[2] = pWal->hdr.aFrameCksum[1];
}

/* 
** Move the write position of the WAL back to the point identified by
** the values in the aWalData[] array. aWalData must point to an array
** of WAL_SAVEPOINT_NDATA u32 values that has been previously populated
** by a call to WalSavepoint().
*/
int sqlite3WalSavepointUndo(Wal *pWal, u32 *aWalData){
  int rc = SQLITE_OK;
  assert( pWal->writeLock );

  assert( aWalData[0]<=pWal->hdr.mxFrame );
  if( aWalData[0]<pWal->hdr.mxFrame ){
    rc = walIndexMap(pWal, walMappingSize(pWal->hdr.mxFrame));
    pWal->hdr.mxFrame = aWalData[0];
    pWal->hdr.aFrameCksum[0] = aWalData[1];
    pWal->hdr.aFrameCksum[1] = aWalData[2];
1735
1736
1737
1738
1739
1740
1741
1742
1743







1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759

1760
1761
1762
1763
1764
1765
1766
  u32 iFrame;                     /* Next frame address */
  u8 aFrame[WAL_FRAME_HDRSIZE];   /* Buffer to assemble frame-header in */
  PgHdr *p;                       /* Iterator to run through pList with. */
  PgHdr *pLast = 0;               /* Last frame in list */
  int nLast = 0;                  /* Number of extra copies of last page */

  assert( pList );
  assert( pWal->lockState==SQLITE_SHM_WRITE );
  assert( pWal->pWiData==0 );








  /* If this is the first frame written into the log, write the WAL
  ** header to the start of the WAL file. See comments at the top of
  ** this source file for a description of the WAL header format.
  */
  iFrame = pWal->hdr.mxFrame;
  if( iFrame==0 ){
    u8 aWalHdr[WAL_HDRSIZE];        /* Buffer to assembly wal-header in */
    sqlite3Put4byte(&aWalHdr[0], (WAL_MAGIC | SQLITE_BIGENDIAN));
    sqlite3Put4byte(&aWalHdr[4], 3007000);
    sqlite3Put4byte(&aWalHdr[8], szPage);
    pWal->szPage = szPage;
    pWal->hdr.bigEndCksum = SQLITE_BIGENDIAN;
    sqlite3Put4byte(&aWalHdr[12], pWal->nCkpt);
    memcpy(&aWalHdr[16], pWal->hdr.aSalt, 8);
    rc = sqlite3OsWrite(pWal->pWalFd, aWalHdr, sizeof(aWalHdr), 0);

    if( rc!=SQLITE_OK ){
      return rc;
    }
    walChecksumBytes(1, aWalHdr, sizeof(aWalHdr), 0, pWal->hdr.aFrameCksum);
  }
  assert( pWal->szPage==szPage );








|

>
>
>
>
>
>
>
















>







2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
  u32 iFrame;                     /* Next frame address */
  u8 aFrame[WAL_FRAME_HDRSIZE];   /* Buffer to assemble frame-header in */
  PgHdr *p;                       /* Iterator to run through pList with. */
  PgHdr *pLast = 0;               /* Last frame in list */
  int nLast = 0;                  /* Number of extra copies of last page */

  assert( pList );
  assert( pWal->writeLock );
  assert( pWal->pWiData==0 );

#if defined(SQLITE_TEST) && defined(SQLITE_DEBUG)
  { int cnt; for(cnt=0, p=pList; p; p=p->pDirty, cnt++){}
    WALTRACE(("WAL%p: frame write begin. %d frames. mxFrame=%d. %s\n",
              pWal, cnt, pWal->hdr.mxFrame, isCommit ? "Commit" : "Spill"));
  }
#endif

  /* If this is the first frame written into the log, write the WAL
  ** header to the start of the WAL file. See comments at the top of
  ** this source file for a description of the WAL header format.
  */
  iFrame = pWal->hdr.mxFrame;
  if( iFrame==0 ){
    u8 aWalHdr[WAL_HDRSIZE];        /* Buffer to assembly wal-header in */
    sqlite3Put4byte(&aWalHdr[0], (WAL_MAGIC | SQLITE_BIGENDIAN));
    sqlite3Put4byte(&aWalHdr[4], 3007000);
    sqlite3Put4byte(&aWalHdr[8], szPage);
    pWal->szPage = szPage;
    pWal->hdr.bigEndCksum = SQLITE_BIGENDIAN;
    sqlite3Put4byte(&aWalHdr[12], pWal->nCkpt);
    memcpy(&aWalHdr[16], pWal->hdr.aSalt, 8);
    rc = sqlite3OsWrite(pWal->pWalFd, aWalHdr, sizeof(aWalHdr), 0);
    WALTRACE(("WAL%p: wal-header write %s\n", pWal, rc ? "failed" : "ok"));
    if( rc!=SQLITE_OK ){
      return rc;
    }
    walChecksumBytes(1, aWalHdr, sizeof(aWalHdr), 0, pWal->hdr.aFrameCksum);
  }
  assert( pWal->szPage==szPage );

1844
1845
1846
1847
1848
1849
1850

1851
1852
1853
1854
1855

1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873

1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884

1885
1886
1887
1888
1889


1890
1891
1892

1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911


1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936




1937

1938
1939
1940
1941



1942
1943
1944
1945
1946
1947


1948
1949
1950
1951


1952













1953

1954



1955
1956
1957
1958
1959
    if( isCommit ){
      walIndexWriteHdr(pWal);
      pWal->iCallback = iFrame;
    }
  }

  walIndexUnmap(pWal);

  return rc;
}

/* 
** Checkpoint the database:

**
**   1. Acquire a CHECKPOINT lock
**   2. Copy the contents of the log into the database file.
**   3. Zero the wal-index header (so new readers will ignore the log).
**   4. Drop the CHECKPOINT lock.
*/
int sqlite3WalCheckpoint(
  Wal *pWal,                      /* Wal connection */
  int sync_flags,                 /* Flags to sync db file with (or 0) */
  int nBuf,                       /* Size of temporary buffer */
  u8 *zBuf,                       /* Temporary buffer to use */
  int (*xBusyHandler)(void *),    /* Pointer to busy-handler function */
  void *pBusyHandlerArg           /* Argument to pass to xBusyHandler */
){
  int rc;                         /* Return code */
  int isChanged = 0;              /* True if a new wal-index header is loaded */

  assert( pWal->pWiData==0 );


  /* Get the CHECKPOINT lock. 
  **
  ** Normally, the connection will be in UNLOCK state at this point. But
  ** if the connection is in exclusive-mode it may still be in READ state
  ** even though the upper layer has no active read-transaction (because
  ** WalCloseSnapshot() is not called in exclusive mode). The state will
  ** be set to UNLOCK when this function returns. This is Ok.
  */
  assert( (pWal->lockState==SQLITE_SHM_UNLOCK)
       || (pWal->lockState==SQLITE_SHM_READ) );

  walSetLock(pWal, SQLITE_SHM_UNLOCK);
  do {
    rc = walSetLock(pWal, SQLITE_SHM_CHECKPOINT);
  }while( rc==SQLITE_BUSY && xBusyHandler(pBusyHandlerArg) );
  if( rc!=SQLITE_OK ){


    walSetLock(pWal, SQLITE_SHM_UNLOCK);
    return rc;
  }


  /* Copy data from the log to the database file. */
  rc = walIndexReadHdr(pWal, &isChanged);
  if( rc==SQLITE_OK ){
    rc = walCheckpoint(pWal, sync_flags, nBuf, zBuf);
  }
  if( isChanged ){
    /* If a new wal-index header was loaded before the checkpoint was 
    ** performed, then the pager-cache associated with pWal is now
    ** out of date. So zero the cached wal-index header to ensure that
    ** next time the pager opens a snapshot on this database it knows that
    ** the cache needs to be reset.
    */
    memset(&pWal->hdr, 0, sizeof(WalIndexHdr));
  }

  /* Release the locks. */
  walIndexUnmap(pWal);
  walSetLock(pWal, SQLITE_SHM_UNLOCK);


  return rc;
}

/* Return the value to pass to a sqlite3_wal_hook callback, the
** number of frames in the WAL at the point of the last commit since
** sqlite3WalCallback() was called.  If no commits have occurred since
** the last call, then return 0.
*/
int sqlite3WalCallback(Wal *pWal){
  u32 ret = 0;
  if( pWal ){
    ret = pWal->iCallback;
    pWal->iCallback = 0;
  }
  return (int)ret;
}

/*
** This function is called to set or query the exclusive-mode flag 
** associated with the WAL connection passed as the first argument. The
** exclusive-mode flag should be set to indicate that the caller is
** holding an EXCLUSIVE lock on the database file (it does this in
** locking_mode=exclusive mode). If the EXCLUSIVE lock is to be dropped,
** the flag set by this function should be cleared before doing so.
**




** The value of the exclusive-mode flag may only be modified when

** the WAL connection is in READ state.
**
** When the flag is set, this module does not call the VFS xShmLock()
** method to obtain any locks on the wal-index (as it assumes it



** has exclusive access to the wal and wal-index files anyhow). It
** continues to hold (and does not drop) the existing READ lock on
** the wal-index.
**
** To set or clear the flag, the "op" parameter is passed 1 or 0,
** respectively. To query the flag, pass -1. In all cases, the value


** returned is the value of the exclusive-mode flag (after its value
** has been modified, if applicable).
*/
int sqlite3WalExclusiveMode(Wal *pWal, int op){


  if( op>=0 ){













    assert( pWal->lockState==SQLITE_SHM_READ );

    pWal->exclusiveMode = (u8)op;



  }
  return pWal->exclusiveMode;
}

#endif /* #ifndef SQLITE_OMIT_WAL */







>




|
>

|
|
<
<





|
<
<





>

<
<
<
<
<
<
<
<
<
<
>
|
<
<
<
|
>
>
|


>


















|
>
>


















|
<
<
<
|
<

>
>
>
>
|
>
|

<
|
>
>
>
|
<
|

<
<
>
>
|
|


>
>
|
>
>
>
>
>
>
>
>
>
>
>
>
>
|
>
|
>
>
>

|



2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299


2300
2301
2302
2303
2304
2305


2306
2307
2308
2309
2310
2311
2312










2313
2314



2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361



2362

2363
2364
2365
2366
2367
2368
2369
2370
2371

2372
2373
2374
2375
2376

2377
2378


2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
    if( isCommit ){
      walIndexWriteHdr(pWal);
      pWal->iCallback = iFrame;
    }
  }

  walIndexUnmap(pWal);
  WALTRACE(("WAL%p: frame write %s\n", pWal, rc ? "failed" : "ok"));
  return rc;
}

/* 
** This routine is called to implement sqlite3_wal_checkpoint() and
** related interfaces.
**
** Obtain a CHECKPOINT lock and then backfill as much information as
** we can from WAL into the database.


*/
int sqlite3WalCheckpoint(
  Wal *pWal,                      /* Wal connection */
  int sync_flags,                 /* Flags to sync db file with (or 0) */
  int nBuf,                       /* Size of temporary buffer */
  u8 *zBuf                        /* Temporary buffer to use */


){
  int rc;                         /* Return code */
  int isChanged = 0;              /* True if a new wal-index header is loaded */

  assert( pWal->pWiData==0 );
  assert( pWal->ckptLock==0 );











  WALTRACE(("WAL%p: checkpoint begins\n", pWal));
  rc = walLockExclusive(pWal, WAL_CKPT_LOCK, 1);



  if( rc ){
    /* Usually this is SQLITE_BUSY meaning that another thread or process
    ** is already running a checkpoint, or maybe a recovery.  But it might
    ** also be SQLITE_IOERR. */
    return rc;
  }
  pWal->ckptLock = 1;

  /* Copy data from the log to the database file. */
  rc = walIndexReadHdr(pWal, &isChanged);
  if( rc==SQLITE_OK ){
    rc = walCheckpoint(pWal, sync_flags, nBuf, zBuf);
  }
  if( isChanged ){
    /* If a new wal-index header was loaded before the checkpoint was 
    ** performed, then the pager-cache associated with pWal is now
    ** out of date. So zero the cached wal-index header to ensure that
    ** next time the pager opens a snapshot on this database it knows that
    ** the cache needs to be reset.
    */
    memset(&pWal->hdr, 0, sizeof(WalIndexHdr));
  }

  /* Release the locks. */
  walIndexUnmap(pWal);
  walUnlockExclusive(pWal, WAL_CKPT_LOCK, 1);
  pWal->ckptLock = 0;
  WALTRACE(("WAL%p: checkpoint %s\n", pWal, rc ? "failed" : "ok"));
  return rc;
}

/* Return the value to pass to a sqlite3_wal_hook callback, the
** number of frames in the WAL at the point of the last commit since
** sqlite3WalCallback() was called.  If no commits have occurred since
** the last call, then return 0.
*/
int sqlite3WalCallback(Wal *pWal){
  u32 ret = 0;
  if( pWal ){
    ret = pWal->iCallback;
    pWal->iCallback = 0;
  }
  return (int)ret;
}

/*
** This function is called to change the WAL subsystem into or out



** of locking_mode=EXCLUSIVE.

**
** If op is zero, then attempt to change from locking_mode=EXCLUSIVE
** into locking_mode=NORMAL.  This means that we must acquire a lock
** on the pWal->readLock byte.  If the WAL is already in locking_mode=NORMAL
** or if the acquisition of the lock fails, then return 0.  If the
** transition out of exclusive-mode is successful, return 1.  This
** operation must occur while the pager is still holding the exclusive
** lock on the main database file.
**

** If op is one, then change from locking_mode=NORMAL into 
** locking_mode=EXCLUSIVE.  This means that the pWal->readLock must
** be released.  Return 1 if the transition is made and 0 if the
** WAL is already in exclusive-locking mode - meaning that this
** routine is a no-op.  The pager must already hold the exclusive lock

** on the main database file before invoking this operation.
**


** If op is negative, then do a dry-run of the op==1 case but do
** not actually change anything.  The pager uses this to see if it
** should acquire the database exclusive lock prior to invoking
** the op==1 case.
*/
int sqlite3WalExclusiveMode(Wal *pWal, int op){
  int rc;
  assert( pWal->writeLock==0 && pWal->readLock>=0 );
  if( op==0 ){
    if( pWal->exclusiveMode ){
      pWal->exclusiveMode = 0;
      if( walLockShared(pWal, WAL_READ_LOCK(pWal->readLock))!=SQLITE_OK ){
        pWal->exclusiveMode = 1;
      }
      rc = pWal->exclusiveMode==0;
    }else{
      /* No changes.  Either already in locking_mode=NORMAL or else the 
      ** acquisition of the read-lock failed.  The pager must continue to
      ** hold the database exclusive lock. */
      rc = 0;
    }
  }else if( op>0 ){
    assert( pWal->exclusiveMode==0 );
    walUnlockShared(pWal, WAL_READ_LOCK(pWal->readLock));
    pWal->exclusiveMode = 1;
    rc = 1;
  }else{
    rc = pWal->exclusiveMode==0;
  }
  return rc;
}

#endif /* #ifndef SQLITE_OMIT_WAL */

Changes to src/wal.h.

16
17
18
19
20
21
22
23
24
25
26
27
28
29

30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66

67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97

#ifndef _WAL_H_
#define _WAL_H_

#include "sqliteInt.h"

#ifdef SQLITE_OMIT_WAL
# define sqlite3WalOpen(x,y,z)             0
# define sqlite3WalClose(w,x,y,z)          0
# define sqlite3WalOpenSnapshot(y,z)       0
# define sqlite3WalCloseSnapshot(z) 
# define sqlite3WalRead(v,w,x,y,z)         0
# define sqlite3WalDbsize(y,z)
# define sqlite3WalWriteLock(y,z)          0

# define sqlite3WalUndo(x,y,z)             0
# define sqlite3WalSavepoint(y,z)
# define sqlite3WalSavepointUndo(y,z)      0
# define sqlite3WalFrames(u,v,w,x,y,z)     0
# define sqlite3WalCheckpoint(u,v,w,x,y,z) 0
# define sqlite3WalCallback(z)             0
#else

#define WAL_SAVEPOINT_NDATA 3

/* Connection to a write-ahead log (WAL) file. 
** There is one object of this type for each pager. 
*/
typedef struct Wal Wal;

/* Open and close a connection to a write-ahead log. */
int sqlite3WalOpen(sqlite3_vfs*, sqlite3_file*, const char *zName, Wal**);
int sqlite3WalClose(Wal *pWal, int sync_flags, int, u8 *);

/* Used by readers to open (lock) and close (unlock) a snapshot.  A 
** snapshot is like a read-transaction.  It is the state of the database
** at an instant in time.  sqlite3WalOpenSnapshot gets a read lock and
** preserves the current state even if the other threads or processes
** write to or checkpoint the WAL.  sqlite3WalCloseSnapshot() closes the
** transaction and releases the lock.
*/
int sqlite3WalOpenSnapshot(Wal *pWal, int *);
void sqlite3WalCloseSnapshot(Wal *pWal);

/* Read a page from the write-ahead log, if it is present. */
int sqlite3WalRead(Wal *pWal, Pgno pgno, int *pInWal, int nOut, u8 *pOut);

/* Return the size of the database as it existed at the beginning
** of the snapshot */
void sqlite3WalDbsize(Wal *pWal, Pgno *pPgno);

/* Obtain or release the WRITER lock. */

int sqlite3WalWriteLock(Wal *pWal, int op);

/* Undo any frames written (but not committed) to the log */
int sqlite3WalUndo(Wal *pWal, int (*xUndo)(void *, Pgno), void *pUndoCtx);

/* Return an integer that records the current (uncommitted) write
** position in the WAL */
void sqlite3WalSavepoint(Wal *pWal, u32 *aWalData);

/* Move the write position of the WAL back to iFrame.  Called in
** response to a ROLLBACK TO command. */
int sqlite3WalSavepointUndo(Wal *pWal, u32 *aWalData);

/* Write a frame or frames to the log. */
int sqlite3WalFrames(Wal *pWal, int, PgHdr *, Pgno, int, int);

/* Copy pages from the log to the database file */ 
int sqlite3WalCheckpoint(
  Wal *pWal,                      /* Write-ahead log connection */
  int sync_flags,                 /* Flags to sync db file with (or 0) */
  int nBuf,                       /* Size of buffer nBuf */
  u8 *zBuf,                       /* Temporary buffer to use */
  int (*xBusyHandler)(void *),    /* Pointer to busy-handler function */
  void *pBusyHandlerArg           /* Argument to pass to xBusyHandler */
);

/* Return the value to pass to a sqlite3_wal_hook callback, the
** number of frames in the WAL at the point of the last commit since
** sqlite3WalCallback() was called.  If no commits have occurred since
** the last call, then return 0.
*/







|
|
|
|
|

|
>
|

|
|
|
|




















|
|









>
|




















|
<
<







16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90


91
92
93
94
95
96
97

#ifndef _WAL_H_
#define _WAL_H_

#include "sqliteInt.h"

#ifdef SQLITE_OMIT_WAL
# define sqlite3WalOpen(x,y,z)                 0
# define sqlite3WalClose(w,x,y,z)              0
# define sqlite3WalBeginReadTransaction(y,z)   0
# define sqlite3WalEndReadTransaction(z)
# define sqlite3WalRead(v,w,x,y,z)             0
# define sqlite3WalDbsize(y,z)
# define sqlite3WalBeginWriteTransaction(y)    0
# define sqlite3WalEndWRiteTransaction(x)      0
# define sqlite3WalUndo(x,y,z)                 0
# define sqlite3WalSavepoint(y,z)
# define sqlite3WalSavepointUndo(y,z)          0
# define sqlite3WalFrames(u,v,w,x,y,z)         0
# define sqlite3WalCheckpoint(u,v,w,x)         0
# define sqlite3WalCallback(z)                 0
#else

#define WAL_SAVEPOINT_NDATA 3

/* Connection to a write-ahead log (WAL) file. 
** There is one object of this type for each pager. 
*/
typedef struct Wal Wal;

/* Open and close a connection to a write-ahead log. */
int sqlite3WalOpen(sqlite3_vfs*, sqlite3_file*, const char *zName, Wal**);
int sqlite3WalClose(Wal *pWal, int sync_flags, int, u8 *);

/* Used by readers to open (lock) and close (unlock) a snapshot.  A 
** snapshot is like a read-transaction.  It is the state of the database
** at an instant in time.  sqlite3WalOpenSnapshot gets a read lock and
** preserves the current state even if the other threads or processes
** write to or checkpoint the WAL.  sqlite3WalCloseSnapshot() closes the
** transaction and releases the lock.
*/
int sqlite3WalBeginReadTransaction(Wal *pWal, int *);
void sqlite3WalEndReadTransaction(Wal *pWal);

/* Read a page from the write-ahead log, if it is present. */
int sqlite3WalRead(Wal *pWal, Pgno pgno, int *pInWal, int nOut, u8 *pOut);

/* Return the size of the database as it existed at the beginning
** of the snapshot */
void sqlite3WalDbsize(Wal *pWal, Pgno *pPgno);

/* Obtain or release the WRITER lock. */
int sqlite3WalBeginWriteTransaction(Wal *pWal);
int sqlite3WalEndWriteTransaction(Wal *pWal);

/* Undo any frames written (but not committed) to the log */
int sqlite3WalUndo(Wal *pWal, int (*xUndo)(void *, Pgno), void *pUndoCtx);

/* Return an integer that records the current (uncommitted) write
** position in the WAL */
void sqlite3WalSavepoint(Wal *pWal, u32 *aWalData);

/* Move the write position of the WAL back to iFrame.  Called in
** response to a ROLLBACK TO command. */
int sqlite3WalSavepointUndo(Wal *pWal, u32 *aWalData);

/* Write a frame or frames to the log. */
int sqlite3WalFrames(Wal *pWal, int, PgHdr *, Pgno, int, int);

/* Copy pages from the log to the database file */ 
int sqlite3WalCheckpoint(
  Wal *pWal,                      /* Write-ahead log connection */
  int sync_flags,                 /* Flags to sync db file with (or 0) */
  int nBuf,                       /* Size of buffer nBuf */
  u8 *zBuf                        /* Temporary buffer to use */


);

/* Return the value to pass to a sqlite3_wal_hook callback, the
** number of frames in the WAL at the point of the last commit since
** sqlite3WalCallback() was called.  If no commits have occurred since
** the last call, then return 0.
*/

Changes to test/filectrl.test.

30
31
32
33
34
35
36
37
38
39
40
41
do_test filectrl-1.4 {
  sqlite3 db test.db
  file_control_lasterrno_test db
} {}
do_test filectrl-1.5 {
  db close
  sqlite3 db test_control_lockproxy.db
  file_control_lockproxy_test db
} {}
db close
file delete -force .test_control_lockproxy.db-conch test.proxy
finish_test







|




30
31
32
33
34
35
36
37
38
39
40
41
do_test filectrl-1.4 {
  sqlite3 db test.db
  file_control_lasterrno_test db
} {}
do_test filectrl-1.5 {
  db close
  sqlite3 db test_control_lockproxy.db
  file_control_lockproxy_test db [pwd]
} {}
db close
file delete -force .test_control_lockproxy.db-conch test.proxy
finish_test

Changes to test/lock_common.tcl.

43
44
45
46
47
48
49




50


51
52
53
54
55
56
57
      return "ERROR: Child process hung up"
    }
    append r $line
  }
}

proc testfixture_nb_cb {varname chan} {




  set line [gets $chan]


  if { $line == "OVER" } {
    set $varname $::tfnb($chan)
    unset ::tfnb($chan)
    close $chan
  } else {
    append ::tfnb($chan) $line
  }







>
>
>
>
|
>
>







43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
      return "ERROR: Child process hung up"
    }
    append r $line
  }
}

proc testfixture_nb_cb {varname chan} {
  if {[eof $chan]} {
    append ::tfnb($chan) "ERROR: Child process hung up"
    set line "OVER"
  } else {
    set line [gets $chan]
  }

  if { $line == "OVER" } {
    set $varname $::tfnb($chan)
    unset ::tfnb($chan)
    close $chan
  } else {
    append ::tfnb($chan) $line
  }

Changes to test/wal.test.

567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599


600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628

629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676

677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
  # checkpointing the database. But not from writing to it.
  #
  do_test wal-10.$tn.11 {
    sql2 { BEGIN; SELECT * FROM t1 }
  } {1 2 3 4 5 6 7 8 9 10}
  do_test wal-10.$tn.12 {
    catchsql { PRAGMA wal_checkpoint } 
  } {1 {database is locked}}
  do_test wal-10.$tn.13 {
    execsql { INSERT INTO t1 VALUES(11, 12) }
    sql2 {SELECT * FROM t1}
  } {1 2 3 4 5 6 7 8 9 10}

  # Connection [db2] is holding a lock on a snapshot, preventing [db] from
  # checkpointing the database. Add a busy-handler to [db]. If [db2] completes
  # its transaction from within the busy-handler, [db] is able to complete
  # the checkpoint operation.
  #
  proc busyhandler x {
    if {$x==4} { sql2 COMMIT }
    if {$x<5} { return 0 }
    return 1
  }
  db busy busyhandler
  do_test wal-10.$tn.14 {
    execsql { PRAGMA wal_checkpoint } 
  } {}

  # Similar to the test above. Except this time, a new read transaction is
  # started (db3) while the checkpointer is waiting for an old one (db2) to 
  # finish. The checkpointer can finish, but any subsequent write operations 
  # must wait until after db3 has closed the read transaction, as db3 is a
  # "region D" writer.


  #
  db busy {}
  do_test wal-10.$tn.15 {
    sql2 { BEGIN; SELECT * FROM t1; }
  } {1 2 3 4 5 6 7 8 9 10 11 12}
  do_test wal-10.$tn.16 {
    catchsql { PRAGMA wal_checkpoint } 
  } {1 {database is locked}}
  proc busyhandler x {
    if {$x==3} { sql3 { BEGIN; SELECT * FROM t1 } }
    if {$x==4} { sql2 COMMIT }
    if {$x<5}  { return 0 }
    return 1
  }
  db busy busyhandler
  do_test wal-10.$tn.17 {
    execsql { PRAGMA wal_checkpoint } 
  } {}
  do_test wal-10.$tn.18 {
    sql3 { SELECT * FROM t1 }
  } {1 2 3 4 5 6 7 8 9 10 11 12}
  do_test wal-10.$tn.19 {
    catchsql { INSERT INTO t1 VALUES(13, 14) }
  } {1 {database is locked}}
  do_test wal-10.$tn.20 {
    execsql { SELECT * FROM t1 }
  } {1 2 3 4 5 6 7 8 9 10 11 12}
  do_test wal-10.$tn.21 {
    sql3 COMMIT

  } {}
  do_test wal-10.$tn.22 {
    execsql { INSERT INTO t1 VALUES(13, 14) }
    execsql { SELECT * FROM t1 }
  } {1 2 3 4 5 6 7 8 9 10 11 12 13 14}

  # Set [db3] up as a "region D" reader again. Then upgrade it to a writer
  # and back down to a reader. Then, check that a checkpoint is not possible
  # (as [db3] still has a snapshot locked).
  #
  do_test wal-10.$tn.23 {
    execsql { PRAGMA wal_checkpoint }
  } {}
  do_test wal-10.$tn.24 {
    sql2 { BEGIN; SELECT * FROM t1; }
  } {1 2 3 4 5 6 7 8 9 10 11 12 13 14}
  do_test wal-10.$tn.25 {
    execsql { PRAGMA wal_checkpoint }
  } {}
  do_test wal-10.$tn.26 {
    catchsql { INSERT INTO t1 VALUES(15, 16) }
  } {1 {database is locked}}
  do_test wal-10.$tn.27 {
    sql3 { INSERT INTO t1 VALUES(15, 16) }
  } {}
  do_test wal-10.$tn.28 {
    code3 {
      set ::STMT [sqlite3_prepare db3 "SELECT * FROM t1" -1 TAIL]
      sqlite3_step $::STMT
    }
    sql3 COMMIT
    execsql { SELECT * FROM t1 }
  } {1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16}
  db busy {}
  do_test wal-10.$tn.29 {
    execsql { INSERT INTO t1 VALUES(17, 18) }
    catchsql { PRAGMA wal_checkpoint }
  } {1 {database is locked}}
  do_test wal-10.$tn.30 {
    code3 { sqlite3_finalize $::STMT }
    execsql { PRAGMA wal_checkpoint }
  } {}

  # At one point, if a reader failed to upgrade to a writer because it
  # was reading an old snapshot, the write-locks were not being released.
  # Test that this bug has been fixed.
  #
  do_test wal-10.$tn.31 {

    execsql { BEGIN ; SELECT * FROM t1 }
    sql2 { INSERT INTO t1 VALUES(19, 20) }
    catchsql { INSERT INTO t1 VALUES(21, 22) }
  } {1 {database is locked}}
  do_test wal-10.$tn.32 {
    # This statement would fail when the bug was present.
    sql2 { INSERT INTO t1 VALUES(21, 22) }
  } {}
  do_test wal-10.$tn.33 {
    execsql { SELECT * FROM t1 ; COMMIT }
  } {1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18}
  do_test wal-10.$tn.34 {
    execsql { SELECT * FROM t1 }
  } {1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22}

  # Test that if a checkpointer cannot obtain the required locks, it
  # releases all locks before returning a busy error.
  #
  do_test wal-10.$tn.35 {
    execsql { 
      DELETE FROM t1;
      INSERT INTO t1 VALUES('a', 'b');
      INSERT INTO t1 VALUES('c', 'd');
    }
    sql2 {
      BEGIN;
        SELECT * FROM t1;
    }
  } {a b c d}
  proc busyhandler x { return 1 }
  db busy busyhandler
  do_test wal-10.$tn.36 {
    catchsql { PRAGMA wal_checkpoint }
  } {1 {database is locked}}
  do_test wal-10.$tn.36 {
    sql3 { INSERT INTO t1 VALUES('e', 'f') }
    sql2 { SELECT * FROM t1 }
  } {a b c d}
  do_test wal-10.$tn.37 {
    sql2 COMMIT
    execsql { PRAGMA wal_checkpoint }







|





<
<
<
|

<
<
<
<
<
<

|
|

<
<
<
<
<
>
>

<

|



<
<
<
<
<
<
|
<




|



|


|


>


<



|
|
<












|

|






<

|
<

|

|










>

|
|



|



|


|















<
<


|







567
568
569
570
571
572
573
574
575
576
577
578
579



580
581






582
583
584
585





586
587
588

589
590
591
592
593






594

595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611

612
613
614
615
616

617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637

638
639

640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683


684
685
686
687
688
689
690
691
692
693
  # checkpointing the database. But not from writing to it.
  #
  do_test wal-10.$tn.11 {
    sql2 { BEGIN; SELECT * FROM t1 }
  } {1 2 3 4 5 6 7 8 9 10}
  do_test wal-10.$tn.12 {
    catchsql { PRAGMA wal_checkpoint } 
  } {0 {}}   ;# Reader no longer block checkpoints
  do_test wal-10.$tn.13 {
    execsql { INSERT INTO t1 VALUES(11, 12) }
    sql2 {SELECT * FROM t1}
  } {1 2 3 4 5 6 7 8 9 10}




  # Writers do not block checkpoints any more either.
  #






  do_test wal-10.$tn.14 {
    catchsql { PRAGMA wal_checkpoint } 
  } {0 {}}






  # The following series of test cases used to verify another blocking
  # case in WAL - a case which no longer blocks.
  #

  do_test wal-10.$tn.15 {
    sql2 { COMMIT; BEGIN; SELECT * FROM t1; }
  } {1 2 3 4 5 6 7 8 9 10 11 12}
  do_test wal-10.$tn.16 {
    catchsql { PRAGMA wal_checkpoint } 






  } {0 {}}

  do_test wal-10.$tn.17 {
    execsql { PRAGMA wal_checkpoint } 
  } {}
  do_test wal-10.$tn.18 {
    sql3 { BEGIN; SELECT * FROM t1 }
  } {1 2 3 4 5 6 7 8 9 10 11 12}
  do_test wal-10.$tn.19 {
    catchsql { INSERT INTO t1 VALUES(13, 14) }
  } {0 {}}
  do_test wal-10.$tn.20 {
    execsql { SELECT * FROM t1 }
  } {1 2 3 4 5 6 7 8 9 10 11 12 13 14}
  do_test wal-10.$tn.21 {
    sql3 COMMIT
    sql2 COMMIT
  } {}
  do_test wal-10.$tn.22 {

    execsql { SELECT * FROM t1 }
  } {1 2 3 4 5 6 7 8 9 10 11 12 13 14}

  # Another series of tests that used to demonstrate blocking behavior
  # but which now work.

  #
  do_test wal-10.$tn.23 {
    execsql { PRAGMA wal_checkpoint }
  } {}
  do_test wal-10.$tn.24 {
    sql2 { BEGIN; SELECT * FROM t1; }
  } {1 2 3 4 5 6 7 8 9 10 11 12 13 14}
  do_test wal-10.$tn.25 {
    execsql { PRAGMA wal_checkpoint }
  } {}
  do_test wal-10.$tn.26 {
    catchsql { INSERT INTO t1 VALUES(15, 16) }
  } {0 {}}
  do_test wal-10.$tn.27 {
    sql3 { INSERT INTO t1 VALUES(17, 18) }
  } {}
  do_test wal-10.$tn.28 {
    code3 {
      set ::STMT [sqlite3_prepare db3 "SELECT * FROM t1" -1 TAIL]
      sqlite3_step $::STMT
    }

    execsql { SELECT * FROM t1 }
  } {1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18}

  do_test wal-10.$tn.29 {
    execsql { INSERT INTO t1 VALUES(19, 20) }
    catchsql { PRAGMA wal_checkpoint }
  } {0 {}}
  do_test wal-10.$tn.30 {
    code3 { sqlite3_finalize $::STMT }
    execsql { PRAGMA wal_checkpoint }
  } {}

  # At one point, if a reader failed to upgrade to a writer because it
  # was reading an old snapshot, the write-locks were not being released.
  # Test that this bug has been fixed.
  #
  do_test wal-10.$tn.31 {
    sql2 COMMIT
    execsql { BEGIN ; SELECT * FROM t1 }
    sql2 { INSERT INTO t1 VALUES(21, 22) }
    catchsql { INSERT INTO t1 VALUES(23, 24) }
  } {1 {database is locked}}
  do_test wal-10.$tn.32 {
    # This statement would fail when the bug was present.
    sql2 { INSERT INTO t1 VALUES(23, 24) }
  } {}
  do_test wal-10.$tn.33 {
    execsql { SELECT * FROM t1 ; COMMIT }
  } {1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20}
  do_test wal-10.$tn.34 {
    execsql { SELECT * FROM t1 }
  } {1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24}

  # Test that if a checkpointer cannot obtain the required locks, it
  # releases all locks before returning a busy error.
  #
  do_test wal-10.$tn.35 {
    execsql { 
      DELETE FROM t1;
      INSERT INTO t1 VALUES('a', 'b');
      INSERT INTO t1 VALUES('c', 'd');
    }
    sql2 {
      BEGIN;
        SELECT * FROM t1;
    }
  } {a b c d}


  do_test wal-10.$tn.36 {
    catchsql { PRAGMA wal_checkpoint }
  } {0 {}}
  do_test wal-10.$tn.36 {
    sql3 { INSERT INTO t1 VALUES('e', 'f') }
    sql2 { SELECT * FROM t1 }
  } {a b c d}
  do_test wal-10.$tn.37 {
    sql2 COMMIT
    execsql { PRAGMA wal_checkpoint }
1055
1056
1057
1058
1059
1060
1061
1062
1063

1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
do_test wal-15.3.2 {
  sqlite3_errcode db
} {SQLITE_LOCKED}
do_test wal-15.3.3 {
  sqlite3_errmsg db
} {database table is locked}

# Also test that an error is returned if the db cannot be checkpointed
# because of locks held by another connection.

#
sqlite3 db2 test.db
do_test wal-15.4.1 {
  execsql {
    BEGIN;
    SELECT * FROM t1;
  } db2
} {1 2}
do_test wal-15.4.2 {
  execsql { COMMIT }
  sqlite3_wal_checkpoint db
} {SQLITE_BUSY}
do_test wal-15.4.3 {
  sqlite3_errmsg db
} {database is locked}

# After [db2] drops its lock, [db] may checkpoint the db.
#
do_test wal-15.4.4 {
  execsql { COMMIT } db2
  sqlite3_wal_checkpoint db
} {SQLITE_OK}







|
|
>











|


|







1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
do_test wal-15.3.2 {
  sqlite3_errcode db
} {SQLITE_LOCKED}
do_test wal-15.3.3 {
  sqlite3_errmsg db
} {database table is locked}

# Earlier versions returned an error is returned if the db cannot be 
# checkpointed because of locks held by another connection. Check that
# this is no longer the case.
#
sqlite3 db2 test.db
do_test wal-15.4.1 {
  execsql {
    BEGIN;
    SELECT * FROM t1;
  } db2
} {1 2}
do_test wal-15.4.2 {
  execsql { COMMIT }
  sqlite3_wal_checkpoint db
} {SQLITE_OK}
do_test wal-15.4.3 {
  sqlite3_errmsg db
} {not an error}

# After [db2] drops its lock, [db] may checkpoint the db.
#
do_test wal-15.4.4 {
  execsql { COMMIT } db2
  sqlite3_wal_checkpoint db
} {SQLITE_OK}

Changes to test/wal2.test.

17
18
19
20
21
22
23
24
25
26
27
28
29
30
31

32
33
34
35
36
37
38
39
40
41
source $testdir/tester.tcl
source $testdir/lock_common.tcl
ifcapable !wal {finish_test ; return }

proc set_tvfs_hdr {file args} {

  # Set $nHdr to the number of bytes in the wal-index header:
  set nHdr 80
  set nInt [expr {$nHdr/4}]

  if {[llength $args]>1} {
    return -code error {wrong # args: should be "set_tvfs_hdr fileName ?val?"}
  }

  set blob [tvfs shm $file]

  if {[llength $args]} {
    set ia [lindex $args 0]
    set tail [string range $blob [expr $nHdr*2] end]
    set blob [binary format i${nInt}i${nInt}a* $ia $ia $tail]
    tvfs shm $file $blob
  }

  binary scan $blob i${nInt} ints
  return $ints
}







|







>


|







17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
source $testdir/tester.tcl
source $testdir/lock_common.tcl
ifcapable !wal {finish_test ; return }

proc set_tvfs_hdr {file args} {

  # Set $nHdr to the number of bytes in the wal-index header:
  set nHdr 40
  set nInt [expr {$nHdr/4}]

  if {[llength $args]>1} {
    return -code error {wrong # args: should be "set_tvfs_hdr fileName ?val?"}
  }

  set blob [tvfs shm $file]

  if {[llength $args]} {
    set ia [lindex $args 0]
    binary scan $blob a[expr $nHdr*2]a* dummy tail
    set blob [binary format i${nInt}i${nInt}a* $ia $ia $tail]
    tvfs shm $file $blob
  }

  binary scan $blob i${nInt} ints
  return $ints
}
88
89
90
91
92
93
94









95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
    SELECT count(a), sum(a) FROM t1;
  }
} {4 10}
do_test wal2-1.1 {
  execsql { SELECT count(a), sum(a) FROM t1 } db2
} {4 10}










foreach {tn iInsert res wal_index_hdr_mod wal_locks} {
         2    5   {5 15}    0             {READ RECOVER READ UNLOCK}
         3    6   {6 21}    1             {READ RECOVER READ UNLOCK}
         4    7   {7 28}    2             {READ RECOVER READ UNLOCK}
         5    8   {8 36}    3             {READ RECOVER READ UNLOCK}
         6    9   {9 45}    4             {READ RECOVER READ UNLOCK}
         7   10   {10 55}   5             {READ RECOVER READ UNLOCK}
         8   11   {11 66}   6             {READ RECOVER READ UNLOCK}
         9   12   {12 78}   7             {READ RECOVER READ UNLOCK}
        10   13   {13 91}   8             {READ RECOVER READ UNLOCK}
        11   14   {14 105}  9             {READ RECOVER READ UNLOCK}
        12   15   {15 120}  -1            {READ UNLOCK}
} {

  do_test wal2-1.$tn.1 {
    execsql { INSERT INTO t1 VALUES($iInsert) }

    set ::locks [list]
    set ::cb_done 0

    proc tvfs_cb {method args} {
      if {$::cb_done == 0 && $method == "xShmGet"} {
        set ::cb_done 1
        if {$::wal_index_hdr_mod >= 0} {
          incr_tvfs_hdr [lindex $args 0] $::wal_index_hdr_mod 1
        }
      }

      if {$method == "xShmLock"} { lappend ::locks [lindex $args 2] }
      return SQLITE_OK
    }

    execsql { SELECT count(a), sum(a) FROM t1 } db2
  } $res








>
>
>
>
>
>
>
>
>
|
|
|
|
|
|
|
|
|
|
|
|
|














<







89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131

132
133
134
135
136
137
138
    SELECT count(a), sum(a) FROM t1;
  }
} {4 10}
do_test wal2-1.1 {
  execsql { SELECT count(a), sum(a) FROM t1 } db2
} {4 10}

set RECOVER [list                                      \
  {0 1 lock exclusive}   {1 7 lock exclusive}          \
  {1 7 unlock exclusive} {0 1 unlock exclusive}        \
]
set READ [list                                         \
  {4 1 lock exclusive} {4 1 unlock exclusive}          \
  {4 1 lock shared}    {4 1 unlock shared}             \
]

foreach {tn iInsert res wal_index_hdr_mod wal_locks} "
         2    5   {5 15}    0             {$RECOVER $READ}
         3    6   {6 21}    1             {$RECOVER $READ}
         4    7   {7 28}    2             {$RECOVER $READ}
         5    8   {8 36}    3             {$RECOVER $READ}
         6    9   {9 45}    4             {$RECOVER $READ}
         7   10   {10 55}   5             {$RECOVER $READ}
         8   11   {11 66}   6             {$RECOVER $READ}
         9   12   {12 78}   7             {$RECOVER $READ}
        10   13   {13 91}   8             {$RECOVER $READ}
        11   14   {14 105}  9             {$RECOVER $READ}
        12   15   {15 120}  -1            {$READ}
" {

  do_test wal2-1.$tn.1 {
    execsql { INSERT INTO t1 VALUES($iInsert) }

    set ::locks [list]
    set ::cb_done 0

    proc tvfs_cb {method args} {
      if {$::cb_done == 0 && $method == "xShmGet"} {
        set ::cb_done 1
        if {$::wal_index_hdr_mod >= 0} {
          incr_tvfs_hdr [lindex $args 0] $::wal_index_hdr_mod 1
        }
      }

      if {$method == "xShmLock"} { lappend ::locks [lindex $args 2] }
      return SQLITE_OK
    }

    execsql { SELECT count(a), sum(a) FROM t1 } db2
  } $res

146
147
148
149
150
151
152





153
154
155
156
157
158
159
# it simply drops back to a READ lock and proceeds. But because the
# header is out-of-date, the reader reads the out-of-date snapshot.
#
# After this, the header is corrupted again and the reader is allowed
# to run recovery. This time, it sees an up-to-date snapshot of the
# database file.
#





do_test wal2-2.0 {

  testvfs tvfs tvfs_cb
  proc tvfs_cb {method args} {
    if {$method == "xShmOpen"} { set ::shm_file [lindex $args 0] }
    return SQLITE_OK
  }







>
>
>
>
>







155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
# it simply drops back to a READ lock and proceeds. But because the
# header is out-of-date, the reader reads the out-of-date snapshot.
#
# After this, the header is corrupted again and the reader is allowed
# to run recovery. This time, it sees an up-to-date snapshot of the
# database file.
#
set WRITER [list 0 1 lock exclusive]
set LOCKS  [list \
  {0 1 lock exclusive} {0 1 unlock exclusive} \
  {4 1 lock shared}    {4 1 unlock shared}    \
]
do_test wal2-2.0 {

  testvfs tvfs tvfs_cb
  proc tvfs_cb {method args} {
    if {$method == "xShmOpen"} { set ::shm_file [lindex $args 0] }
    return SQLITE_OK
  }
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
        if {$::wal_index_hdr_mod >= 0} {
          incr_tvfs_hdr $::shm_file $::wal_index_hdr_mod 1
        }
      }
      if {$method == "xShmLock"} {
        set lock [lindex $args 2]
        lappend ::locks $lock
        if {$lock == "RECOVER"} {
          set_tvfs_hdr $::shm_file $::oldhdr
        }
      }
      return SQLITE_OK
    }

    execsql { SELECT count(a), sum(a) FROM t1 } db2
  } $res0

  do_test wal2-2.$tn.3 {
    set ::locks
  } {READ RECOVER READ UNLOCK}

  do_test wal2-2.$tn.4 {
    set ::locks [list]
    set ::cb_done 0
    proc tvfs_cb {method args} {
      if {$::cb_done == 0 && $method == "xShmGet"} {
        set ::cb_done 1







|











|







216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
        if {$::wal_index_hdr_mod >= 0} {
          incr_tvfs_hdr $::shm_file $::wal_index_hdr_mod 1
        }
      }
      if {$method == "xShmLock"} {
        set lock [lindex $args 2]
        lappend ::locks $lock
        if {$lock == $::WRITER} {
          set_tvfs_hdr $::shm_file $::oldhdr
        }
      }
      return SQLITE_OK
    }

    execsql { SELECT count(a), sum(a) FROM t1 } db2
  } $res0

  do_test wal2-2.$tn.3 {
    set ::locks
  } $LOCKS

  do_test wal2-2.$tn.4 {
    set ::locks [list]
    set ::cb_done 0
    proc tvfs_cb {method args} {
      if {$::cb_done == 0 && $method == "xShmGet"} {
        set ::cb_done 1
241
242
243
244
245
246
247


248
249
250
251
252
253
254
  } $res1
}
db close
db2 close
tvfs delete
file delete -force test.db test.db-wal test.db-journal



#-------------------------------------------------------------------------
# This test case - wal2-3.* - tests the response of the library to an
# SQLITE_BUSY when attempting to obtain a READ or RECOVER lock.
#
#   wal2-3.0 - 2: SQLITE_BUSY when obtaining a READ lock
#   wal2-3.3 - 6: SQLITE_BUSY when obtaining a RECOVER lock
#







>
>







255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
  } $res1
}
db close
db2 close
tvfs delete
file delete -force test.db test.db-wal test.db-journal


if 0 {
#-------------------------------------------------------------------------
# This test case - wal2-3.* - tests the response of the library to an
# SQLITE_BUSY when attempting to obtain a READ or RECOVER lock.
#
#   wal2-3.0 - 2: SQLITE_BUSY when obtaining a READ lock
#   wal2-3.3 - 6: SQLITE_BUSY when obtaining a RECOVER lock
#
310
311
312
313
314
315
316


317
318
319
320
321
322
323
} {4 10}
do_test wal2-3.5 {
  list [info exists ::sabotage] [info exists ::locked]
} {0 0}
db close
tvfs delete
file delete -force test.db test.db-wal test.db-journal



#-------------------------------------------------------------------------
# Test that a database connection using a VFS that does not support the
# xShmXXX interfaces cannot open a WAL database.
#
do_test wal2-4.1 {
  sqlite3 db test.db







>
>







326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
} {4 10}
do_test wal2-3.5 {
  list [info exists ::sabotage] [info exists ::locked]
} {0 0}
db close
tvfs delete
file delete -force test.db test.db-wal test.db-journal

}

#-------------------------------------------------------------------------
# Test that a database connection using a VFS that does not support the
# xShmXXX interfaces cannot open a WAL database.
#
do_test wal2-4.1 {
  sqlite3 db test.db
345
346
347
348
349
350
351













352
353
354
355
356
357
358
db close
tvfs delete

#-------------------------------------------------------------------------
# Test that if a database connection is forced to run recovery before it
# can perform a checkpoint, it does not transition into RECOVER state.
#













do_test wal2-5.1 {
  proc tvfs_cb {method args} {
    set ::shm_file [lindex $args 0]
    if {$method == "xShmLock"} { lappend ::locks [lindex $args 2] }
    return $::tvfs_cb_return
  }
  set tvfs_cb_return SQLITE_OK







>
>
>
>
>
>
>
>
>
>
>
>
>







363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
db close
tvfs delete

#-------------------------------------------------------------------------
# Test that if a database connection is forced to run recovery before it
# can perform a checkpoint, it does not transition into RECOVER state.
#
# UPDATE: This has now changed. When running a checkpoint, if recovery is
# required the client grabs all exclusive locks (just as it would for a
# recovery performed as a pre-cursor to a normal database transaction).
#
set expected_locks [list]
lappend expected_locks {1 1 lock exclusive}   ;# Lock checkpoint
lappend expected_locks {0 1 lock exclusive}   ;# Lock writer
lappend expected_locks {2 6 lock exclusive}   ;# Lock recovery & all aReadMark[]
lappend expected_locks {2 6 unlock exclusive} ;# Unlock recovery & aReadMark[]
lappend expected_locks {0 1 unlock exclusive} ;# Unlock writer
lappend expected_locks {3 1 lock exclusive}   ;# Lock aReadMark[0]
lappend expected_locks {3 1 unlock exclusive} ;# Unlock aReadMark[0]
lappend expected_locks {1 1 unlock exclusive} ;# Unlock checkpoint
do_test wal2-5.1 {
  proc tvfs_cb {method args} {
    set ::shm_file [lindex $args 0]
    if {$method == "xShmLock"} { lappend ::locks [lindex $args 2] }
    return $::tvfs_cb_return
  }
  set tvfs_cb_return SQLITE_OK
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
    INSERT INTO x VALUES(1);
  }

  incr_tvfs_hdr $::shm_file 1 1
  set ::locks [list]
  execsql { PRAGMA wal_checkpoint }
  set ::locks
} {CHECKPOINT UNLOCK}
db close
tvfs delete

#-------------------------------------------------------------------------
# This block, test cases wal2-6.*, tests the operation of WAL with
# "PRAGMA locking_mode=EXCLUSIVE" set.
#







|







397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
    INSERT INTO x VALUES(1);
  }

  incr_tvfs_hdr $::shm_file 1 1
  set ::locks [list]
  execsql { PRAGMA wal_checkpoint }
  set ::locks
} $expected_locks
db close
tvfs delete

#-------------------------------------------------------------------------
# This block, test cases wal2-6.*, tests the operation of WAL with
# "PRAGMA locking_mode=EXCLUSIVE" set.
#
531
532
533
534
535
536
537





538
539
540
541
542
543
544
545
546

547



















548


549

550
551
552





553
554
555
556
557
558
559



560
561
562
563
564


565
566
567



568


569








570


571
572
573



574
575
576







577


578









579

580





581
582
583
584

585
586
587
588
589
590
591
592
  list [file exists test.db-wal] [file exists test.db-journal]
} {0 1}
do_test wal2-6.3.7 {
  execsql { PRAGMA lock_status }
} {main exclusive temp closed}
db close






do_test wal2-6.4.1 {
  file delete -force test.db test.db-wal test.db-journal
  proc tvfs_cb {method args} {
    set ::shm_file [lindex $args 0]
    if {$method == "xShmLock"} { lappend ::locks [lindex $args 2] }
    return "SQLITE_OK"
  }
  testvfs tvfs tvfs_cb
  sqlite3 db test.db -vfs tvfs





















  execsql {


    PRAGMA journal_mode = WAL;

    CREATE TABLE t1(x);
    INSERT INTO t1 VALUES('Leonard');
    INSERT INTO t1 VALUES('Arthur');





  }

  set ::locks [list]
  execsql { PRAGMA locking_mode = exclusive }
  set ::locks
} {}
do_test wal2-6.4.2 {



  execsql { SELECT * FROM t1 }
} {Leonard Arthur}
do_test wal2-6.4.3 {
  set ::locks
} {READ}


do_test wal2-6.4.4 {
  execsql { 
    INSERT INTO t1 VALUES('Julius Henry');



    SELECT * FROM t1;


  }








} {Leonard Arthur {Julius Henry}}


do_test wal2-6.4.5 {
  set ::locks
} {READ}



do_test wal2-6.4.6 {
  execsql {
    PRAGMA locking_mode = NORMAL;







    DELETE FROM t1;


  }









  set ::locks

} {READ UNLOCK}





do_test wal2-6.4.7 {
  set ::locks [list]
  execsql { INSERT INTO t1 VALUES('Karl') }
  set ::locks

} {READ WRITE READ UNLOCK}
db close
tvfs delete

do_test wal2-6.5.1 {
  sqlite3 db test.db
  execsql {
    PRAGMA journal_mode = wal;







>
>
>
>
>









>

>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
>
>

>
|
|
|
>
>
>
>
>


<
<
<
|
|
>
>
>
|
|
<
<
|
>
>
|
|
<
>
>
>
|
>
>

>
>
>
>
>
>
>
>
|
>
>
|
<
|
>
>
>
|
|
|
>
>
>
>
>
>
>
|
>
>

>
>
>
>
>
>
>
>
>
|
>
|
>
>
>
>
>
|

|
|
>
|







562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618



619
620
621
622
623
624
625


626
627
628
629
630

631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649

650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
  list [file exists test.db-wal] [file exists test.db-journal]
} {0 1}
do_test wal2-6.3.7 {
  execsql { PRAGMA lock_status }
} {main exclusive temp closed}
db close


# This test - wal2-6.4.* - uses a single database connection and the
# [testvfs] instrumentation to test that xShmLock() is being called
# as expected when a WAL database is used with locking_mode=exclusive.
#
do_test wal2-6.4.1 {
  file delete -force test.db test.db-wal test.db-journal
  proc tvfs_cb {method args} {
    set ::shm_file [lindex $args 0]
    if {$method == "xShmLock"} { lappend ::locks [lindex $args 2] }
    return "SQLITE_OK"
  }
  testvfs tvfs tvfs_cb
  sqlite3 db test.db -vfs tvfs
} {}

set RECOVERY {
  {0 1 lock exclusive} {1 7 lock exclusive} 
  {1 7 unlock exclusive} {0 1 unlock exclusive}
}
set READMARK0_READ {
  {3 1 lock shared} {3 1 unlock shared}
}
set READMARK0_WRITE {
  {3 1 lock shared} 
  {0 1 lock exclusive} {3 1 unlock shared} 
  {4 1 lock exclusive} {4 1 unlock exclusive} {4 1 lock shared} 
  {0 1 unlock exclusive} {4 1 unlock shared}
}
set READMARK1_SET {
  {4 1 lock exclusive} {4 1 unlock exclusive}
}
set READMARK1_READ {
  {4 1 lock shared} {4 1 unlock shared}
}

foreach {tn sql res expected_locks} {
  2 {
    PRAGMA journal_mode = WAL;
    BEGIN;
      CREATE TABLE t1(x);
      INSERT INTO t1 VALUES('Leonard');
      INSERT INTO t1 VALUES('Arthur');
    COMMIT;
  } {wal} {
    $RECOVERY 
    $READMARK0_READ 
    $READMARK0_WRITE
  }




  3 {
    # This test should do the READMARK1_SET locking to populate the 
    # aReadMark[1] slot with the current mxFrame value. Followed by
    # READMARK1_READ to read the database.
    #
    SELECT * FROM t1
  } {Leonard Arthur} {


    $READMARK1_SET
    $READMARK1_READ
  }

  4 {

    # aReadMark[1] is already set to mxFrame. So just READMARK1_READ
    # this time, not READMARK1_SET.
    #
    SELECT * FROM t1 ORDER BY x
  } {Arthur Leonard} { 
    $READMARK1_READ 
  }

  5 {
    PRAGMA locking_mode = exclusive
  } {exclusive} { } 

  6 {
    INSERT INTO t1 VALUES('Julius Henry');
    SELECT * FROM t1;
  } {Leonard Arthur {Julius Henry}} {
    $READMARK1_READ
  }


  7 {
    INSERT INTO t1 VALUES('Karl');
    SELECT * FROM t1;
  } {Leonard Arthur {Julius Henry} Karl} { }

  8 {
    PRAGMA locking_mode = normal
  } {normal} { }

  9 {
    SELECT * FROM t1 ORDER BY x
  } {Arthur {Julius Henry} Karl Leonard} { }

  10 {
    DELETE FROM t1
  } {} {
    $READMARK1_READ
  }

  11 {
    SELECT * FROM t1
  } {} {
    $READMARK1_SET
    $READMARK1_READ
  }
} {

  set L [list]
  foreach el [subst $expected_locks] { lappend L $el }

  set S ""
  foreach sq [split $sql "\n"] { 
    set sq [string trim $sq]
    if {[string match {#*} $sq]==0} {append S "$sq\n"}
  }

  set ::locks [list]
  do_test wal2-6.4.$tn.1 { execsql $S } $res
  do_test wal2-6.4.$tn.2 { set ::locks  } $L
}

db close
tvfs delete

do_test wal2-6.5.1 {
  sqlite3 db test.db
  execsql {
    PRAGMA journal_mode = wal;

Changes to test/walthread.test.

280
281
282
283
284
285
286






287
288
289
290
291
292
293
        INSERT INTO t1 VALUES(randomblob(100));
        INSERT INTO t1 VALUES(randomblob(100));
        INSERT INTO t1 SELECT md5sum(x) FROM t1;
      COMMIT;
    }
  }







  set nRun 0
  while {[tt_continue]} {
    read_transaction
    write_transaction 
    incr nRun
  }
  set nRun







>
>
>
>
>
>







280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
        INSERT INTO t1 VALUES(randomblob(100));
        INSERT INTO t1 VALUES(randomblob(100));
        INSERT INTO t1 SELECT md5sum(x) FROM t1;
      COMMIT;
    }
  }

  # Turn off auto-checkpoint. Otherwise, an auto-checkpoint run by a
  # writer may cause the dedicated checkpoint thread to return an
  # SQLITE_BUSY error.
  #
  db eval { PRAGMA wal_autocheckpoint = 0 }

  set nRun 0
  while {[tt_continue]} {
    read_transaction
    write_transaction 
    incr nRun
  }
  set nRun
385
386
387
388
389
390
391
392



393
394
395
396
397
398
399
    INSERT INTO t1 VALUES(0, 0, 0);
  }
} -thread t 10 {

  set nextwrite $E(pid)

  proc wal_hook {zDb nEntry} {
    if {$nEntry>10} {db eval {PRAGMA wal_checkpoint}}



    return 0
  }
  db wal_hook wal_hook

  while {[tt_continue]} {
    set max 0
    while { $max != ($nextwrite-1) && [tt_continue] } {







|
>
>
>







391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
    INSERT INTO t1 VALUES(0, 0, 0);
  }
} -thread t 10 {

  set nextwrite $E(pid)

  proc wal_hook {zDb nEntry} {
    if {$nEntry>10} { 
      set rc [catch { db eval {PRAGMA wal_checkpoint} } msg]
      if {$rc && $msg != "database is locked"} { error $msg }
    }
    return 0
  }
  db wal_hook wal_hook

  while {[tt_continue]} {
    set max 0
    while { $max != ($nextwrite-1) && [tt_continue] } {