SQLite

Check-in [ef3ba7a17f]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Initial code for incremental checkpoint in WAL mode. This check-in compiles on unix and runs as long as you do not engage WAL mode. WAL mode crashes and burns. Consider this check-in a baseline implementation for getting the new capability up and running.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | wal-incr-ckpt
Files: files | file ages | folders
SHA1: ef3ba7a17ff90674d702e5694b9e792851ab6998
User & Date: drh 2010-05-30 19:55:16.000
Context
2010-05-31
01:41
WAL runs but quickly deadlocks. (check-in: ace58acbf1 user: drh tags: wal-incr-ckpt)
2010-05-30
19:55
Initial code for incremental checkpoint in WAL mode. This check-in compiles on unix and runs as long as you do not engage WAL mode. WAL mode crashes and burns. Consider this check-in a baseline implementation for getting the new capability up and running. (check-in: ef3ba7a17f user: drh tags: wal-incr-ckpt)
2010-05-29
08:40
Add tests to fkey2.test to check that ON CONFLICT clauses do not affect SQLite's behaviour when an FK constraint is violated. (check-in: e9e5b10019 user: dan tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Changes to src/os.c.
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
}
int sqlite3OsShmGet(sqlite3_file *id,int reqSize,int *pSize,void volatile **pp){
  return id->pMethods->xShmGet(id, reqSize, pSize, pp);
}
int sqlite3OsShmRelease(sqlite3_file *id){
  return id->pMethods->xShmRelease(id);
}
int sqlite3OsShmLock(sqlite3_file *id, int desiredLock, int *pGotLock){
  return id->pMethods->xShmLock(id, desiredLock, pGotLock);
}
void sqlite3OsShmBarrier(sqlite3_file *id){
  id->pMethods->xShmBarrier(id);
}
int sqlite3OsShmClose(sqlite3_file *id, int deleteFlag){
  return id->pMethods->xShmClose(id, deleteFlag);
}







|
|







106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
}
int sqlite3OsShmGet(sqlite3_file *id,int reqSize,int *pSize,void volatile **pp){
  return id->pMethods->xShmGet(id, reqSize, pSize, pp);
}
int sqlite3OsShmRelease(sqlite3_file *id){
  return id->pMethods->xShmRelease(id);
}
int sqlite3OsShmLock(sqlite3_file *id, int offset, int n, int flags){
  return id->pMethods->xShmLock(id, offset, n, flags);
}
void sqlite3OsShmBarrier(sqlite3_file *id){
  id->pMethods->xShmBarrier(id);
}
int sqlite3OsShmClose(sqlite3_file *id, int deleteFlag){
  return id->pMethods->xShmClose(id, deleteFlag);
}
Changes to src/os.h.
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
#define SQLITE_FCNTL_DB_UNCHANGED 0xca093fa0
int sqlite3OsSectorSize(sqlite3_file *id);
int sqlite3OsDeviceCharacteristics(sqlite3_file *id);
int sqlite3OsShmOpen(sqlite3_file *id);
int sqlite3OsShmSize(sqlite3_file *id, int, int*);
int sqlite3OsShmGet(sqlite3_file *id, int, int*, void volatile**);
int sqlite3OsShmRelease(sqlite3_file *id);
int sqlite3OsShmLock(sqlite3_file *id, int, int*);
void sqlite3OsShmBarrier(sqlite3_file *id);
int sqlite3OsShmClose(sqlite3_file *id, int);

/* 
** Functions for accessing sqlite3_vfs methods 
*/
int sqlite3OsOpen(sqlite3_vfs *, const char *, sqlite3_file*, int, int *);







|







243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
#define SQLITE_FCNTL_DB_UNCHANGED 0xca093fa0
int sqlite3OsSectorSize(sqlite3_file *id);
int sqlite3OsDeviceCharacteristics(sqlite3_file *id);
int sqlite3OsShmOpen(sqlite3_file *id);
int sqlite3OsShmSize(sqlite3_file *id, int, int*);
int sqlite3OsShmGet(sqlite3_file *id, int, int*, void volatile**);
int sqlite3OsShmRelease(sqlite3_file *id);
int sqlite3OsShmLock(sqlite3_file *id, int, int, int);
void sqlite3OsShmBarrier(sqlite3_file *id);
int sqlite3OsShmClose(sqlite3_file *id, int);

/* 
** Functions for accessing sqlite3_vfs methods 
*/
int sqlite3OsOpen(sqlite3_vfs *, const char *, sqlite3_file*, int, int *);
Changes to src/os_unix.c.
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210


3211
3212
3213
3214
3215
3216
3217
3218
3219
3220

3221


3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238

3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249






3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282

3283

3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312

3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
**
** All other fields are read/write.  The unixShm.pFile->mutex must be held
** while accessing any read/write fields.
*/
struct unixShm {
  unixShmNode *pShmNode;     /* The underlying unixShmNode object */
  unixShm *pNext;            /* Next unixShm with the same unixShmNode */
  u8 lockState;              /* Current lock state */
  u8 hasMutex;               /* True if holding the unixShmNode mutex */
  u8 hasMutexBuf;            /* True if holding pFile->mutexBuf */
  u8 sharedMask;             /* Mask of shared locks held */
  u8 exclMask;               /* Mask of exclusive locks held */
#ifdef SQLITE_DEBUG
  u8 id;                     /* Id of this connection within its unixShmNode */
#endif
};

/*
** Size increment by which shared memory grows
*/
#define SQLITE_UNIX_SHM_INCR  4096

/*
** Constants used for locking
*/
#define UNIX_SHM_BASE      80        /* Byte offset of the first lock byte */
#define UNIX_SHM_DMS       0x01      /* Mask for Dead-Man-Switch lock */
#define UNIX_SHM_A         0x10      /* Mask for region locks... */
#define UNIX_SHM_B         0x20
#define UNIX_SHM_C         0x40
#define UNIX_SHM_D         0x80

#ifdef SQLITE_DEBUG
/*
** Return a pointer to a nul-terminated string in static memory that
** describes a locking mask.  The string is of the form "MSABCD" with
** each character representing a lock.  "M" for MUTEX, "S" for DMS, 
** and "A" through "D" for the region locks.  If a lock is held, the
** letter is shown.  If the lock is not held, the letter is converted
** to ".".
**
** This routine is for debugging purposes only and does not appear
** in a production build.
*/
static const char *unixShmLockString(u8 mask){
  static char zBuf[48];
  static int iBuf = 0;


  char *z;

  z = &zBuf[iBuf];
  iBuf += 8;
  if( iBuf>=sizeof(zBuf) ) iBuf = 0;

  z[0] = (mask & UNIX_SHM_DMS)   ? 'S' : '.';
  z[1] = (mask & UNIX_SHM_A)     ? 'A' : '.';
  z[2] = (mask & UNIX_SHM_B)     ? 'B' : '.';
  z[3] = (mask & UNIX_SHM_C)     ? 'C' : '.';

  z[4] = (mask & UNIX_SHM_D)     ? 'D' : '.';


  z[5] = 0;
  return z;
}
#endif /* SQLITE_DEBUG */

/*
** Apply posix advisory locks for all bytes identified in lockMask.
**
** lockMask might contain multiple bits but all bits are guaranteed
** to be contiguous.
**
** Locks block if the mask is exactly UNIX_SHM_C and are non-blocking
** otherwise.
*/
static int unixShmSystemLock(
  unixShmNode *pShmNode, /* Apply locks to this open shared-memory segment */
  int lockType,          /* F_UNLCK, F_RDLCK, or F_WRLCK */

  u8 lockMask            /* Which bytes to lock or unlock */
){
  struct flock f;       /* The posix advisory locking structure */
  int lockOp;           /* The opcode for fcntl() */
  int i;                /* Offset into the locking byte range */
  int rc;               /* Result code form fcntl() */
  u8 mask;              /* Mask of bits in lockMask */

  /* Access to the unixShmNode object is serialized by the caller */
  assert( sqlite3_mutex_held(pShmNode->mutex) || pShmNode->nRef==0 );







  /* Initialize the locking parameters */
  memset(&f, 0, sizeof(f));
  f.l_type = lockType;
  f.l_whence = SEEK_SET;
  if( lockMask==UNIX_SHM_C && lockType!=F_UNLCK ){
    lockOp = F_SETLKW;
    OSTRACE(("SHM-LOCK requesting blocking lock\n"));
  }else{
    lockOp = F_SETLK;
  }

  /* Find the first bit in lockMask that is set */
  for(i=0, mask=0x01; mask!=0 && (lockMask&mask)==0; mask <<= 1, i++){}
  assert( mask!=0 );
  f.l_start = i+UNIX_SHM_BASE;
  f.l_len = 1;

  /* Extend the locking range for each additional bit that is set */
  mask <<= 1;
  while( mask!=0 && (lockMask & mask)!=0 ){
    f.l_len++;
    mask <<= 1;
  }

  /* Verify that all bits set in lockMask are contiguous */
  assert( mask==0 || (lockMask & ~(mask | (mask-1)))==0 );

  /* Acquire the system-level lock */
  rc = fcntl(pShmNode->h, lockOp, &f);
  rc = (rc!=(-1)) ? SQLITE_OK : SQLITE_BUSY;

  /* Update the global lock state and do debug tracing */
#ifdef SQLITE_DEBUG

  OSTRACE(("SHM-LOCK "));

  if( rc==SQLITE_OK ){
    if( lockType==F_UNLCK ){
      OSTRACE(("unlock ok"));
      pShmNode->exclMask &= ~lockMask;
      pShmNode->sharedMask &= ~lockMask;
    }else if( lockType==F_RDLCK ){
      OSTRACE(("read-lock ok"));
      pShmNode->exclMask &= ~lockMask;
      pShmNode->sharedMask |= lockMask;
    }else{
      assert( lockType==F_WRLCK );
      OSTRACE(("write-lock ok"));
      pShmNode->exclMask |= lockMask;
      pShmNode->sharedMask &= ~lockMask;
    }
  }else{
    if( lockType==F_UNLCK ){
      OSTRACE(("unlock failed"));
    }else if( lockType==F_RDLCK ){
      OSTRACE(("read-lock failed"));
    }else{
      assert( lockType==F_WRLCK );
      OSTRACE(("write-lock failed"));
    }
  }
  OSTRACE((" - change requested %s - afterwards %s:%s\n",
           unixShmLockString(lockMask),
           unixShmLockString(pShmNode->sharedMask),
           unixShmLockString(pShmNode->exclMask)));

#endif

  return rc;        
}

/*
** For connection p, unlock all of the locks identified by the unlockMask
** parameter.
*/
static int unixShmUnlock(
  unixShmNode *pShmNode,   /* The underlying shared-memory file */
  unixShm *p,              /* The connection to be unlocked */
  u8 unlockMask            /* Mask of locks to be unlocked */
){
  int rc;      /* Result code */
  unixShm *pX; /* For looping over all sibling connections */
  u8 allMask;  /* Union of locks held by connections other than "p" */

  /* Access to the unixShmNode object is serialized by the caller */
  assert( sqlite3_mutex_held(pShmNode->mutex) );

  /* Compute locks held by sibling connections */
  allMask = 0;
  for(pX=pShmNode->pFirst; pX; pX=pX->pNext){
    if( pX==p ) continue;
    assert( (pX->exclMask & (p->exclMask|p->sharedMask))==0 );
    allMask |= pX->sharedMask;
  }

  /* Unlock the system-level locks */
  if( (unlockMask & allMask)!=unlockMask ){
    rc = unixShmSystemLock(pShmNode, F_UNLCK, unlockMask & ~allMask);
  }else{
    rc = SQLITE_OK;
  }

  /* Undo the local locks */
  if( rc==SQLITE_OK ){
    p->exclMask &= ~unlockMask;
    p->sharedMask &= ~unlockMask;
  } 
  return rc;
}

/*
** Get reader locks for connection p on all locks in the readMask parameter.
*/
static int unixShmSharedLock(
  unixShmNode *pShmNode,   /* The underlying shared-memory file */
  unixShm *p,              /* The connection to get the shared locks */
  u8 readMask              /* Mask of shared locks to be acquired */
){
  int rc;        /* Result code */
  unixShm *pX;   /* For looping over all sibling connections */
  u8 allShared;  /* Union of locks held by connections other than "p" */

  /* Access to the unixShmNode object is serialized by the caller */
  assert( sqlite3_mutex_held(pShmNode->mutex) );

  /* Find out which shared locks are already held by sibling connections.
  ** If any sibling already holds an exclusive lock, go ahead and return
  ** SQLITE_BUSY.
  */
  allShared = 0;
  for(pX=pShmNode->pFirst; pX; pX=pX->pNext){
    if( pX==p ) continue;
    if( (pX->exclMask & readMask)!=0 ) return SQLITE_BUSY;
    allShared |= pX->sharedMask;
  }

  /* Get shared locks at the system level, if necessary */
  if( (~allShared) & readMask ){
    rc = unixShmSystemLock(pShmNode, F_RDLCK, readMask);
  }else{
    rc = SQLITE_OK;
  }

  /* Get the local shared locks */
  if( rc==SQLITE_OK ){
    p->sharedMask |= readMask;
  }
  return rc;
}

/*
** For connection p, get an exclusive lock on all locks identified in
** the writeMask parameter.
*/
static int unixShmExclusiveLock(
  unixShmNode *pShmNode,    /* The underlying shared-memory file */
  unixShm *p,               /* The connection to get the exclusive locks */
  u8 writeMask              /* Mask of exclusive locks to be acquired */
){
  int rc;        /* Result code */
  unixShm *pX;   /* For looping over all sibling connections */

  /* Access to the unixShmNode object is serialized by the caller */
  assert( sqlite3_mutex_held(pShmNode->mutex) );

  /* Make sure no sibling connections hold locks that will block this
  ** lock.  If any do, return SQLITE_BUSY right away.
  */
  for(pX=pShmNode->pFirst; pX; pX=pX->pNext){
    if( pX==p ) continue;
    if( (pX->exclMask & writeMask)!=0 ) return SQLITE_BUSY;
    if( (pX->sharedMask & writeMask)!=0 ) return SQLITE_BUSY;
  }

  /* Get the exclusive locks at the system level.  Then if successful
  ** also mark the local connection as being locked.
  */
  rc = unixShmSystemLock(pShmNode, F_WRLCK, writeMask);
  if( rc==SQLITE_OK ){
    p->sharedMask &= ~writeMask;
    p->exclMask |= writeMask;
  }
  return rc;
}

/*
** Purge the unixShmNodeList list of all entries with unixShmNode.nRef==0.
**
** This is not a VFS shared-memory method; it is a utility function called
** by VFS shared-memory methods.
*/







<


|
|





<
<
<
<
<




<
<
<
<
|













|
|

>
>



|

|
|
|
|
|
>
|
>
>
|





|
<
<
<







>
|


<
<
|
<




>
>
>
>
>
>




<
<
<
<
<
<
<
<
<
<
|
|

<
<
<
<
<
<
<
<
<
<
<
|




>

>


|
|
|

|
|
|


|
|
|



|




|


|
<
|
<
>





<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







3164
3165
3166
3167
3168
3169
3170

3171
3172
3173
3174
3175
3176
3177
3178
3179





3180
3181
3182
3183




3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223



3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234


3235

3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249










3250
3251
3252











3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286

3287

3288
3289
3290
3291
3292
3293

















































































































3294
3295
3296
3297
3298
3299
3300
**
** All other fields are read/write.  The unixShm.pFile->mutex must be held
** while accessing any read/write fields.
*/
struct unixShm {
  unixShmNode *pShmNode;     /* The underlying unixShmNode object */
  unixShm *pNext;            /* Next unixShm with the same unixShmNode */

  u8 hasMutex;               /* True if holding the unixShmNode mutex */
  u8 hasMutexBuf;            /* True if holding pFile->mutexBuf */
  u16 sharedMask;            /* Mask of shared locks held */
  u16 exclMask;              /* Mask of exclusive locks held */
#ifdef SQLITE_DEBUG
  u8 id;                     /* Id of this connection within its unixShmNode */
#endif
};






/*
** Constants used for locking
*/
#define UNIX_SHM_BASE      80        /* Byte offset of the first lock byte */




#define UNIX_SHM_DMS       80        /* The deadman switch lock */

#ifdef SQLITE_DEBUG
/*
** Return a pointer to a nul-terminated string in static memory that
** describes a locking mask.  The string is of the form "MSABCD" with
** each character representing a lock.  "M" for MUTEX, "S" for DMS, 
** and "A" through "D" for the region locks.  If a lock is held, the
** letter is shown.  If the lock is not held, the letter is converted
** to ".".
**
** This routine is for debugging purposes only and does not appear
** in a production build.
*/
static const char *unixShmLockString(u16 maskShared, u16 maskExclusive){
  static char zBuf[52];
  static int iBuf = 0;
  int i;
  u16 mask;
  char *z;

  z = &zBuf[iBuf];
  iBuf += 16;
  if( iBuf>=sizeof(zBuf) ) iBuf = 0;
  for(i=0, mask=1; i<SQLITE_SHM_NLOCK; i++, mask += mask){
    if( mask & maskShared ){
      z[i] = 's';
    }else if( mask & maskExclusive ){
      z[i] = 'E';
    }else{
      z[i] = '.';
    }
  }
  z[i] = 0;
  return z;
}
#endif /* SQLITE_DEBUG */

/*
** Apply posix advisory locks for all bytes from ofst through ofst+n-1.



**
** Locks block if the mask is exactly UNIX_SHM_C and are non-blocking
** otherwise.
*/
static int unixShmSystemLock(
  unixShmNode *pShmNode, /* Apply locks to this open shared-memory segment */
  int lockType,          /* F_UNLCK, F_RDLCK, or F_WRLCK */
  int ofst,              /* First byte of the locking range */
  int n                  /* Number of bytes to lock */
){
  struct flock f;       /* The posix advisory locking structure */


  int rc = SQLITE_OK;   /* Result code form fcntl() */


  /* Access to the unixShmNode object is serialized by the caller */
  assert( sqlite3_mutex_held(pShmNode->mutex) || pShmNode->nRef==0 );

  /* Shared locks never span more than one byte */
  assert( n==1 || lockType!=F_RDLCK );

  /* Locks are within range */
  assert( n>=1 && ofst>=0 && ofst+n<SQLITE_SHM_NLOCK );

  /* Initialize the locking parameters */
  memset(&f, 0, sizeof(f));
  f.l_type = lockType;
  f.l_whence = SEEK_SET;










  f.l_start = ofst+UNIX_SHM_BASE;
  f.l_len = n;












  rc = fcntl(pShmNode->h, F_SETLK, &f);
  rc = (rc!=(-1)) ? SQLITE_OK : SQLITE_BUSY;

  /* Update the global lock state and do debug tracing */
#ifdef SQLITE_DEBUG
  { u16 mask;
  OSTRACE(("SHM-LOCK "));
  mask = (1<<(ofst+n)) - (1<<ofst);
  if( rc==SQLITE_OK ){
    if( lockType==F_UNLCK ){
      OSTRACE(("unlock %d ok", ofst));
      pShmNode->exclMask &= ~mask;
      pShmNode->sharedMask &= ~mask;
    }else if( lockType==F_RDLCK ){
      OSTRACE(("read-lock %d ok", ofst));
      pShmNode->exclMask &= ~mask;
      pShmNode->sharedMask |= mask;
    }else{
      assert( lockType==F_WRLCK );
      OSTRACE(("write-lock %d ok", ofst));
      pShmNode->exclMask |= mask;
      pShmNode->sharedMask &= ~mask;
    }
  }else{
    if( lockType==F_UNLCK ){
      OSTRACE(("unlock %d failed", ofst));
    }else if( lockType==F_RDLCK ){
      OSTRACE(("read-lock failed"));
    }else{
      assert( lockType==F_WRLCK );
      OSTRACE(("write-lock %d failed", ofst));
    }
  }
  OSTRACE((" - afterwards %s\n",

           unixShmLockString(pShmNode->sharedMask, pShmNode->exclMask)));

  }
#endif

  return rc;        
}



















































































































/*
** Purge the unixShmNodeList list of all entries with unixShmNode.nRef==0.
**
** This is not a VFS shared-memory method; it is a utility function called
** by VFS shared-memory methods.
*/
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
      goto shm_open_err;
    }

    /* Check to see if another process is holding the dead-man switch.
    ** If not, truncate the file to zero length. 
    */
    rc = SQLITE_OK;
    if( unixShmSystemLock(pShmNode, F_WRLCK, UNIX_SHM_DMS)==SQLITE_OK ){
      if( ftruncate(pShmNode->h, 0) ){
        rc = SQLITE_IOERR;
      }
    }
    if( rc==SQLITE_OK ){
      rc = unixShmSystemLock(pShmNode, F_RDLCK, UNIX_SHM_DMS);
    }
    if( rc ) goto shm_open_err;
  }

  /* Make the new connection a child of the unixShmNode */
  p->pShmNode = pShmNode;
  p->pNext = pShmNode->pFirst;







|





|







3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
      goto shm_open_err;
    }

    /* Check to see if another process is holding the dead-man switch.
    ** If not, truncate the file to zero length. 
    */
    rc = SQLITE_OK;
    if( unixShmSystemLock(pShmNode, F_WRLCK, UNIX_SHM_DMS, 1)==SQLITE_OK ){
      if( ftruncate(pShmNode->h, 0) ){
        rc = SQLITE_IOERR;
      }
    }
    if( rc==SQLITE_OK ){
      rc = unixShmSystemLock(pShmNode, F_RDLCK, UNIX_SHM_DMS, 1);
    }
    if( rc ) goto shm_open_err;
  }

  /* Make the new connection a child of the unixShmNode */
  p->pShmNode = pShmNode;
  p->pNext = pShmNode->pFirst;
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
  unixShm *p = pDbFd->pShm;
  unixShmNode *pShmNode = p->pShmNode;
  int rc = SQLITE_OK;

  assert( pShmNode==pDbFd->pInode->pShmNode );
  assert( pShmNode->pInode==pDbFd->pInode );

  if( p->lockState!=SQLITE_SHM_CHECKPOINT && p->hasMutexBuf==0 ){
    assert( sqlite3_mutex_notheld(pShmNode->mutex) );
    sqlite3_mutex_enter(pShmNode->mutexBuf);
    p->hasMutexBuf = 1;
  }
  sqlite3_mutex_enter(pShmNode->mutex);
  if( pShmNode->szMap==0 || reqMapSize>pShmNode->szMap ){
    int actualSize;







|







3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
  unixShm *p = pDbFd->pShm;
  unixShmNode *pShmNode = p->pShmNode;
  int rc = SQLITE_OK;

  assert( pShmNode==pDbFd->pInode->pShmNode );
  assert( pShmNode->pInode==pDbFd->pInode );

  if( p->hasMutexBuf==0 ){
    assert( sqlite3_mutex_notheld(pShmNode->mutex) );
    sqlite3_mutex_enter(pShmNode->mutexBuf);
    p->hasMutexBuf = 1;
  }
  sqlite3_mutex_enter(pShmNode->mutex);
  if( pShmNode->szMap==0 || reqMapSize>pShmNode->szMap ){
    int actualSize;
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763

3764
3765
3766
3767

3768
3769

3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781

3782
3783
3784
3785
3786

3787

3788

3789
3790
3791


3792

3793
3794
3795


3796
3797

3798
3799
3800
3801
3802

3803


3804
3805


3806
3807

3808


3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819

3820
3821
3822
3823
3824

3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845








3846
3847
3848
3849
3850

3851
3852
3853
3854
3855
3856
3857
3858
3859


3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874

3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
** really want to release the lock, so in that case too, this routine
** is a no-op.
*/
static int unixShmRelease(sqlite3_file *fd){
  unixFile *pDbFd = (unixFile*)fd;
  unixShm *p = pDbFd->pShm;

  if( p->hasMutexBuf && p->lockState!=SQLITE_SHM_RECOVER ){
    assert( sqlite3_mutex_notheld(p->pShmNode->mutex) );
    sqlite3_mutex_leave(p->pShmNode->mutexBuf);
    p->hasMutexBuf = 0;
  }
  return SQLITE_OK;
}

/*
** Symbolic names for LOCK states used for debugging.
*/
#ifdef SQLITE_DEBUG
static const char *azLkName[] = {
  "UNLOCK",
  "READ",
  "READ_FULL",
  "WRITE",
  "PENDING",
  "CHECKPOINT",
  "RECOVER"
};
#endif


/*
** Change the lock state for a shared-memory segment.
*/
static int unixShmLock(
  sqlite3_file *fd,          /* Database file holding the shared memory */
  int desiredLock,           /* One of SQLITE_SHM_xxxxx locking states */

  int *pGotLock              /* The lock you actually got */
){
  unixFile *pDbFd = (unixFile*)fd;
  unixShm *p = pDbFd->pShm;

  unixShmNode *pShmNode = p->pShmNode;
  int rc = SQLITE_PROTOCOL;


  assert( pShmNode==pDbFd->pInode->pShmNode );
  assert( pShmNode->pInode==pDbFd->pInode );

  /* Note that SQLITE_SHM_READ_FULL and SQLITE_SHM_PENDING are never
  ** directly requested; they are side effects from requesting
  ** SQLITE_SHM_READ and SQLITE_SHM_CHECKPOINT, respectively.
  */
  assert( desiredLock==SQLITE_SHM_UNLOCK
       || desiredLock==SQLITE_SHM_READ
       || desiredLock==SQLITE_SHM_WRITE
       || desiredLock==SQLITE_SHM_CHECKPOINT

       || desiredLock==SQLITE_SHM_RECOVER );

  /* Return directly if this is just a lock state query, or if
  ** the connection is already in the desired locking state.
  */

  if( desiredLock==p->lockState

   || (desiredLock==SQLITE_SHM_READ && p->lockState==SQLITE_SHM_READ_FULL)

  ){
    OSTRACE(("SHM-LOCK shmid-%d, pid-%d request %s and got %s\n",
             p->id, getpid(), azLkName[desiredLock], azLkName[p->lockState]));


    if( pGotLock ) *pGotLock = p->lockState;

    return SQLITE_OK;
  }



  OSTRACE(("SHM-LOCK shmid-%d, pid-%d request %s->%s\n",
            p->id, getpid(), azLkName[p->lockState], azLkName[desiredLock]));

  
  if( desiredLock==SQLITE_SHM_RECOVER && !p->hasMutexBuf ){
    assert( sqlite3_mutex_notheld(pShmNode->mutex) );
    sqlite3_mutex_enter(pShmNode->mutexBuf);
    p->hasMutexBuf = 1;

  }


  sqlite3_mutex_enter(pShmNode->mutex);
  switch( desiredLock ){


    case SQLITE_SHM_UNLOCK: {
      assert( p->lockState!=SQLITE_SHM_RECOVER );

      unixShmUnlock(pShmNode, p, UNIX_SHM_A|UNIX_SHM_B|UNIX_SHM_C|UNIX_SHM_D);


      rc = SQLITE_OK;
      p->lockState = SQLITE_SHM_UNLOCK;
      break;
    }
    case SQLITE_SHM_READ: {
      if( p->lockState==SQLITE_SHM_UNLOCK ){
        int nAttempt;
        rc = SQLITE_BUSY;
        assert( p->lockState==SQLITE_SHM_UNLOCK );
        for(nAttempt=0; nAttempt<5 && rc==SQLITE_BUSY; nAttempt++){
          rc = unixShmSharedLock(pShmNode, p, UNIX_SHM_A|UNIX_SHM_B);

          if( rc==SQLITE_BUSY ){
            rc = unixShmSharedLock(pShmNode, p, UNIX_SHM_D);
            if( rc==SQLITE_OK ){
              p->lockState = SQLITE_SHM_READ_FULL;
            }

          }else{
            unixShmUnlock(pShmNode, p, UNIX_SHM_B);
            p->lockState = SQLITE_SHM_READ;
          }
        }
      }else{
       assert( p->lockState==SQLITE_SHM_WRITE
               || p->lockState==SQLITE_SHM_RECOVER );
        rc = unixShmSharedLock(pShmNode, p, UNIX_SHM_A);
        unixShmUnlock(pShmNode, p, UNIX_SHM_C|UNIX_SHM_D);
        p->lockState = SQLITE_SHM_READ;
      }
      break;
    }
    case SQLITE_SHM_WRITE: {
      assert( p->lockState==SQLITE_SHM_READ 
              || p->lockState==SQLITE_SHM_READ_FULL );
      rc = unixShmExclusiveLock(pShmNode, p, UNIX_SHM_C|UNIX_SHM_D);
      if( rc==SQLITE_OK ){
        p->lockState = SQLITE_SHM_WRITE;
      }








      break;
    }
    case SQLITE_SHM_CHECKPOINT: {
      assert( p->lockState==SQLITE_SHM_UNLOCK
           || p->lockState==SQLITE_SHM_PENDING

      );
      if( p->lockState==SQLITE_SHM_UNLOCK ){
        rc = unixShmExclusiveLock(pShmNode, p, UNIX_SHM_B|UNIX_SHM_C);
        if( rc==SQLITE_OK ){
          p->lockState = SQLITE_SHM_PENDING;
        }
      }
      if( p->lockState==SQLITE_SHM_PENDING ){
        rc = unixShmExclusiveLock(pShmNode, p, UNIX_SHM_A);


        if( rc==SQLITE_OK ){
          p->lockState = SQLITE_SHM_CHECKPOINT;
        }
      }
      break;
    }
    default: {
      assert( desiredLock==SQLITE_SHM_RECOVER );
      assert( p->lockState==SQLITE_SHM_READ
           || p->lockState==SQLITE_SHM_READ_FULL
      );
      assert( sqlite3_mutex_held(pShmNode->mutexBuf) );
      rc = unixShmExclusiveLock(pShmNode, p, UNIX_SHM_C);
      if( rc==SQLITE_OK ){
        p->lockState = SQLITE_SHM_RECOVER;

      }
      break;
    }
  }
  sqlite3_mutex_leave(pShmNode->mutex);
  OSTRACE(("SHM-LOCK shmid-%d, pid-%d got %s\n",
           p->id, getpid(), azLkName[p->lockState]));
  if( pGotLock ) *pGotLock = p->lockState;
  return rc;
}

/*
** Implement a memory barrier or memory fence on shared memory.  
**
** All loads and stores begun before the barrier must complete before







|







<
<
<
<
<
<
<
<
<
<
<
<
<
<
<






|
>
|

|
|
>
|
|
>



|
<
<
<
<
|
|
|
|
>
|

<
<
<
>
|
>
|
>
|
<
<
>
>
|
>
|
|

>
>
|
|
>
|
|
|
|
|
>
|
>
>
|
<
>
>
|
<
>
|
>
>
|
<
|
|
<
<
<
<
<
<
|
>
|
|
|
|
<
>
|
<
|
|
|
<
<
<
<
<
<
|
|
<
<
<
<
<
|
|
|
>
>
>
>
>
>
>
>
|
|
<
<
<
>
<
<
<
<
<
|
<
<
|
>
>
|
<
<
<
<
<
<
<
<
<
<
<
|

|
>

<




|
<







3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604















3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624




3625
3626
3627
3628
3629
3630
3631



3632
3633
3634
3635
3636
3637


3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659

3660
3661
3662

3663
3664
3665
3666
3667

3668
3669






3670
3671
3672
3673
3674
3675

3676
3677

3678
3679
3680






3681
3682





3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695



3696





3697


3698
3699
3700
3701











3702
3703
3704
3705
3706

3707
3708
3709
3710
3711

3712
3713
3714
3715
3716
3717
3718
** really want to release the lock, so in that case too, this routine
** is a no-op.
*/
static int unixShmRelease(sqlite3_file *fd){
  unixFile *pDbFd = (unixFile*)fd;
  unixShm *p = pDbFd->pShm;

  if( p->hasMutexBuf ){
    assert( sqlite3_mutex_notheld(p->pShmNode->mutex) );
    sqlite3_mutex_leave(p->pShmNode->mutexBuf);
    p->hasMutexBuf = 0;
  }
  return SQLITE_OK;
}

















/*
** Change the lock state for a shared-memory segment.
*/
static int unixShmLock(
  sqlite3_file *fd,          /* Database file holding the shared memory */
  int ofst,                  /* First lock to acquire or release */
  int n,                     /* Number of locks to acquire or release */
  int flags                  /* What to do with the lock */
){
  unixFile *pDbFd = (unixFile*)fd;      /* Connection holding shared memory */
  unixShm *p = pDbFd->pShm;             /* The shared memory being locked */
  unixShm *pX;                          /* For looping over all siblings */
  unixShmNode *pShmNode = p->pShmNode;  /* The underlying file iNode */
  int rc = SQLITE_OK;                   /* Result code */
  u16 mask;                             /* Mask of locks to take or release */

  assert( pShmNode==pDbFd->pInode->pShmNode );
  assert( pShmNode->pInode==pDbFd->pInode );
  assert( ofst>=0 && ofst+n<SQLITE_SHM_NLOCK );




  assert( n>=1 );
  assert( flags==(SQLITE_SHM_LOCK | SQLITE_SHM_SHARED)
       || flags==(SQLITE_SHM_LOCK | SQLITE_SHM_EXCLUSIVE)
       || flags==(SQLITE_SHM_UNLOCK | SQLITE_SHM_SHARED)
       || flags==(SQLITE_SHM_UNLOCK | SQLITE_SHM_EXCLUSIVE) );
  assert( n==1 || (flags & SQLITE_SHM_EXCLUSIVE)!=0 );




  mask = (1<<(ofst+n+1)) - (1<<(ofst+1));
  assert( n>1 || mask==(1<<ofst) );
  sqlite3_mutex_enter(pShmNode->mutex);
  if( flags & SQLITE_SHM_UNLOCK ){
    u16 allMask = 0; /* Mask of locks held by siblings */



    /* See if any siblings hold this same lock */
    for(pX=pShmNode->pFirst; pX; pX=pX->pNext){
      if( pX==p ) continue;
      assert( (pX->exclMask & (p->exclMask|p->sharedMask))==0 );
      allMask |= pX->sharedMask;
    }

    /* Unlock the system-level locks */
    if( (mask & allMask)==0 ){
      rc = unixShmSystemLock(pShmNode, F_UNLCK, ofst+1, n);
    }else{
      rc = SQLITE_OK;
    }

    /* Undo the local locks */
    if( rc==SQLITE_OK ){
      p->exclMask &= ~mask;
      p->sharedMask &= ~mask;
    } 
  }else if( flags & SQLITE_SHM_SHARED ){
    u16 allShared = 0;  /* Union of locks held by connections other than "p" */


    /* Find out which shared locks are already held by sibling connections.
    ** If any sibling already holds an exclusive lock, go ahead and return
    ** SQLITE_BUSY.

    */
    for(pX=pShmNode->pFirst; pX; pX=pX->pNext){
      if( pX==p ) continue;
      if( (pX->exclMask & mask)!=0 ){
        rc = SQLITE_BUSY;

        break;
      }






      allShared |= pX->sharedMask;
    }

    /* Get shared locks at the system level, if necessary */
    if( rc==SQLITE_OK ){
      if( (allShared & mask)==0 ){

        rc = unixShmSystemLock(pShmNode, F_RDLCK, ofst+1, n);
      }else{

        rc = SQLITE_OK;
      }
    }







    /* Get the local shared locks */





    if( rc==SQLITE_OK ){
      p->sharedMask |= mask;
    }
  }else{
    /* Make sure no sibling connections hold locks that will block this
    ** lock.  If any do, return SQLITE_BUSY right away.
    */
    for(pX=pShmNode->pFirst; pX; pX=pX->pNext){
      if( pX==p ) continue;
      if( (pX->exclMask & mask)!=0 || (pX->sharedMask & mask)!=0 ){
        rc = SQLITE_BUSY;
        break;
      }



    }





  


    /* Get the exclusive locks at the system level.  Then if successful
    ** also mark the local connection as being locked.
    */
    if( rc==SQLITE_OK ){











      rc = unixShmSystemLock(pShmNode, F_WRLCK, ofst+1, n);
      if( rc==SQLITE_OK ){
        p->sharedMask &= ~mask;
        p->exclMask |= mask;
      }

    }
  }
  sqlite3_mutex_leave(pShmNode->mutex);
  OSTRACE(("SHM-LOCK shmid-%d, pid-%d got %s\n",
           p->id, getpid(), unixShmLockString(p->sharedMask, p->exclMask)));

  return rc;
}

/*
** Implement a memory barrier or memory fence on shared memory.  
**
** All loads and stores begun before the barrier must complete before
Changes to src/pager.c.
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
  return (pPager->pWal!=0);
}
#else
# define pagerUseWal(x) 0
# define pagerRollbackWal(x) 0
# define pagerWalFrames(v,w,x,y,z) 0
# define pagerOpenWalIfPresent(z) SQLITE_OK
# define pagerOpenSnapshot(z) SQLITE_OK
#endif

/*
** Unlock the database file. This function is a no-op if the pager
** is in exclusive mode.
**
** If the pager is currently in error state, discard the contents of 







|







1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
  return (pPager->pWal!=0);
}
#else
# define pagerUseWal(x) 0
# define pagerRollbackWal(x) 0
# define pagerWalFrames(v,w,x,y,z) 0
# define pagerOpenWalIfPresent(z) SQLITE_OK
# define pagerBeginReadTransaction(z) SQLITE_OK
#endif

/*
** Unlock the database file. This function is a no-op if the pager
** is in exclusive mode.
**
** If the pager is currently in error state, discard the contents of 
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
    ** this happens.  One can argue that this doesn't need to be cleared
    ** until the change-counter check fails in PagerSharedLock().
    ** Clearing the page size cache here is being conservative.
    */
    pPager->dbSizeValid = 0;

    if( pagerUseWal(pPager) ){
      sqlite3WalCloseSnapshot(pPager->pWal);
    }else{
      rc = osUnlock(pPager->fd, NO_LOCK);
    }
    if( rc ){
      pPager->errCode = rc;
    }
    IOTRACE(("UNLOCK %p\n", pPager))







|







1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
    ** this happens.  One can argue that this doesn't need to be cleared
    ** until the change-counter check fails in PagerSharedLock().
    ** Clearing the page size cache here is being conservative.
    */
    pPager->dbSizeValid = 0;

    if( pagerUseWal(pPager) ){
      sqlite3WalEndReadTransaction(pPager->pWal);
    }else{
      rc = osUnlock(pPager->fd, NO_LOCK);
    }
    if( rc ){
      pPager->errCode = rc;
    }
    IOTRACE(("UNLOCK %p\n", pPager))
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
  }
  sqlite3BitvecDestroy(pPager->pInJournal);
  pPager->pInJournal = 0;
  pPager->nRec = 0;
  sqlite3PcacheCleanAll(pPager->pPCache);

  if( pagerUseWal(pPager) ){
    rc2 = sqlite3WalWriteLock(pPager->pWal, 0);
    pPager->state = PAGER_SHARED;

    /* If the connection was in locking_mode=exclusive mode but is no longer,
    ** drop the EXCLUSIVE lock held on the database file.
    */
    if( rc2==SQLITE_OK 
     && !pPager->exclusiveMode 







|







1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
  }
  sqlite3BitvecDestroy(pPager->pInJournal);
  pPager->pInJournal = 0;
  pPager->nRec = 0;
  sqlite3PcacheCleanAll(pPager->pPCache);

  if( pagerUseWal(pPager) ){
    rc2 = sqlite3WalEndWriteTransaction(pPager->pWal);
    pPager->state = PAGER_SHARED;

    /* If the connection was in locking_mode=exclusive mode but is no longer,
    ** drop the EXCLUSIVE lock held on the database file.
    */
    if( rc2==SQLITE_OK 
     && !pPager->exclusiveMode 
2358
2359
2360
2361
2362
2363
2364




2365

2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
      sqlite3BackupUpdate(pPager->pBackup, p->pgno, (u8 *)p->pData);
    }
  }
  return rc;
}

/*




** Open a WAL snapshot on the log file this pager is connected to.

*/
static int pagerOpenSnapshot(Pager *pPager){
  int rc;                         /* Return code */
  int changed = 0;                /* True if cache must be reset */

  assert( pagerUseWal(pPager) );

  rc = sqlite3WalOpenSnapshot(pPager->pWal, &changed);
  if( rc==SQLITE_OK ){
    int dummy;
    if( changed ){
      pager_reset(pPager);
      assert( pPager->errCode || pPager->dbSizeValid==0 );
    }
    rc = sqlite3PagerPagecount(pPager, &dummy);







>
>
>
>
|
>

|





|







2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
      sqlite3BackupUpdate(pPager->pBackup, p->pgno, (u8 *)p->pData);
    }
  }
  return rc;
}

/*
** Begin a read transaction on the WAL.
**
** This routine used to be called "pagerOpenSnapshot()" because it essentially
** makes a snapshot of the database at the current point in time and preserves
** that snapshot for use by the reader in spite of concurrently changes by
** other writers or checkpointers.
*/
static int pagerBeginReadTransaction(Pager *pPager){
  int rc;                         /* Return code */
  int changed = 0;                /* True if cache must be reset */

  assert( pagerUseWal(pPager) );

  rc = sqlite3WalBeginReadTransaction(pPager->pWal, &changed);
  if( rc==SQLITE_OK ){
    int dummy;
    if( changed ){
      pager_reset(pPager);
      assert( pPager->errCode || pPager->dbSizeValid==0 );
    }
    rc = sqlite3PagerPagecount(pPager, &dummy);
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
    int isWal;                    /* True if WAL file exists */
    rc = pagerHasWAL(pPager, &isWal);
    if( rc==SQLITE_OK ){
      if( isWal ){
        pager_reset(pPager);
        rc = sqlite3PagerOpenWal(pPager, 0);
        if( rc==SQLITE_OK ){
          rc = pagerOpenSnapshot(pPager);
        }
      }else if( pPager->journalMode==PAGER_JOURNALMODE_WAL ){
        pPager->journalMode = PAGER_JOURNALMODE_DELETE;
      }
    }
  }
  return rc;







|







2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
    int isWal;                    /* True if WAL file exists */
    rc = pagerHasWAL(pPager, &isWal);
    if( rc==SQLITE_OK ){
      if( isWal ){
        pager_reset(pPager);
        rc = sqlite3PagerOpenWal(pPager, 0);
        if( rc==SQLITE_OK ){
          rc = pagerBeginReadTransaction(pPager);
        }
      }else if( pPager->journalMode==PAGER_JOURNALMODE_WAL ){
        pPager->journalMode = PAGER_JOURNALMODE_DELETE;
      }
    }
  }
  return rc;
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
      isErrorReset = 1;
    }
    pPager->errCode = SQLITE_OK;
    pager_reset(pPager);
  }

  if( pagerUseWal(pPager) ){
    rc = pagerOpenSnapshot(pPager);
  }else if( pPager->state==PAGER_UNLOCK || isErrorReset ){
    sqlite3_vfs * const pVfs = pPager->pVfs;
    int isHotJournal = 0;
    assert( !MEMDB );
    assert( sqlite3PcacheRefCount(pPager->pPCache)==0 );
    if( pPager->noReadlock ){
      assert( pPager->readOnly );







|







4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
      isErrorReset = 1;
    }
    pPager->errCode = SQLITE_OK;
    pager_reset(pPager);
  }

  if( pagerUseWal(pPager) ){
    rc = pagerBeginReadTransaction(pPager);
  }else if( pPager->state==PAGER_UNLOCK || isErrorReset ){
    sqlite3_vfs * const pVfs = pPager->pVfs;
    int isHotJournal = 0;
    assert( !MEMDB );
    assert( sqlite3PcacheRefCount(pPager->pPCache)==0 );
    if( pPager->noReadlock ){
      assert( pPager->readOnly );
4557
4558
4559
4560
4561
4562
4563
4564
4565
4566
4567
4568
4569
4570
4571
      **
      ** WAL mode sets Pager.state to PAGER_RESERVED when it has an open
      ** transaction, but never to PAGER_EXCLUSIVE. This is because in 
      ** PAGER_EXCLUSIVE state the code to roll back savepoint transactions
      ** may copy data from the sub-journal into the database file as well
      ** as into the page cache. Which would be incorrect in WAL mode.
      */
      rc = sqlite3WalWriteLock(pPager->pWal, 1);
      if( rc==SQLITE_OK ){
        pPager->dbOrigSize = pPager->dbSize;
        pPager->state = PAGER_RESERVED;
        pPager->journalOff = 0;
      }

      assert( rc!=SQLITE_OK || pPager->state==PAGER_RESERVED );







|







4562
4563
4564
4565
4566
4567
4568
4569
4570
4571
4572
4573
4574
4575
4576
      **
      ** WAL mode sets Pager.state to PAGER_RESERVED when it has an open
      ** transaction, but never to PAGER_EXCLUSIVE. This is because in 
      ** PAGER_EXCLUSIVE state the code to roll back savepoint transactions
      ** may copy data from the sub-journal into the database file as well
      ** as into the page cache. Which would be incorrect in WAL mode.
      */
      rc = sqlite3WalBeginWriteTransaction(pPager->pWal);
      if( rc==SQLITE_OK ){
        pPager->dbOrigSize = pPager->dbSize;
        pPager->state = PAGER_RESERVED;
        pPager->journalOff = 0;
      }

      assert( rc!=SQLITE_OK || pPager->state==PAGER_RESERVED );
5888
5889
5890
5891
5892
5893
5894
5895
5896
5897
5898
5899
5900
5901
5902
5903
*/
int sqlite3PagerCheckpoint(Pager *pPager){
  int rc = SQLITE_OK;
  if( pPager->pWal ){
    u8 *zBuf = (u8 *)pPager->pTmpSpace;
    rc = sqlite3WalCheckpoint(pPager->pWal,
        (pPager->noSync ? 0 : pPager->sync_flags),
        pPager->pageSize, zBuf, 
        pPager->xBusyHandler, pPager->pBusyHandlerArg
    );
  }
  return rc;
}

int sqlite3PagerWalCallback(Pager *pPager){
  return sqlite3WalCallback(pPager->pWal);







|
<







5893
5894
5895
5896
5897
5898
5899
5900

5901
5902
5903
5904
5905
5906
5907
*/
int sqlite3PagerCheckpoint(Pager *pPager){
  int rc = SQLITE_OK;
  if( pPager->pWal ){
    u8 *zBuf = (u8 *)pPager->pTmpSpace;
    rc = sqlite3WalCheckpoint(pPager->pWal,
        (pPager->noSync ? 0 : pPager->sync_flags),
        pPager->pageSize, zBuf

    );
  }
  return rc;
}

int sqlite3PagerWalCallback(Pager *pPager){
  return sqlite3WalCallback(pPager->pWal);
Changes to src/sqlite.h.in.
440
441
442
443
444
445
446
447

448
449
450
451
452
453
454
#define SQLITE_IOERR_BLOCKED           (SQLITE_IOERR | (11<<8))
#define SQLITE_IOERR_NOMEM             (SQLITE_IOERR | (12<<8))
#define SQLITE_IOERR_ACCESS            (SQLITE_IOERR | (13<<8))
#define SQLITE_IOERR_CHECKRESERVEDLOCK (SQLITE_IOERR | (14<<8))
#define SQLITE_IOERR_LOCK              (SQLITE_IOERR | (15<<8))
#define SQLITE_IOERR_CLOSE             (SQLITE_IOERR | (16<<8))
#define SQLITE_IOERR_DIR_CLOSE         (SQLITE_IOERR | (17<<8))
#define SQLITE_LOCKED_SHAREDCACHE      (SQLITE_LOCKED | (1<<8) )


/*
** CAPI3REF: Flags For File Open Operations
**
** These bit values are intended for use in the
** 3rd parameter to the [sqlite3_open_v2()] interface and
** in the 4th parameter to the xOpen method of the







|
>







440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
#define SQLITE_IOERR_BLOCKED           (SQLITE_IOERR | (11<<8))
#define SQLITE_IOERR_NOMEM             (SQLITE_IOERR | (12<<8))
#define SQLITE_IOERR_ACCESS            (SQLITE_IOERR | (13<<8))
#define SQLITE_IOERR_CHECKRESERVEDLOCK (SQLITE_IOERR | (14<<8))
#define SQLITE_IOERR_LOCK              (SQLITE_IOERR | (15<<8))
#define SQLITE_IOERR_CLOSE             (SQLITE_IOERR | (16<<8))
#define SQLITE_IOERR_DIR_CLOSE         (SQLITE_IOERR | (17<<8))
#define SQLITE_LOCKED_SHAREDCACHE      (SQLITE_LOCKED |  (1<<8))
#define SQLITE_BUSY_RECOVERY           (SQLITE_BUSY   |  (1<<8))

/*
** CAPI3REF: Flags For File Open Operations
**
** These bit values are intended for use in the
** 3rd parameter to the [sqlite3_open_v2()] interface and
** in the 4th parameter to the xOpen method of the
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
  int (*xSectorSize)(sqlite3_file*);
  int (*xDeviceCharacteristics)(sqlite3_file*);
  /* Methods above are valid for version 1 */
  int (*xShmOpen)(sqlite3_file*);
  int (*xShmSize)(sqlite3_file*, int reqSize, int *pNewSize);
  int (*xShmGet)(sqlite3_file*, int reqSize, int *pSize, void volatile**);
  int (*xShmRelease)(sqlite3_file*);
  int (*xShmLock)(sqlite3_file*, int desiredLock, int *gotLock);
  void (*xShmBarrier)(sqlite3_file*);
  int (*xShmClose)(sqlite3_file*, int deleteFlag);
  /* Methods above are valid for version 2 */
  /* Additional methods may be added in future releases */
};

/*







|







655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
  int (*xSectorSize)(sqlite3_file*);
  int (*xDeviceCharacteristics)(sqlite3_file*);
  /* Methods above are valid for version 1 */
  int (*xShmOpen)(sqlite3_file*);
  int (*xShmSize)(sqlite3_file*, int reqSize, int *pNewSize);
  int (*xShmGet)(sqlite3_file*, int reqSize, int *pSize, void volatile**);
  int (*xShmRelease)(sqlite3_file*);
  int (*xShmLock)(sqlite3_file*, int offset, int n, int flags);
  void (*xShmBarrier)(sqlite3_file*);
  int (*xShmClose)(sqlite3_file*, int deleteFlag);
  /* Methods above are valid for version 2 */
  /* Additional methods may be added in future releases */
};

/*
884
885
886
887
888
889
890
891
















892
893
894
895
896
897
898








899
900
901
902
903
904
905
906
907
#define SQLITE_ACCESS_EXISTS    0
#define SQLITE_ACCESS_READWRITE 1
#define SQLITE_ACCESS_READ      2

/*
** CAPI3REF: Flags for the xShmLock VFS method
**
** These integer constants define the various locking states that
















** an sqlite3_shm object can be in.
*/
#define SQLITE_SHM_UNLOCK       0
#define SQLITE_SHM_READ         1
#define SQLITE_SHM_READ_FULL    2
#define SQLITE_SHM_WRITE        3
#define SQLITE_SHM_PENDING      4








#define SQLITE_SHM_CHECKPOINT   5
#define SQLITE_SHM_RECOVER      6

/*
** CAPI3REF: Initialize The SQLite Library
**
** ^The sqlite3_initialize() routine initializes the
** SQLite library.  ^The sqlite3_shutdown() routine
** deallocates any resources that were allocated by sqlite3_initialize().







|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|

|
|
|
|
|
>
>
>
>
>
>
>
>
|
|







885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
#define SQLITE_ACCESS_EXISTS    0
#define SQLITE_ACCESS_READWRITE 1
#define SQLITE_ACCESS_READ      2

/*
** CAPI3REF: Flags for the xShmLock VFS method
**
** These integer constants define the various locking operations
** allowed by the xShmLock method of [sqlite3_io_methods].  The
** following are the only legal combinations of flags to the
** xShmLock method:
**
** <ul>
** <li>  SQLITE_SHM_LOCK | SQLITE_SHM_SHARED
** <li>  SQLITE_SHM_LOCK | SQLITE_SHM_EXCLUSIVE
** <li>  SQLITE_SHM_UNLOCK | SQLITE_SHM_SHARED
** <li>  SQLITE_SHM_UNLOCK | SQLITE_SHM_EXCLUSIVE
** </ul>
**
** When unlocking, the same SHARED or EXCLUSIVE flag must be supplied as
** was given no the corresponding lock.  
**
** The xShmLock method can transition between unlocked and SHARED or
** between unlocked and EXCLUSIVE.  It cannot transition between SHARED
** and EXCLUSIVE.
*/
#define SQLITE_SHM_UNLOCK       1
#define SQLITE_SHM_LOCK         2
#define SQLITE_SHM_SHARED       4
#define SQLITE_SHM_EXCLUSIVE    8

/*
** CAPI3REF: Maximum xShmLock index
**
** The xShmLock method on [sqlite3_io_methods] may use values
** between 0 and this upper bound as its "offset" argument.
** The SQLite core will never attempt to acquire or release a
** lock outside of this range
*/
#define SQLITE_SHM_NLOCK        8


/*
** CAPI3REF: Initialize The SQLite Library
**
** ^The sqlite3_initialize() routine initializes the
** SQLite library.  ^The sqlite3_shutdown() routine
** deallocates any resources that were allocated by sqlite3_initialize().
Changes to src/test6.c.
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
  void volatile **pp
){
  return sqlite3OsShmGet(((CrashFile*)pFile)->pRealFile, reqSize, pSize, pp);
}
static int cfShmRelease(sqlite3_file *pFile){
  return sqlite3OsShmRelease(((CrashFile*)pFile)->pRealFile);
}
static int cfShmLock(sqlite3_file *pFile, int desired, int *pGot){
  return sqlite3OsShmLock(((CrashFile*)pFile)->pRealFile, desired, pGot);
}
static void cfShmBarrier(sqlite3_file *pFile){
  sqlite3OsShmBarrier(((CrashFile*)pFile)->pRealFile);
}
static int cfShmClose(sqlite3_file *pFile, int delFlag){
  return sqlite3OsShmClose(((CrashFile*)pFile)->pRealFile, delFlag);
}







|
|







536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
  void volatile **pp
){
  return sqlite3OsShmGet(((CrashFile*)pFile)->pRealFile, reqSize, pSize, pp);
}
static int cfShmRelease(sqlite3_file *pFile){
  return sqlite3OsShmRelease(((CrashFile*)pFile)->pRealFile);
}
static int cfShmLock(sqlite3_file *pFile, int ofst, int n, int flags){
  return sqlite3OsShmLock(((CrashFile*)pFile)->pRealFile, ofst, n, flags);
}
static void cfShmBarrier(sqlite3_file *pFile){
  sqlite3OsShmBarrier(((CrashFile*)pFile)->pRealFile);
}
static int cfShmClose(sqlite3_file *pFile, int delFlag){
  return sqlite3OsShmClose(((CrashFile*)pFile)->pRealFile, delFlag);
}
Changes to src/test_devsym.c.
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
static int devsymFileControl(sqlite3_file*, int op, void *pArg);
static int devsymSectorSize(sqlite3_file*);
static int devsymDeviceCharacteristics(sqlite3_file*);
static int devsymShmOpen(sqlite3_file*);
static int devsymShmSize(sqlite3_file*,int,int*);
static int devsymShmGet(sqlite3_file*,int,int*,volatile void**);
static int devsymShmRelease(sqlite3_file*);
static int devsymShmLock(sqlite3_file*,int,int*);
static void devsymShmBarrier(sqlite3_file*);
static int devsymShmClose(sqlite3_file*,int);

/*
** Method declarations for devsym_vfs.
*/
static int devsymOpen(sqlite3_vfs*, const char *, sqlite3_file*, int , int *);







|







50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
static int devsymFileControl(sqlite3_file*, int op, void *pArg);
static int devsymSectorSize(sqlite3_file*);
static int devsymDeviceCharacteristics(sqlite3_file*);
static int devsymShmOpen(sqlite3_file*);
static int devsymShmSize(sqlite3_file*,int,int*);
static int devsymShmGet(sqlite3_file*,int,int*,volatile void**);
static int devsymShmRelease(sqlite3_file*);
static int devsymShmLock(sqlite3_file*,int,int,int);
static void devsymShmBarrier(sqlite3_file*);
static int devsymShmClose(sqlite3_file*,int);

/*
** Method declarations for devsym_vfs.
*/
static int devsymOpen(sqlite3_vfs*, const char *, sqlite3_file*, int , int *);
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
  devsym_file *p = (devsym_file *)pFile;
  return sqlite3OsShmGet(p->pReal, reqSz, pSize, pp);
}
static int devsymShmRelease(sqlite3_file *pFile){
  devsym_file *p = (devsym_file *)pFile;
  return sqlite3OsShmRelease(p->pReal);
}
static int devsymShmLock(sqlite3_file *pFile, int desired, int *pGot){
  devsym_file *p = (devsym_file *)pFile;
  return sqlite3OsShmLock(p->pReal, desired, pGot);
}
static void devsymShmBarrier(sqlite3_file *pFile){
  devsym_file *p = (devsym_file *)pFile;
  sqlite3OsShmBarrier(p->pReal);
}
static int devsymShmClose(sqlite3_file *pFile, int delFlag){
  devsym_file *p = (devsym_file *)pFile;







|

|







259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
  devsym_file *p = (devsym_file *)pFile;
  return sqlite3OsShmGet(p->pReal, reqSz, pSize, pp);
}
static int devsymShmRelease(sqlite3_file *pFile){
  devsym_file *p = (devsym_file *)pFile;
  return sqlite3OsShmRelease(p->pReal);
}
static int devsymShmLock(sqlite3_file *pFile, int ofst, int n, int flags){
  devsym_file *p = (devsym_file *)pFile;
  return sqlite3OsShmLock(p->pReal, ofst, n, flags);
}
static void devsymShmBarrier(sqlite3_file *pFile){
  devsym_file *p = (devsym_file *)pFile;
  sqlite3OsShmBarrier(p->pReal);
}
static int devsymShmClose(sqlite3_file *pFile, int delFlag){
  devsym_file *p = (devsym_file *)pFile;
Changes to src/test_osinst.c.
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
static int vfslogSectorSize(sqlite3_file*);
static int vfslogDeviceCharacteristics(sqlite3_file*);

static int vfslogShmOpen(sqlite3_file *pFile);
static int vfslogShmSize(sqlite3_file *pFile, int reqSize, int *pNewSize);
static int vfslogShmGet(sqlite3_file *pFile, int,int*,volatile void **);
static int vfslogShmRelease(sqlite3_file *pFile);
static int vfslogShmLock(sqlite3_file *pFile, int desiredLock, int *gotLock);
static void vfslogShmBarrier(sqlite3_file*);
static int vfslogShmClose(sqlite3_file *pFile, int deleteFlag);

/*
** Method declarations for vfslog_vfs.
*/
static int vfslogOpen(sqlite3_vfs*, const char *, sqlite3_file*, int , int *);







|







151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
static int vfslogSectorSize(sqlite3_file*);
static int vfslogDeviceCharacteristics(sqlite3_file*);

static int vfslogShmOpen(sqlite3_file *pFile);
static int vfslogShmSize(sqlite3_file *pFile, int reqSize, int *pNewSize);
static int vfslogShmGet(sqlite3_file *pFile, int,int*,volatile void **);
static int vfslogShmRelease(sqlite3_file *pFile);
static int vfslogShmLock(sqlite3_file *pFile, int ofst, int n, int flags);
static void vfslogShmBarrier(sqlite3_file*);
static int vfslogShmClose(sqlite3_file *pFile, int deleteFlag);

/*
** Method declarations for vfslog_vfs.
*/
static int vfslogOpen(sqlite3_vfs*, const char *, sqlite3_file*, int , int *);
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
  VfslogFile *p = (VfslogFile *)pFile;
  t = vfslog_time();
  rc = p->pReal->pMethods->xShmRelease(p->pReal);
  t = vfslog_time() - t;
  vfslog_call(p->pVfslog, OS_SHMRELEASE, p->iFileId, t, rc, 0, 0);
  return rc;
}
static int vfslogShmLock(sqlite3_file *pFile, int desiredLock, int *gotLock){
  int rc;
  sqlite3_uint64 t;
  VfslogFile *p = (VfslogFile *)pFile;
  t = vfslog_time();
  rc = p->pReal->pMethods->xShmLock(p->pReal, desiredLock, gotLock);
  t = vfslog_time() - t;
  vfslog_call(p->pVfslog, OS_SHMLOCK, p->iFileId, t, rc, 0, 0);
  return rc;
}
static void vfslogShmBarrier(sqlite3_file *pFile){
  sqlite3_uint64 t;
  VfslogFile *p = (VfslogFile *)pFile;







|




|







456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
  VfslogFile *p = (VfslogFile *)pFile;
  t = vfslog_time();
  rc = p->pReal->pMethods->xShmRelease(p->pReal);
  t = vfslog_time() - t;
  vfslog_call(p->pVfslog, OS_SHMRELEASE, p->iFileId, t, rc, 0, 0);
  return rc;
}
static int vfslogShmLock(sqlite3_file *pFile, int ofst, int n, int flags){
  int rc;
  sqlite3_uint64 t;
  VfslogFile *p = (VfslogFile *)pFile;
  t = vfslog_time();
  rc = p->pReal->pMethods->xShmLock(p->pReal, ofst, n, flags);
  t = vfslog_time() - t;
  vfslog_call(p->pVfslog, OS_SHMLOCK, p->iFileId, t, rc, 0, 0);
  return rc;
}
static void vfslogShmBarrier(sqlite3_file *pFile){
  sqlite3_uint64 t;
  VfslogFile *p = (VfslogFile *)pFile;
Changes to src/test_vfs.c.
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
static int tvfsSleep(sqlite3_vfs*, int microseconds);
static int tvfsCurrentTime(sqlite3_vfs*, double*);

static int tvfsShmOpen(sqlite3_file*);
static int tvfsShmSize(sqlite3_file*, int , int *);
static int tvfsShmGet(sqlite3_file*, int , int *, volatile void **);
static int tvfsShmRelease(sqlite3_file*);
static int tvfsShmLock(sqlite3_file*, int , int *);
static void tvfsShmBarrier(sqlite3_file*);
static int tvfsShmClose(sqlite3_file*, int);

static sqlite3_io_methods tvfs_io_methods = {
  2,                            /* iVersion */
  tvfsClose,                      /* xClose */
  tvfsRead,                       /* xRead */







|







98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
static int tvfsSleep(sqlite3_vfs*, int microseconds);
static int tvfsCurrentTime(sqlite3_vfs*, double*);

static int tvfsShmOpen(sqlite3_file*);
static int tvfsShmSize(sqlite3_file*, int , int *);
static int tvfsShmGet(sqlite3_file*, int , int *, volatile void **);
static int tvfsShmRelease(sqlite3_file*);
static int tvfsShmLock(sqlite3_file*, int , int, int);
static void tvfsShmBarrier(sqlite3_file*);
static int tvfsShmClose(sqlite3_file*, int);

static sqlite3_io_methods tvfs_io_methods = {
  2,                            /* iVersion */
  tvfsClose,                      /* xClose */
  tvfsRead,                       /* xRead */
540
541
542
543
544
545
546
547
548

549
550
551
552
553

554

555
556
557

558

559
560
561


562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
  tvfsResultCode(p, &rc);

  return rc;
}

static int tvfsShmLock(
  sqlite3_file *pFile,
  int desiredLock,
  int *gotLock

){
  int rc = SQLITE_OK;
  TestvfsFile *pFd = (TestvfsFile *)pFile;
  Testvfs *p = (Testvfs *)(pFd->pVfs->pAppData);
  char *zLock = "";



  switch( desiredLock ){
    case SQLITE_SHM_READ:         zLock = "READ";       break;
    case SQLITE_SHM_WRITE:        zLock = "WRITE";      break;

    case SQLITE_SHM_CHECKPOINT:   zLock = "CHECKPOINT"; break;

    case SQLITE_SHM_RECOVER:      zLock = "RECOVER";    break;
    case SQLITE_SHM_PENDING:      zLock = "PENDING";    break;
    case SQLITE_SHM_UNLOCK:       zLock = "UNLOCK";     break;


  }
  tvfsExecTcl(p, "xShmLock", 
      Tcl_NewStringObj(pFd->pShm->zFile, -1), pFd->pShmId,
      Tcl_NewStringObj(zLock, -1)
  );
  tvfsResultCode(p, &rc);
  if( rc==SQLITE_OK ){
    *gotLock = desiredLock;
  }

  return rc;
}

static void tvfsShmBarrier(sqlite3_file *pFile){
  int rc = SQLITE_OK;
  TestvfsFile *pFd = (TestvfsFile *)pFile;
  Testvfs *p = (Testvfs *)(pFd->pVfs->pAppData);







|
|
>




|
>

>
|
|
|
>
|
>
|
|
|
>
>






<
<
<
<







540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574




575
576
577
578
579
580
581
  tvfsResultCode(p, &rc);

  return rc;
}

static int tvfsShmLock(
  sqlite3_file *pFile,
  int ofst,
  int n,
  int flags
){
  int rc = SQLITE_OK;
  TestvfsFile *pFd = (TestvfsFile *)pFile;
  Testvfs *p = (Testvfs *)(pFd->pVfs->pAppData);
  int nLock;
  char zLock[80];

  sqlite3_snprintf(sizeof(zLock), zLock, "%d %d", ofst, n);
  nLock = strlen(zLock);
  if( flags & SQLITE_SHM_LOCK ){
    strcpy(&zLock[nLock], " lock");
  }else{
    strcpy(&zLock[nLock], " unlock");
  }
  nLock += strlen(&zLock[nLock]);
  if( flags & SQLITE_SHM_SHARED ){
    strcpy(&zLock[nLock], " shared");
  }else{
    strcpy(&zLock[nLock], " exclusive");
  }
  tvfsExecTcl(p, "xShmLock", 
      Tcl_NewStringObj(pFd->pShm->zFile, -1), pFd->pShmId,
      Tcl_NewStringObj(zLock, -1)
  );
  tvfsResultCode(p, &rc);




  return rc;
}

static void tvfsShmBarrier(sqlite3_file *pFile){
  int rc = SQLITE_OK;
  TestvfsFile *pFd = (TestvfsFile *)pFile;
  Testvfs *p = (Testvfs *)(pFd->pVfs->pAppData);
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
**   VFSNAME shm FILENAME ?NEWVALUE?
**
** When the xShmLock method is invoked by SQLite, the following script is
** run:
**
**   SCRIPT xShmLock    FILENAME ID LOCK
**
** where LOCK is one of "UNLOCK", "READ", "READ_FULL", "WRITE", "PENDING",
** "CHECKPOINT" or "RECOVER". The script should return an SQLite error
** code.
*/
static int testvfs_cmd(
  ClientData cd,
  Tcl_Interp *interp,
  int objc,
  Tcl_Obj *CONST objv[]
){







|
<
<







715
716
717
718
719
720
721
722


723
724
725
726
727
728
729
**   VFSNAME shm FILENAME ?NEWVALUE?
**
** When the xShmLock method is invoked by SQLite, the following script is
** run:
**
**   SCRIPT xShmLock    FILENAME ID LOCK
**
** where LOCK is of the form "OFFSET NBYTE lock/unlock shared/exclusive"


*/
static int testvfs_cmd(
  ClientData cd,
  Tcl_Interp *interp,
  int objc,
  Tcl_Obj *CONST objv[]
){
Changes to src/wal.c.
89
90
91
92
93
94
95
96

97
98
99
100









101
102
103
104
105
106
107
108
** being considered valid at the same time and being checkpointing together
** following a crash.
**
** READER ALGORITHM
**
** To read a page from the database (call it page number P), a reader
** first checks the WAL to see if it contains page P.  If so, then the
** last valid instance of page P that is or is followed by a commit frame

** become the value read.  If the WAL contains no copies of page P that
** are valid and which are or are followed by a commit frame, then page
** P is read from the database file.
**









** The reader algorithm in the previous paragraph works correctly, but 
** because frames for page P can appear anywhere within the WAL, the
** reader has to scan the entire WAL looking for page P frames.  If the
** WAL is large (multiple megabytes is typical) that scan can be slow,
** and read performance suffers.  To overcome this problem, a separate
** data structure called the wal-index is maintained to expedite the
** search for frames of a particular page.
** 







|
>
|
|
|

>
>
>
>
>
>
>
>
>
|







89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
** being considered valid at the same time and being checkpointing together
** following a crash.
**
** READER ALGORITHM
**
** To read a page from the database (call it page number P), a reader
** first checks the WAL to see if it contains page P.  If so, then the
** last valid instance of page P that is a followed by a commit frame
** or is a commit frame itself becomes the value read.  If the WAL
** contains no copies of page P that are valid and which are a commit
** frame or are followed by a commit frame, then page P is read from
** the database file.
**
** To start a read transaction, the reader records the index of the last
** valid frame in the WAL.  The reader uses this recorded "mxFrame" value
** for all subsequent read operations.  New transactions can be appended
** to the WAL, but as long as the reader uses its original mxFrame value
** and ignores the newly appended content, it will see a consistent snapshot
** of the database from a single point in time.  This technique allows
** multiple concurrent readers to view different versions of the database
** content simultaneously.
**
** The reader algorithm in the previous paragraphs works correctly, but 
** because frames for page P can appear anywhere within the WAL, the
** reader has to scan the entire WAL looking for page P frames.  If the
** WAL is large (multiple megabytes is typical) that scan can be slow,
** and read performance suffers.  To overcome this problem, a separate
** data structure called the wal-index is maintained to expedite the
** search for frames of a particular page.
** 
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
** table is never more than half full.  The expected number of collisions 
** prior to finding a match is 1.  Each entry of the hash table is an
** 1-based index of an entry in the mapping section of the same
** index block.   Let K be the 1-based index of the largest entry in
** the mapping section.  (For index blocks other than the last, K will
** always be exactly HASHTABLE_NPAGE (4096) and for the last index block
** K will be (mxFrame%HASHTABLE_NPAGE).)  Unused slots of the hash table
** contain a value greater than K.  Note that no hash table slot ever
** contains a zero value.
**
** To look for page P in the hash table, first compute a hash iKey on
** P as follows:
**
**      iKey = (P * 383) % HASHTABLE_NSLOT
**
** Then start scanning entries of the hash table, starting with iKey







|
<







167
168
169
170
171
172
173
174

175
176
177
178
179
180
181
** table is never more than half full.  The expected number of collisions 
** prior to finding a match is 1.  Each entry of the hash table is an
** 1-based index of an entry in the mapping section of the same
** index block.   Let K be the 1-based index of the largest entry in
** the mapping section.  (For index blocks other than the last, K will
** always be exactly HASHTABLE_NPAGE (4096) and for the last index block
** K will be (mxFrame%HASHTABLE_NPAGE).)  Unused slots of the hash table
** contain a value of 0.

**
** To look for page P in the hash table, first compute a hash iKey on
** P as follows:
**
**      iKey = (P * 383) % HASHTABLE_NSLOT
**
** Then start scanning entries of the hash table, starting with iKey
210
211
212
213
214
215
216











217
218
219
220

221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239




















































240
241
242
243
244
245
246
247
248
249
250
251
252
253
** that correspond to frames greater than the new K value are removed
** from the hash table at this point.
*/
#ifndef SQLITE_OMIT_WAL

#include "wal.h"













/* Object declarations */
typedef struct WalIndexHdr WalIndexHdr;
typedef struct WalIterator WalIterator;



/*
** The following object holds a copy of the wal-index header content.
**
** The actual header in the wal-index consists of two copies of this
** object.
*/
struct WalIndexHdr {
  u32 iChange;                    /* Counter incremented each transaction */
  u16 bigEndCksum;                /* True if checksums in WAL are big-endian */
  u16 szPage;                     /* Database page size in bytes */
  u32 mxFrame;                    /* Index of last valid frame in the WAL */
  u32 nPage;                      /* Size of database in pages */
  u32 aFrameCksum[2];             /* Checksum of last frame in log */
  u32 aSalt[2];                   /* Two salt values copied from WAL header */
  u32 aCksum[2];                  /* Checksum over all prior fields */
};





















































/* A block of WALINDEX_LOCK_RESERVED bytes beginning at
** WALINDEX_LOCK_OFFSET is reserved for locks. Since some systems
** only support mandatory file-locks, we do not read or write data
** from the region of the file on which locks are applied.
*/
#define WALINDEX_LOCK_OFFSET   (sizeof(WalIndexHdr)*2)
#define WALINDEX_LOCK_RESERVED 8
#define WALINDEX_HDR_SIZE      (WALINDEX_LOCK_OFFSET+WALINDEX_LOCK_RESERVED)

/* Size of header before each frame in wal */
#define WAL_FRAME_HDRSIZE 24

/* Size of write ahead log header */
#define WAL_HDRSIZE 24







>
>
>
>
>
>
>
>
>
>
>




>



















>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>





|
|







219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
** that correspond to frames greater than the new K value are removed
** from the hash table at this point.
*/
#ifndef SQLITE_OMIT_WAL

#include "wal.h"

/*
** Indices of various locking bytes.   WAL_NREADER is the number
** of available reader locks and should be at least 3.
*/
#define WAL_WRITE_LOCK         0
#define WAL_ALL_BUT_WRITE      1
#define WAL_CKPT_LOCK          1
#define WAL_RECOVER_LOCK       2
#define WAL_READ_LOCK(I)       (3+(I))
#define WAL_NREADER            (SQLITE_SHM_NLOCK-3)


/* Object declarations */
typedef struct WalIndexHdr WalIndexHdr;
typedef struct WalIterator WalIterator;
typedef struct WalCkptInfo WalCkptInfo;


/*
** The following object holds a copy of the wal-index header content.
**
** The actual header in the wal-index consists of two copies of this
** object.
*/
struct WalIndexHdr {
  u32 iChange;                    /* Counter incremented each transaction */
  u16 bigEndCksum;                /* True if checksums in WAL are big-endian */
  u16 szPage;                     /* Database page size in bytes */
  u32 mxFrame;                    /* Index of last valid frame in the WAL */
  u32 nPage;                      /* Size of database in pages */
  u32 aFrameCksum[2];             /* Checksum of last frame in log */
  u32 aSalt[2];                   /* Two salt values copied from WAL header */
  u32 aCksum[2];                  /* Checksum over all prior fields */
};

/*
** A copy of the following object occurs in the wal-index immediately
** following the second copy of the WalIndexHdr.  This object stores
** information used by checkpoint.
**
** nBackfill is the number of frames in the WAL that have been written
** back into the database. (We call the act of moving content from WAL to
** database "backfilling".)  The nBackfill number is never greater than
** WalIndexHdr.mxFrame.  nBackfill can only be increased by threads
** holding the WAL_CKPT_LOCK lock (which includes a recovery thread).
** However, a WAL_WRITE_LOCK thread can move the value of nBackfill from
** mxFrame back to zero when the WAL is reset.
**
** There is one entry in aReadMark[] for each reader lock.  If a reader
** holds read-lock K, then the value in aReadMark[K] is no greater than
** the mxFrame for that reader.  aReadMark[0] is a special case.  It
** always holds zero.  Readers holding WAL_READ_LOCK(0) always ignore 
** the entire WAL and read all content directly from the database.
**
** The value of aReadMark[K] may only be changed by a thread that
** is holding an exclusive lock on WAL_READ_LOCK(K).  Thus, the value of
** aReadMark[K] cannot changed while there is a reader is using that mark
** since the reader will be holding a shared lock on WAL_READ_LOCK(K).
**
** The checkpointer may only transfer frames from WAL to database where
** the frame numbers are less than or equal to every aReadMark[] that is
** in use (that is, every aReadMark[j] for which there is a corresponding
** WAL_READ_LOCK(j)).  New readers (usually) pick the aReadMark[] with the
** largest value and will increase an unused aReadMark[] to mxFrame if there
** is not already an aReadMark[] equal to mxFrame.  The exception to the
** previous sentence is when nBackfill equals mxFrame (meaning that everything
** in the WAL has been backfilled into the database) then new readers
** will choose aReadMark[0] which has value 0 and hence such reader will
** get all their all content directly from the database file and ignore 
** the WAL.
**
** Writers normally append new frames to the end of the WAL.  However,
** if nBackfill equals mxFrame (meaning that all WAL content has been
** written back into the database) and if no readers are using the WAL
** (in other words, if there are no WAL_READ_LOCK(i) where i>0) then
** the writer will first "reset" the WAL back to the beginning and start
** writing new content beginning at frame 1.
**
** We assume that 32-bit loads are atomic and so no locks are needed in
** order to read from any aReadMark[] entries.
*/
struct WalCkptInfo {
  u32 nBackfill;                  /* Number of WAL frames backfilled into DB */
  u32 aReadMark[WAL_NREADER];     /* Reader marks */
};


/* A block of WALINDEX_LOCK_RESERVED bytes beginning at
** WALINDEX_LOCK_OFFSET is reserved for locks. Since some systems
** only support mandatory file-locks, we do not read or write data
** from the region of the file on which locks are applied.
*/
#define WALINDEX_LOCK_OFFSET   (sizeof(WalIndexHdr)*2 + sizeof(WalCkptInfo))
#define WALINDEX_LOCK_RESERVED 16
#define WALINDEX_HDR_SIZE      (WALINDEX_LOCK_OFFSET+WALINDEX_LOCK_RESERVED)

/* Size of header before each frame in wal */
#define WAL_FRAME_HDRSIZE 24

/* Size of write ahead log header */
#define WAL_HDRSIZE 24
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289


290
291
292
293
294








295
296
297
298
299
300
301
)

/*
** An open write-ahead log file is represented by an instance of the
** following object.
*/
struct Wal {
  sqlite3_vfs *pVfs;         /* The VFS used to create pFd */
  sqlite3_file *pDbFd;       /* File handle for the database file */
  sqlite3_file *pWalFd;      /* File handle for WAL file */
  u32 iCallback;             /* Value to pass to log callback (or 0) */
  int szWIndex;              /* Size of the wal-index that is mapped in mem */
  volatile u32 *pWiData;     /* Pointer to wal-index content in memory */
  u8 lockState;              /* SQLITE_SHM_xxxx constant showing lock state */
  u8 readerType;             /* SQLITE_SHM_READ or SQLITE_SHM_READ_FULL */
  u8 exclusiveMode;          /* Non-zero if connection is in exclusive mode */
  u8 isWindexOpen;           /* True if ShmOpen() called on pDbFd */


  WalIndexHdr hdr;           /* Wal-index for current snapshot */
  char *zWalName;            /* Name of WAL file */
  int szPage;                /* Database page size */
  u32 nCkpt;                 /* Checkpoint sequence counter in the wal-header */
};










/*
** This structure is used to implement an iterator that loops through
** all frames in the WAL in database page order. Where two or more frames
** correspond to the same database page, the iterator visits only the 
** frame most recently written to the WAL (in other words, the frame with







|





|
|

|
>
>
|

<


>
>
>
>
>
>
>
>







346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366

367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
)

/*
** An open write-ahead log file is represented by an instance of the
** following object.
*/
struct Wal {
  sqlite3_vfs *pVfs;         /* The VFS used to create pDbFd */
  sqlite3_file *pDbFd;       /* File handle for the database file */
  sqlite3_file *pWalFd;      /* File handle for WAL file */
  u32 iCallback;             /* Value to pass to log callback (or 0) */
  int szWIndex;              /* Size of the wal-index that is mapped in mem */
  volatile u32 *pWiData;     /* Pointer to wal-index content in memory */
  u16 szPage;                /* Database page size */
  i16 readLock;              /* Which read lock is being held.  -1 for none */
  u8 exclusiveMode;          /* Non-zero if connection is in exclusive mode */
  u8 isWIndexOpen;           /* True if ShmOpen() called on pDbFd */
  u8 writeLock;              /* True if in a write transaction */
  u8 ckptLock;               /* True if holding a checkpoint lock */
  WalIndexHdr hdr;           /* Wal-index header for current transaction */
  char *zWalName;            /* Name of WAL file */

  u32 nCkpt;                 /* Checkpoint sequence counter in the wal-header */
};

/*
** Return a pointer to the WalCkptInfo structure in the wal-index.
*/
static volatile WalCkptInfo *walCkptInfo(Wal *pWal){
  assert( pWal->pWiData!=0 );
  return (volatile WalCkptInfo*)&pWal->pWiData[sizeof(WalIndexHdr)/2];
}


/*
** This structure is used to implement an iterator that loops through
** all frames in the WAL in database page order. Where two or more frames
** correspond to the same database page, the iterator visits only the 
** frame most recently written to the WAL (in other words, the frame with
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411


412
413
414
415
416
417
418
419
420
421
422
423
424
425
    }while( aData<aEnd );
  }

  aOut[0] = s1;
  aOut[1] = s2;
}

/*
** Attempt to change the lock status.
**
** When changing the lock status to SQLITE_SHM_READ, store the
** type of reader lock (either SQLITE_SHM_READ or SQLITE_SHM_READ_FULL)
** in pWal->readerType.
*/
static int walSetLock(Wal *pWal, int desiredStatus){
  int rc = SQLITE_OK;             /* Return code */
  if( pWal->exclusiveMode || pWal->lockState==desiredStatus ){
    pWal->lockState = desiredStatus;
  }else{
    int got = pWal->lockState;
    rc = sqlite3OsShmLock(pWal->pDbFd, desiredStatus, &got);
    pWal->lockState = got;
    if( got==SQLITE_SHM_READ_FULL || got==SQLITE_SHM_READ ){
      pWal->readerType = got;
      pWal->lockState = SQLITE_SHM_READ;
    }
  }
  return rc;
}

/*
** Write the header information in pWal->hdr into the wal-index.
**
** The checksum on pWal->hdr is updated before it is written.
*/
static void walIndexWriteHdr(Wal *pWal){
  WalIndexHdr *aHdr;


  walChecksumBytes(1, (u8*)&pWal->hdr,
                   sizeof(pWal->hdr) - sizeof(pWal->hdr.aCksum),
                   0, pWal->hdr.aCksum);
  aHdr = (WalIndexHdr*)pWal->pWiData;
  memcpy(&aHdr[1], &pWal->hdr, sizeof(pWal->hdr));
  sqlite3OsShmBarrier(pWal->pDbFd);
  memcpy(&aHdr[0], &pWal->hdr, sizeof(pWal->hdr));
}

/*
** This function encodes a single frame header and writes it to a buffer
** supplied by the caller. A frame-header is made up of a series of 
** 4-byte big-endian integers, as follows:
**







<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







>
>
|
<


|

|







457
458
459
460
461
462
463























464
465
466
467
468
469
470
471
472
473

474
475
476
477
478
479
480
481
482
483
484
485
    }while( aData<aEnd );
  }

  aOut[0] = s1;
  aOut[1] = s2;
}
























/*
** Write the header information in pWal->hdr into the wal-index.
**
** The checksum on pWal->hdr is updated before it is written.
*/
static void walIndexWriteHdr(Wal *pWal){
  WalIndexHdr *aHdr;

  assert( pWal->writeLock );
  walChecksumBytes(1, (u8*)&pWal->hdr, offsetof(WalIndexHdr, aCksum),

                   0, pWal->hdr.aCksum);
  aHdr = (WalIndexHdr*)pWal->pWiData;
  memcpy(&aHdr[1], &pWal->hdr, sizeof(WalIndexHdr));
  sqlite3OsShmBarrier(pWal->pDbFd);
  memcpy(&aHdr[0], &pWal->hdr, sizeof(WalIndexHdr));
}

/*
** This function encodes a single frame header and writes it to a buffer
** supplied by the caller. A frame-header is made up of a series of 
** 4-byte big-endian integers, as follows:
**
515
516
517
518
519
520
521


























522
523
524
525
526
527
528
** create incompatibilities.
*/
#define HASHTABLE_NPAGE      4096  /* Must be power of 2 and multiple of 256 */
#define HASHTABLE_DATATYPE   u16
#define HASHTABLE_HASH_1     383                  /* Should be prime */
#define HASHTABLE_NSLOT      (HASHTABLE_NPAGE*2)  /* Must be a power of 2 */
#define HASHTABLE_NBYTE      (sizeof(HASHTABLE_DATATYPE)*HASHTABLE_NSLOT)



























/*
** Return the index in the Wal.pWiData array that corresponds to 
** frame iFrame.
**
** Wal.pWiData is an array of u32 elements that is the wal-index.
** The array begins with a header and is then followed by alternating







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
** create incompatibilities.
*/
#define HASHTABLE_NPAGE      4096  /* Must be power of 2 and multiple of 256 */
#define HASHTABLE_DATATYPE   u16
#define HASHTABLE_HASH_1     383                  /* Should be prime */
#define HASHTABLE_NSLOT      (HASHTABLE_NPAGE*2)  /* Must be a power of 2 */
#define HASHTABLE_NBYTE      (sizeof(HASHTABLE_DATATYPE)*HASHTABLE_NSLOT)

/*
** Set or release locks.
**
** In locking_mode=EXCLUSIVE, all of these routines become no-ops.
*/
static int walLockShared(Wal *pWal, int lockIdx){
  if( pWal->exclusiveMode ) return SQLITE_OK;
  return sqlite3OsShmLock(pWal->pDbFd, lockIdx, 1,
                          SQLITE_SHM_LOCK | SQLITE_SHM_SHARED);
}
static void walUnlockShared(Wal *pWal, int lockIdx){
  if( pWal->exclusiveMode ) return;
  (void)sqlite3OsShmLock(pWal->pDbFd, lockIdx, 1,
                         SQLITE_SHM_UNLOCK | SQLITE_SHM_SHARED);
}
static int walLockExclusive(Wal *pWal, int lockIdx, int n){
  if( pWal->exclusiveMode ) return SQLITE_OK;
  return sqlite3OsShmLock(pWal->pDbFd, lockIdx, n,
                          SQLITE_SHM_LOCK | SQLITE_SHM_EXCLUSIVE);
}
static void walUnlockExclusive(Wal *pWal, int lockIdx, int n){
  if( pWal->exclusiveMode ) return;
  (void)sqlite3OsShmLock(pWal->pDbFd, lockIdx, n,
                         SQLITE_SHM_UNLOCK | SQLITE_SHM_EXCLUSIVE);
}

/*
** Return the index in the Wal.pWiData array that corresponds to 
** frame iFrame.
**
** Wal.pWiData is an array of u32 elements that is the wal-index.
** The array begins with a header and is then followed by alternating
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
**
** If enlargeTo is non-negative, then increase the size of the underlying
** storage to be at least as big as enlargeTo before remapping.
*/
static int walIndexRemap(Wal *pWal, int enlargeTo){
  int rc;
  int sz;
  assert( pWal->lockState>=SQLITE_SHM_WRITE );
  rc = sqlite3OsShmSize(pWal->pDbFd, enlargeTo, &sz);
  if( rc==SQLITE_OK && sz>pWal->szWIndex ){
    walIndexUnmap(pWal);
    rc = walIndexMap(pWal, sz);
  }
  assert( pWal->szWIndex>=enlargeTo || rc!=SQLITE_OK );
  return rc;







|







682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
**
** If enlargeTo is non-negative, then increase the size of the underlying
** storage to be at least as big as enlargeTo before remapping.
*/
static int walIndexRemap(Wal *pWal, int enlargeTo){
  int rc;
  int sz;
  assert( pWal->writeLock );
  rc = sqlite3OsShmSize(pWal->pDbFd, enlargeTo, &sz);
  if( rc==SQLITE_OK && sz>pWal->szWIndex ){
    walIndexUnmap(pWal);
    rc = walIndexMap(pWal, sz);
  }
  assert( pWal->szWIndex>=enlargeTo || rc!=SQLITE_OK );
  return rc;
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
*/
static void walCleanupHash(Wal *pWal){
  volatile HASHTABLE_DATATYPE *aHash;  /* Pointer to hash table to clear */
  volatile u32 *aPgno;                 /* Unused return from walHashFind() */
  u32 iZero;                           /* frame == (aHash[x]+iZero) */
  int iLimit;                          /* Zero values greater than this */

  assert( pWal->lockState==SQLITE_SHM_WRITE );
  walHashFind(pWal, pWal->hdr.mxFrame+1, &aHash, &aPgno, &iZero);
  iLimit = pWal->hdr.mxFrame - iZero;
  if( iLimit>0 ){
    int nByte;                    /* Number of bytes to zero in aPgno[] */
    int i;                        /* Used to iterate through aHash[] */
    for(i=0; i<HASHTABLE_NSLOT; i++){
      if( aHash[i]>iLimit ){







|







767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
*/
static void walCleanupHash(Wal *pWal){
  volatile HASHTABLE_DATATYPE *aHash;  /* Pointer to hash table to clear */
  volatile u32 *aPgno;                 /* Unused return from walHashFind() */
  u32 iZero;                           /* frame == (aHash[x]+iZero) */
  int iLimit;                          /* Zero values greater than this */

  assert( pWal->writeLock );
  walHashFind(pWal, pWal->hdr.mxFrame+1, &aHash, &aPgno, &iZero);
  iLimit = pWal->hdr.mxFrame - iZero;
  if( iLimit>0 ){
    int nByte;                    /* Number of bytes to zero in aPgno[] */
    int i;                        /* Used to iterate through aHash[] */
    for(i=0; i<HASHTABLE_NSLOT; i++){
      if( aHash[i]>iLimit ){
806
807
808
809
810
811
812


813




814
815
816
817
818
819




820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848

  return rc;
}


/*
** Recover the wal-index by reading the write-ahead log file. 


** The caller must hold RECOVER lock on the wal-index file.




*/
static int walIndexRecover(Wal *pWal){
  int rc;                         /* Return Code */
  i64 nSize;                      /* Size of log file */
  u32 aFrameCksum[2] = {0, 0};





  assert( pWal->lockState>SQLITE_SHM_READ );
  memset(&pWal->hdr, 0, sizeof(WalIndexHdr));

  rc = sqlite3OsFileSize(pWal->pWalFd, &nSize);
  if( rc!=SQLITE_OK ){
    return rc;
  }

  if( nSize>WAL_HDRSIZE ){
    u8 aBuf[WAL_HDRSIZE];         /* Buffer to load WAL header into */
    u8 *aFrame = 0;               /* Malloc'd buffer to load entire frame */
    int szFrame;                  /* Number of bytes in buffer aFrame[] */
    u8 *aData;                    /* Pointer to data part of aFrame buffer */
    int iFrame;                   /* Index of last frame read */
    i64 iOffset;                  /* Next offset to read from log file */
    int szPage;                   /* Page size according to the log */
    u32 magic;                    /* Magic value read from WAL header */

    /* Read in the WAL header. */
    rc = sqlite3OsRead(pWal->pWalFd, aBuf, WAL_HDRSIZE, 0);
    if( rc!=SQLITE_OK ){
      return rc;
    }

    /* If the database page size is not a power of two, or is greater than
    ** SQLITE_MAX_PAGE_SIZE, conclude that the WAL file contains no valid 
    ** data. Similarly, if the 'magic' value is invalid, ignore the whole
    ** WAL file.
    */







>
>
|
>
>
>
>






>
>
>
>
|




|















|







892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944

  return rc;
}


/*
** Recover the wal-index by reading the write-ahead log file. 
**
** This routine first tries to establish an exclusive lock on the
** wal-index to prevent other threads/processes from doing anything
** with the WAL or wal-index while recovery is running.  The
** WAL_RECOVER_LOCK is also held so that other threads will know
** that this thread is running recovery.  If unable to establish
** the necessary locks, this routine returns SQLITE_BUSY.
*/
static int walIndexRecover(Wal *pWal){
  int rc;                         /* Return Code */
  i64 nSize;                      /* Size of log file */
  u32 aFrameCksum[2] = {0, 0};

  rc = walLockExclusive(pWal, WAL_ALL_BUT_WRITE, SQLITE_SHM_NLOCK-1);
  if( rc ){
    return rc;
  }

  memset(&pWal->hdr, 0, sizeof(WalIndexHdr));

  rc = sqlite3OsFileSize(pWal->pWalFd, &nSize);
  if( rc!=SQLITE_OK ){
    goto recovery_error;
  }

  if( nSize>WAL_HDRSIZE ){
    u8 aBuf[WAL_HDRSIZE];         /* Buffer to load WAL header into */
    u8 *aFrame = 0;               /* Malloc'd buffer to load entire frame */
    int szFrame;                  /* Number of bytes in buffer aFrame[] */
    u8 *aData;                    /* Pointer to data part of aFrame buffer */
    int iFrame;                   /* Index of last frame read */
    i64 iOffset;                  /* Next offset to read from log file */
    int szPage;                   /* Page size according to the log */
    u32 magic;                    /* Magic value read from WAL header */

    /* Read in the WAL header. */
    rc = sqlite3OsRead(pWal->pWalFd, aBuf, WAL_HDRSIZE, 0);
    if( rc!=SQLITE_OK ){
      goto recovery_error;
    }

    /* If the database page size is not a power of two, or is greater than
    ** SQLITE_MAX_PAGE_SIZE, conclude that the WAL file contains no valid 
    ** data. Similarly, if the 'magic' value is invalid, ignore the whole
    ** WAL file.
    */
863
864
865
866
867
868
869
870

871
872
873
874
875
876
877
        aBuf, WAL_HDRSIZE, 0, pWal->hdr.aFrameCksum
    );

    /* Malloc a buffer to read frames into. */
    szFrame = szPage + WAL_FRAME_HDRSIZE;
    aFrame = (u8 *)sqlite3_malloc(szFrame);
    if( !aFrame ){
      return SQLITE_NOMEM;

    }
    aData = &aFrame[WAL_FRAME_HDRSIZE];

    /* Read all frames from the log file. */
    iFrame = 0;
    for(iOffset=WAL_HDRSIZE; (iOffset+szFrame)<=nSize; iOffset+=szFrame){
      u32 pgno;                   /* Database page number for frame */







|
>







959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
        aBuf, WAL_HDRSIZE, 0, pWal->hdr.aFrameCksum
    );

    /* Malloc a buffer to read frames into. */
    szFrame = szPage + WAL_FRAME_HDRSIZE;
    aFrame = (u8 *)sqlite3_malloc(szFrame);
    if( !aFrame ){
      rc = SQLITE_NOMEM;
      goto recovery_error;
    }
    aData = &aFrame[WAL_FRAME_HDRSIZE];

    /* Read all frames from the log file. */
    iFrame = 0;
    for(iOffset=WAL_HDRSIZE; (iOffset+szFrame)<=nSize; iOffset+=szFrame){
      u32 pgno;                   /* Database page number for frame */
904
905
906
907
908
909
910



911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
    rc = walIndexRemap(pWal, walMappingSize(1));
  }
  if( rc==SQLITE_OK ){
    pWal->hdr.aFrameCksum[0] = aFrameCksum[0];
    pWal->hdr.aFrameCksum[1] = aFrameCksum[1];
    walIndexWriteHdr(pWal);
  }



  return rc;
}

/*
** Close an open wal-index.
*/
static void walIndexClose(Wal *pWal, int isDelete){
  if( pWal->isWindexOpen ){
    int notUsed;
    sqlite3OsShmLock(pWal->pDbFd, SQLITE_SHM_UNLOCK, &notUsed);
    sqlite3OsShmClose(pWal->pDbFd, isDelete);
    pWal->isWindexOpen = 0;
  }
}

/* 
** Open a connection to the log file associated with database zDb. The
** database file does not actually have to exist. zDb is used only to
** figure out the name of the log file to open. If the log file does not 







>
>
>







|
<
<

|







1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018


1019
1020
1021
1022
1023
1024
1025
1026
1027
    rc = walIndexRemap(pWal, walMappingSize(1));
  }
  if( rc==SQLITE_OK ){
    pWal->hdr.aFrameCksum[0] = aFrameCksum[0];
    pWal->hdr.aFrameCksum[1] = aFrameCksum[1];
    walIndexWriteHdr(pWal);
  }

recovery_error:
  walUnlockExclusive(pWal, WAL_ALL_BUT_WRITE, SQLITE_SHM_NLOCK-1);
  return rc;
}

/*
** Close an open wal-index.
*/
static void walIndexClose(Wal *pWal, int isDelete){
  if( pWal->isWIndexOpen ){


    sqlite3OsShmClose(pWal->pDbFd, isDelete);
    pWal->isWIndexOpen = 0;
  }
}

/* 
** Open a connection to the log file associated with database zDb. The
** database file does not actually have to exist. zDb is used only to
** figure out the name of the log file to open. If the log file does not 
974
975
976
977
978
979
980

981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
    return SQLITE_NOMEM;
  }

  pRet->pVfs = pVfs;
  pRet->pWalFd = (sqlite3_file *)&pRet[1];
  pRet->pDbFd = pDbFd;
  pRet->szWIndex = -1;

  sqlite3_randomness(8, &pRet->hdr.aSalt);
  pRet->zWalName = zWal = pVfs->szOsFile + (char*)pRet->pWalFd;
  sqlite3_snprintf(nWal, zWal, "%s-wal", zDbName);
  rc = sqlite3OsShmOpen(pDbFd);

  /* Open file handle on the write-ahead log file. */
  if( rc==SQLITE_OK ){
    pRet->isWindexOpen = 1;
    flags = (SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE|SQLITE_OPEN_MAIN_JOURNAL);
    rc = sqlite3OsOpen(pVfs, zWal, pRet->pWalFd, flags, &flags);
  }

  if( rc!=SQLITE_OK ){
    walIndexClose(pRet, 0);
    sqlite3OsClose(pRet->pWalFd);







>







|







1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
    return SQLITE_NOMEM;
  }

  pRet->pVfs = pVfs;
  pRet->pWalFd = (sqlite3_file *)&pRet[1];
  pRet->pDbFd = pDbFd;
  pRet->szWIndex = -1;
  pRet->readLock = -1;
  sqlite3_randomness(8, &pRet->hdr.aSalt);
  pRet->zWalName = zWal = pVfs->szOsFile + (char*)pRet->pWalFd;
  sqlite3_snprintf(nWal, zWal, "%s-wal", zDbName);
  rc = sqlite3OsShmOpen(pDbFd);

  /* Open file handle on the write-ahead log file. */
  if( rc==SQLITE_OK ){
    pRet->isWIndexOpen = 1;
    flags = (SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE|SQLITE_OPEN_MAIN_JOURNAL);
    rc = sqlite3OsOpen(pVfs, zWal, pRet->pWalFd, flags, &flags);
  }

  if( rc!=SQLITE_OK ){
    walIndexClose(pRet, 0);
    sqlite3OsClose(pRet->pWalFd);
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
  }

  /* This routine only runs while holding SQLITE_SHM_CHECKPOINT.  No other
  ** thread is able to write to shared memory while this routine is
  ** running (or, indeed, while the WalIterator object exists).  Hence,
  ** we can cast off the volatile qualifacation from shared memory
  */
  assert( pWal->lockState==SQLITE_SHM_CHECKPOINT );
  aData = (u32*)pWal->pWiData;

  /* Allocate space for the WalIterator object */
  iLast = pWal->hdr.mxFrame;
  nSegment = (iLast >> 8) + 1;
  nFinal = (iLast & 0x000000FF);
  nByte = sizeof(WalIterator) + (nSegment+1)*(sizeof(struct WalSegment)+256);







|







1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
  }

  /* This routine only runs while holding SQLITE_SHM_CHECKPOINT.  No other
  ** thread is able to write to shared memory while this routine is
  ** running (or, indeed, while the WalIterator object exists).  Hence,
  ** we can cast off the volatile qualifacation from shared memory
  */
  assert( pWal->ckptLock );
  aData = (u32*)pWal->pWiData;

  /* Allocate space for the WalIterator object */
  iLast = pWal->hdr.mxFrame;
  nSegment = (iLast >> 8) + 1;
  nFinal = (iLast & 0x000000FF);
  nByte = sizeof(WalIterator) + (nSegment+1)*(sizeof(struct WalSegment)+256);
1175
1176
1177
1178
1179
1180
1181
1182

1183



























1184

1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196




1197
1198
1199
1200
1201

1202
1203

1204

1205
1206
1207
1208






1209




1210
1211
1212





1213
1214

1215
1216
1217
1218

1219
1220
1221
1222

1223



1224
1225



1226


1227


1228
1229





1230
1231
1232
1233


1234
1235
1236
1237
1238

1239
1240
1241
1242
1243
1244
1245
1246
1247
1248

/* 
** Free an iterator allocated by walIteratorInit().
*/
static void walIteratorFree(WalIterator *p){
  sqlite3_free(p);
}


/*



























** Checkpoint the contents of the log file.

*/
static int walCheckpoint(
  Wal *pWal,                      /* Wal connection */
  int sync_flags,                 /* Flags for OsSync() (or 0) */
  int nBuf,                       /* Size of zBuf in bytes */
  u8 *zBuf                        /* Temporary buffer to use */
){
  int rc;                         /* Return code */
  int szPage = pWal->hdr.szPage;  /* Database page-size */
  WalIterator *pIter = 0;         /* Wal iterator context */
  u32 iDbpage = 0;                /* Next database page to write */
  u32 iFrame = 0;                 /* Wal frame containing data for iDbpage */





  /* Allocate the iterator */
  rc = walIteratorInit(pWal, &pIter);
  if( rc!=SQLITE_OK || pWal->hdr.mxFrame==0 ){
    goto out;

  }


  if( pWal->hdr.szPage!=nBuf ){

    rc = SQLITE_CORRUPT_BKPT;
    goto out;
  }







  /* Sync the log file to disk */




  if( sync_flags ){
    rc = sqlite3OsSync(pWal->pWalFd, sync_flags);
    if( rc!=SQLITE_OK ) goto out;





  }


  /* Iterate through the contents of the log, copying data to the db file. */
  while( 0==walIteratorNext(pIter, &iDbpage, &iFrame) ){
    rc = sqlite3OsRead(pWal->pWalFd, zBuf, szPage, 
        walFrameOffset(iFrame, szPage) + WAL_FRAME_HDRSIZE

    );
    if( rc!=SQLITE_OK ) goto out;
    rc = sqlite3OsWrite(pWal->pDbFd, zBuf, szPage, (iDbpage-1)*szPage);
    if( rc!=SQLITE_OK ) goto out;

  }




  /* Truncate the database file */



  rc = sqlite3OsTruncate(pWal->pDbFd, ((i64)pWal->hdr.nPage*(i64)szPage));


  if( rc!=SQLITE_OK ) goto out;



  /* Sync the database file. If successful, update the wal-index. */





  if( sync_flags ){
    rc = sqlite3OsSync(pWal->pDbFd, sync_flags);
    if( rc!=SQLITE_OK ) goto out;
  }


  pWal->hdr.mxFrame = 0;
  pWal->nCkpt++;
  sqlite3Put4byte((u8*)pWal->hdr.aSalt,
                   1 + sqlite3Get4byte((u8*)pWal->hdr.aSalt));
  sqlite3_randomness(4, &pWal->hdr.aSalt[1]);

  walIndexWriteHdr(pWal);

 out:
  walIteratorFree(pIter);
  return rc;
}

/*
** Close a connection to a log file.
*/








>

>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
>












>
>
>
>




|
>


>

>
|
<


>
>
>
>
>
>
|
>
>
>
>
|
|
|
>
>
>
>
>
|
|
>
|
|
<
<
>
|
<
<
<
>
|
>
>
>
|
|
>
>
>
|
>
>
|
>
>
|
|
>
>
>
>
>
|
|
<
|
>
>
|
<
<
<
<
>
|
|
|







1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340

1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366


1367
1368



1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393

1394
1395
1396
1397




1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408

/* 
** Free an iterator allocated by walIteratorInit().
*/
static void walIteratorFree(WalIterator *p){
  sqlite3_free(p);
}


/*
** Copy as much content as we can from the WAL back into the database file
** in response to an sqlite3_wal_checkpoint() request or the equivalent.
**
** The amount of information copies from WAL to database might be limited
** by active readers.  This routine will never overwrite a database page
** that a concurrent reader might be using.
**
** All I/O barrier operations (a.k.a fsyncs) occur in this routine when
** SQLite is in WAL-mode in synchronous=NORMAL.  That means that if 
** checkpoints are always run by a background thread or background 
** process, foreground threads will never block on a lengthy fsync call.
**
** Fsync is called on the WAL before writing content out of the WAL and
** into the database.  This ensures that if the new content is persistent
** in the WAL and can be recovered following a power-loss or hard reset.
**
** Fsync is also called on the database file if (and only if) the entire
** WAL content is copied into the database file.  This second fsync makes
** it safe to delete the WAL since the new content will persist in the
** database file.
**
** This routine uses and updates the nBackfill field of the wal-index header.
** This is the only routine tha will increase the value of nBackfill.  
** (A WAL reset or recovery will revert nBackfill to zero, but not increase
** its value.)
**
** The caller must be holding sufficient locks to ensure that no other
** checkpoint is running (in any other thread or process) at the same
** time.
*/
static int walCheckpoint(
  Wal *pWal,                      /* Wal connection */
  int sync_flags,                 /* Flags for OsSync() (or 0) */
  int nBuf,                       /* Size of zBuf in bytes */
  u8 *zBuf                        /* Temporary buffer to use */
){
  int rc;                         /* Return code */
  int szPage = pWal->hdr.szPage;  /* Database page-size */
  WalIterator *pIter = 0;         /* Wal iterator context */
  u32 iDbpage = 0;                /* Next database page to write */
  u32 iFrame = 0;                 /* Wal frame containing data for iDbpage */
  u32 mxSafeFrame;                /* Max frame that can be backfilled */
  int i;                          /* Loop counter */
  volatile WalIndexHdr *pHdr;     /* The actual wal-index header in SHM */
  volatile WalCkptInfo *pInfo;    /* The checkpoint status information */

  /* Allocate the iterator */
  rc = walIteratorInit(pWal, &pIter);
  if( rc!=SQLITE_OK || pWal->hdr.mxFrame==0 ){
    walIteratorFree(pIter);
    return rc;
  }

  /*** TODO:  Move this test out to the caller.  Make it an assert() here ***/
  if( pWal->hdr.szPage!=nBuf ){
    walIteratorFree(pIter);
    return SQLITE_CORRUPT_BKPT;

  }

  /* Compute in mxSafeFrame the index of the last frame of the WAL that is
  ** safe to write into the database.  Frames beyond mxSafeFrame might
  ** overwrite database pages that are in use by active readers and thus
  ** cannot be backfilled from the WAL.
  */
  mxSafeFrame = 0;
  pHdr = (volatile WalIndexHdr*)pWal->pWiData;
  pInfo = (volatile WalCkptInfo*)&pHdr[2];
  assert( pInfo==walCkptInfo(pWal) );
  for(i=1; i<WAL_NREADER; i++){
    u32 y = pInfo->aReadMark[i];
    if( y>0 && (mxSafeFrame==0 || mxSafeFrame<y) ){
      if( y<pWal->hdr.mxFrame
       && (rc = walLockExclusive(pWal, WAL_READ_LOCK(i), 1))==SQLITE_OK
      ){
        pInfo->aReadMark[i] = 0;
        walUnlockExclusive(pWal, WAL_READ_LOCK(i), 1);
      }else{
        mxSafeFrame = y;
      }
    }
  }

  if( pInfo->nBackfill<mxSafeFrame


   && (rc = walLockExclusive(pWal, WAL_READ_LOCK(0), 1))==SQLITE_OK
  ){



    u32 nBackfill = pInfo->nBackfill;

    /* Sync the WAL to disk */
    if( sync_flags ){
      rc = sqlite3OsSync(pWal->pWalFd, sync_flags);
    }

    /* Iterate through the contents of the WAL, copying data to the db file. */
    while( rc==SQLITE_OK && 0==walIteratorNext(pIter, &iDbpage, &iFrame) ){
      if( iFrame<=nBackfill || iFrame>mxSafeFrame ) continue;
      rc = sqlite3OsRead(pWal->pWalFd, zBuf, szPage, 
          walFrameOffset(iFrame, szPage) + WAL_FRAME_HDRSIZE
      );
      if( rc!=SQLITE_OK ) break;
      rc = sqlite3OsWrite(pWal->pDbFd, zBuf, szPage, (iDbpage-1)*szPage);
      if( rc!=SQLITE_OK ) break;
    }

    /* If work was actually accomplished... */
    if( rc==SQLITE_OK && pInfo->nBackfill<mxSafeFrame ){
      pInfo->nBackfill = mxSafeFrame;
      if( mxSafeFrame==pHdr[0].mxFrame && sync_flags ){
        rc = sqlite3OsTruncate(pWal->pDbFd, ((i64)pWal->hdr.nPage*(i64)szPage));
        if( rc==SQLITE_OK && sync_flags ){
          rc = sqlite3OsSync(pWal->pDbFd, sync_flags);

        }
      }
    }





    /* Release the reader lock held while backfilling */
    walUnlockExclusive(pWal, WAL_READ_LOCK(0), 1);
  }

  walIteratorFree(pIter);
  return rc;
}

/*
** Close a connection to a log file.
*/
1262
1263
1264
1265
1266
1267
1268

1269
1270
1271
1272
1273
1274
1275
1276
    ** the database. In this case checkpoint the database and unlink both
    ** the wal and wal-index files.
    **
    ** The EXCLUSIVE lock is not released before returning.
    */
    rc = sqlite3OsLock(pWal->pDbFd, SQLITE_LOCK_EXCLUSIVE);
    if( rc==SQLITE_OK ){

      rc = sqlite3WalCheckpoint(pWal, sync_flags, nBuf, zBuf, 0, 0);
      if( rc==SQLITE_OK ){
        isDelete = 1;
      }
      walIndexUnmap(pWal);
    }

    walIndexClose(pWal, isDelete);







>
|







1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
    ** the database. In this case checkpoint the database and unlink both
    ** the wal and wal-index files.
    **
    ** The EXCLUSIVE lock is not released before returning.
    */
    rc = sqlite3OsLock(pWal->pDbFd, SQLITE_LOCK_EXCLUSIVE);
    if( rc==SQLITE_OK ){
      pWal->exclusiveMode = 1;
      rc = walCheckpoint(pWal, sync_flags, nBuf, zBuf);
      if( rc==SQLITE_OK ){
        isDelete = 1;
      }
      walIndexUnmap(pWal);
    }

    walIndexClose(pWal, isDelete);
1286
1287
1288
1289
1290
1291
1292

1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
/*
** Try to read the wal-index header.  Return 0 on success and 1 if
** there is a problem.
**
** The wal-index is in shared memory.  Another thread or process might
** be writing the header at the same time this procedure is trying to
** read it, which might result in inconsistency.  A dirty read is detected

** by verifying a checksum on the header.
**
** If and only if the read is consistent and the header is different from
** pWal->hdr, then pWal->hdr is updated to the content of the new header
** and *pChanged is set to 1.
**
** If the checksum cannot be verified return non-zero. If the header
** is read successfully and the checksum verified, return zero.
*/
int walIndexTryHdr(Wal *pWal, int *pChanged){
  u32 aCksum[2];               /* Checksum on the header content */
  WalIndexHdr h1, h2;          /* Two copies of the header content */
  WalIndexHdr *aHdr;           /* Header in shared memory */

  if( pWal->szWIndex < WALINDEX_HDR_SIZE ){
    /* The wal-index is not large enough to hold the header, then assume
    ** header is invalid. */
    return 1;
  }
  assert( pWal->pWiData );

  /* Read the header. The caller may or may not have an exclusive 
  ** (WRITE, PENDING, CHECKPOINT or RECOVER) lock on the wal-index
  ** file, meaning it is possible that an inconsistent snapshot is read
  ** from the file. If this happens, return non-zero.
  **
  ** There are two copies of the header at the beginning of the wal-index.
  ** When reading, read [0] first then [1].  Writes are in the reverse order.
  ** Memory barriers are used to prevent the compiler or the hardware from
  ** reordering the reads and writes.
  */







>
|




















|
|
|







1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
/*
** Try to read the wal-index header.  Return 0 on success and 1 if
** there is a problem.
**
** The wal-index is in shared memory.  Another thread or process might
** be writing the header at the same time this procedure is trying to
** read it, which might result in inconsistency.  A dirty read is detected
** by verifying that both copies of the header are the same and also by
** a checksum on the header.
**
** If and only if the read is consistent and the header is different from
** pWal->hdr, then pWal->hdr is updated to the content of the new header
** and *pChanged is set to 1.
**
** If the checksum cannot be verified return non-zero. If the header
** is read successfully and the checksum verified, return zero.
*/
int walIndexTryHdr(Wal *pWal, int *pChanged){
  u32 aCksum[2];               /* Checksum on the header content */
  WalIndexHdr h1, h2;          /* Two copies of the header content */
  WalIndexHdr *aHdr;           /* Header in shared memory */

  if( pWal->szWIndex < WALINDEX_HDR_SIZE ){
    /* The wal-index is not large enough to hold the header, then assume
    ** header is invalid. */
    return 1;
  }
  assert( pWal->pWiData );

  /* Read the header. This might happen currently with a write to the
  ** same area of shared memory on a different CPU in a SMP,
  ** meaning it is possible that an inconsistent snapshot is read
  ** from the file. If this happens, return non-zero.
  **
  ** There are two copies of the header at the beginning of the wal-index.
  ** When reading, read [0] first then [1].  Writes are in the reverse order.
  ** Memory barriers are used to prevent the compiler or the hardware from
  ** reordering the reads and writes.
  */
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408

1409




1410
1411
1412

1413
1414
1415

1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430





















































































































































1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446

1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466

1467
1468
1469
1470
1471
1472
1473

1474
1475
1476
1477





1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489



1490
1491
1492
1493


1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
** after this routine returns.
**
** If the wal-index header is successfully read, return SQLITE_OK. 
** Otherwise an SQLite error code.
*/
static int walIndexReadHdr(Wal *pWal, int *pChanged){
  int rc;                         /* Return code */
  int lockState;                  /* pWal->lockState before running recovery */

  assert( pWal->lockState>=SQLITE_SHM_READ );
  assert( pChanged );
  rc = walIndexMap(pWal, walMappingSize(1));
  if( rc!=SQLITE_OK ){
    return rc;
  }

  /* First attempt to read the wal-index header. This may fail for one
  ** of two reasons: (a) the wal-index does not yet exist or has been
  ** corrupted and needs to be constructed by running recovery, or (b)
  ** the caller is only holding a READ lock and made a dirty read of
  ** the wal-index header.
  **
  ** A dirty read of the wal-index header occurs if another thread or
  ** process happens to be writing to the wal-index header at roughly
  ** the same time as this thread is reading it. In this case it is 
  ** possible that an inconsistent header is read (which is detected
  ** using the header checksum mechanism).
  */
  if( walIndexTryHdr(pWal, pChanged)!=0 ){

    /* If the first attempt to read the header failed, lock the wal-index
    ** file with an exclusive lock and try again. If the header checksum 
    ** verification fails again, we can be sure that it is not simply a
    ** dirty read, but that the wal-index really does need to be 
    ** reconstructed by running log recovery.
    **
    ** In the paragraph above, an "exclusive lock" may be any of WRITE,
    ** PENDING, CHECKPOINT or RECOVER. If any of these are already held,
    ** no locking operations are required. If the caller currently holds
    ** a READ lock, then upgrade to a RECOVER lock before re-reading the
    ** wal-index header and revert to a READ lock before returning.
    */
    lockState = pWal->lockState;
    if( lockState>SQLITE_SHM_READ
     || SQLITE_OK==(rc = walSetLock(pWal, SQLITE_SHM_RECOVER)) 
    ){

      if( walIndexTryHdr(pWal, pChanged) ){




        *pChanged = 1;
        rc = walIndexRecover(pWal);
      }

      if( lockState==SQLITE_SHM_READ ){
        walSetLock(pWal, SQLITE_SHM_READ);
      }

    }
  }

  /* Make sure the mapping is large enough to cover the entire wal-index */
  if( rc==SQLITE_OK ){
    int szWanted = walMappingSize(pWal->hdr.mxFrame);
    if( pWal->szWIndex<szWanted ){
      rc = walIndexMap(pWal, szWanted);
    }
  }

  return rc;
}

/*





















































































































































** Take a snapshot of the state of the WAL and wal-index for the current
** instant in time.  The current thread will continue to use this snapshot.
** Other threads might containing appending to the WAL and wal-index but
** the extra content appended will be ignored by the current thread.
**
** A snapshot is like a read transaction.
**
** No other threads are allowed to run a checkpoint while this thread is
** holding the snapshot since a checkpoint would remove data out from under
** this thread.
**
** If this call obtains a new read-lock and the database contents have been
** modified since the most recent call to WalCloseSnapshot() on this Wal
** connection, then *pChanged is set to 1 before returning. Otherwise, it 
** is left unmodified. This is used by the pager layer to determine whether 
** or not any cached pages may be safely reused.

*/
int sqlite3WalOpenSnapshot(Wal *pWal, int *pChanged){
  int rc;                         /* Return code */

  rc = walSetLock(pWal, SQLITE_SHM_READ);
  assert( rc!=SQLITE_OK || pWal->lockState==SQLITE_SHM_READ );

  if( rc==SQLITE_OK ){
    rc = walIndexReadHdr(pWal, pChanged);
    if( rc!=SQLITE_OK ){
      /* An error occured while attempting log recovery. */
      sqlite3WalCloseSnapshot(pWal);
    }
  }

  walIndexUnmap(pWal);
  return rc;
}

/*

** Unlock the current snapshot.
*/
void sqlite3WalCloseSnapshot(Wal *pWal){
  assert( pWal->lockState==SQLITE_SHM_READ
       || pWal->lockState==SQLITE_SHM_UNLOCK
  );
  walSetLock(pWal, SQLITE_SHM_UNLOCK);

}

/*
** Read a page from the log, if it is present. 





*/
int sqlite3WalRead(
  Wal *pWal,                      /* WAL handle */
  Pgno pgno,                      /* Database page number to read data for */
  int *pInWal,                    /* OUT: True if data is read from WAL */
  int nOut,                       /* Size of buffer pOut in bytes */
  u8 *pOut                        /* Buffer to write page data to */
){
  int rc;                         /* Return code */
  u32 iRead = 0;                  /* If !=0, WAL frame to return data from */
  u32 iLast = pWal->hdr.mxFrame;  /* Last page in WAL for this reader */
  int iHash;                      /* Used to loop through N hash tables */




  /* If the "last page" field of the wal-index header snapshot is 0, then
  ** no data will be read from the wal under any circumstances. Return early
  ** in this case to avoid the walIndexMap/Unmap overhead.


  */
  if( iLast==0 ){
    *pInWal = 0;
    return SQLITE_OK;
  }

  /* Ensure the wal-index is mapped. */
  assert( pWal->lockState==SQLITE_SHM_READ||pWal->lockState==SQLITE_SHM_WRITE );
  rc = walIndexMap(pWal, walMappingSize(iLast));
  if( rc!=SQLITE_OK ){
    return rc;
  }

  /* Search the hash table or tables for an entry matching page number
  ** pgno. Each iteration of the following for() loop searches one







|

<






|
<
<
<
<
|
<
<
<
<
<

|

|
|
<
<
<
<
<
<
<
<
<
|
|
|
|
|
>
|
>
>
>
>
|


>
|
|
<
>















>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|

|
|

<
<
<
<
<
<
|
<
|
|
<
>

|


<
<
|
<
|
<
<
<
<
<
|





>
|

|
|
|
<
|
>



|
>
>
>
>
>












>
>
>



|
>
>

|





<







1525
1526
1527
1528
1529
1530
1531
1532
1533

1534
1535
1536
1537
1538
1539
1540




1541





1542
1543
1544
1545
1546









1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563

1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733






1734

1735
1736

1737
1738
1739
1740
1741


1742

1743





1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755

1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794

1795
1796
1797
1798
1799
1800
1801
** after this routine returns.
**
** If the wal-index header is successfully read, return SQLITE_OK. 
** Otherwise an SQLite error code.
*/
static int walIndexReadHdr(Wal *pWal, int *pChanged){
  int rc;                         /* Return code */
  int badHdr;                     /* True if a header read failed */


  assert( pChanged );
  rc = walIndexMap(pWal, walMappingSize(1));
  if( rc!=SQLITE_OK ){
    return rc;
  }

  /* Try once to read the header straight out.  This works most of the




  ** time.





  */
  badHdr = walIndexTryHdr(pWal, pChanged);

  /* If the first attempt failed, it might have been due to a race
  ** with a writer.  So get a WRITE lock and try again.









  */
  assert( pWal->writeLock==0 );
  if( badHdr ){
    rc = walLockExclusive(pWal, WAL_WRITE_LOCK, 1);
    if( rc==SQLITE_OK ){
      pWal->writeLock = 1;
      badHdr = walIndexTryHdr(pWal, pChanged);
      if( badHdr ){
        /* If the wal-index header is still malformed even while holding
        ** a WRITE lock, it can only mean that the header is corrupted and
        ** needs to be reconstructed.  So run recovery to do exactly that.
        */
        rc = walIndexRecover(pWal);
      }
      walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1);
      pWal->writeLock = 0;
    }else if( rc!=SQLITE_BUSY ){

      return rc;
    }
  }

  /* Make sure the mapping is large enough to cover the entire wal-index */
  if( rc==SQLITE_OK ){
    int szWanted = walMappingSize(pWal->hdr.mxFrame);
    if( pWal->szWIndex<szWanted ){
      rc = walIndexMap(pWal, szWanted);
    }
  }

  return rc;
}

/*
** This is the value that walTryBeginRead returns when it needs to
** be retried.
*/
#define WAL_RETRY  (-1)

/*
** Attempt to start a read transaction.  This might fail due to a race or
** other transient condition.  When that happens, it returns WAL_RETRY to
** indicate to the caller that it is safe to retry immediately.
**
** On success return SQLITE_OK.  On a permantent failure (such an
** I/O error or an SQLITE_BUSY because another process is running
** recovery) return a positive error code.
**
** On success, this routine obtains a read lock on 
** WAL_READ_LOCK(pWal->readLock).  The pWal->readLock integer is
** in the range 0 <= pWal->readLock < WAL_NREADER.  If pWal->readLock==(-1)
** that means the Wal does not hold any read lock.  The reader must not
** access any database page that is modified by a WAL frame up to and
** including frame number aReadMark[pWal->readLock].  The reader will
** use WAL frames up to and including pWal->hdr.mxFrame if pWal->readLock>0
** Or if pWal->readLock==0, then the reader will ignore the WAL
** completely and get all content directly from the database file.
** When the read transaction is completed, the caller must release the
** lock on WAL_READ_LOCK(pWal->readLock) and set pWal->readLock to -1.
**
** This routine uses the nBackfill and aReadMark[] fields of the header
** to select a particular WAL_READ_LOCK() that strives to let the
** checkpoint process do as much work as possible.  This routine might
** update values of the aReadMark[] array in the header, but if it does
** so it takes care to hold an exclusive lock on the corresponding
** WAL_READ_LOCK() while changing values.
*/
static int walTryBeginRead(Wal *pWal, int *pChanged, int useWal){
  volatile WalIndexHdr *pHdr;     /* Header of the wal-index */
  volatile WalCkptInfo *pInfo;    /* Checkpoint information in wal-index */
  u32 mxReadMark;                 /* Largest aReadMark[] value */
  int mxI;                        /* Index of largest aReadMark[] value */
  int i;                          /* Loop counter */
  int rc;                         /* Return code  */

  assert( pWal->readLock<0 );  /* No read lock held on entry */

  if( !useWal ){
    rc = walIndexReadHdr(pWal, pChanged);
    if( rc==SQLITE_BUSY ){
      /* If there is not a recovery running in another thread or process
      ** then convert BUSY errors to WAL_RETRY.  If recovery is known to
      ** be running, convert BUSY to BUSY_RECOVERY.  There is a race here
      ** which might cause WAL_RETRY to be returned even if BUSY_RECOVERY
      ** would be technically correct.  But the race is benign since with
      ** WAL_RETRY this routine will be called again and will probably be
      ** right on the second iteration.
      */
      rc = walLockShared(pWal, WAL_RECOVER_LOCK);
      if( rc==SQLITE_OK ){
        walUnlockShared(pWal, WAL_RECOVER_LOCK);
        rc = WAL_RETRY;
      }else if( rc==SQLITE_BUSY ){
        rc = SQLITE_BUSY_RECOVERY;
      }
    }
  }else{
    rc = walIndexMap(pWal, pWal->hdr.mxFrame);
  }
  if( rc!=SQLITE_OK ){
    return rc;
  }

  pHdr = (volatile WalIndexHdr*)pWal->pWiData;
  pInfo = (volatile WalCkptInfo*)&pHdr[2];
  assert( pInfo==walCkptInfo(pWal) );
  if( !useWal && pInfo->nBackfill==pWal->hdr.mxFrame ){
    /* The WAL has been completely backfilled (or it is empty).
    ** and can be safely ignored.
    */
    rc = walLockShared(pWal, WAL_READ_LOCK(0));
    if( rc==SQLITE_OK ){
      if( pHdr->mxFrame!=pWal->hdr.mxFrame ){
        walUnlockShared(pWal, WAL_READ_LOCK(0));
        return WAL_RETRY;
      }
      pWal->readLock = 0;
      return SQLITE_OK;
    }else if( rc!=SQLITE_BUSY ){
      return rc;
    }
  }

  /* If we get this far, it means that the reader will want to use
  ** the WAL to get at content from recent commits.  The job now is
  ** to select one of the aReadMark[] entries that is closest to
  ** but not exceeding pWal->hdr.mxFrame and lock that entry.
  */
  mxReadMark = 0;
  mxI = 0;
  for(i=1; i<WAL_NREADER; i++){
    u32 thisMark = pInfo->aReadMark[i];
    if( mxReadMark<thisMark ){
      mxReadMark = thisMark;
      mxI = i;
    }
  }
  if( mxI==0 ){
    /* If we get here, it means that all of the aReadMark[] entries between
    ** 1 and WAL_NREADER-1 are zero.  Try to initialize aReadMark[1] to
    ** be mxFrame, then retry.
    */
    rc = walLockExclusive(pWal, WAL_READ_LOCK(1), 1);
    if( rc==SQLITE_OK ){
      pInfo->aReadMark[1] = pWal->hdr.mxFrame;
      walUnlockExclusive(pWal, WAL_READ_LOCK(1), 1);
    }
    return WAL_RETRY;
  }else{
    if( mxReadMark < pWal->hdr.mxFrame ){
      for(i=0; i<WAL_NREADER; i++){
        rc = walLockExclusive(pWal, WAL_READ_LOCK(i), 1);
        if( rc==SQLITE_OK ){
          pInfo->aReadMark[i] = pWal->hdr.mxFrame;
          mxReadMark = pWal->hdr.mxFrame;
          mxI = i;
          walUnlockExclusive(pWal, WAL_READ_LOCK(i), 1);
          break;
        }
      }
    }

    rc = walLockShared(pWal, WAL_READ_LOCK(mxI));
    if( rc ){
      return rc==SQLITE_BUSY ? WAL_RETRY : rc;
    }
    if( pInfo->aReadMark[mxI]!=mxReadMark
     || pHdr[0].mxFrame!=pWal->hdr.mxFrame
     || (sqlite3OsShmBarrier(pWal->pDbFd), pHdr[1].mxFrame!=pWal->hdr.mxFrame)
    ){
      walUnlockShared(pWal, WAL_READ_LOCK(mxI));
      return WAL_RETRY;
    }else{
      pWal->readLock = mxI;
    }
  }
  return rc;
}

/*
** Begin a read transaction on the database.
**
** This routine used to be called sqlite3OpenSnapshot() and with good reason:
** it takes a snapshot of the state of the WAL and wal-index for the current
** instant in time.  The current thread will continue to use this snapshot.
** Other threads might append new content to the WAL and wal-index but
** that extra content is ignored by the current thread.
**






** If the database contents have changes since the previous read

** transaction, then *pChanged is set to 1 before returning.  The
** Pager layer will use this to know that is cache is stale and

** needs to be flushed.
*/
int sqlite3WalBeginReadTransaction(Wal *pWal, int *pChanged){
  int rc;                         /* Return code */



  do{

    rc = walTryBeginRead(pWal, pChanged, 0);





  }while( rc==WAL_RETRY );
  walIndexUnmap(pWal);
  return rc;
}

/*
** Finish with a read transaction.  All this does is release the
** read-lock.
*/
void sqlite3WalEndReadTransaction(Wal *pWal){
  if( pWal->readLock>=0 ){
    walUnlockShared(pWal, WAL_READ_LOCK(pWal->readLock));

    pWal->readLock = -1;
  }
}

/*
** Read a page from the WAL, if it is present in the WAL and if the 
** current read transaction is configured to use the WAL.  
**
** The *pInWal is set to 1 if the requested page is in the WAL and
** has been loaded.  Or *pInWal is set to 0 if the page was not in 
** the WAL and needs to be read out of the database.
*/
int sqlite3WalRead(
  Wal *pWal,                      /* WAL handle */
  Pgno pgno,                      /* Database page number to read data for */
  int *pInWal,                    /* OUT: True if data is read from WAL */
  int nOut,                       /* Size of buffer pOut in bytes */
  u8 *pOut                        /* Buffer to write page data to */
){
  int rc;                         /* Return code */
  u32 iRead = 0;                  /* If !=0, WAL frame to return data from */
  u32 iLast = pWal->hdr.mxFrame;  /* Last page in WAL for this reader */
  int iHash;                      /* Used to loop through N hash tables */

  /* This routine is only called from within a read transaction */
  assert( pWal->readLock>=0 );

  /* If the "last page" field of the wal-index header snapshot is 0, then
  ** no data will be read from the wal under any circumstances. Return early
  ** in this case to avoid the walIndexMap/Unmap overhead.  Likewise, if
  ** pWal->readLock==0, then the WAL is ignored by the reader so
  ** return early, as if the WAL were empty.
  */
  if( iLast==0 || pWal->readLock==0 ){
    *pInWal = 0;
    return SQLITE_OK;
  }

  /* Ensure the wal-index is mapped. */

  rc = walIndexMap(pWal, walMappingSize(iLast));
  if( rc!=SQLITE_OK ){
    return rc;
  }

  /* Search the hash table or tables for an entry matching page number
  ** pgno. Each iteration of the following for() loop searches one
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614

1615
1616
1617






1618


1619
1620
1621

1622


1623
1624






1625
1626
1627

1628
1629














1630
1631

1632
1633
1634
1635





1636


1637
1638
1639


1640

1641

1642
1643
1644
1645






1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
}


/* 
** Set *pPgno to the size of the database file (or zero, if unknown).
*/
void sqlite3WalDbsize(Wal *pWal, Pgno *pPgno){
  assert( pWal->lockState==SQLITE_SHM_READ
       || pWal->lockState==SQLITE_SHM_WRITE );
  *pPgno = pWal->hdr.nPage;
}


/* 
** This function returns SQLITE_OK if the caller may write to the database.
** Otherwise, if the caller is operating on a snapshot that has already






** been overwritten by another writer, SQLITE_BUSY is returned.


*/
int sqlite3WalWriteLock(Wal *pWal, int op){
  int rc = SQLITE_OK;

  if( op ){


    assert( pWal->lockState==SQLITE_SHM_READ );
    rc = walSetLock(pWal, SQLITE_SHM_WRITE);







    /* If this connection is not reading the most recent database snapshot,
    ** it is not possible to write to the database. In this case release

    ** the write locks and return SQLITE_BUSY.
    */














    if( rc==SQLITE_OK ){
      rc = walIndexMap(pWal, walMappingSize(1));

      assert( pWal->szWIndex>=WALINDEX_HDR_SIZE || rc!=SQLITE_OK );
      if( rc==SQLITE_OK
       && memcmp(&pWal->hdr, (void*)pWal->pWiData, sizeof(WalIndexHdr))
      ){





        rc = SQLITE_BUSY;


      }
      walIndexUnmap(pWal);
      if( rc!=SQLITE_OK ){


        walSetLock(pWal, SQLITE_SHM_READ);

      }

    }
  }else if( pWal->lockState==SQLITE_SHM_WRITE ){
    rc = walSetLock(pWal, SQLITE_SHM_READ);
  }






  return rc;
}

/*
** If any data has been written (but not committed) to the log file, this
** function moves the write-pointer back to the start of the transaction.
**
** Additionally, the callback function is invoked for each frame written
** to the log since the start of the transaction. If the callback returns
** other than SQLITE_OK, it is not invoked again and the error code is
** returned to the caller.
**
** Otherwise, if the callback function does not return an error, this
** function returns SQLITE_OK.
*/
int sqlite3WalUndo(Wal *pWal, int (*xUndo)(void *, Pgno), void *pUndoCtx){
  int rc = SQLITE_OK;
  if( pWal->lockState==SQLITE_SHM_WRITE ){
    int unused;
    Pgno iMax = pWal->hdr.mxFrame;
    Pgno iFrame;
  
    assert( pWal->pWiData==0 );
    rc = walIndexReadHdr(pWal, &unused);
    if( rc==SQLITE_OK ){
      rc = walIndexMap(pWal, walMappingSize(iMax));
    }
    if( rc==SQLITE_OK ){
      for(iFrame=pWal->hdr.mxFrame+1; rc==SQLITE_OK && iFrame<=iMax; iFrame++){
        assert( pWal->lockState==SQLITE_SHM_WRITE );
        rc = xUndo(pUndoCtx, pWal->pWiData[walIndexEntry(iFrame)]);
      }
      walCleanupHash(pWal);
    }
    walIndexUnmap(pWal);
  }
  return rc;
}

/* 
** Argument aWalData must point to an array of WAL_SAVEPOINT_NDATA u32 
** values. This function populates the array with values required to 
** "rollback" the write position of the WAL handle back to the current 
** point in the event of a savepoint rollback (via WalSavepointUndo()).
*/
void sqlite3WalSavepoint(Wal *pWal, u32 *aWalData){
  assert( pWal->lockState==SQLITE_SHM_WRITE );
  aWalData[0] = pWal->hdr.mxFrame;
  aWalData[1] = pWal->hdr.aFrameCksum[0];
  aWalData[2] = pWal->hdr.aFrameCksum[1];
}

/* 
** Move the write position of the WAL back to the point identified by
** the values in the aWalData[] array. aWalData must point to an array
** of WAL_SAVEPOINT_NDATA u32 values that has been previously populated
** by a call to WalSavepoint().
*/
int sqlite3WalSavepointUndo(Wal *pWal, u32 *aWalData){
  int rc = SQLITE_OK;
  assert( pWal->lockState==SQLITE_SHM_WRITE );

  assert( aWalData[0]<=pWal->hdr.mxFrame );
  if( aWalData[0]<pWal->hdr.mxFrame ){
    rc = walIndexMap(pWal, walMappingSize(pWal->hdr.mxFrame));
    pWal->hdr.mxFrame = aWalData[0];
    pWal->hdr.aFrameCksum[0] = aWalData[1];
    pWal->hdr.aFrameCksum[1] = aWalData[2];







|
<



>

|
|
>
>
>
>
>
>
|
>
>

|
|
>
|
>
>
|
|
>
>
>
>
>
>
|
|
|
>
|
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>

|
>
|
<
|
|
>
>
>
>
>
|
>
>
|
|
<
>
>
|
>
|
>
|
<
<
|
>
>
>
>
>
>
|







|








|











|
















|













|







1896
1897
1898
1899
1900
1901
1902
1903

1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958

1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970

1971
1972
1973
1974
1975
1976
1977


1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
}


/* 
** Set *pPgno to the size of the database file (or zero, if unknown).
*/
void sqlite3WalDbsize(Wal *pWal, Pgno *pPgno){
  assert( pWal->readLock>=0 );

  *pPgno = pWal->hdr.nPage;
}


/* 
** This function starts a write transaction on the WAL.
**
** A read transaction must have already been started by a prior call
** to sqlite3WalBeginReadTransaction().
**
** If another thread or process has written into the database since
** the read transaction was started, then it is not possible for this
** thread to write as doing so would cause a fork.  So this routine
** returns SQLITE_BUSY in that case and no write transaction is started.
**
** There can only be a single writer active at a time.
*/
int sqlite3WalBeginWriteTransaction(Wal *pWal){
  int rc;
  volatile WalCkptInfo *pInfo;

  /* Cannot start a write transaction without first holding a read
  ** transaction. */
  assert( pWal->readLock>=0 );

  /* Only one writer allowed at a time.  Get the write lock.  Return
  ** SQLITE_BUSY if unable.
  */
  rc = walLockExclusive(pWal, WAL_WRITE_LOCK, 1);
  if( rc ){
    return rc;
  }

  /* If another connection has written to the database file since the
  ** time the read transaction on this connection was started, then
  ** the write is disallowed.
  */
  rc = walIndexMap(pWal, pWal->hdr.mxFrame);
  if( rc ){
    walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1);
    return rc;
  }
  if( memcmp(&pWal->hdr, (void*)pWal->pWiData, sizeof(WalIndexHdr))!=0 ){
    walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1);
    walIndexUnmap(pWal);
    return SQLITE_BUSY;
  }

  pInfo = walCkptInfo(pWal);
  if( pWal->readLock==0 && pInfo->nBackfill==pWal->hdr.mxFrame ){
    rc = walLockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1);
    if( rc==SQLITE_OK ){
      /* If all readers are using WAL_READ_LOCK(0) (in other words if no
      ** readers are currently using the WAL) */
      pWal->nCkpt++;

      pWal->hdr.mxFrame = 0;
      sqlite3Put4byte((u8*)pWal->hdr.aSalt,
                       1 + sqlite3Get4byte((u8*)pWal->hdr.aSalt));
      sqlite3_randomness(4, &pWal->hdr.aSalt[1]);
      walIndexWriteHdr(pWal);
      pInfo->nBackfill = 0;
      memset(&pInfo->aReadMark[1], 0, sizeof(pInfo->aReadMark)-sizeof(u32));
      rc = sqlite3OsTruncate(pWal->pDbFd, 
                             ((i64)pWal->hdr.nPage*(i64)pWal->szPage));
      walUnlockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1);
    }
    walUnlockShared(pWal, WAL_READ_LOCK(0));

    do{
      int notUsed;
      rc = walTryBeginRead(pWal, &notUsed, 1);
    }while( rc==WAL_RETRY );
  }
  return rc;
}



/*
** End a write transaction.  The commit has already been done.  This
** routine merely releases the lock.
*/
int sqlite3WalEndWriteTransaction(Wal *pWal){
  walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1);
  return SQLITE_OK;
}

/*
** If any data has been written (but not committed) to the log file, this
** function moves the write-pointer back to the start of the transaction.
**
** Additionally, the callback function is invoked for each frame written
** to the WAL since the start of the transaction. If the callback returns
** other than SQLITE_OK, it is not invoked again and the error code is
** returned to the caller.
**
** Otherwise, if the callback function does not return an error, this
** function returns SQLITE_OK.
*/
int sqlite3WalUndo(Wal *pWal, int (*xUndo)(void *, Pgno), void *pUndoCtx){
  int rc = SQLITE_OK;
  if( pWal->writeLock ){
    int unused;
    Pgno iMax = pWal->hdr.mxFrame;
    Pgno iFrame;
  
    assert( pWal->pWiData==0 );
    rc = walIndexReadHdr(pWal, &unused);
    if( rc==SQLITE_OK ){
      rc = walIndexMap(pWal, walMappingSize(iMax));
    }
    if( rc==SQLITE_OK ){
      for(iFrame=pWal->hdr.mxFrame+1; rc==SQLITE_OK && iFrame<=iMax; iFrame++){
        assert( pWal->writeLock );
        rc = xUndo(pUndoCtx, pWal->pWiData[walIndexEntry(iFrame)]);
      }
      walCleanupHash(pWal);
    }
    walIndexUnmap(pWal);
  }
  return rc;
}

/* 
** Argument aWalData must point to an array of WAL_SAVEPOINT_NDATA u32 
** values. This function populates the array with values required to 
** "rollback" the write position of the WAL handle back to the current 
** point in the event of a savepoint rollback (via WalSavepointUndo()).
*/
void sqlite3WalSavepoint(Wal *pWal, u32 *aWalData){
  assert( pWal->writeLock );
  aWalData[0] = pWal->hdr.mxFrame;
  aWalData[1] = pWal->hdr.aFrameCksum[0];
  aWalData[2] = pWal->hdr.aFrameCksum[1];
}

/* 
** Move the write position of the WAL back to the point identified by
** the values in the aWalData[] array. aWalData must point to an array
** of WAL_SAVEPOINT_NDATA u32 values that has been previously populated
** by a call to WalSavepoint().
*/
int sqlite3WalSavepointUndo(Wal *pWal, u32 *aWalData){
  int rc = SQLITE_OK;
  assert( pWal->writeLock );

  assert( aWalData[0]<=pWal->hdr.mxFrame );
  if( aWalData[0]<pWal->hdr.mxFrame ){
    rc = walIndexMap(pWal, walMappingSize(pWal->hdr.mxFrame));
    pWal->hdr.mxFrame = aWalData[0];
    pWal->hdr.aFrameCksum[0] = aWalData[1];
    pWal->hdr.aFrameCksum[1] = aWalData[2];
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
  u32 iFrame;                     /* Next frame address */
  u8 aFrame[WAL_FRAME_HDRSIZE];   /* Buffer to assemble frame-header in */
  PgHdr *p;                       /* Iterator to run through pList with. */
  PgHdr *pLast = 0;               /* Last frame in list */
  int nLast = 0;                  /* Number of extra copies of last page */

  assert( pList );
  assert( pWal->lockState==SQLITE_SHM_WRITE );
  assert( pWal->pWiData==0 );

  /* If this is the first frame written into the log, write the WAL
  ** header to the start of the WAL file. See comments at the top of
  ** this source file for a description of the WAL header format.
  */
  iFrame = pWal->hdr.mxFrame;







|







2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
  u32 iFrame;                     /* Next frame address */
  u8 aFrame[WAL_FRAME_HDRSIZE];   /* Buffer to assemble frame-header in */
  PgHdr *p;                       /* Iterator to run through pList with. */
  PgHdr *pLast = 0;               /* Last frame in list */
  int nLast = 0;                  /* Number of extra copies of last page */

  assert( pList );
  assert( pWal->writeLock );
  assert( pWal->pWiData==0 );

  /* If this is the first frame written into the log, write the WAL
  ** header to the start of the WAL file. See comments at the top of
  ** this source file for a description of the WAL header format.
  */
  iFrame = pWal->hdr.mxFrame;
1848
1849
1850
1851
1852
1853
1854
1855

1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889


1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
  }

  walIndexUnmap(pWal);
  return rc;
}

/* 
** Checkpoint the database:

**
**   1. Acquire a CHECKPOINT lock
**   2. Copy the contents of the log into the database file.
**   3. Zero the wal-index header (so new readers will ignore the log).
**   4. Drop the CHECKPOINT lock.
*/
int sqlite3WalCheckpoint(
  Wal *pWal,                      /* Wal connection */
  int sync_flags,                 /* Flags to sync db file with (or 0) */
  int nBuf,                       /* Size of temporary buffer */
  u8 *zBuf,                       /* Temporary buffer to use */
  int (*xBusyHandler)(void *),    /* Pointer to busy-handler function */
  void *pBusyHandlerArg           /* Argument to pass to xBusyHandler */
){
  int rc;                         /* Return code */
  int isChanged = 0;              /* True if a new wal-index header is loaded */

  assert( pWal->pWiData==0 );

  /* Get the CHECKPOINT lock. 
  **
  ** Normally, the connection will be in UNLOCK state at this point. But
  ** if the connection is in exclusive-mode it may still be in READ state
  ** even though the upper layer has no active read-transaction (because
  ** WalCloseSnapshot() is not called in exclusive mode). The state will
  ** be set to UNLOCK when this function returns. This is Ok.
  */
  assert( (pWal->lockState==SQLITE_SHM_UNLOCK)
       || (pWal->lockState==SQLITE_SHM_READ) );
  walSetLock(pWal, SQLITE_SHM_UNLOCK);
  do {
    rc = walSetLock(pWal, SQLITE_SHM_CHECKPOINT);
  }while( rc==SQLITE_BUSY && xBusyHandler(pBusyHandlerArg) );
  if( rc!=SQLITE_OK ){


    walSetLock(pWal, SQLITE_SHM_UNLOCK);
    return rc;
  }

  /* Copy data from the log to the database file. */
  rc = walIndexReadHdr(pWal, &isChanged);
  if( rc==SQLITE_OK ){
    rc = walCheckpoint(pWal, sync_flags, nBuf, zBuf);
  }
  if( isChanged ){
    /* If a new wal-index header was loaded before the checkpoint was 
    ** performed, then the pager-cache associated with pWal is now
    ** out of date. So zero the cached wal-index header to ensure that
    ** next time the pager opens a snapshot on this database it knows that
    ** the cache needs to be reset.
    */
    memset(&pWal->hdr, 0, sizeof(WalIndexHdr));
  }

  /* Release the locks. */
  walIndexUnmap(pWal);
  walSetLock(pWal, SQLITE_SHM_UNLOCK);
  return rc;
}

/* Return the value to pass to a sqlite3_wal_hook callback, the
** number of frames in the WAL at the point of the last commit since
** sqlite3WalCallback() was called.  If no commits have occurred since
** the last call, then return 0.







|
>

|
|
<
<





|
<
<






<
<
<
<
<
<
<
<
<
<
|
<
<
<
|
>
>
|




















|







2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198


2199
2200
2201
2202
2203
2204


2205
2206
2207
2208
2209
2210










2211



2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
  }

  walIndexUnmap(pWal);
  return rc;
}

/* 
** This routine is called to implement sqlite3_wal_checkpoint() and
** related interfaces.
**
** Obtain a CHECKPOINT lock and then backfill as much information as
** we can from WAL into the database.


*/
int sqlite3WalCheckpoint(
  Wal *pWal,                      /* Wal connection */
  int sync_flags,                 /* Flags to sync db file with (or 0) */
  int nBuf,                       /* Size of temporary buffer */
  u8 *zBuf                        /* Temporary buffer to use */


){
  int rc;                         /* Return code */
  int isChanged = 0;              /* True if a new wal-index header is loaded */

  assert( pWal->pWiData==0 );











  rc = walLockExclusive(pWal, WAL_CKPT_LOCK, 1);



  if( rc ){
    /* Usually this is SQLITE_BUSY meaning that another thread or process
    ** is already running a checkpoint, or maybe a recovery.  But it might
    ** also be SQLITE_IOERR. */
    return rc;
  }

  /* Copy data from the log to the database file. */
  rc = walIndexReadHdr(pWal, &isChanged);
  if( rc==SQLITE_OK ){
    rc = walCheckpoint(pWal, sync_flags, nBuf, zBuf);
  }
  if( isChanged ){
    /* If a new wal-index header was loaded before the checkpoint was 
    ** performed, then the pager-cache associated with pWal is now
    ** out of date. So zero the cached wal-index header to ensure that
    ** next time the pager opens a snapshot on this database it knows that
    ** the cache needs to be reset.
    */
    memset(&pWal->hdr, 0, sizeof(WalIndexHdr));
  }

  /* Release the locks. */
  walIndexUnmap(pWal);
  walUnlockExclusive(pWal, WAL_CKPT_LOCK, 1);
  return rc;
}

/* Return the value to pass to a sqlite3_wal_hook callback, the
** number of frames in the WAL at the point of the last commit since
** sqlite3WalCallback() was called.  If no commits have occurred since
** the last call, then return 0.
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
** This function is called to set or query the exclusive-mode flag 
** associated with the WAL connection passed as the first argument. The
** exclusive-mode flag should be set to indicate that the caller is
** holding an EXCLUSIVE lock on the database file (it does this in
** locking_mode=exclusive mode). If the EXCLUSIVE lock is to be dropped,
** the flag set by this function should be cleared before doing so.
**
** The value of the exclusive-mode flag may only be modified when
** the WAL connection is in READ state.
**
** When the flag is set, this module does not call the VFS xShmLock()
** method to obtain any locks on the wal-index (as it assumes it
** has exclusive access to the wal and wal-index files anyhow). It
** continues to hold (and does not drop) the existing READ lock on
** the wal-index.
**
** To set or clear the flag, the "op" parameter is passed 1 or 0,
** respectively. To query the flag, pass -1. In all cases, the value
** returned is the value of the exclusive-mode flag (after its value
** has been modified, if applicable).
*/
int sqlite3WalExclusiveMode(Wal *pWal, int op){
  if( op>=0 ){
    assert( pWal->lockState==SQLITE_SHM_READ );
    pWal->exclusiveMode = (u8)op;
  }
  return pWal->exclusiveMode;
}

#endif /* #ifndef SQLITE_OMIT_WAL */







<
<
<













<






2255
2256
2257
2258
2259
2260
2261



2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274

2275
2276
2277
2278
2279
2280
** This function is called to set or query the exclusive-mode flag 
** associated with the WAL connection passed as the first argument. The
** exclusive-mode flag should be set to indicate that the caller is
** holding an EXCLUSIVE lock on the database file (it does this in
** locking_mode=exclusive mode). If the EXCLUSIVE lock is to be dropped,
** the flag set by this function should be cleared before doing so.
**



** When the flag is set, this module does not call the VFS xShmLock()
** method to obtain any locks on the wal-index (as it assumes it
** has exclusive access to the wal and wal-index files anyhow). It
** continues to hold (and does not drop) the existing READ lock on
** the wal-index.
**
** To set or clear the flag, the "op" parameter is passed 1 or 0,
** respectively. To query the flag, pass -1. In all cases, the value
** returned is the value of the exclusive-mode flag (after its value
** has been modified, if applicable).
*/
int sqlite3WalExclusiveMode(Wal *pWal, int op){
  if( op>=0 ){

    pWal->exclusiveMode = (u8)op;
  }
  return pWal->exclusiveMode;
}

#endif /* #ifndef SQLITE_OMIT_WAL */
Changes to src/wal.h.
16
17
18
19
20
21
22
23
24
25
26
27
28
29

30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66

67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97

#ifndef _WAL_H_
#define _WAL_H_

#include "sqliteInt.h"

#ifdef SQLITE_OMIT_WAL
# define sqlite3WalOpen(x,y,z)             0
# define sqlite3WalClose(w,x,y,z)          0
# define sqlite3WalOpenSnapshot(y,z)       0
# define sqlite3WalCloseSnapshot(z) 
# define sqlite3WalRead(v,w,x,y,z)         0
# define sqlite3WalDbsize(y,z)
# define sqlite3WalWriteLock(y,z)          0

# define sqlite3WalUndo(x,y,z)             0
# define sqlite3WalSavepoint(y,z)
# define sqlite3WalSavepointUndo(y,z)      0
# define sqlite3WalFrames(u,v,w,x,y,z)     0
# define sqlite3WalCheckpoint(u,v,w,x,y,z) 0
# define sqlite3WalCallback(z)             0
#else

#define WAL_SAVEPOINT_NDATA 3

/* Connection to a write-ahead log (WAL) file. 
** There is one object of this type for each pager. 
*/
typedef struct Wal Wal;

/* Open and close a connection to a write-ahead log. */
int sqlite3WalOpen(sqlite3_vfs*, sqlite3_file*, const char *zName, Wal**);
int sqlite3WalClose(Wal *pWal, int sync_flags, int, u8 *);

/* Used by readers to open (lock) and close (unlock) a snapshot.  A 
** snapshot is like a read-transaction.  It is the state of the database
** at an instant in time.  sqlite3WalOpenSnapshot gets a read lock and
** preserves the current state even if the other threads or processes
** write to or checkpoint the WAL.  sqlite3WalCloseSnapshot() closes the
** transaction and releases the lock.
*/
int sqlite3WalOpenSnapshot(Wal *pWal, int *);
void sqlite3WalCloseSnapshot(Wal *pWal);

/* Read a page from the write-ahead log, if it is present. */
int sqlite3WalRead(Wal *pWal, Pgno pgno, int *pInWal, int nOut, u8 *pOut);

/* Return the size of the database as it existed at the beginning
** of the snapshot */
void sqlite3WalDbsize(Wal *pWal, Pgno *pPgno);

/* Obtain or release the WRITER lock. */

int sqlite3WalWriteLock(Wal *pWal, int op);

/* Undo any frames written (but not committed) to the log */
int sqlite3WalUndo(Wal *pWal, int (*xUndo)(void *, Pgno), void *pUndoCtx);

/* Return an integer that records the current (uncommitted) write
** position in the WAL */
void sqlite3WalSavepoint(Wal *pWal, u32 *aWalData);

/* Move the write position of the WAL back to iFrame.  Called in
** response to a ROLLBACK TO command. */
int sqlite3WalSavepointUndo(Wal *pWal, u32 *aWalData);

/* Write a frame or frames to the log. */
int sqlite3WalFrames(Wal *pWal, int, PgHdr *, Pgno, int, int);

/* Copy pages from the log to the database file */ 
int sqlite3WalCheckpoint(
  Wal *pWal,                      /* Write-ahead log connection */
  int sync_flags,                 /* Flags to sync db file with (or 0) */
  int nBuf,                       /* Size of buffer nBuf */
  u8 *zBuf,                       /* Temporary buffer to use */
  int (*xBusyHandler)(void *),    /* Pointer to busy-handler function */
  void *pBusyHandlerArg           /* Argument to pass to xBusyHandler */
);

/* Return the value to pass to a sqlite3_wal_hook callback, the
** number of frames in the WAL at the point of the last commit since
** sqlite3WalCallback() was called.  If no commits have occurred since
** the last call, then return 0.
*/







|
|
|
|
|

|
>
|

|
|
|
|




















|
|









>
|




















|
<
<







16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90


91
92
93
94
95
96
97

#ifndef _WAL_H_
#define _WAL_H_

#include "sqliteInt.h"

#ifdef SQLITE_OMIT_WAL
# define sqlite3WalOpen(x,y,z)                 0
# define sqlite3WalClose(w,x,y,z)              0
# define sqlite3WalBeginReadTransaction(y,z)   0
# define sqlite3WalEndReadTransaction(z)
# define sqlite3WalRead(v,w,x,y,z)             0
# define sqlite3WalDbsize(y,z)
# define sqlite3WalBeginWriteTransaction(y)    0
# define sqlite3WalEndWRiteTransaction(x)      0
# define sqlite3WalUndo(x,y,z)                 0
# define sqlite3WalSavepoint(y,z)
# define sqlite3WalSavepointUndo(y,z)          0
# define sqlite3WalFrames(u,v,w,x,y,z)         0
# define sqlite3WalCheckpoint(u,v,w,x)         0
# define sqlite3WalCallback(z)                 0
#else

#define WAL_SAVEPOINT_NDATA 3

/* Connection to a write-ahead log (WAL) file. 
** There is one object of this type for each pager. 
*/
typedef struct Wal Wal;

/* Open and close a connection to a write-ahead log. */
int sqlite3WalOpen(sqlite3_vfs*, sqlite3_file*, const char *zName, Wal**);
int sqlite3WalClose(Wal *pWal, int sync_flags, int, u8 *);

/* Used by readers to open (lock) and close (unlock) a snapshot.  A 
** snapshot is like a read-transaction.  It is the state of the database
** at an instant in time.  sqlite3WalOpenSnapshot gets a read lock and
** preserves the current state even if the other threads or processes
** write to or checkpoint the WAL.  sqlite3WalCloseSnapshot() closes the
** transaction and releases the lock.
*/
int sqlite3WalBeginReadTransaction(Wal *pWal, int *);
void sqlite3WalEndReadTransaction(Wal *pWal);

/* Read a page from the write-ahead log, if it is present. */
int sqlite3WalRead(Wal *pWal, Pgno pgno, int *pInWal, int nOut, u8 *pOut);

/* Return the size of the database as it existed at the beginning
** of the snapshot */
void sqlite3WalDbsize(Wal *pWal, Pgno *pPgno);

/* Obtain or release the WRITER lock. */
int sqlite3WalBeginWriteTransaction(Wal *pWal);
int sqlite3WalEndWriteTransaction(Wal *pWal);

/* Undo any frames written (but not committed) to the log */
int sqlite3WalUndo(Wal *pWal, int (*xUndo)(void *, Pgno), void *pUndoCtx);

/* Return an integer that records the current (uncommitted) write
** position in the WAL */
void sqlite3WalSavepoint(Wal *pWal, u32 *aWalData);

/* Move the write position of the WAL back to iFrame.  Called in
** response to a ROLLBACK TO command. */
int sqlite3WalSavepointUndo(Wal *pWal, u32 *aWalData);

/* Write a frame or frames to the log. */
int sqlite3WalFrames(Wal *pWal, int, PgHdr *, Pgno, int, int);

/* Copy pages from the log to the database file */ 
int sqlite3WalCheckpoint(
  Wal *pWal,                      /* Write-ahead log connection */
  int sync_flags,                 /* Flags to sync db file with (or 0) */
  int nBuf,                       /* Size of buffer nBuf */
  u8 *zBuf                        /* Temporary buffer to use */


);

/* Return the value to pass to a sqlite3_wal_hook callback, the
** number of frames in the WAL at the point of the last commit since
** sqlite3WalCallback() was called.  If no commits have occurred since
** the last call, then return 0.
*/