/ Check-in [18b2c23a]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Change the way wal2 locks work to ensure a reader only ever has to lock a single slot.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | wal2
Files: files | file ages | folders
SHA3-256: 18b2c23ac53d985ccc5798ea2d92fb75644b857c373fb490e0d04d5d0194a3d5
User & Date: dan 2018-12-11 17:56:23
Wiki:wal2
Context
2018-12-12
19:04
Add tests to ensure that each of the 4 wal read-locks does what it is supposed to. check-in: 4d5779f3 user: dan tags: wal2
2018-12-11
17:56
Change the way wal2 locks work to ensure a reader only ever has to lock a single slot. check-in: 18b2c23a user: dan tags: wal2
13:44
Merge latest trunk changes into this branch. check-in: d8dd98a3 user: dan tags: wal2
Changes
Hide Diffs Unified Diffs Show Whitespace Changes Patch

Changes to src/wal.c.

358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
...
469
470
471
472
473
474
475
476
477

478
479




480
481
482
483
484
485
486
487
488
489
490
491
492
....
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
....
2246
2247
2248
2249
2250
2251
2252






























































2253
2254
2255
2256
2257
2258
2259
....
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
....
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
....
2430
2431
2432
2433
2434
2435
2436

2437

2438
2439
2440
2441
2442
2443
2444

2445
2446
2447
2448
2449
2450
2451
....
3056
3057
3058
3059
3060
3061
3062



3063
3064
3065

3066
3067








3068
3069
3070
3071
3072
3073
3074

3075
3076
3077
3078
3079
3080
3081
....
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
....
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
....
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861

3862
3863

3864
3865
3866
3867
3868
3869
3870
....
4486
4487
4488
4489
4490
4491
4492
4493
4494
4495
4496
4497
4498
4499
4500
4501
4502
4503
4504
4505
4506
4507
4508
4509
4510
4511
4512
4513
4514
4515
4516
4517
4518
4519
4520
** it is held, but does not prevent a checkpointer from checkpointing 
** it.
**
** There is still a single WRITER and a single CHECKPOINTER lock. The
** recovery procedure still takes the same exclusive lock on the entire
** range of SQLITE_SHM_NLOCK shm-locks. This works because the read-locks
** above use four of the six read-locking slots used by legacy wal mode.
** See the header comment for function walLockReader() for details.
**
** STARTUP/RECOVERY
**
** The read and write version fields of the database header in a wal2
** database are set to 0x03, instead of 0x02 as in legacy wal mode.
**
** The wal file format used in wal2 mode is the same as the format used
................................................................................
/*
** Values that may be stored in Wal.readLock in wal2 mode.
**
** In wal mode, the Wal.readLock member is set to -1 when no read-lock
** is held, or else is the index of the read-mark on which a lock is
** held.
**
** In wal2 mode, Wal.readLock must be set to one of the following values.
** A value of -1 still indicates that no read-lock is held, but the other

** values are symbolic. See the implementation of walLockReader() for
** details of how the symbols map to OS level locks.




*/
#define WAL_LOCK_NONE        -1
#define WAL_LOCK_PART1        1
#define WAL_LOCK_PART1_FULL2  2
#define WAL_LOCK_PART2        3
#define WAL_LOCK_PART2_FULL1  4

/* 
** This constant is used in wal2 mode only.
**
** In wal2 mode, when committing a transaction, if the current wal file 
** is sufficiently large and there are no conflicting locks held, the
** writer writes the new transaction into the start of the other wal
................................................................................
  if( pWal->exclusiveMode ) return;
  (void)sqlite3OsShmLock(pWal->pDbFd, lockIdx, n,
                         SQLITE_SHM_UNLOCK | SQLITE_SHM_EXCLUSIVE);
  WALTRACE(("WAL%p: release EXCLUSIVE-%s cnt=%d\n", pWal,
             walLockName(lockIdx), n));
}

/*
** This function is used to take and release read-locks in wal2 mode.
**
** Use of WAL_READ_LOCK(x) slots for (1<=x<=4).
**
** 1) Partial read of *-wal-1   (blocks checkpointer from checkpointing)
** 2) Full read of *-wal-2      (blocks writer from writing)
** 3) Partial read of *-wal-2   (blocks checkpointer from checkpointing)
** 4) Full read of *-wal-1      (blocks writer from writing)
*/
static int walLockReader(Wal *pWal, int eLock, int bLock){
  int i;                          /* Index of first readmark to lock */
  int n;                          /* Number of readmarks to lock */

  assert( pWal->hdr.iVersion==WAL_VERSION2 );
  if( pWal->exclusiveMode ) return SQLITE_OK;

  switch( eLock ){
    case WAL_LOCK_PART1      : i = 1; n = 1; break; 
    case WAL_LOCK_PART1_FULL2: i = 1; n = 2; break; 
    case WAL_LOCK_PART2      : i = 3; n = 1; break; 
    case WAL_LOCK_PART2_FULL1: i = 3; n = 2; break; 
    default: assert( !"cannot happen" );
  }

  return sqlite3OsShmLock(pWal->pDbFd, WAL_READ_LOCK(i), n,
      SQLITE_SHM_SHARED | (bLock ? SQLITE_SHM_LOCK : SQLITE_SHM_UNLOCK) 
  );
}

/*
** Compute a hash on a page number.  The resulting hash value must land
** between 0 and (HASHTABLE_NSLOT-1).  The walHashNext() function advances
** the hash to the next value in the event of a collision.
*/
static int walHash(u32 iPage){
  assert( iPage>0 );
................................................................................
  walIndexWriteHdr(pWal);
  pInfo->nBackfill = 0;
  pInfo->nBackfillAttempted = 0;
  pInfo->aReadMark[1] = 0;
  for(i=2; i<WAL_NREADER; i++) pInfo->aReadMark[i] = READMARK_NOT_USED;
  assert( pInfo->aReadMark[0]==0 );
}































































/*
** Copy as much content as we can from the WAL back into the database file
** in response to an sqlite3_wal_checkpoint() request or the equivalent.
**
** The amount of information copies from WAL to database might be limited
** by active readers.  This routine will never overwrite a database page
................................................................................
    sqlite3_file *pWalFd = pWal->apWalFd[iCkpt];
    mxPage = pWal->hdr.nPage;

    /* If this is a wal2 system, check for a reader holding a lock 
    ** preventing this checkpoint operation. If one is found, return
    ** early.  */
    if( bWal2 ){
      rc = walLockExclusive(pWal, WAL_READ_LOCK(1 + iCkpt*2), 1);
      if( rc!=SQLITE_OK ) return rc;
    }

    /* EVIDENCE-OF: R-62920-47450 The busy-handler callback is never invoked
    ** in the SQLITE_CHECKPOINT_PASSIVE mode. */
    assert( eMode!=SQLITE_CHECKPOINT_PASSIVE || xBusy==0 );

................................................................................
    /* Allocate the iterator */
    if( bWal2 || pInfo->nBackfill<mxSafeFrame ){
      assert( bWal2==0 || pInfo->nBackfill==0 );
      rc = walIteratorInit(pWal, iCkpt, pInfo->nBackfill, &pIter);
      assert( rc==SQLITE_OK || pIter==0 );
    }

    if( pIter
     && (rc = walBusyLock(pWal, xBusy, pBusyArg, WAL_READ_LOCK(0),1))==SQLITE_OK
    ){
      u32 nBackfill = pInfo->nBackfill;

      assert( bWal2==0 || nBackfill==0 );
      pInfo->nBackfillAttempted = mxSafeFrame;

      /* Sync the wal file being checkpointed to disk */
      rc = sqlite3OsSync(pWalFd, CKPT_SYNC_FLAGS(sync_flags));
................................................................................
        }
      }
      if( rc==SQLITE_OK ){
        pInfo->nBackfill = bWal2 ? 1 : mxSafeFrame;
      }

      /* Release the reader lock held while backfilling */

      walUnlockExclusive(pWal, WAL_READ_LOCK(bWal2 ? 1 + iCkpt*2 : 0), 1);

    }

    if( rc==SQLITE_BUSY ){
      /* Reset the return code so as not to report a checkpoint failure
      ** just because there are active readers.  */
      rc = SQLITE_OK;
    }

  }

  /* If this is an SQLITE_CHECKPOINT_RESTART or TRUNCATE operation, and the
  ** entire wal file has been copied into the database file, then block 
  ** until all readers have finished using the wal file. This ensures that 
  ** the next process to write to the database restarts the wal file.
  */
................................................................................
    }
  }

  assert( pWal->nWiData>0 );
  assert( pWal->apWiData[0]!=0 );
  pInfo = walCkptInfo(pWal);
  if( isWalMode2(pWal) ){



    int eLock = 1 + (walidxGetFile(&pWal->hdr)*2);
    if( pInfo->nBackfill==0 ){
      eLock += walidxGetMxFrame(&pWal->hdr, !walidxGetFile(&pWal->hdr))>0;

    }
    rc = walLockReader(pWal, eLock, 1);








    if( rc!=SQLITE_OK ){
      return rc;
    }

    walShmBarrier(pWal);
    if( memcmp((void *)walIndexHdr(pWal), &pWal->hdr, sizeof(WalIndexHdr)) ){
      walLockReader(pWal, eLock, 0);

      return WAL_RETRY;
    }else{
      pWal->readLock = eLock;
    }
    assert( pWal->minFrame==0 && walFramePage(pWal->minFrame)==0 );
  }else{
    u32 mxReadMark;               /* Largest aReadMark[] value */
................................................................................
/*
** Finish with a read transaction.  All this does is release the
** read-lock.
*/
void sqlite3WalEndReadTransaction(Wal *pWal){
  sqlite3WalEndWriteTransaction(pWal);
  if( pWal->readLock!=WAL_LOCK_NONE ){
    if( isWalMode2(pWal) ){
      (void)walLockReader(pWal, pWal->readLock, 0);
    }else{
      walUnlockShared(pWal, WAL_READ_LOCK(pWal->readLock));
    }
    pWal->readLock = WAL_LOCK_NONE;
  }
}

/* Search hash table iHash for an entry matching page number
** pgno. Each call to this function searches a single hash table
** (each hash table indexes up to HASHTABLE_NPAGE frames).
................................................................................
    pWal->hdr.aFrameCksum[1] = aWalData[2];
    walCleanupHash(pWal);
  }

  return rc;
}

/*
** This function is used in wal2 mode.
**
** This function is called when writer pWal is just about to start 
** writing out frames. The "other" wal file (wal file !pWal->hdr.iAppend)
** has been fully checkpointed. This function returns SQLITE_OK if there
** are no readers preventing the writer from switching to the other wal
** file. Or SQLITE_BUSY if there are.
*/
static int walRestartOk(Wal *pWal){
  int rc;                                        /* Return code */
  int iApp = walidxGetFile(&pWal->hdr);          /* Current WAL file */

  /* No reader can be doing a "partial" read of wal file !iApp - in that
  ** case it would not have been possible to checkpoint the file. So
  ** it is only necessary to test for "full" readers. See the comment
  ** above walLockReader() function for exactly what this means in terms
  ** of locks.  */
  int i = (iApp==0) ? 2 : 4;

  rc = walLockExclusive(pWal, WAL_READ_LOCK(i), 1);
  if( rc==SQLITE_OK ){
    walUnlockExclusive(pWal, WAL_READ_LOCK(i), 1);
  }
  return rc;
}

/*
** This function is called just before writing a set of frames to the log
** file (see sqlite3WalFrames()). It checks to see if, instead of appending
** to the current log file, it is possible and desirable to switch to the
** other log file and write the new transaction to the start of it.
** If so, the wal-index header is updated accordingly - both in heap memory
** and in the *-shm file.
................................................................................
        / (pWal->szPage+WAL_FRAME_HDRSIZE);
      nWalSize = MAX(nWalSize, 1);
    }

    if( walidxGetMxFrame(&pWal->hdr, iApp)>=nWalSize ){
      volatile WalCkptInfo *pInfo = walCkptInfo(pWal);
      if( walidxGetMxFrame(&pWal->hdr, !iApp)==0 || pInfo->nBackfill ){
        rc = walRestartOk(pWal);
        if( rc==SQLITE_OK ){
          iApp = !iApp;
          pWal->nCkpt++;
          walidxSetFile(&pWal->hdr, iApp);
          walidxSetMxFrame(&pWal->hdr, iApp, 0);
          sqlite3Put4byte((u8*)&pWal->hdr.aSalt[0], pWal->hdr.aFrameCksum[0]);
          sqlite3Put4byte((u8*)&pWal->hdr.aSalt[1], pWal->hdr.aFrameCksum[1]);
          walIndexWriteHdr(pWal);
          pInfo->nBackfill = 0;
          walLockReader(pWal, pWal->readLock, 0);

          pWal->readLock = iApp ? WAL_LOCK_PART2_FULL1 : WAL_LOCK_PART1_FULL2;
          rc = walLockReader(pWal, pWal->readLock, 1);

        }else if( rc==SQLITE_BUSY ){
          rc = SQLITE_OK;
        }
      }
    }
  }else if( pWal->readLock==0 ){
    int cnt;
................................................................................
  */
  assert( pWal->readLock!=WAL_LOCK_NONE || pWal->lockError );
  assert( pWal->readLock!=WAL_LOCK_NONE || (op<=0 && pWal->exclusiveMode==0) );

  if( op==0 ){
    if( pWal->exclusiveMode ){
      pWal->exclusiveMode = WAL_NORMAL_MODE;
      if( isWalMode2(pWal) ){
        rc = walLockReader(pWal, pWal->readLock, 1);
      }else{
        rc = walLockShared(pWal, WAL_READ_LOCK(pWal->readLock));
      }
      if( rc!=SQLITE_OK ){
        pWal->exclusiveMode = WAL_EXCLUSIVE_MODE;
      }
      rc = pWal->exclusiveMode==WAL_NORMAL_MODE;
    }else{
      /* Already in locking_mode=NORMAL */
      rc = 0;
    }
  }else if( op>0 ){
    assert( pWal->exclusiveMode==WAL_NORMAL_MODE );
    assert( pWal->readLock>=0 );
    if( isWalMode2(pWal) ){
      walLockReader(pWal, pWal->readLock, 0);
    }else{
      walUnlockShared(pWal, WAL_READ_LOCK(pWal->readLock));
    }
    pWal->exclusiveMode = WAL_EXCLUSIVE_MODE;
    rc = 1;
  }else{
    rc = pWal->exclusiveMode==WAL_NORMAL_MODE;
  }
  return rc;
}







<







 







<
|
>
|
|
>
>
>
>




|
|







 







<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







 







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







 







|







 







|
|
|







 







>
|
>







>







 







>
>
>
|
<
|
>
|
<
>
>
>
>
>
>
>
>



<


<
>







 







<
<
<
|
<







 







<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







 







|

|

|
|




|
>
|
<
>







 







<
<
<
|
<











<
<
<
|
<







358
359
360
361
362
363
364

365
366
367
368
369
370
371
...
468
469
470
471
472
473
474

475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
....
1110
1111
1112
1113
1114
1115
1116






























1117
1118
1119
1120
1121
1122
1123
....
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
....
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
....
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
....
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
....
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104

3105
3106
3107

3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118

3119
3120

3121
3122
3123
3124
3125
3126
3127
3128
....
3446
3447
3448
3449
3450
3451
3452



3453

3454
3455
3456
3457
3458
3459
3460
....
3833
3834
3835
3836
3837
3838
3839



























3840
3841
3842
3843
3844
3845
3846
....
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879

3880
3881
3882
3883
3884
3885
3886
3887
....
4503
4504
4505
4506
4507
4508
4509



4510

4511
4512
4513
4514
4515
4516
4517
4518
4519
4520
4521



4522

4523
4524
4525
4526
4527
4528
4529
** it is held, but does not prevent a checkpointer from checkpointing 
** it.
**
** There is still a single WRITER and a single CHECKPOINTER lock. The
** recovery procedure still takes the same exclusive lock on the entire
** range of SQLITE_SHM_NLOCK shm-locks. This works because the read-locks
** above use four of the six read-locking slots used by legacy wal mode.

**
** STARTUP/RECOVERY
**
** The read and write version fields of the database header in a wal2
** database are set to 0x03, instead of 0x02 as in legacy wal mode.
**
** The wal file format used in wal2 mode is the same as the format used
................................................................................
/*
** Values that may be stored in Wal.readLock in wal2 mode.
**
** In wal mode, the Wal.readLock member is set to -1 when no read-lock
** is held, or else is the index of the read-mark on which a lock is
** held.
**

** In wal2 mode, a value of -1 still indicates that no read-lock is held.
** And a non-zero value still represents the index of the read-mark on
** which a lock is held. There are two differences:
**
**   1. wal2 mode never uses read-mark 0.
**
**   2. locks on each read-mark have a different interpretation, as 
**      indicated by the symbolic names below.
*/
#define WAL_LOCK_NONE        -1
#define WAL_LOCK_PART1        1
#define WAL_LOCK_PART1_FULL2  2
#define WAL_LOCK_PART2_FULL1  3
#define WAL_LOCK_PART2        4

/* 
** This constant is used in wal2 mode only.
**
** In wal2 mode, when committing a transaction, if the current wal file 
** is sufficiently large and there are no conflicting locks held, the
** writer writes the new transaction into the start of the other wal
................................................................................
  if( pWal->exclusiveMode ) return;
  (void)sqlite3OsShmLock(pWal->pDbFd, lockIdx, n,
                         SQLITE_SHM_UNLOCK | SQLITE_SHM_EXCLUSIVE);
  WALTRACE(("WAL%p: release EXCLUSIVE-%s cnt=%d\n", pWal,
             walLockName(lockIdx), n));
}































/*
** Compute a hash on a page number.  The resulting hash value must land
** between 0 and (HASHTABLE_NSLOT-1).  The walHashNext() function advances
** the hash to the next value in the event of a collision.
*/
static int walHash(u32 iPage){
  assert( iPage>0 );
................................................................................
  walIndexWriteHdr(pWal);
  pInfo->nBackfill = 0;
  pInfo->nBackfillAttempted = 0;
  pInfo->aReadMark[1] = 0;
  for(i=2; i<WAL_NREADER; i++) pInfo->aReadMark[i] = READMARK_NOT_USED;
  assert( pInfo->aReadMark[0]==0 );
}

/*
** This function is used in wal2 mode.
**
** This function is called when writer pWal is just about to start 
** writing out frames. Parameter iApp is the current wal file. The "other" wal
** file (wal file !iApp) has been fully checkpointed. This function returns
** SQLITE_OK if there are no readers preventing the writer from switching to
** the other wal file. Or SQLITE_BUSY if there are.
*/
static int wal2RestartOk(Wal *pWal, int iApp){
  /* The other wal file (wal file !iApp) can be overwritten if there
  ** are no readers reading from it - no "full" or "partial" locks.
  ** Technically speaking it is not possible for any reader to hold
  ** a "part" lock, as this would have prevented the file from being
  ** checkpointed. But checking anyway doesn't hurt. The following
  ** is equivalent to:
  **
  **   if( iApp==0 ) eLock = WAL_LOCK_PART1_FULL2;
  **   if( iApp==1 ) eLock = WAL_LOCK_PART1;
  */
  int eLock = 1 + (iApp==0);

  assert( WAL_LOCK_PART1==1 );
  assert( WAL_LOCK_PART1_FULL2==2 );
  assert( WAL_LOCK_PART2_FULL1==3 );
  assert( WAL_LOCK_PART2==4 );

  assert( iApp!=0 || eLock==WAL_LOCK_PART1_FULL2 );
  assert( iApp!=1 || eLock==WAL_LOCK_PART1 );

  return walLockExclusive(pWal, WAL_READ_LOCK(eLock), 3);
}
static void wal2RestartFinished(Wal *pWal, int iApp){
  walUnlockExclusive(pWal, WAL_READ_LOCK(1 + (iApp==0)), 3);
}

/*
** This function is used in wal2 mode.
**
** This function is called when a checkpointer wishes to checkpoint wal
** file iCkpt. It takes the required lock and, if successful, returns
** SQLITE_OK. Otherwise, an SQLite error code (e.g. SQLITE_BUSY). If this
** function returns SQLITE_OK, it is the responsibility of the caller
** to invoke wal2CheckpointFinished() to release the lock.
*/
static int wal2CheckpointOk(Wal *pWal, int iCkpt){
  int eLock = 1 + (iCkpt*2);

  assert( WAL_LOCK_PART1==1 );
  assert( WAL_LOCK_PART1_FULL2==2 );
  assert( WAL_LOCK_PART2_FULL1==3 );
  assert( WAL_LOCK_PART2==4 );

  assert( iCkpt!=0 || eLock==WAL_LOCK_PART1 );
  assert( iCkpt!=1 || eLock==WAL_LOCK_PART2_FULL1 );

  return walLockExclusive(pWal, WAL_READ_LOCK(eLock), 2);
}
static void wal2CheckpointFinished(Wal *pWal, int iCkpt){
  walUnlockExclusive(pWal, WAL_READ_LOCK(1 + (iCkpt*2)), 2);
}

/*
** Copy as much content as we can from the WAL back into the database file
** in response to an sqlite3_wal_checkpoint() request or the equivalent.
**
** The amount of information copies from WAL to database might be limited
** by active readers.  This routine will never overwrite a database page
................................................................................
    sqlite3_file *pWalFd = pWal->apWalFd[iCkpt];
    mxPage = pWal->hdr.nPage;

    /* If this is a wal2 system, check for a reader holding a lock 
    ** preventing this checkpoint operation. If one is found, return
    ** early.  */
    if( bWal2 ){
      rc = wal2CheckpointOk(pWal, iCkpt);
      if( rc!=SQLITE_OK ) return rc;
    }

    /* EVIDENCE-OF: R-62920-47450 The busy-handler callback is never invoked
    ** in the SQLITE_CHECKPOINT_PASSIVE mode. */
    assert( eMode!=SQLITE_CHECKPOINT_PASSIVE || xBusy==0 );

................................................................................
    /* Allocate the iterator */
    if( bWal2 || pInfo->nBackfill<mxSafeFrame ){
      assert( bWal2==0 || pInfo->nBackfill==0 );
      rc = walIteratorInit(pWal, iCkpt, pInfo->nBackfill, &pIter);
      assert( rc==SQLITE_OK || pIter==0 );
    }

    if( pIter && (bWal2 
     || (rc = walBusyLock(pWal, xBusy, pBusyArg,WAL_READ_LOCK(0),1))==SQLITE_OK
    )){
      u32 nBackfill = pInfo->nBackfill;

      assert( bWal2==0 || nBackfill==0 );
      pInfo->nBackfillAttempted = mxSafeFrame;

      /* Sync the wal file being checkpointed to disk */
      rc = sqlite3OsSync(pWalFd, CKPT_SYNC_FLAGS(sync_flags));
................................................................................
        }
      }
      if( rc==SQLITE_OK ){
        pInfo->nBackfill = bWal2 ? 1 : mxSafeFrame;
      }

      /* Release the reader lock held while backfilling */
      if( bWal2==0 ){
        walUnlockExclusive(pWal, WAL_READ_LOCK(0), 1);
      }
    }

    if( rc==SQLITE_BUSY ){
      /* Reset the return code so as not to report a checkpoint failure
      ** just because there are active readers.  */
      rc = SQLITE_OK;
    }
    if( bWal2 ) wal2CheckpointFinished(pWal, iCkpt);
  }

  /* If this is an SQLITE_CHECKPOINT_RESTART or TRUNCATE operation, and the
  ** entire wal file has been copied into the database file, then block 
  ** until all readers have finished using the wal file. This ensures that 
  ** the next process to write to the database restarts the wal file.
  */
................................................................................
    }
  }

  assert( pWal->nWiData>0 );
  assert( pWal->apWiData[0]!=0 );
  pInfo = walCkptInfo(pWal);
  if( isWalMode2(pWal) ){
    /* This connection needs a "part" lock on the current wal file and, 
    ** unless pInfo->nBackfill is set to indicate that it has already been
    ** checkpointed, a "full" lock on the other wal file.  */
    int iWal = walidxGetFile(&pWal->hdr);

    int nBackfill = pInfo->nBackfill || walidxGetMxFrame(&pWal->hdr, !iWal)==0;
    int eLock = 1 + (iWal*2) + (nBackfill==iWal);


    assert( nBackfill==0 || nBackfill==1 );
    assert( iWal==0 || iWal==1 );
    assert( iWal!=0 || nBackfill!=1 || eLock==WAL_LOCK_PART1 );
    assert( iWal!=0 || nBackfill!=0 || eLock==WAL_LOCK_PART1_FULL2 );
    assert( iWal!=1 || nBackfill!=1 || eLock==WAL_LOCK_PART2 );
    assert( iWal!=1 || nBackfill!=0 || eLock==WAL_LOCK_PART2_FULL1 );

    rc = walLockShared(pWal, WAL_READ_LOCK(eLock));
    if( rc!=SQLITE_OK ){
      return rc;
    }

    walShmBarrier(pWal);
    if( memcmp((void *)walIndexHdr(pWal), &pWal->hdr, sizeof(WalIndexHdr)) ){

      walUnlockShared(pWal, WAL_READ_LOCK(eLock));
      return WAL_RETRY;
    }else{
      pWal->readLock = eLock;
    }
    assert( pWal->minFrame==0 && walFramePage(pWal->minFrame)==0 );
  }else{
    u32 mxReadMark;               /* Largest aReadMark[] value */
................................................................................
/*
** Finish with a read transaction.  All this does is release the
** read-lock.
*/
void sqlite3WalEndReadTransaction(Wal *pWal){
  sqlite3WalEndWriteTransaction(pWal);
  if( pWal->readLock!=WAL_LOCK_NONE ){



    walUnlockShared(pWal, WAL_READ_LOCK(pWal->readLock));

    pWal->readLock = WAL_LOCK_NONE;
  }
}

/* Search hash table iHash for an entry matching page number
** pgno. Each call to this function searches a single hash table
** (each hash table indexes up to HASHTABLE_NPAGE frames).
................................................................................
    pWal->hdr.aFrameCksum[1] = aWalData[2];
    walCleanupHash(pWal);
  }

  return rc;
}




























/*
** This function is called just before writing a set of frames to the log
** file (see sqlite3WalFrames()). It checks to see if, instead of appending
** to the current log file, it is possible and desirable to switch to the
** other log file and write the new transaction to the start of it.
** If so, the wal-index header is updated accordingly - both in heap memory
** and in the *-shm file.
................................................................................
        / (pWal->szPage+WAL_FRAME_HDRSIZE);
      nWalSize = MAX(nWalSize, 1);
    }

    if( walidxGetMxFrame(&pWal->hdr, iApp)>=nWalSize ){
      volatile WalCkptInfo *pInfo = walCkptInfo(pWal);
      if( walidxGetMxFrame(&pWal->hdr, !iApp)==0 || pInfo->nBackfill ){
        rc = wal2RestartOk(pWal, iApp);
        if( rc==SQLITE_OK ){
          int iNew = !iApp;
          pWal->nCkpt++;
          walidxSetFile(&pWal->hdr, iNew);
          walidxSetMxFrame(&pWal->hdr, iNew, 0);
          sqlite3Put4byte((u8*)&pWal->hdr.aSalt[0], pWal->hdr.aFrameCksum[0]);
          sqlite3Put4byte((u8*)&pWal->hdr.aSalt[1], pWal->hdr.aFrameCksum[1]);
          walIndexWriteHdr(pWal);
          pInfo->nBackfill = 0;
          wal2RestartFinished(pWal, iApp);
          walUnlockShared(pWal, WAL_READ_LOCK(pWal->readLock));
          pWal->readLock = iNew ? WAL_LOCK_PART2_FULL1 : WAL_LOCK_PART1_FULL2;

          rc = walLockShared(pWal, WAL_READ_LOCK(pWal->readLock));
        }else if( rc==SQLITE_BUSY ){
          rc = SQLITE_OK;
        }
      }
    }
  }else if( pWal->readLock==0 ){
    int cnt;
................................................................................
  */
  assert( pWal->readLock!=WAL_LOCK_NONE || pWal->lockError );
  assert( pWal->readLock!=WAL_LOCK_NONE || (op<=0 && pWal->exclusiveMode==0) );

  if( op==0 ){
    if( pWal->exclusiveMode ){
      pWal->exclusiveMode = WAL_NORMAL_MODE;



      rc = walLockShared(pWal, WAL_READ_LOCK(pWal->readLock));

      if( rc!=SQLITE_OK ){
        pWal->exclusiveMode = WAL_EXCLUSIVE_MODE;
      }
      rc = pWal->exclusiveMode==WAL_NORMAL_MODE;
    }else{
      /* Already in locking_mode=NORMAL */
      rc = 0;
    }
  }else if( op>0 ){
    assert( pWal->exclusiveMode==WAL_NORMAL_MODE );
    assert( pWal->readLock>=0 );



    walUnlockShared(pWal, WAL_READ_LOCK(pWal->readLock));

    pWal->exclusiveMode = WAL_EXCLUSIVE_MODE;
    rc = 1;
  }else{
    rc = pWal->exclusiveMode==WAL_NORMAL_MODE;
  }
  return rc;
}

Changes to test/wal2simple.test.

239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254

do_test 6.1 {
  for {set i 0} {$i < 10} {incr i} {
    execsql "CREATE TABLE t$i (x);"
  }
} {}

puts "[file size test.db-wal] [file size test.db-wal2]"

do_test 6.2.1 {
  foreach f [glob -nocomplain test.db2*] { forcedelete $f }
  forcecopy test.db-wal2 test.db2-wal2
  sqlite3 db2 test.db2
  db2 eval { SELECT * FROM sqlite_master }
} {}
do_test 6.2.2 {







<
<







239
240
241
242
243
244
245


246
247
248
249
250
251
252

do_test 6.1 {
  for {set i 0} {$i < 10} {incr i} {
    execsql "CREATE TABLE t$i (x);"
  }
} {}



do_test 6.2.1 {
  foreach f [glob -nocomplain test.db2*] { forcedelete $f }
  forcecopy test.db-wal2 test.db2-wal2
  sqlite3 db2 test.db2
  db2 eval { SELECT * FROM sqlite_master }
} {}
do_test 6.2.2 {