SQLite

Check-in [669706431f]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Convert the wal-header and frame-header to 24 bytes. Extra information in both headers is designed to enhance robustness after crashes, though the extra information is currently unused. This is a snapshot of a work in progress.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 669706431f186f92fdc0856a6206419a1e843f46
User & Date: drh 2010-05-20 16:45:59.000
Context
2010-05-20
21:21
Make use of the extra information in the WAL header and frame header to enhance robustness. (check-in: 9580ecb7e3 user: drh tags: trunk)
16:45
Convert the wal-header and frame-header to 24 bytes. Extra information in both headers is designed to enhance robustness after crashes, though the extra information is currently unused. This is a snapshot of a work in progress. (check-in: 669706431f user: drh tags: trunk)
2010-05-19
23:41
Merge WIN32 WAL support into trunk. Still some issues with locking to resolve. (check-in: 43377663fc user: shaneh tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Changes to src/wal.c.
27
28
29
30
31
32
33
34
35
36


37
38
39

40
41
42
43
44
45
46
47
48
49

50

51
52
53
54
55
56
57
** A single WAL file can be used multiple times.  In other words, the
** WAL can fill up with frames and then be checkpointed and then new
** frames can overwrite the old ones.  A WAL always grows from beginning
** toward the end.  Checksums and counters attached to each frame are
** used to determine which frames within the WAL are valid and which
** are leftovers from prior checkpoints.
**
** The WAL header is 12 bytes in size and consists of the following three
** big-endian 32-bit unsigned integer values:
**


**     0: Database page size,
**     4: Randomly selected salt value 1,
**     8: Randomly selected salt value 2.

**
** Immediately following the header are zero or more frames. Each
** frame consists of a 16-byte header followed by a <page-size> bytes
** of page data. The header is broken into 4 big-endian 32-bit unsigned 
** integer values, as follows:
**
**     0: Page number.
**     4: For commit records, the size of the database image in pages 
**        after the commit. For all other records, zero.
**     8: Checksum value 1.

**    12: Checksum value 2.

**
** READER ALGORITHM
**
** To read a page from the database (call it page number P), a reader
** first checks the WAL to see if it contains page P.  If so, then the
** last valid instance of page P that is or is followed by a commit frame
** become the value read.  If the WAL contains no copies of page P that







|


>
>
|
|
|
>

|
|
|





|
>
|
>







27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
** A single WAL file can be used multiple times.  In other words, the
** WAL can fill up with frames and then be checkpointed and then new
** frames can overwrite the old ones.  A WAL always grows from beginning
** toward the end.  Checksums and counters attached to each frame are
** used to determine which frames within the WAL are valid and which
** are leftovers from prior checkpoints.
**
** The WAL header is 24 bytes in size and consists of the following six
** big-endian 32-bit unsigned integer values:
**
**     0: Magic number.  0x377f0682 (big endian)
**     4: File format version.  Currently 3007000
**     8: Database page size.  Example: 1024
**    12: Checkpoint sequence number
**    16: Salt-1, random integer that changes with each checkpoint
**    20: Salt-2, a different random integer changing with salt-1
**
** Immediately following the wal-header are zero or more frames. Each
** frame consists of a 24-byte frame-header followed by a <page-size> bytes
** of page data. The frame-header is broken into 6 big-endian 32-bit unsigned 
** integer values, as follows:
**
**     0: Page number.
**     4: For commit records, the size of the database image in pages 
**        after the commit. For all other records, zero.
**     8: Checkpoint sequence number (copied from the header)
**    12: Salt-1 (copied from the header)
**    16: Checksum-1.
**    20: Checksum-2.
**
** READER ALGORITHM
**
** To read a page from the database (call it page number P), a reader
** first checks the WAL to see if it contains page P.  If so, then the
** last valid instance of page P that is or is followed by a commit frame
** become the value read.  If the WAL contains no copies of page P that
176
177
178
179
180
181
182


183
184
185
186
187
188
189
190

/* Object declarations */
typedef struct WalIndexHdr WalIndexHdr;
typedef struct WalIterator WalIterator;


/*


** The following object stores a copy of the wal-index header.
**
** Member variables iCheck1 and iCheck2 contain the checksum for the
** last frame written to the wal, or 2 and 3 respectively if the log 
** is currently empty.
*/
struct WalIndexHdr {
  u32 iChange;          /* Counter incremented each transaction */







>
>
|







181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197

/* Object declarations */
typedef struct WalIndexHdr WalIndexHdr;
typedef struct WalIterator WalIterator;


/*
** The following object stores information from the wal-index header.
**
** This object is *not* a copy of the wal-index header.
**
** Member variables iCheck1 and iCheck2 contain the checksum for the
** last frame written to the wal, or 2 and 3 respectively if the log 
** is currently empty.
*/
struct WalIndexHdr {
  u32 iChange;          /* Counter incremented each transaction */
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
** do not read or write data from the region of the file on which locks
** are applied.
*/
#define WALINDEX_LOCK_OFFSET   ((sizeof(WalIndexHdr))+2*sizeof(u32))
#define WALINDEX_LOCK_RESERVED 8

/* Size of header before each frame in wal */
#define WAL_FRAME_HDRSIZE 16

/* Size of write ahead log header */
#define WAL_HDRSIZE 12

/*
** Return the offset of frame iFrame in the write-ahead log file, 
** assuming a database page size of szPage bytes. The offset returned
** is to the start of the write-ahead log frame-header.
*/
#define walFrameOffset(iFrame, szPage) (                               \







|


|







210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
** do not read or write data from the region of the file on which locks
** are applied.
*/
#define WALINDEX_LOCK_OFFSET   ((sizeof(WalIndexHdr))+2*sizeof(u32))
#define WALINDEX_LOCK_RESERVED 8

/* Size of header before each frame in wal */
#define WAL_FRAME_HDRSIZE 24

/* Size of write ahead log header */
#define WAL_HDRSIZE 24

/*
** Return the offset of frame iFrame in the write-ahead log file, 
** assuming a database page size of szPage bytes. The offset returned
** is to the start of the write-ahead log frame-header.
*/
#define walFrameOffset(iFrame, szPage) (                               \
234
235
236
237
238
239
240


241
242
243
244
245
246
247
  volatile u32 *pWiData;     /* Pointer to wal-index content in memory */
  u8 lockState;              /* SQLITE_SHM_xxxx constant showing lock state */
  u8 readerType;             /* SQLITE_SHM_READ or SQLITE_SHM_READ_FULL */
  u8 exclusiveMode;          /* Non-zero if connection is in exclusive mode */
  u8 isWindexOpen;           /* True if ShmOpen() called on pDbFd */
  WalIndexHdr hdr;           /* Wal-index for current snapshot */
  char *zWalName;            /* Name of WAL file */


};


/*
** This structure is used to implement an iterator that loops through
** all frames in the WAL in database page order. Where two or more frames
** correspond to the same database page, the iterator visits only the 







>
>







241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
  volatile u32 *pWiData;     /* Pointer to wal-index content in memory */
  u8 lockState;              /* SQLITE_SHM_xxxx constant showing lock state */
  u8 readerType;             /* SQLITE_SHM_READ or SQLITE_SHM_READ_FULL */
  u8 exclusiveMode;          /* Non-zero if connection is in exclusive mode */
  u8 isWindexOpen;           /* True if ShmOpen() called on pDbFd */
  WalIndexHdr hdr;           /* Wal-index for current snapshot */
  char *zWalName;            /* Name of WAL file */
  u32 nCkpt;                 /* Checkpoint sequence number */
  u32 iSalt1, iSalt2;        /* Two random salt values */
};


/*
** This structure is used to implement an iterator that loops through
** all frames in the WAL in database page order. Where two or more frames
** correspond to the same database page, the iterator visits only the 
328
329
330
331
332
333
334
335
336


337
338
339

340
341

342
343
344
345
346
347
348
349
350
351
352


353
354
355
356
357
358
359
360
361
362
363
364
365

366
367
368
369
370
371
372
373









374
375
376
377
378
379
380
381
382
383
384
385
386
}

/*
** This function encodes a single frame header and writes it to a buffer
** supplied by the caller. A frame-header is made up of a series of 
** 4-byte big-endian integers, as follows:
**
**     0: Database page size in bytes.
**     4: Page number.


**     8: New database size (for commit frames, otherwise zero).
**    12: Frame checksum 1.
**    16: Frame checksum 2.

*/
static void walEncodeFrame(

  u32 *aCksum,                    /* IN/OUT: Checksum values */
  u32 iPage,                      /* Database page number for frame */
  u32 nTruncate,                  /* New db size (or 0 for non-commit frames) */
  int nData,                      /* Database page size (size of aData[]) */
  u8 *aData,                      /* Pointer to page data (for checksum) */
  u8 *aFrame                      /* OUT: Write encoded frame here */
){
  assert( WAL_FRAME_HDRSIZE==16 );

  sqlite3Put4byte(&aFrame[0], iPage);
  sqlite3Put4byte(&aFrame[4], nTruncate);



  walChecksumBytes(aFrame, 8, aCksum);
  walChecksumBytes(aData, nData, aCksum);

  sqlite3Put4byte(&aFrame[8], aCksum[0]);
  sqlite3Put4byte(&aFrame[12], aCksum[1]);
}

/*
** Return 1 and populate *piPage, *pnTruncate and aCksum if the 
** frame checksum looks Ok. Otherwise return 0.
*/
static int walDecodeFrame(

  u32 *aCksum,                    /* IN/OUT: Checksum values */
  u32 *piPage,                    /* OUT: Database page number for frame */
  u32 *pnTruncate,                /* OUT: New db size (or 0 if not commit) */
  int nData,                      /* Database page size (size of aData[]) */
  u8 *aData,                      /* Pointer to page data (for checksum) */
  u8 *aFrame                      /* Frame data */
){
  assert( WAL_FRAME_HDRSIZE==16 );










  walChecksumBytes(aFrame, 8, aCksum);
  walChecksumBytes(aData, nData, aCksum);

  if( aCksum[0]!=sqlite3Get4byte(&aFrame[8]) 
   || aCksum[1]!=sqlite3Get4byte(&aFrame[12]) 
  ){
    /* Checksum failed. */
    return 0;
  }

  *piPage = sqlite3Get4byte(&aFrame[0]);
  *pnTruncate = sqlite3Get4byte(&aFrame[4]);







<
|
>
>
|
|
|
>


>







|



>
>




|
|







>







|
>
>
>
>
>
>
>
>
>



<
|
|







337
338
339
340
341
342
343

344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400

401
402
403
404
405
406
407
408
409
}

/*
** This function encodes a single frame header and writes it to a buffer
** supplied by the caller. A frame-header is made up of a series of 
** 4-byte big-endian integers, as follows:
**

**     0: Page number.
**     4: For commit records, the size of the database image in pages 
**        after the commit. For all other records, zero.
**     8: Checkpoint sequence number (copied from the header)
**    12: Salt-1 (copied from the header)
**    16: Checksum-1.
**    20: Checksum-2.
*/
static void walEncodeFrame(
  Wal *pWal,                      /* The write-ahead log */
  u32 *aCksum,                    /* IN/OUT: Checksum values */
  u32 iPage,                      /* Database page number for frame */
  u32 nTruncate,                  /* New db size (or 0 for non-commit frames) */
  int nData,                      /* Database page size (size of aData[]) */
  u8 *aData,                      /* Pointer to page data (for checksum) */
  u8 *aFrame                      /* OUT: Write encoded frame here */
){
  assert( WAL_FRAME_HDRSIZE==24 );

  sqlite3Put4byte(&aFrame[0], iPage);
  sqlite3Put4byte(&aFrame[4], nTruncate);
  sqlite3Put4byte(&aFrame[8], pWal->nCkpt);
  sqlite3Put4byte(&aFrame[12], pWal->iSalt1);

  walChecksumBytes(aFrame, 8, aCksum);
  walChecksumBytes(aData, nData, aCksum);

  sqlite3Put4byte(&aFrame[16], aCksum[0]);
  sqlite3Put4byte(&aFrame[20], aCksum[1]);
}

/*
** Return 1 and populate *piPage, *pnTruncate and aCksum if the 
** frame checksum looks Ok. Otherwise return 0.
*/
static int walDecodeFrame(
  Wal *pWal,                      /* The write-ahead log */
  u32 *aCksum,                    /* IN/OUT: Checksum values */
  u32 *piPage,                    /* OUT: Database page number for frame */
  u32 *pnTruncate,                /* OUT: New db size (or 0 if not commit) */
  int nData,                      /* Database page size (size of aData[]) */
  u8 *aData,                      /* Pointer to page data (for checksum) */
  u8 *aFrame                      /* Frame data */
){
  assert( WAL_FRAME_HDRSIZE==24 );

#if 0
  if( pWal->nCkpt!=sqlite3Get4byte(&aFrame[8]) ){
    return 0;
  }
  if( pWal->iSalt1!=sqlite3Get4byte(&aFrame[12]) ){
    return 0;
  }
#endif

  walChecksumBytes(aFrame, 8, aCksum);
  walChecksumBytes(aData, nData, aCksum);

  if( aCksum[0]!=sqlite3Get4byte(&aFrame[16]) 
   || aCksum[1]!=sqlite3Get4byte(&aFrame[20]) 
  ){
    /* Checksum failed. */
    return 0;
  }

  *piPage = sqlite3Get4byte(&aFrame[0]);
  *pnTruncate = sqlite3Get4byte(&aFrame[4]);
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644

645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666

667
668
669
670
671
672
673

  rc = sqlite3OsFileSize(pWal->pWalFd, &nSize);
  if( rc!=SQLITE_OK ){
    return rc;
  }

  if( nSize>WAL_FRAME_HDRSIZE ){
    u8 aBuf[WAL_FRAME_HDRSIZE];   /* Buffer to load first frame header into */
    u8 *aFrame = 0;               /* Malloc'd buffer to load entire frame */
    int szFrame;                  /* Number of bytes in buffer aFrame[] */
    u8 *aData;                    /* Pointer to data part of aFrame buffer */
    int iFrame;                   /* Index of last frame read */
    i64 iOffset;                  /* Next offset to read from log file */
    int szPage;                   /* Page size according to the log */
    u32 aCksum[2];                /* Running checksum */

    /* Read in the first frame header in the file (to determine the 
    ** database page size).
    */
    rc = sqlite3OsRead(pWal->pWalFd, aBuf, WAL_HDRSIZE, 0);
    if( rc!=SQLITE_OK ){
      return rc;
    }

    /* If the database page size is not a power of two, or is greater than
    ** SQLITE_MAX_PAGE_SIZE, conclude that the WAL file contains no valid data.
    */
    szPage = sqlite3Get4byte(&aBuf[0]);
    if( szPage&(szPage-1) || szPage>SQLITE_MAX_PAGE_SIZE || szPage<512 ){
      goto finished;
    }

    aCksum[0] = sqlite3Get4byte(&aBuf[4]);
    aCksum[1] = sqlite3Get4byte(&aBuf[8]);

    /* Malloc a buffer to read frames into. */
    szFrame = szPage + WAL_FRAME_HDRSIZE;
    aFrame = (u8 *)sqlite3_malloc(szFrame);
    if( !aFrame ){
      return SQLITE_NOMEM;
    }
    aData = &aFrame[WAL_FRAME_HDRSIZE];

    /* Read all frames from the log file. */
    iFrame = 0;
    for(iOffset=WAL_HDRSIZE; (iOffset+szFrame)<=nSize; iOffset+=szFrame){
      u32 pgno;                   /* Database page number for frame */
      u32 nTruncate;              /* dbsize field from frame header */
      int isValid;                /* True if this frame is valid */

      /* Read and decode the next log frame. */
      rc = sqlite3OsRead(pWal->pWalFd, aFrame, szFrame, iOffset);
      if( rc!=SQLITE_OK ) break;
      isValid = walDecodeFrame(aCksum, &pgno, &nTruncate, szPage, aData, aFrame);

      if( !isValid ) break;
      rc = walIndexAppend(pWal, ++iFrame, pgno);
      if( rc!=SQLITE_OK ) break;

      /* If nTruncate is non-zero, this is a commit record. */
      if( nTruncate ){
        hdr.iCheck1 = aCksum[0];







|



















|



>
|
|



















|
>







637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698

  rc = sqlite3OsFileSize(pWal->pWalFd, &nSize);
  if( rc!=SQLITE_OK ){
    return rc;
  }

  if( nSize>WAL_FRAME_HDRSIZE ){
    u8 aBuf[WAL_HDRSIZE];         /* Buffer to load first frame header into */
    u8 *aFrame = 0;               /* Malloc'd buffer to load entire frame */
    int szFrame;                  /* Number of bytes in buffer aFrame[] */
    u8 *aData;                    /* Pointer to data part of aFrame buffer */
    int iFrame;                   /* Index of last frame read */
    i64 iOffset;                  /* Next offset to read from log file */
    int szPage;                   /* Page size according to the log */
    u32 aCksum[2];                /* Running checksum */

    /* Read in the first frame header in the file (to determine the 
    ** database page size).
    */
    rc = sqlite3OsRead(pWal->pWalFd, aBuf, WAL_HDRSIZE, 0);
    if( rc!=SQLITE_OK ){
      return rc;
    }

    /* If the database page size is not a power of two, or is greater than
    ** SQLITE_MAX_PAGE_SIZE, conclude that the WAL file contains no valid data.
    */
    szPage = sqlite3Get4byte(&aBuf[8]);
    if( szPage&(szPage-1) || szPage>SQLITE_MAX_PAGE_SIZE || szPage<512 ){
      goto finished;
    }
    pWal->nCkpt = sqlite3Get4byte(&aBuf[12]);
    aCksum[0] = sqlite3Get4byte(&aBuf[16]);
    aCksum[1] = sqlite3Get4byte(&aBuf[20]);

    /* Malloc a buffer to read frames into. */
    szFrame = szPage + WAL_FRAME_HDRSIZE;
    aFrame = (u8 *)sqlite3_malloc(szFrame);
    if( !aFrame ){
      return SQLITE_NOMEM;
    }
    aData = &aFrame[WAL_FRAME_HDRSIZE];

    /* Read all frames from the log file. */
    iFrame = 0;
    for(iOffset=WAL_HDRSIZE; (iOffset+szFrame)<=nSize; iOffset+=szFrame){
      u32 pgno;                   /* Database page number for frame */
      u32 nTruncate;              /* dbsize field from frame header */
      int isValid;                /* True if this frame is valid */

      /* Read and decode the next log frame. */
      rc = sqlite3OsRead(pWal->pWalFd, aFrame, szFrame, iOffset);
      if( rc!=SQLITE_OK ) break;
      isValid = walDecodeFrame(pWal, aCksum, &pgno, &nTruncate, szPage,
                               aData, aFrame);
      if( !isValid ) break;
      rc = walIndexAppend(pWal, ++iFrame, pgno);
      if( rc!=SQLITE_OK ) break;

      /* If nTruncate is non-zero, this is a commit record. */
      if( nTruncate ){
        hdr.iCheck1 = aCksum[0];
1001
1002
1003
1004
1005
1006
1007

1008
1009
1010
1011
1012
1013
1014
    rc = sqlite3OsSync(pWal->pDbFd, sync_flags);
    if( rc!=SQLITE_OK ) goto out;
  }
  pWal->hdr.mxFrame = 0;
  pWal->hdr.iCheck1 = 2;
  pWal->hdr.iCheck2 = 3;
  walIndexWriteHdr(pWal, &pWal->hdr);


  /* TODO: If a crash occurs and the current log is copied into the 
  ** database there is no problem. However, if a crash occurs while
  ** writing the next transaction into the start of the log, such that:
  **
  **   * The first transaction currently in the log is left intact, but
  **   * The second (or subsequent) transaction is damaged,







>







1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
    rc = sqlite3OsSync(pWal->pDbFd, sync_flags);
    if( rc!=SQLITE_OK ) goto out;
  }
  pWal->hdr.mxFrame = 0;
  pWal->hdr.iCheck1 = 2;
  pWal->hdr.iCheck2 = 3;
  walIndexWriteHdr(pWal, &pWal->hdr);
  pWal->nCkpt++;

  /* TODO: If a crash occurs and the current log is copied into the 
  ** database there is no problem. However, if a crash occurs while
  ** writing the next transaction into the start of the log, such that:
  **
  **   * The first transaction currently in the log is left intact, but
  **   * The second (or subsequent) transaction is damaged,
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
int sqlite3WalSavepointUndo(Wal *pWal, u32 iFrame){
  int rc = SQLITE_OK;
  u8 aCksum[8];
  assert( pWal->lockState==SQLITE_SHM_WRITE );

  pWal->hdr.mxFrame = iFrame;
  if( iFrame>0 ){
    i64 iOffset = walFrameOffset(iFrame, pWal->hdr.szPage) + sizeof(u32)*2;
    rc = sqlite3OsRead(pWal->pWalFd, aCksum, sizeof(aCksum), iOffset);
    pWal->hdr.iCheck1 = sqlite3Get4byte(&aCksum[0]);
    pWal->hdr.iCheck2 = sqlite3Get4byte(&aCksum[4]);
  }

  return rc;
}







|







1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
int sqlite3WalSavepointUndo(Wal *pWal, u32 iFrame){
  int rc = SQLITE_OK;
  u8 aCksum[8];
  assert( pWal->lockState==SQLITE_SHM_WRITE );

  pWal->hdr.mxFrame = iFrame;
  if( iFrame>0 ){
    i64 iOffset = walFrameOffset(iFrame, pWal->hdr.szPage) + sizeof(u32)*4;
    rc = sqlite3OsRead(pWal->pWalFd, aCksum, sizeof(aCksum), iOffset);
    pWal->hdr.iCheck1 = sqlite3Get4byte(&aCksum[0]);
    pWal->hdr.iCheck2 = sqlite3Get4byte(&aCksum[4]);
  }

  return rc;
}
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509



1510

1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
  u32 iFrame;                     /* Next frame address */
  u8 aFrame[WAL_FRAME_HDRSIZE];   /* Buffer to assemble frame-header in */
  PgHdr *p;                       /* Iterator to run through pList with. */
  u32 aCksum[2];                  /* Checksums */
  PgHdr *pLast = 0;               /* Last frame in list */
  int nLast = 0;                  /* Number of extra copies of last page */

  assert( WAL_FRAME_HDRSIZE==(4 * 2 + 2*sizeof(u32)) );
  assert( pList );
  assert( pWal->lockState==SQLITE_SHM_WRITE );
  assert( pWal->pWiData==0 );

  /* If this is the first frame written into the log, write the WAL
  ** header to the start of the WAL file. See comments at the top of
  ** this source file for a description of the WAL header format.
  */
  assert( WAL_FRAME_HDRSIZE>=WAL_HDRSIZE );
  iFrame = pWal->hdr.mxFrame;
  if( iFrame==0 ){



    sqlite3Put4byte(aFrame, szPage);

    sqlite3_randomness(8, &aFrame[4]);
    pWal->hdr.iCheck1 = sqlite3Get4byte(&aFrame[4]);
    pWal->hdr.iCheck2 = sqlite3Get4byte(&aFrame[8]);
    rc = sqlite3OsWrite(pWal->pWalFd, aFrame, WAL_HDRSIZE, 0);
    if( rc!=SQLITE_OK ){
      return rc;
    }
  }

  aCksum[0] = pWal->hdr.iCheck1;
  aCksum[1] = pWal->hdr.iCheck2;

  /* Write the log file. */
  for(p=pList; p; p=p->pDirty){
    u32 nDbsize;                  /* Db-size field for frame header */
    i64 iOffset;                  /* Write offset in log file */

    iOffset = walFrameOffset(++iFrame, szPage);
    
    /* Populate and write the frame header */
    nDbsize = (isCommit && p->pDirty==0) ? nTruncate : 0;
    walEncodeFrame(aCksum, p->pgno, nDbsize, szPage, p->pData, aFrame);
    rc = sqlite3OsWrite(pWal->pWalFd, aFrame, sizeof(aFrame), iOffset);
    if( rc!=SQLITE_OK ){
      return rc;
    }

    /* Write the page data */
    rc = sqlite3OsWrite(pWal->pWalFd, p->pData, szPage, iOffset+sizeof(aFrame));







<








<


>
>
>
|
>
|
|
|
|

















|







1517
1518
1519
1520
1521
1522
1523

1524
1525
1526
1527
1528
1529
1530
1531

1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
  u32 iFrame;                     /* Next frame address */
  u8 aFrame[WAL_FRAME_HDRSIZE];   /* Buffer to assemble frame-header in */
  PgHdr *p;                       /* Iterator to run through pList with. */
  u32 aCksum[2];                  /* Checksums */
  PgHdr *pLast = 0;               /* Last frame in list */
  int nLast = 0;                  /* Number of extra copies of last page */


  assert( pList );
  assert( pWal->lockState==SQLITE_SHM_WRITE );
  assert( pWal->pWiData==0 );

  /* If this is the first frame written into the log, write the WAL
  ** header to the start of the WAL file. See comments at the top of
  ** this source file for a description of the WAL header format.
  */

  iFrame = pWal->hdr.mxFrame;
  if( iFrame==0 ){
    u8 aWalHdr[WAL_HDRSIZE];        /* Buffer to assembly wal-header in */
    sqlite3Put4byte(&aWalHdr[0], 0x377f0682);
    sqlite3Put4byte(&aWalHdr[4], 3007000);
    sqlite3Put4byte(&aWalHdr[8], szPage);
    sqlite3Put4byte(&aWalHdr[12], pWal->nCkpt);
    sqlite3_randomness(8, &aWalHdr[16]);
    pWal->hdr.iCheck1 = pWal->iSalt1 = sqlite3Get4byte(&aWalHdr[16]);
    pWal->hdr.iCheck2 = pWal->iSalt2 = sqlite3Get4byte(&aWalHdr[20]);
    rc = sqlite3OsWrite(pWal->pWalFd, aWalHdr, sizeof(aWalHdr), 0);
    if( rc!=SQLITE_OK ){
      return rc;
    }
  }

  aCksum[0] = pWal->hdr.iCheck1;
  aCksum[1] = pWal->hdr.iCheck2;

  /* Write the log file. */
  for(p=pList; p; p=p->pDirty){
    u32 nDbsize;                  /* Db-size field for frame header */
    i64 iOffset;                  /* Write offset in log file */

    iOffset = walFrameOffset(++iFrame, szPage);
    
    /* Populate and write the frame header */
    nDbsize = (isCommit && p->pDirty==0) ? nTruncate : 0;
    walEncodeFrame(pWal, aCksum, p->pgno, nDbsize, szPage, p->pData, aFrame);
    rc = sqlite3OsWrite(pWal->pWalFd, aFrame, sizeof(aFrame), iOffset);
    if( rc!=SQLITE_OK ){
      return rc;
    }

    /* Write the page data */
    rc = sqlite3OsWrite(pWal->pWalFd, p->pData, szPage, iOffset+sizeof(aFrame));
1549
1550
1551
1552
1553
1554
1555

1556
1557
1558
1559
1560
1561
1562
1563
    i64 iOffset = walFrameOffset(iFrame+1, szPage);

    assert( isCommit );
    assert( iSegment>0 );

    iSegment = (((iOffset+iSegment-1)/iSegment) * iSegment);
    while( iOffset<iSegment ){

      walEncodeFrame(aCksum,pLast->pgno,nTruncate,szPage,pLast->pData,aFrame);
      rc = sqlite3OsWrite(pWal->pWalFd, aFrame, sizeof(aFrame), iOffset);
      if( rc!=SQLITE_OK ){
        return rc;
      }

      iOffset += WAL_FRAME_HDRSIZE;
      rc = sqlite3OsWrite(pWal->pWalFd, pLast->pData, szPage, iOffset); 







>
|







1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
    i64 iOffset = walFrameOffset(iFrame+1, szPage);

    assert( isCommit );
    assert( iSegment>0 );

    iSegment = (((iOffset+iSegment-1)/iSegment) * iSegment);
    while( iOffset<iSegment ){
      walEncodeFrame(pWal, aCksum, pLast->pgno, nTruncate, szPage,
                     pLast->pData, aFrame);
      rc = sqlite3OsWrite(pWal->pWalFd, aFrame, sizeof(aFrame), iOffset);
      if( rc!=SQLITE_OK ){
        return rc;
      }

      iOffset += WAL_FRAME_HDRSIZE;
      rc = sqlite3OsWrite(pWal->pWalFd, pLast->pData, szPage, iOffset); 
Changes to test/wal.test.
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
  [lindex $args 0] eval { PRAGMA page_size = 1024 }
  [lindex $args 0] eval { PRAGMA journal_mode = wal }
  [lindex $args 0] eval { PRAGMA synchronous = normal }
  [lindex $args 0] function blob blob
}

proc log_file_size {nFrame pgsz} {
  expr {12 + ($pgsz+16)*$nFrame}
}

proc log_deleted {logfile} {
  return [expr [file exists $logfile]==0]
}

#







|







37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
  [lindex $args 0] eval { PRAGMA page_size = 1024 }
  [lindex $args 0] eval { PRAGMA journal_mode = wal }
  [lindex $args 0] eval { PRAGMA synchronous = normal }
  [lindex $args 0] function blob blob
}

proc log_file_size {nFrame pgsz} {
  expr {24 + ($pgsz+24)*$nFrame}
}

proc log_deleted {logfile} {
  return [expr [file exists $logfile]==0]
}

#
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
# number of "padding" frames are appended to the log file when a transaction
# is committed in synchronous=FULL mode.
# 
# Do this by creating a database that uses 512 byte pages. Then writing
# a transaction that modifies 171 pages. In synchronous=NORMAL mode, this
# produces a log file of:
#
#   12 + (16+512)*171 = 90300 bytes.
#
# Slightly larger than 11*8192 = 90112 bytes.
#
# Run the test using various different sector-sizes. In each case, the
# WAL code should write the 90300 bytes of log file containing the 
# transaction, then append as may frames as are required to extend the
# log file so that no part of the next transaction will be written into
# a disk-sector used by transaction just committed.
#
set old_pending_byte [sqlite3_test_control_pending_byte 0x10000000]
catch { db close }
foreach {tn sectorsize logsize} {
  1   128  90828
  2   256  90828
  3   512  90828 
  4  1024  91356
  5  2048  92412
  6  4096  94524
  7  8192  98748
} {
  file delete -force test.db test.db-wal test.db-journal
  sqlite3_simulate_device -sectorsize $sectorsize
  sqlite3 db test.db -vfs devsym

  do_test wal-17.$tn.1 {
    execsql {







|












|
|
|
|
|
|
|







1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
# number of "padding" frames are appended to the log file when a transaction
# is committed in synchronous=FULL mode.
# 
# Do this by creating a database that uses 512 byte pages. Then writing
# a transaction that modifies 171 pages. In synchronous=NORMAL mode, this
# produces a log file of:
#
#   24 + (24+512)*171 = 90312 bytes.
#
# Slightly larger than 11*8192 = 90112 bytes.
#
# Run the test using various different sector-sizes. In each case, the
# WAL code should write the 90300 bytes of log file containing the 
# transaction, then append as may frames as are required to extend the
# log file so that no part of the next transaction will be written into
# a disk-sector used by transaction just committed.
#
set old_pending_byte [sqlite3_test_control_pending_byte 0x10000000]
catch { db close }
foreach {tn sectorsize logsize} {
  1   128  92216
  2   256  92216
  3   512  92216 
  4  1024  92216
  5  2048  92216
  6  4096  94360
  7  8192  98648
} {
  file delete -force test.db test.db-wal test.db-journal
  sqlite3_simulate_device -sectorsize $sectorsize
  sqlite3 db test.db -vfs devsym

  do_test wal-17.$tn.1 {
    execsql {
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
         5      {0 0 1 2 3 4}
         6      {0 0 1 2 3 4 5 6}
} {
  do_test wal-18.1.$nFrame {
    file copy -force testX.db test.db
    file copy -force testX.db-wal test.db-wal

    hexio_write test.db-wal [expr 12 + $nFrame*(16+1024) + 12] 00000000

    sqlite3 db test.db
    execsql { 
      SELECT * FROM t1;
      PRAGMA integrity_check; 
    }
  } [concat $result ok]







|







1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
         5      {0 0 1 2 3 4}
         6      {0 0 1 2 3 4 5 6}
} {
  do_test wal-18.1.$nFrame {
    file copy -force testX.db test.db
    file copy -force testX.db-wal test.db-wal

    hexio_write test.db-wal [expr 24 + $nFrame*(24+1024) + 20] 00000000

    sqlite3 db test.db
    execsql { 
      SELECT * FROM t1;
      PRAGMA integrity_check; 
    }
  } [concat $result ok]
1334
1335
1336
1337
1338
1339
1340
1341

1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
      # the database will be corrupt (because the garbage frame contents
      # will be treated as valid content). If $pgsz is invalid (too small
      # or too large), the db will not be corrupt as the log file will
      # be ignored.
      #
      set c1 22
      set c2 23
      set walhdr [binary format III $pgsz $c1 $c2]

      set framebody [randomblob $pgsz]
      set framehdr  [binary format II $pg 5]
      logcksum c1 c2 $framehdr
      logcksum c1 c2 $framebody
      set framehdr [binary format IIII $pg 5 $c1 $c2]
      set fd [open test.db-wal w]
      fconfigure $fd -encoding binary -translation binary
      puts -nonewline $fd $walhdr
      puts -nonewline $fd $framehdr
      puts -nonewline $fd $framebody
      close $fd
  







|
>




|







1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
      # the database will be corrupt (because the garbage frame contents
      # will be treated as valid content). If $pgsz is invalid (too small
      # or too large), the db will not be corrupt as the log file will
      # be ignored.
      #
      set c1 22
      set c2 23
      set walhdr [binary format IIIIII 931071618 3007000 $pgsz 1234 $c1 $c2]
      set salt1 $c1
      set framebody [randomblob $pgsz]
      set framehdr  [binary format II $pg 5]
      logcksum c1 c2 $framehdr
      logcksum c1 c2 $framebody
      set framehdr [binary format IIIIII $pg 5 1234 $salt1 $c1 $c2]
      set fd [open test.db-wal w]
      fconfigure $fd -encoding binary -translation binary
      puts -nonewline $fd $walhdr
      puts -nonewline $fd $framehdr
      puts -nonewline $fd $framebody
      close $fd
  
Changes to test/walbak.test.
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29

set testdir [file dirname $argv0]
source $testdir/tester.tcl

ifcapable !wal {finish_test ; return }

proc log_file_size {nFrame pgsz} {
  expr {12 + ($pgsz+16)*$nFrame}
}

# Test organization:
# 
#   walback-1.*: Simple tests.
#   walback-2.*: Test backups when the source db is modified mid-backup.
#







|







15
16
17
18
19
20
21
22
23
24
25
26
27
28
29

set testdir [file dirname $argv0]
source $testdir/tester.tcl

ifcapable !wal {finish_test ; return }

proc log_file_size {nFrame pgsz} {
  expr {24 + ($pgsz+24)*$nFrame}
}

# Test organization:
# 
#   walback-1.*: Simple tests.
#   walback-2.*: Test backups when the source db is modified mid-backup.
#
179
180
181
182
183
184
185
186
} {SQLITE_DONE SQLITE_OK}
do_test walbak-2.12 {
  string compare [sig db] [sig db2]
} {0}
db2 close

finish_test








<
179
180
181
182
183
184
185

} {SQLITE_DONE SQLITE_OK}
do_test walbak-2.12 {
  string compare [sig db] [sig db2]
} {0}
db2 close

finish_test

Changes to test/walhook.test.
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33

set testdir [file dirname $argv0]
source $testdir/tester.tcl

ifcapable !wal {finish_test ; return }

proc log_file_size {nFrame pgsz} {
  expr {12 + ($pgsz+16)*$nFrame}
}

set ::wal_hook [list]
proc wal_hook {zDb nEntry} {
  lappend ::wal_hook $zDb $nEntry
  return 0
}







|







19
20
21
22
23
24
25
26
27
28
29
30
31
32
33

set testdir [file dirname $argv0]
source $testdir/tester.tcl

ifcapable !wal {finish_test ; return }

proc log_file_size {nFrame pgsz} {
  expr {24 + ($pgsz+24)*$nFrame}
}

set ::wal_hook [list]
proc wal_hook {zDb nEntry} {
  lappend ::wal_hook $zDb $nEntry
  return 0
}