/ Check-in [5d6d4423]
Login
SQLite training in Houston TX on 2019-11-05 (details)
Part of the 2019 Tcl Conference

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Create a version of the log checksummer that works on big-endian platforms. Remove the 512KB size limit on the log-summary.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | wal
Files: files | file ages | folders
SHA1: 5d6d4423d1def39bd2424703120aa985085c3f8e
User & Date: dan 2010-04-22 19:14:14
Context
2010-04-23
11:44
Add very simple test cases for backup and VACUUM of WAL databases. More to come. check-in: 1077d813 user: dan tags: wal
2010-04-22
19:14
Create a version of the log checksummer that works on big-endian platforms. Remove the 512KB size limit on the log-summary. check-in: 5d6d4423 user: dan tags: wal
06:27
Further tests and changes related to switching between WAL and rollback modes. check-in: 12363184 user: dan tags: wal
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to src/log.c.

79
80
81
82
83
84
85









86
87
88
89
90
91
92
..
95
96
97
98
99
100
101

102
103
104
105
106
107
108
...
284
285
286
287
288
289
290





291
292
293
294







295
296
297
298
299
300
301
...
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
...
518
519
520
521
522
523
524






































525
526
527
528
529
530
531
...
538
539
540
541
542
543
544










545
546
547
548
549
550
551
...
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
...
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
** page size of pgsz bytes. The offset returned is to the start of the
** log frame-header.
*/
#define logFrameOffset(iFrame, pgsz) (                               \
  LOG_HDRSIZE + ((iFrame)-1)*((pgsz)+LOG_FRAME_HDRSIZE)              \
)










/*
** There is one instance of this structure for each log-summary object
** that this process has a connection to. They are stored in a linked
** list starting at pLogSummary (global variable).
**
** TODO: LogSummary.fd is a unix file descriptor. Unix APIs are used 
**       directly in this implementation because the VFS does not support
................................................................................
struct LogSummary {
  sqlite3_mutex *mutex;           /* Mutex used to protect this object */
  int nRef;                       /* Number of pointers to this structure */
  int fd;                         /* File descriptor open on log-summary */
  char *zPath;                    /* Path to associated WAL file */
  LogLock *pLock;                 /* Linked list of locks on this object */
  LogSummary *pNext;              /* Next in global list */

  int nData;                      /* Size of aData allocation/mapping */
  u32 *aData;                     /* File body */
};

/*
** This module uses three different types of file-locks. All are taken
** on the log-summary file. The three types of locks are as follows:
................................................................................
  u64 sum2 = aCksum[1];
  u32 *a32 = (u32 *)aByte;
  u32 *aEnd = (u32 *)&aByte[nByte];

  assert( LOG_CKSM_BYTES==2*sizeof(u32) );
  assert( (nByte&0x00000003)==0 );






  do {
    sum1 += (*a32++);
    sum2 += sum1;
  } while( a32<aEnd );








  aCksum[0] = sum1 + (sum1>>24);
  aCksum[1] = sum2 + (sum2>>24);
}

/*
** Argument zPath must be a nul-terminated string containing a path-name.
................................................................................
      }
    }
    z[j++] = z[i];
  }
  z[j] = 0;
}

/*
** Memory map the first nByte bytes of the summary file opened with 
** pSummary->fd at pSummary->aData. If the summary file is smaller than
** nByte bytes in size when this function is called, ftruncate() is
** used to expand it before it is mapped.
**
** It is assumed that an exclusive lock is held on the summary file
** by the caller (to protect the ftruncate()).
*/
static int logSummaryMap(LogSummary *pSummary, int nByte){
  struct stat sStat;
  int rc;
  int fd = pSummary->fd;
  void *pMap;

  assert( pSummary->aData==0 );

  /* If the file is less than nByte bytes in size, cause it to grow. */
  rc = fstat(fd, &sStat);
  if( rc!=0 ) return SQLITE_IOERR;
  if( sStat.st_size<nByte ){
    rc = ftruncate(fd, nByte);
    if( rc!=0 ) return SQLITE_IOERR;
  }

  /* Map the file. */
  pMap = mmap(0, nByte, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
  if( pMap==MAP_FAILED ){
    return SQLITE_IOERR;
  }
  pSummary->aData = (u32 *)pMap;
  pSummary->nData = nByte;

  return SQLITE_OK;
}

/*
** Unmap the log-summary mapping and close the file-descriptor. If
** the isTruncate argument is non-zero, truncate the log-summary file
** region to zero bytes.
**
** Regardless of the value of isTruncate, close the file-descriptor
** opened on the log-summary file.
................................................................................
    for(i=1; i<*pnList; i++){
      assert( aContent[aList[i]] > aContent[aList[i-1]] );
    }
  }
#endif
}








































/*
** Return the index in the LogSummary.aData array that corresponds to 
** frame iFrame. The log-summary file consists of a header, followed by
** alternating "map" and "index" blocks.
*/
static int logSummaryEntry(u32 iFrame){
................................................................................
** page iPage. Values are always appended to the log-summary (i.e. the
** value of iFrame is always exactly one more than the value passed to
** the previous call), but that restriction is not enforced or asserted
** here.
*/
static void logSummaryAppend(LogSummary *pSummary, u32 iFrame, u32 iPage){
  u32 iSlot = logSummaryEntry(iFrame);











  /* Set the log-summary entry itself */
  pSummary->aData[iSlot] = iPage;

  /* If the frame number is a multiple of 256 (frames are numbered starting
  ** at 1), build an index of the most recently added 256 frames.
  */
................................................................................
      return rc;
    }

    /* If the database page size is not a power of two, or is greater than
    ** SQLITE_MAX_PAGE_SIZE, conclude that the log file contains no valid data.
    */
    nPgsz = sqlite3Get4byte(&aBuf[0]);
    if( nPgsz&(nPgsz-1) || nPgsz>SQLITE_MAX_PAGE_SIZE ){
      goto finished;
    }
    aCksum[0] = sqlite3Get4byte(&aBuf[4]);
    aCksum[1] = sqlite3Get4byte(&aBuf[8]);

    /* Malloc a buffer to read frames into. */
    nFrame = nPgsz + LOG_FRAME_HDRSIZE;
................................................................................
  ** pages should not be allocated until they are first accessed anyhow,
  ** so using a large mapping consumes no more resources than a smaller
  ** one would.
  */
  assert( sqlite3_mutex_held(pSummary->mutex) );
  rc = logLockMutex(pSummary, LOG_WRLOCKW);
  if( rc!=SQLITE_OK ) return rc;
  rc = logSummaryMap(pSummary, 512*1024);
  if( rc!=SQLITE_OK ) goto out;

  /* Try to obtain an EXCLUSIVE lock on the dead-mans-hand region. If this
  ** is possible, the contents of the log-summary file (if any) may not
  ** be trusted. Zero the log-summary header before continuing.
  */
  rc = logLockDMH(pSummary, LOG_WRLOCK);







>
>
>
>
>
>
>
>
>







 







>







 







>
>
>
>
>
|
|
|
|
>
>
>
>
>
>
>







 







<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







 







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







 







>
>
>
>
>
>
>
>
>
>







 







|







 







|







79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
...
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
...
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
...
351
352
353
354
355
356
357




































358
359
360
361
362
363
364
...
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
...
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
...
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
...
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
** page size of pgsz bytes. The offset returned is to the start of the
** log frame-header.
*/
#define logFrameOffset(iFrame, pgsz) (                               \
  LOG_HDRSIZE + ((iFrame)-1)*((pgsz)+LOG_FRAME_HDRSIZE)              \
)

/*
** If using mmap() to access a shared (or otherwise) log-summary file, then
** the mapping size is incremented in units of the following size.
**
** A 64 KB log-summary mapping corresponds to a log file containing over
** 13000 frames, so the mapping size does not need to be increased often.
*/
#define LOGSUMMARY_MMAP_INCREMENT (64*1024)

/*
** There is one instance of this structure for each log-summary object
** that this process has a connection to. They are stored in a linked
** list starting at pLogSummary (global variable).
**
** TODO: LogSummary.fd is a unix file descriptor. Unix APIs are used 
**       directly in this implementation because the VFS does not support
................................................................................
struct LogSummary {
  sqlite3_mutex *mutex;           /* Mutex used to protect this object */
  int nRef;                       /* Number of pointers to this structure */
  int fd;                         /* File descriptor open on log-summary */
  char *zPath;                    /* Path to associated WAL file */
  LogLock *pLock;                 /* Linked list of locks on this object */
  LogSummary *pNext;              /* Next in global list */

  int nData;                      /* Size of aData allocation/mapping */
  u32 *aData;                     /* File body */
};

/*
** This module uses three different types of file-locks. All are taken
** on the log-summary file. The three types of locks are as follows:
................................................................................
  u64 sum2 = aCksum[1];
  u32 *a32 = (u32 *)aByte;
  u32 *aEnd = (u32 *)&aByte[nByte];

  assert( LOG_CKSM_BYTES==2*sizeof(u32) );
  assert( (nByte&0x00000003)==0 );

  if( SQLITE_LITTLEENDIAN ){
#ifdef SQLITE_DEBUG
    u8 *a = (u8 *)a32;
    assert( *a32==(a[0] + (a[1]<<8) + (a[2]<<16) + (a[3]<<24)) );
#endif
    do {
      sum1 += *a32;
      sum2 += sum1;
    } while( ++a32<aEnd );
  }else{
    do {
      u8 *a = (u8*)a32;
      sum1 += a[0] + (a[1]<<8) + (a[2]<<16) + (a[3]<<24);
      sum2 += sum1;
    } while( ++a32<aEnd );
  }

  aCksum[0] = sum1 + (sum1>>24);
  aCksum[1] = sum2 + (sum2>>24);
}

/*
** Argument zPath must be a nul-terminated string containing a path-name.
................................................................................
      }
    }
    z[j++] = z[i];
  }
  z[j] = 0;
}





































/*
** Unmap the log-summary mapping and close the file-descriptor. If
** the isTruncate argument is non-zero, truncate the log-summary file
** region to zero bytes.
**
** Regardless of the value of isTruncate, close the file-descriptor
** opened on the log-summary file.
................................................................................
    for(i=1; i<*pnList; i++){
      assert( aContent[aList[i]] > aContent[aList[i-1]] );
    }
  }
#endif
}


/*
** Memory map the first nByte bytes of the summary file opened with 
** pSummary->fd at pSummary->aData. If the summary file is smaller than
** nByte bytes in size when this function is called, ftruncate() is
** used to expand it before it is mapped.
**
** It is assumed that an exclusive lock is held on the summary file
** by the caller (to protect the ftruncate()).
*/
static int logSummaryMap(LogSummary *pSummary, int nByte){
  struct stat sStat;
  int rc;
  int fd = pSummary->fd;
  void *pMap;

  assert( pSummary->aData==0 );

  /* If the file is less than nByte bytes in size, cause it to grow. */
  rc = fstat(fd, &sStat);
  if( rc!=0 ) return SQLITE_IOERR;
  if( sStat.st_size<nByte ){
    rc = ftruncate(fd, nByte);
    if( rc!=0 ) return SQLITE_IOERR;
  }else{
    nByte = sStat.st_size;
  }

  /* Map the file. */
  pMap = mmap(0, nByte, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
  if( pMap==MAP_FAILED ){
    return SQLITE_IOERR;
  }
  pSummary->aData = (u32 *)pMap;
  pSummary->nData = nByte/4;

  return SQLITE_OK;
}

/*
** Return the index in the LogSummary.aData array that corresponds to 
** frame iFrame. The log-summary file consists of a header, followed by
** alternating "map" and "index" blocks.
*/
static int logSummaryEntry(u32 iFrame){
................................................................................
** page iPage. Values are always appended to the log-summary (i.e. the
** value of iFrame is always exactly one more than the value passed to
** the previous call), but that restriction is not enforced or asserted
** here.
*/
static void logSummaryAppend(LogSummary *pSummary, u32 iFrame, u32 iPage){
  u32 iSlot = logSummaryEntry(iFrame);

  if( (iSlot+128)>=pSummary->nData ){
    int nByte = pSummary->nData*4 + LOGSUMMARY_MMAP_INCREMENT;

    sqlite3_mutex_enter(pSummary->mutex);
    munmap(pSummary->aData, pSummary->nData*4);
    pSummary->aData = 0;
    logSummaryMap(pSummary, nByte);
    sqlite3_mutex_leave(pSummary->mutex);
  }

  /* Set the log-summary entry itself */
  pSummary->aData[iSlot] = iPage;

  /* If the frame number is a multiple of 256 (frames are numbered starting
  ** at 1), build an index of the most recently added 256 frames.
  */
................................................................................
      return rc;
    }

    /* If the database page size is not a power of two, or is greater than
    ** SQLITE_MAX_PAGE_SIZE, conclude that the log file contains no valid data.
    */
    nPgsz = sqlite3Get4byte(&aBuf[0]);
    if( nPgsz&(nPgsz-1) || nPgsz>SQLITE_MAX_PAGE_SIZE || nPgsz<512 ){
      goto finished;
    }
    aCksum[0] = sqlite3Get4byte(&aBuf[4]);
    aCksum[1] = sqlite3Get4byte(&aBuf[8]);

    /* Malloc a buffer to read frames into. */
    nFrame = nPgsz + LOG_FRAME_HDRSIZE;
................................................................................
  ** pages should not be allocated until they are first accessed anyhow,
  ** so using a large mapping consumes no more resources than a smaller
  ** one would.
  */
  assert( sqlite3_mutex_held(pSummary->mutex) );
  rc = logLockMutex(pSummary, LOG_WRLOCKW);
  if( rc!=SQLITE_OK ) return rc;
  rc = logSummaryMap(pSummary, LOGSUMMARY_MMAP_INCREMENT);
  if( rc!=SQLITE_OK ) goto out;

  /* Try to obtain an EXCLUSIVE lock on the dead-mans-hand region. If this
  ** is possible, the contents of the log-summary file (if any) may not
  ** be trusted. Zero the log-summary header before continuing.
  */
  rc = logLockDMH(pSummary, LOG_WRLOCK);

Changes to src/pager.c.

3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
    rc = SQLITE_OK;
    *pExists = 0;
  }
  return rc;
}

static int pagerOpenSnapshot(Pager *pPager){
  int rc;
  int changed;

  assert( pagerUseLog(pPager) );

  rc = sqlite3LogOpenSnapshot(pPager->pLog, &changed);
  if( rc==SQLITE_OK ){
    int dummy;
    if( changed ){
        pager_reset(pPager);
        assert( pPager->errCode || pPager->dbSizeValid==0 );
    }
    rc = sqlite3PagerPagecount(pPager, &dummy);
  }
  pPager->state = PAGER_SHARED;

  return rc;
}







|
|







|
|







3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
    rc = SQLITE_OK;
    *pExists = 0;
  }
  return rc;
}

static int pagerOpenSnapshot(Pager *pPager){
  int rc;                         /* Return code */
  int changed = 0;                /* True if cache must be reset */

  assert( pagerUseLog(pPager) );

  rc = sqlite3LogOpenSnapshot(pPager->pLog, &changed);
  if( rc==SQLITE_OK ){
    int dummy;
    if( changed ){
      pager_reset(pPager);
      assert( pPager->errCode || pPager->dbSizeValid==0 );
    }
    rc = sqlite3PagerPagecount(pPager, &dummy);
  }
  pPager->state = PAGER_SHARED;

  return rc;
}

Changes to test/wal.test.

672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706

707

































708
709
  }
  list [expr [file size test.db]/1024] [expr [file size test.db-wal]/1044]
} {3 1}
do_test wal-12.3 {
  execsql { INSERT INTO t2 VALUES('B', 1) }
  list [expr [file size test.db]/1024] [expr [file size test.db-wal]/1044]
} {3 2}

do_test wal-12.4 {
  file copy -force test.db test2.db
  file copy -force test.db-wal test2.db-wal
  sqlite3_wal db2 test2.db
  execsql { SELECT * FROM t2 } db2
} {B 1}
db2 close

do_test wal-12.5 {
  execsql {
    PRAGMA checkpoint;
    UPDATE t2 SET y = 2 WHERE x = 'B'; 
    PRAGMA checkpoint;
    UPDATE t1 SET y = 1 WHERE x = 'A';
    PRAGMA checkpoint;
    UPDATE t1 SET y = 0 WHERE x = 'A';
    SELECT * FROM t2;
  }
} {B 2}

do_test wal-12.4 {
  file copy -force test.db test2.db
  file copy -force test.db-wal test2.db-wal
  sqlite3_wal db2 test2.db
  execsql { SELECT * FROM t2 } db2
} {B 2}
db2 close



































finish_test








<







<











<
|






>

>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>


672
673
674
675
676
677
678

679
680
681
682
683
684
685

686
687
688
689
690
691
692
693
694
695
696

697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
  }
  list [expr [file size test.db]/1024] [expr [file size test.db-wal]/1044]
} {3 1}
do_test wal-12.3 {
  execsql { INSERT INTO t2 VALUES('B', 1) }
  list [expr [file size test.db]/1024] [expr [file size test.db-wal]/1044]
} {3 2}

do_test wal-12.4 {
  file copy -force test.db test2.db
  file copy -force test.db-wal test2.db-wal
  sqlite3_wal db2 test2.db
  execsql { SELECT * FROM t2 } db2
} {B 1}
db2 close

do_test wal-12.5 {
  execsql {
    PRAGMA checkpoint;
    UPDATE t2 SET y = 2 WHERE x = 'B'; 
    PRAGMA checkpoint;
    UPDATE t1 SET y = 1 WHERE x = 'A';
    PRAGMA checkpoint;
    UPDATE t1 SET y = 0 WHERE x = 'A';
    SELECT * FROM t2;
  }
} {B 2}

do_test wal-12.6 {
  file copy -force test.db test2.db
  file copy -force test.db-wal test2.db-wal
  sqlite3_wal db2 test2.db
  execsql { SELECT * FROM t2 } db2
} {B 2}
db2 close
db close

#-------------------------------------------------------------------------
# Test large log summaries.
#
do_test wal-13.1 {
  list [file exists test.db] [file exists test.db-wal]
} {1 0}
do_test wal-13.2 {
  set fd [open test.db-wal w]
  seek $fd [expr 200*1024*1024]
  puts $fd ""
  close $fd
  sqlite3 db test.db
  execsql { SELECT * FROM t2 }
} {B 2}
do_test wal-13.3 {
  db close
  file exists test.db-wal
} {0}
do_test wal-13.4 {
  sqlite3 db test.db
  execsql { SELECT count(*) FROM t2 }
} {1}
do_test wal-13.5 {
  for {set i 0} {$i < 15} {incr i} {
    execsql { INSERT INTO t2 SELECT randomblob(400), randomblob(400) FROM t2 }
  }
  execsql { SELECT count(*) FROM t2 }
} [expr int(pow(2, 15))]
do_test wal-13.6 {
  file size test.db-wal-summary
} [expr 192*1024]


finish_test