/ Check-in [ecd828f9]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Allow writers to write dirty pages to the log mid-transaction in order to free memory.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | wal
Files: files | file ages | folders
SHA1: ecd828f96909895535d7dc744e5a8530e234e04d
User & Date: dan 2010-04-15 16:45:35
Context
2010-04-16
11:30
Fix bug in log recovery (last frame in log was being ignored). Also remove an incorrect assert statement. check-in: 67d2a89e user: dan tags: wal
2010-04-15
16:45
Allow writers to write dirty pages to the log mid-transaction in order to free memory. check-in: ecd828f9 user: dan tags: wal
13:33
Merge two leaves on the WAL branch. check-in: c9ed66cc user: dan tags: wal
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to src/pager.c.

  3249   3249   static int pagerStress(void *p, PgHdr *pPg){
  3250   3250     Pager *pPager = (Pager *)p;
  3251   3251     int rc = SQLITE_OK;
  3252   3252   
  3253   3253     assert( pPg->pPager==pPager );
  3254   3254     assert( pPg->flags&PGHDR_DIRTY );
  3255   3255   
  3256         -  if( pagerUseLog(pPager) ) return SQLITE_OK;
  3257         -
  3258         -  /* The doNotSync flag is set by the sqlite3PagerWrite() function while it
  3259         -  ** is journalling a set of two or more database pages that are stored
  3260         -  ** on the same disk sector. Syncing the journal is not allowed while
  3261         -  ** this is happening as it is important that all members of such a
  3262         -  ** set of pages are synced to disk together. So, if the page this function
  3263         -  ** is trying to make clean will require a journal sync and the doNotSync
  3264         -  ** flag is set, return without doing anything. The pcache layer will
  3265         -  ** just have to go ahead and allocate a new page buffer instead of
  3266         -  ** reusing pPg.
  3267         -  **
  3268         -  ** Similarly, if the pager has already entered the error state, do not
  3269         -  ** try to write the contents of pPg to disk.
  3270         -  */
  3271         -  if( NEVER(pPager->errCode)
  3272         -   || (pPager->doNotSync && pPg->flags&PGHDR_NEED_SYNC)
  3273         -  ){
  3274         -    return SQLITE_OK;
  3275         -  }
  3276         -
  3277         -  /* Sync the journal file if required. */
  3278         -  if( pPg->flags&PGHDR_NEED_SYNC ){
  3279         -    rc = syncJournal(pPager);
  3280         -    if( rc==SQLITE_OK && pPager->fullSync && 
  3281         -      !(pPager->journalMode==PAGER_JOURNALMODE_MEMORY) &&
  3282         -      !(sqlite3OsDeviceCharacteristics(pPager->fd)&SQLITE_IOCAP_SAFE_APPEND)
         3256  +  if( pagerUseLog(pPager) ){
         3257  +    /* Write a single frame for this page to the log. */
         3258  +    assert( pPg->pDirty==0 );
         3259  +    rc = sqlite3LogFrames(pPager->pLog, pPager->pageSize, pPg, 0, 0, 0);
         3260  +  }else{
         3261  +    /* The doNotSync flag is set by the sqlite3PagerWrite() function while it
         3262  +    ** is journalling a set of two or more database pages that are stored
         3263  +    ** on the same disk sector. Syncing the journal is not allowed while
         3264  +    ** this is happening as it is important that all members of such a
         3265  +    ** set of pages are synced to disk together. So, if the page this function
         3266  +    ** is trying to make clean will require a journal sync and the doNotSync
         3267  +    ** flag is set, return without doing anything. The pcache layer will
         3268  +    ** just have to go ahead and allocate a new page buffer instead of
         3269  +    ** reusing pPg.
         3270  +    **
         3271  +    ** Similarly, if the pager has already entered the error state, do not
         3272  +    ** try to write the contents of pPg to disk.
         3273  +    */
         3274  +    if( NEVER(pPager->errCode)
         3275  +     || (pPager->doNotSync && pPg->flags&PGHDR_NEED_SYNC)
  3283   3276       ){
  3284         -      pPager->nRec = 0;
  3285         -      rc = writeJournalHdr(pPager);
  3286         -    }
  3287         -  }
  3288         -
  3289         -  /* If the page number of this page is larger than the current size of
  3290         -  ** the database image, it may need to be written to the sub-journal.
  3291         -  ** This is because the call to pager_write_pagelist() below will not
  3292         -  ** actually write data to the file in this case.
  3293         -  **
  3294         -  ** Consider the following sequence of events:
  3295         -  **
  3296         -  **   BEGIN;
  3297         -  **     <journal page X>
  3298         -  **     <modify page X>
  3299         -  **     SAVEPOINT sp;
  3300         -  **       <shrink database file to Y pages>
  3301         -  **       pagerStress(page X)
  3302         -  **     ROLLBACK TO sp;
  3303         -  **
  3304         -  ** If (X>Y), then when pagerStress is called page X will not be written
  3305         -  ** out to the database file, but will be dropped from the cache. Then,
  3306         -  ** following the "ROLLBACK TO sp" statement, reading page X will read
  3307         -  ** data from the database file. This will be the copy of page X as it
  3308         -  ** was when the transaction started, not as it was when "SAVEPOINT sp"
  3309         -  ** was executed.
  3310         -  **
  3311         -  ** The solution is to write the current data for page X into the 
  3312         -  ** sub-journal file now (if it is not already there), so that it will
  3313         -  ** be restored to its current value when the "ROLLBACK TO sp" is 
  3314         -  ** executed.
  3315         -  */
  3316         -  if( NEVER(
  3317         -      rc==SQLITE_OK && pPg->pgno>pPager->dbSize && subjRequiresPage(pPg)
  3318         -  ) ){
  3319         -    rc = subjournalPage(pPg);
  3320         -  }
  3321         -
  3322         -  /* Write the contents of the page out to the database file. */
  3323         -  if( rc==SQLITE_OK ){
  3324         -    pPg->pDirty = 0;
  3325         -    rc = pager_write_pagelist(pPg);
         3277  +      return SQLITE_OK;
         3278  +    }
         3279  +  
         3280  +    /* Sync the journal file if required. */
         3281  +    if( pPg->flags&PGHDR_NEED_SYNC ){
         3282  +      rc = syncJournal(pPager);
         3283  +      if( rc==SQLITE_OK && pPager->fullSync && 
         3284  +        !(pPager->journalMode==PAGER_JOURNALMODE_MEMORY) &&
         3285  +        !(sqlite3OsDeviceCharacteristics(pPager->fd)&SQLITE_IOCAP_SAFE_APPEND)
         3286  +      ){
         3287  +        pPager->nRec = 0;
         3288  +        rc = writeJournalHdr(pPager);
         3289  +      }
         3290  +    }
         3291  +  
         3292  +    /* If the page number of this page is larger than the current size of
         3293  +    ** the database image, it may need to be written to the sub-journal.
         3294  +    ** This is because the call to pager_write_pagelist() below will not
         3295  +    ** actually write data to the file in this case.
         3296  +    **
         3297  +    ** Consider the following sequence of events:
         3298  +    **
         3299  +    **   BEGIN;
         3300  +    **     <journal page X>
         3301  +    **     <modify page X>
         3302  +    **     SAVEPOINT sp;
         3303  +    **       <shrink database file to Y pages>
         3304  +    **       pagerStress(page X)
         3305  +    **     ROLLBACK TO sp;
         3306  +    **
         3307  +    ** If (X>Y), then when pagerStress is called page X will not be written
         3308  +    ** out to the database file, but will be dropped from the cache. Then,
         3309  +    ** following the "ROLLBACK TO sp" statement, reading page X will read
         3310  +    ** data from the database file. This will be the copy of page X as it
         3311  +    ** was when the transaction started, not as it was when "SAVEPOINT sp"
         3312  +    ** was executed.
         3313  +    **
         3314  +    ** The solution is to write the current data for page X into the 
         3315  +    ** sub-journal file now (if it is not already there), so that it will
         3316  +    ** be restored to its current value when the "ROLLBACK TO sp" is 
         3317  +    ** executed.
         3318  +    */
         3319  +    if( NEVER(
         3320  +        rc==SQLITE_OK && pPg->pgno>pPager->dbSize && subjRequiresPage(pPg)
         3321  +    ) ){
         3322  +      rc = subjournalPage(pPg);
         3323  +    }
         3324  +  
         3325  +    /* Write the contents of the page out to the database file. */
         3326  +    if( rc==SQLITE_OK ){
         3327  +      pPg->pDirty = 0;
         3328  +      rc = pager_write_pagelist(pPg);
         3329  +    }
  3326   3330     }
  3327   3331   
  3328   3332     /* Mark the page as clean. */
  3329   3333     if( rc==SQLITE_OK ){
  3330   3334       PAGERTRACE(("STRESS %d page %d\n", PAGERID(pPager), pPg->pgno));
  3331   3335       sqlite3PcacheMakeClean(pPg);
  3332   3336     }

Changes to test/wal.test.

   526    526   
   527    527     catch { db close }
   528    528     catch { code2 { db2 close } }
   529    529     catch { code3 { db3 close } }
   530    530     catch { close $::code2_chan }
   531    531     catch { close $::code3_chan }
   532    532   }
          533  +
          534  +#-------------------------------------------------------------------------
          535  +# This block of tests, wal-11.*, test that nothing goes terribly wrong
          536  +# if frames must be written to the log file before a transaction is
          537  +# committed (in order to free up memory).
          538  +#
          539  +do_test wal-11.1 {
          540  +  reopen_db
          541  +  execsql {
          542  +    PRAGMA cache_size = 10;
          543  +    PRAGMA page_size = 1024;
          544  +    CREATE TABLE t1(x PRIMARY KEY);
          545  +  }
          546  +  list [expr [file size test.db]/1024] [expr [file size test.db-wal]/1044]
          547  +} {0 3}
          548  +do_test wal-11.2 {
          549  +  execsql { PRAGMA checkpoint }
          550  +  list [expr [file size test.db]/1024] [expr [file size test.db-wal]/1044]
          551  +} {3 3}
          552  +do_test wal-11.3 {
          553  +  execsql { INSERT INTO t1 VALUES( randomblob(900) ) }
          554  +  list [expr [file size test.db]/1024] [expr [file size test.db-wal]/1044]
          555  +} {3 4}
          556  +
          557  +do_test wal-11.4 {
          558  +  execsql { 
          559  +    BEGIN;
          560  +      INSERT INTO t1 SELECT randomblob(900) FROM t1;   -- 2
          561  +      INSERT INTO t1 SELECT randomblob(900) FROM t1;   -- 4
          562  +      INSERT INTO t1 SELECT randomblob(900) FROM t1;   -- 8
          563  +      INSERT INTO t1 SELECT randomblob(900) FROM t1;   -- 16
          564  +  }
          565  +  list [expr [file size test.db]/1024] [expr [file size test.db-wal]/1044]
          566  +} {3 33}
          567  +do_test wal-11.5 {
          568  +  execsql { 
          569  +    SELECT count(*) FROM t1;
          570  +    PRAGMA integrity_check;
          571  +  }
          572  +} {16 ok}
          573  +do_test wal-11.6 {
          574  +  execsql COMMIT
          575  +  list [expr [file size test.db]/1024] [expr [file size test.db-wal]/1044]
          576  +} {3 42}
          577  +do_test wal-11.7 {
          578  +  execsql { 
          579  +    SELECT count(*) FROM t1;
          580  +    PRAGMA integrity_check;
          581  +  }
          582  +} {16 ok}
          583  +do_test wal-11.8 {
          584  +  execsql { PRAGMA checkpoint }
          585  +  list [expr [file size test.db]/1024] [expr [file size test.db-wal]/1044]
          586  +} {37 42}
          587  +do_test wal-11.9 {
          588  +  db close
          589  +  sqlite3_wal db test.db
          590  +  list [expr [file size test.db]/1024] [expr [file size test.db-wal]/1044]
          591  +} {37 0}
          592  +
          593  +do_test wal-11.10 {
          594  +  execsql {
          595  +    PRAGMA cache_size = 10;
          596  +    BEGIN;
          597  +      INSERT INTO t1 SELECT randomblob(900) FROM t1;   -- 32
          598  +      SELECT count(*) FROM t1;
          599  +  }
          600  +  list [expr [file size test.db]/1024] [expr [file size test.db-wal]/1044]
          601  +} {37 38}
          602  +do_test wal-11.11 {
          603  +  execsql {
          604  +      SELECT count(*) FROM t1;
          605  +    ROLLBACK;
          606  +    SELECT count(*) FROM t1;
          607  +  }
          608  +} {32 16}
          609  +do_test wal-11.12 {
          610  +  list [expr [file size test.db]/1024] [expr [file size test.db-wal]/1044]
          611  +} {37 38}
          612  +do_test wal-11.13 {
          613  +  execsql {
          614  +    INSERT INTO t1 VALUES( randomblob(900) );
          615  +    SELECT count(*) FROM t1;
          616  +    PRAGMA integrity_check;
          617  +  }
          618  +} {17 ok}
          619  +do_test wal-11.14 {
          620  +  list [expr [file size test.db]/1024] [expr [file size test.db-wal]/1044]
          621  +} {37 38}
          622  +
   533    623   
   534    624   finish_test
   535    625