/ Check-in [ecd828f9]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Allow writers to write dirty pages to the log mid-transaction in order to free memory.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | wal
Files: files | file ages | folders
SHA1: ecd828f96909895535d7dc744e5a8530e234e04d
User & Date: dan 2010-04-15 16:45:35
Context
2010-04-16
11:30
Fix bug in log recovery (last frame in log was being ignored). Also remove an incorrect assert statement. check-in: 67d2a89e user: dan tags: wal
2010-04-15
16:45
Allow writers to write dirty pages to the log mid-transaction in order to free memory. check-in: ecd828f9 user: dan tags: wal
13:33
Merge two leaves on the WAL branch. check-in: c9ed66cc user: dan tags: wal
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to src/pager.c.

3249
3250
3251
3252
3253
3254
3255
3256
3257




3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325

3326
3327
3328
3329
3330
3331
3332
static int pagerStress(void *p, PgHdr *pPg){
  Pager *pPager = (Pager *)p;
  int rc = SQLITE_OK;

  assert( pPg->pPager==pPager );
  assert( pPg->flags&PGHDR_DIRTY );

  if( pagerUseLog(pPager) ) return SQLITE_OK;





  /* The doNotSync flag is set by the sqlite3PagerWrite() function while it
  ** is journalling a set of two or more database pages that are stored
  ** on the same disk sector. Syncing the journal is not allowed while
  ** this is happening as it is important that all members of such a
  ** set of pages are synced to disk together. So, if the page this function
  ** is trying to make clean will require a journal sync and the doNotSync
  ** flag is set, return without doing anything. The pcache layer will
  ** just have to go ahead and allocate a new page buffer instead of
  ** reusing pPg.
  **
  ** Similarly, if the pager has already entered the error state, do not
  ** try to write the contents of pPg to disk.
  */
  if( NEVER(pPager->errCode)
   || (pPager->doNotSync && pPg->flags&PGHDR_NEED_SYNC)
  ){
    return SQLITE_OK;
  }

  /* Sync the journal file if required. */
  if( pPg->flags&PGHDR_NEED_SYNC ){
    rc = syncJournal(pPager);
    if( rc==SQLITE_OK && pPager->fullSync && 
      !(pPager->journalMode==PAGER_JOURNALMODE_MEMORY) &&
      !(sqlite3OsDeviceCharacteristics(pPager->fd)&SQLITE_IOCAP_SAFE_APPEND)
    ){
      pPager->nRec = 0;
      rc = writeJournalHdr(pPager);
    }
  }

  /* If the page number of this page is larger than the current size of
  ** the database image, it may need to be written to the sub-journal.
  ** This is because the call to pager_write_pagelist() below will not
  ** actually write data to the file in this case.
  **
  ** Consider the following sequence of events:
  **
  **   BEGIN;
  **     <journal page X>
  **     <modify page X>
  **     SAVEPOINT sp;
  **       <shrink database file to Y pages>
  **       pagerStress(page X)
  **     ROLLBACK TO sp;
  **
  ** If (X>Y), then when pagerStress is called page X will not be written
  ** out to the database file, but will be dropped from the cache. Then,
  ** following the "ROLLBACK TO sp" statement, reading page X will read
  ** data from the database file. This will be the copy of page X as it
  ** was when the transaction started, not as it was when "SAVEPOINT sp"
  ** was executed.
  **
  ** The solution is to write the current data for page X into the 
  ** sub-journal file now (if it is not already there), so that it will
  ** be restored to its current value when the "ROLLBACK TO sp" is 
  ** executed.
  */
  if( NEVER(
      rc==SQLITE_OK && pPg->pgno>pPager->dbSize && subjRequiresPage(pPg)
  ) ){
    rc = subjournalPage(pPg);
  }

  /* Write the contents of the page out to the database file. */
  if( rc==SQLITE_OK ){
    pPg->pDirty = 0;
    rc = pager_write_pagelist(pPg);

  }

  /* Mark the page as clean. */
  if( rc==SQLITE_OK ){
    PAGERTRACE(("STRESS %d page %d\n", PAGERID(pPager), pPg->pgno));
    sqlite3PcacheMakeClean(pPg);
  }







|
<
>
>
>
>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
>







3249
3250
3251
3252
3253
3254
3255
3256

3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
static int pagerStress(void *p, PgHdr *pPg){
  Pager *pPager = (Pager *)p;
  int rc = SQLITE_OK;

  assert( pPg->pPager==pPager );
  assert( pPg->flags&PGHDR_DIRTY );

  if( pagerUseLog(pPager) ){

    /* Write a single frame for this page to the log. */
    assert( pPg->pDirty==0 );
    rc = sqlite3LogFrames(pPager->pLog, pPager->pageSize, pPg, 0, 0, 0);
  }else{
    /* The doNotSync flag is set by the sqlite3PagerWrite() function while it
    ** is journalling a set of two or more database pages that are stored
    ** on the same disk sector. Syncing the journal is not allowed while
    ** this is happening as it is important that all members of such a
    ** set of pages are synced to disk together. So, if the page this function
    ** is trying to make clean will require a journal sync and the doNotSync
    ** flag is set, return without doing anything. The pcache layer will
    ** just have to go ahead and allocate a new page buffer instead of
    ** reusing pPg.
    **
    ** Similarly, if the pager has already entered the error state, do not
    ** try to write the contents of pPg to disk.
    */
    if( NEVER(pPager->errCode)
     || (pPager->doNotSync && pPg->flags&PGHDR_NEED_SYNC)
    ){
      return SQLITE_OK;
    }
  
    /* Sync the journal file if required. */
    if( pPg->flags&PGHDR_NEED_SYNC ){
      rc = syncJournal(pPager);
      if( rc==SQLITE_OK && pPager->fullSync && 
        !(pPager->journalMode==PAGER_JOURNALMODE_MEMORY) &&
        !(sqlite3OsDeviceCharacteristics(pPager->fd)&SQLITE_IOCAP_SAFE_APPEND)
      ){
        pPager->nRec = 0;
        rc = writeJournalHdr(pPager);
      }
    }
  
    /* If the page number of this page is larger than the current size of
    ** the database image, it may need to be written to the sub-journal.
    ** This is because the call to pager_write_pagelist() below will not
    ** actually write data to the file in this case.
    **
    ** Consider the following sequence of events:
    **
    **   BEGIN;
    **     <journal page X>
    **     <modify page X>
    **     SAVEPOINT sp;
    **       <shrink database file to Y pages>
    **       pagerStress(page X)
    **     ROLLBACK TO sp;
    **
    ** If (X>Y), then when pagerStress is called page X will not be written
    ** out to the database file, but will be dropped from the cache. Then,
    ** following the "ROLLBACK TO sp" statement, reading page X will read
    ** data from the database file. This will be the copy of page X as it
    ** was when the transaction started, not as it was when "SAVEPOINT sp"
    ** was executed.
    **
    ** The solution is to write the current data for page X into the 
    ** sub-journal file now (if it is not already there), so that it will
    ** be restored to its current value when the "ROLLBACK TO sp" is 
    ** executed.
    */
    if( NEVER(
        rc==SQLITE_OK && pPg->pgno>pPager->dbSize && subjRequiresPage(pPg)
    ) ){
      rc = subjournalPage(pPg);
    }
  
    /* Write the contents of the page out to the database file. */
    if( rc==SQLITE_OK ){
      pPg->pDirty = 0;
      rc = pager_write_pagelist(pPg);
    }
  }

  /* Mark the page as clean. */
  if( rc==SQLITE_OK ){
    PAGERTRACE(("STRESS %d page %d\n", PAGERID(pPager), pPg->pgno));
    sqlite3PcacheMakeClean(pPg);
  }

Changes to test/wal.test.

526
527
528
529
530
531
532
533


























































































534
535

  catch { db close }
  catch { code2 { db2 close } }
  catch { code3 { db3 close } }
  catch { close $::code2_chan }
  catch { close $::code3_chan }
}



























































































finish_test









>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>


526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625

  catch { db close }
  catch { code2 { db2 close } }
  catch { code3 { db3 close } }
  catch { close $::code2_chan }
  catch { close $::code3_chan }
}

#-------------------------------------------------------------------------
# This block of tests, wal-11.*, test that nothing goes terribly wrong
# if frames must be written to the log file before a transaction is
# committed (in order to free up memory).
#
do_test wal-11.1 {
  reopen_db
  execsql {
    PRAGMA cache_size = 10;
    PRAGMA page_size = 1024;
    CREATE TABLE t1(x PRIMARY KEY);
  }
  list [expr [file size test.db]/1024] [expr [file size test.db-wal]/1044]
} {0 3}
do_test wal-11.2 {
  execsql { PRAGMA checkpoint }
  list [expr [file size test.db]/1024] [expr [file size test.db-wal]/1044]
} {3 3}
do_test wal-11.3 {
  execsql { INSERT INTO t1 VALUES( randomblob(900) ) }
  list [expr [file size test.db]/1024] [expr [file size test.db-wal]/1044]
} {3 4}

do_test wal-11.4 {
  execsql { 
    BEGIN;
      INSERT INTO t1 SELECT randomblob(900) FROM t1;   -- 2
      INSERT INTO t1 SELECT randomblob(900) FROM t1;   -- 4
      INSERT INTO t1 SELECT randomblob(900) FROM t1;   -- 8
      INSERT INTO t1 SELECT randomblob(900) FROM t1;   -- 16
  }
  list [expr [file size test.db]/1024] [expr [file size test.db-wal]/1044]
} {3 33}
do_test wal-11.5 {
  execsql { 
    SELECT count(*) FROM t1;
    PRAGMA integrity_check;
  }
} {16 ok}
do_test wal-11.6 {
  execsql COMMIT
  list [expr [file size test.db]/1024] [expr [file size test.db-wal]/1044]
} {3 42}
do_test wal-11.7 {
  execsql { 
    SELECT count(*) FROM t1;
    PRAGMA integrity_check;
  }
} {16 ok}
do_test wal-11.8 {
  execsql { PRAGMA checkpoint }
  list [expr [file size test.db]/1024] [expr [file size test.db-wal]/1044]
} {37 42}
do_test wal-11.9 {
  db close
  sqlite3_wal db test.db
  list [expr [file size test.db]/1024] [expr [file size test.db-wal]/1044]
} {37 0}

do_test wal-11.10 {
  execsql {
    PRAGMA cache_size = 10;
    BEGIN;
      INSERT INTO t1 SELECT randomblob(900) FROM t1;   -- 32
      SELECT count(*) FROM t1;
  }
  list [expr [file size test.db]/1024] [expr [file size test.db-wal]/1044]
} {37 38}
do_test wal-11.11 {
  execsql {
      SELECT count(*) FROM t1;
    ROLLBACK;
    SELECT count(*) FROM t1;
  }
} {32 16}
do_test wal-11.12 {
  list [expr [file size test.db]/1024] [expr [file size test.db-wal]/1044]
} {37 38}
do_test wal-11.13 {
  execsql {
    INSERT INTO t1 VALUES( randomblob(900) );
    SELECT count(*) FROM t1;
    PRAGMA integrity_check;
  }
} {17 ok}
do_test wal-11.14 {
  list [expr [file size test.db]/1024] [expr [file size test.db-wal]/1044]
} {37 38}


finish_test