SQLite

Check-in [595568492e]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Add code to reduce rollback journal i/o based on IOCAP flags. As yet untested. (CVS 4278)
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 595568492e63822caed5b6970542dcee4615dc4d
User & Date: danielk1977 2007-08-23 14:48:24.000
Context
2007-08-23
16:27
Add some warm-body tests for rollback journal i/o enhancements. (CVS 4279) (check-in: ff3770f855 user: danielk1977 tags: trunk)
14:48
Add code to reduce rollback journal i/o based on IOCAP flags. As yet untested. (CVS 4278) (check-in: 595568492e user: danielk1977 tags: trunk)
11:47
Ensure temporary files are deleted when they are closed. (CVS 4277) (check-in: cf4e3c158a user: danielk1977 tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Changes to src/pager.c.
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
** The pager is used to access a database disk file.  It implements
** atomic commit and rollback through the use of a journal file that
** is separate from the database file.  The pager also implements file
** locking to prevent two processes from writing the same database
** file simultaneously, or one process from reading the database while
** another is writing.
**
** @(#) $Id: pager.c,v 1.370 2007/08/23 11:47:59 danielk1977 Exp $
*/
#ifndef SQLITE_OMIT_DISKIO
#include "sqliteInt.h"
#include <assert.h>
#include <string.h>

/*







|







14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
** The pager is used to access a database disk file.  It implements
** atomic commit and rollback through the use of a journal file that
** is separate from the database file.  The pager also implements file
** locking to prevent two processes from writing the same database
** file simultaneously, or one process from reading the database while
** another is writing.
**
** @(#) $Id: pager.c,v 1.371 2007/08/23 14:48:24 danielk1977 Exp $
*/
#ifndef SQLITE_OMIT_DISKIO
#include "sqliteInt.h"
#include <assert.h>
#include <string.h>

/*
818
819
820
821
822
823
824

825






826

827
828




829
830


831


832




833
834

835


836
837
838
839
840
841
842
  if( pPager->stmtHdrOff==0 ){
    pPager->stmtHdrOff = pPager->journalOff;
  }

  seekJournalHdr(pPager);
  pPager->journalHdr = pPager->journalOff;


  /* FIX ME: 






  **

  ** Possibly for a pager not in no-sync mode, the journal magic should not
  ** be written until nRec is filled in as part of next syncJournal(). 




  **
  ** Actually maybe the whole journal header should be delayed until that


  ** point. Think about this.


  */




  memcpy(zHeader, aJournalMagic, sizeof(aJournalMagic));
  /* The nRec Field. 0xFFFFFFFF for no-sync journals. */

  put32bits(&zHeader[sizeof(aJournalMagic)], pPager->noSync ? 0xffffffff : 0);


  /* The random check-hash initialiser */ 
  sqlite3Randomness(sizeof(pPager->cksumInit), &pPager->cksumInit);
  put32bits(&zHeader[sizeof(aJournalMagic)+4], pPager->cksumInit);
  /* The initial database size */
  put32bits(&zHeader[sizeof(aJournalMagic)+8], pPager->dbSize);
  /* The assumed sector size for this process */
  put32bits(&zHeader[sizeof(aJournalMagic)+12], pPager->sectorSize);







>
|
>
>
>
>
>
>

>
|
<
>
>
>
>

<
>
>
|
>
>

>
>
>
>
|
<
>
|
>
>







818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835

836
837
838
839
840

841
842
843
844
845
846
847
848
849
850
851

852
853
854
855
856
857
858
859
860
861
862
  if( pPager->stmtHdrOff==0 ){
    pPager->stmtHdrOff = pPager->journalOff;
  }

  seekJournalHdr(pPager);
  pPager->journalHdr = pPager->journalOff;

  memcpy(zHeader, aJournalMagic, sizeof(aJournalMagic));

  /* 
  ** Write the nRec Field - the number of page records that follow this
  ** journal header. Normally, zero is written to this value at this time.
  ** After the records are added to the journal (and the journal synced, 
  ** if in full-sync mode), the zero is overwritten with the true number
  ** of records (see syncJournal()).
  **
  ** A faster alternative is to write 0xFFFFFFFF to the nRec field. When
  ** reading the journal this value tells SQLite to assume that the

  ** rest of the journal file contains valid page records. This assumption
  ** is dangerous, as if a failure occured whilst writing to the journal
  ** file it may contain some garbage data. There are two scenarios
  ** where this risk can be ignored:
  **

  **   * When the pager is in no-sync mode. Corruption can follow a
  **     power failure in this case anyway.
  **
  **   * When the SQLITE_IOCAP_SAFE_APPEND flag is set. This guarantees
  **     that garbage data is never appended to the journal file.
  */
  assert(pPager->fd->pMethods||pPager->noSync);
  if( (pPager->noSync) 
   || (sqlite3OsDeviceCharacteristics(pPager->fd)&SQLITE_IOCAP_SAFE_APPEND) 
  ){
    put32bits(&zHeader[sizeof(aJournalMagic)], 0xffffffff);

  }else{
    put32bits(&zHeader[sizeof(aJournalMagic)], 0);
  }

  /* The random check-hash initialiser */ 
  sqlite3Randomness(sizeof(pPager->cksumInit), &pPager->cksumInit);
  put32bits(&zHeader[sizeof(aJournalMagic)+4], pPager->cksumInit);
  /* The initial database size */
  put32bits(&zHeader[sizeof(aJournalMagic)+8], pPager->dbSize);
  /* The assumed sector size for this process */
  put32bits(&zHeader[sizeof(aJournalMagic)+12], pPager->sectorSize);
2480
2481
2482
2483
2484
2485
2486
2487





2488
2489
2490
2491
2492
2493
2494

2495
2496
2497
2498
2499
2500

2501

2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521






2522

2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534

2535
2536
2537
2538
2539
2540

2541
2542
2543
2544
2545
2546
2547
** 
** This routine also updates the nRec field in the header of the journal.
** (See comments on the pager_playback() routine for additional information.)
** If the sync mode is FULL, two syncs will occur.  First the whole journal
** is synced, then the nRec field is updated, then a second sync occurs.
**
** For temporary databases, we do not care if we are able to rollback
** after a power failure, so sync occurs.





**
** This routine clears the needSync field of every page current held in
** memory.
*/
static int syncJournal(Pager *pPager){
  PgHdr *pPg;
  int rc = SQLITE_OK;


  /* Sync the journal before modifying the main database
  ** (assuming there is a journal and it needs to be synced.)
  */
  if( pPager->needSync ){
    if( !pPager->tempFile ){

      assert( pPager->journalOpen );

      /* assert( !pPager->noSync ); // noSync might be set if synchronous
      ** was turned off after the transaction was started.  Ticket #615 */
#ifndef NDEBUG
      {
        /* Make sure the pPager->nRec counter we are keeping agrees
        ** with the nRec computed from the size of the journal file.
        */
        i64 jSz;
        rc = sqlite3OsFileSize(pPager->jfd, &jSz);
        if( rc!=0 ) return rc;
        assert( pPager->journalOff==jSz );
      }
#endif
      {
        i64 jrnlOff;

        /* Write the nRec value into the journal file header. If in
        ** full-synchronous mode, sync the journal first. This ensures that
        ** all data has really hit the disk before nRec is updated to mark
        ** it as a candidate for rollback. 






        */

        if( pPager->fullSync ){
          PAGERTRACE2("SYNC journal of %d\n", PAGERID(pPager));
          IOTRACE(("JSYNC %p\n", pPager))
          rc = sqlite3OsSync(pPager->jfd, pPager->sync_flags);
          if( rc!=0 ) return rc;
        }

        jrnlOff = pPager->journalHdr + sizeof(aJournalMagic);
        IOTRACE(("JHDR %p %lld %d\n", pPager, jrnlOff, 4));
        rc = write32bits(pPager->jfd, jrnlOff, pPager->nRec);
        if( rc ) return rc;
      }

      PAGERTRACE2("SYNC journal of %d\n", PAGERID(pPager));
      IOTRACE(("JSYNC %p\n", pPager))
      rc = sqlite3OsSync(pPager->jfd, pPager->sync_flags| 
        (pPager->sync_flags==SQLITE_SYNC_FULL?SQLITE_SYNC_DATAONLY:0)
      );
      if( rc!=0 ) return rc;

      pPager->journalStarted = 1;
    }
    pPager->needSync = 0;

    /* Erase the needSync flag from every page.
    */
    for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){







|
>
>
>
>
>







>






>

>













<
<
|



|
>
>
>
>
>
>

>
|











>
|
|
|
|
|
|
>







2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542


2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
** 
** This routine also updates the nRec field in the header of the journal.
** (See comments on the pager_playback() routine for additional information.)
** If the sync mode is FULL, two syncs will occur.  First the whole journal
** is synced, then the nRec field is updated, then a second sync occurs.
**
** For temporary databases, we do not care if we are able to rollback
** after a power failure, so no sync occurs.
**
** If the IOCAP_SEQUENTIAL flag is set for the persistent media on which
** the database is stored, then OsSync() is never called on the journal
** file. In this case all that is required is to update the nRec field in
** the journal header.
**
** This routine clears the needSync field of every page current held in
** memory.
*/
static int syncJournal(Pager *pPager){
  PgHdr *pPg;
  int rc = SQLITE_OK;


  /* Sync the journal before modifying the main database
  ** (assuming there is a journal and it needs to be synced.)
  */
  if( pPager->needSync ){
    if( !pPager->tempFile ){
      int iDc = sqlite3OsDeviceCharacteristics(pPager->fd);
      assert( pPager->journalOpen );

      /* assert( !pPager->noSync ); // noSync might be set if synchronous
      ** was turned off after the transaction was started.  Ticket #615 */
#ifndef NDEBUG
      {
        /* Make sure the pPager->nRec counter we are keeping agrees
        ** with the nRec computed from the size of the journal file.
        */
        i64 jSz;
        rc = sqlite3OsFileSize(pPager->jfd, &jSz);
        if( rc!=0 ) return rc;
        assert( pPager->journalOff==jSz );
      }
#endif


      if( 0==(iDc&SQLITE_IOCAP_SAFE_APPEND) ){
        /* Write the nRec value into the journal file header. If in
        ** full-synchronous mode, sync the journal first. This ensures that
        ** all data has really hit the disk before nRec is updated to mark
        ** it as a candidate for rollback.
        **
        ** This is not required if the persistent media supports the
        ** SAFE_APPEND property. Because in this case it is not possible 
        ** for garbage data to be appended to the file, the nRec field
        ** is populated with 0xFFFFFFFF when the journal header is written
        ** and never needs to be updated.
        */
        i64 jrnlOff;
        if( pPager->fullSync && 0==(iDc&SQLITE_IOCAP_SEQUENTIAL) ){
          PAGERTRACE2("SYNC journal of %d\n", PAGERID(pPager));
          IOTRACE(("JSYNC %p\n", pPager))
          rc = sqlite3OsSync(pPager->jfd, pPager->sync_flags);
          if( rc!=0 ) return rc;
        }

        jrnlOff = pPager->journalHdr + sizeof(aJournalMagic);
        IOTRACE(("JHDR %p %lld %d\n", pPager, jrnlOff, 4));
        rc = write32bits(pPager->jfd, jrnlOff, pPager->nRec);
        if( rc ) return rc;
      }
      if( 0==(iDc&SQLITE_IOCAP_SEQUENTIAL) ){
        PAGERTRACE2("SYNC journal of %d\n", PAGERID(pPager));
        IOTRACE(("JSYNC %p\n", pPager))
        rc = sqlite3OsSync(pPager->jfd, pPager->sync_flags| 
          (pPager->sync_flags==SQLITE_SYNC_FULL?SQLITE_SYNC_DATAONLY:0)
        );
        if( rc!=0 ) return rc;
      }
      pPager->journalStarted = 1;
    }
    pPager->needSync = 0;

    /* Erase the needSync flag from every page.
    */
    for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
2770
2771
2772
2773
2774
2775
2776

2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788

  /* If we could not find a page that does not require an fsync()
  ** on the journal file then fsync the journal file.  This is a
  ** very slow operation, so we work hard to avoid it.  But sometimes
  ** it can't be helped.
  */
  if( pPg==0 && pPager->pFirst && syncOk && !MEMDB){

    int rc = syncJournal(pPager);
    if( rc!=0 ){
      return rc;
    }
    if( pPager->fullSync ){
      /* If in full-sync mode, write a new journal header into the
      ** journal file. This is done to avoid ever modifying a journal
      ** header that is involved in the rollback of pages that have
      ** already been written to the database (in case the header is
      ** trashed when the nRec field is updated).
      */
      pPager->nRec = 0;







>




|







2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824

  /* If we could not find a page that does not require an fsync()
  ** on the journal file then fsync the journal file.  This is a
  ** very slow operation, so we work hard to avoid it.  But sometimes
  ** it can't be helped.
  */
  if( pPg==0 && pPager->pFirst && syncOk && !MEMDB){
    int iDc = sqlite3OsDeviceCharacteristics(pPager->fd);
    int rc = syncJournal(pPager);
    if( rc!=0 ){
      return rc;
    }
    if( pPager->fullSync && 0==(iDc&SQLITE_IOCAP_SAFE_APPEND) ){
      /* If in full-sync mode, write a new journal header into the
      ** journal file. This is done to avoid ever modifying a journal
      ** header that is involved in the rollback of pages that have
      ** already been written to the database (in case the header is
      ** trashed when the nRec field is updated).
      */
      pPager->nRec = 0;