SQLite

Check-in [a0be6ea464]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Add SQLITE_IOCAP_ZERO_DAMAGE and enable it for both unix and windows. Use this device characteristic to reduce the required work in journaling. A side effect is that this changes the default page exists back to 1024 even with the use of statvfs().
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | statvfs
Files: files | file ages | folders
SHA1: a0be6ea464695fdf1eaf2b7cf0652778617814f2
User & Date: drh 2011-12-17 19:49:02.976
Context
2011-12-17
20:02
For improved clarity of presentation, refactor some of the code associated with ZERO_DAMAGE and sector-size. (check-in: 1dde96c9ee user: drh tags: statvfs)
19:49
Add SQLITE_IOCAP_ZERO_DAMAGE and enable it for both unix and windows. Use this device characteristic to reduce the required work in journaling. A side effect is that this changes the default page exists back to 1024 even with the use of statvfs(). (check-in: a0be6ea464 user: drh tags: statvfs)
16:25
Fix a bad #endif with the previous check-in on this branch. (check-in: 915713ffe4 user: drh tags: statvfs)
Changes
Unified Diff Ignore Whitespace Patch
Changes to src/os_unix.c.
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
}

/*
** Return the device characteristics for the file. This is always 0 for unix.
*/
static int unixDeviceCharacteristics(sqlite3_file *NotUsed){
  UNUSED_PARAMETER(NotUsed);
  return 0;
}

#ifndef SQLITE_OMIT_WAL


/*
** Object used to represent an shared memory buffer.  







|







3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
}

/*
** Return the device characteristics for the file. This is always 0 for unix.
*/
static int unixDeviceCharacteristics(sqlite3_file *NotUsed){
  UNUSED_PARAMETER(NotUsed);
  return SQLITE_IOCAP_ZERO_DAMAGE;
}

#ifndef SQLITE_OMIT_WAL


/*
** Object used to represent an shared memory buffer.  
Changes to src/os_win.c.
2209
2210
2211
2212
2213
2214
2215
2216

2217
2218
2219
2220
2221
2222
2223
}

/*
** Return a vector of device characteristics.
*/
static int winDeviceCharacteristics(sqlite3_file *id){
  UNUSED_PARAMETER(id);
  return SQLITE_IOCAP_UNDELETABLE_WHEN_OPEN;

}

#ifndef SQLITE_OMIT_WAL

/* 
** Windows will only let you create file view mappings
** on allocation size granularity boundaries.







|
>







2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
}

/*
** Return a vector of device characteristics.
*/
static int winDeviceCharacteristics(sqlite3_file *id){
  UNUSED_PARAMETER(id);
  return SQLITE_IOCAP_UNDELETABLE_WHEN_OPEN |
         SQLITE_IOCAP_ZERO_DAMAGE;
}

#ifndef SQLITE_OMIT_WAL

/* 
** Windows will only let you create file view mappings
** on allocation size granularity boundaries.
Changes to src/pager.c.
2511
2512
2513
2514
2515
2516
2517









2518
2519
2520
2521
2522


2523
2524
2525
2526
2527
2528
2529
**
** For temporary files the effective sector size is always 512 bytes.
**
** Otherwise, for non-temporary files, the effective sector size is
** the value returned by the xSectorSize() method rounded up to 32 if
** it is less than 32, or rounded down to MAX_SECTOR_SIZE if it
** is greater than MAX_SECTOR_SIZE.









*/
static void setSectorSize(Pager *pPager){
  assert( isOpen(pPager->fd) || pPager->tempFile );

  if( !pPager->tempFile ){


    /* Sector size doesn't matter for temporary files. Also, the file
    ** may not have been opened yet, in which case the OsSectorSize()
    ** call will segfault.
    */
    pPager->sectorSize = sqlite3OsSectorSize(pPager->fd);
  }
  if( pPager->sectorSize<32 ){







>
>
>
>
>
>
>
>
>




|
>
>







2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
**
** For temporary files the effective sector size is always 512 bytes.
**
** Otherwise, for non-temporary files, the effective sector size is
** the value returned by the xSectorSize() method rounded up to 32 if
** it is less than 32, or rounded down to MAX_SECTOR_SIZE if it
** is greater than MAX_SECTOR_SIZE.
**
** If the file has the SQLITE_IOCAP_ZERO_DAMAGE property, then set the
** effective sector size to its minimum value (512).  The purpose of
** pPager->sectorSize is to define the "blast radius" of bytes that
** might change if a crash occurs while writing to a single byte in
** that range.  But with ZERO_DAMAGE, the blast radius is zero, so
** we minimize the sector size.  For backwards compatibility of the
** rollback journal file format, we cannot reduce the effective sector
** size below 512.
*/
static void setSectorSize(Pager *pPager){
  assert( isOpen(pPager->fd) || pPager->tempFile );

  if( !pPager->tempFile
   && (sqlite3OsDeviceCharacteristics(pPager->fd)&SQLITE_IOCAP_ZERO_DAMAGE)==0
  ){
    /* Sector size doesn't matter for temporary files. Also, the file
    ** may not have been opened yet, in which case the OsSectorSize()
    ** call will segfault.
    */
    pPager->sectorSize = sqlite3OsSectorSize(pPager->fd);
  }
  if( pPager->sectorSize<32 ){
Changes to src/sqlite.h.in.
500
501
502
503
504
505
506
507






508
509
510
511
512
513
514
515
516
517
518
519
520

521
522
523
524
525
526
527
** mean that writes of blocks that are nnn bytes in size and
** are aligned to an address which is an integer multiple of
** nnn are atomic.  The SQLITE_IOCAP_SAFE_APPEND value means
** that when data is appended to a file, the data is appended
** first then the size of the file is extended, never the other
** way around.  The SQLITE_IOCAP_SEQUENTIAL property means that
** information is written to disk in the same order as calls
** to xWrite().






*/
#define SQLITE_IOCAP_ATOMIC                 0x00000001
#define SQLITE_IOCAP_ATOMIC512              0x00000002
#define SQLITE_IOCAP_ATOMIC1K               0x00000004
#define SQLITE_IOCAP_ATOMIC2K               0x00000008
#define SQLITE_IOCAP_ATOMIC4K               0x00000010
#define SQLITE_IOCAP_ATOMIC8K               0x00000020
#define SQLITE_IOCAP_ATOMIC16K              0x00000040
#define SQLITE_IOCAP_ATOMIC32K              0x00000080
#define SQLITE_IOCAP_ATOMIC64K              0x00000100
#define SQLITE_IOCAP_SAFE_APPEND            0x00000200
#define SQLITE_IOCAP_SEQUENTIAL             0x00000400
#define SQLITE_IOCAP_UNDELETABLE_WHEN_OPEN  0x00000800


/*
** CAPI3REF: File Locking Levels
**
** SQLite uses one of these integer values as the second
** argument to calls it makes to the xLock() and xUnlock() methods
** of an [sqlite3_io_methods] object.







|
>
>
>
>
>
>













>







500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
** mean that writes of blocks that are nnn bytes in size and
** are aligned to an address which is an integer multiple of
** nnn are atomic.  The SQLITE_IOCAP_SAFE_APPEND value means
** that when data is appended to a file, the data is appended
** first then the size of the file is extended, never the other
** way around.  The SQLITE_IOCAP_SEQUENTIAL property means that
** information is written to disk in the same order as calls
** to xWrite().  The SQLITE_IOCAP_ZERO_DAMAGE property means that
** after reboot following a crash or power loss, the value of
** each byte in a file is a value that was actually written
** into that byte at some point.  In other words, a crash will
** not introduce garbage or randomness into a file, and byte of
** a file that are never written will not change values due to
** writes to nearby bytes.
*/
#define SQLITE_IOCAP_ATOMIC                 0x00000001
#define SQLITE_IOCAP_ATOMIC512              0x00000002
#define SQLITE_IOCAP_ATOMIC1K               0x00000004
#define SQLITE_IOCAP_ATOMIC2K               0x00000008
#define SQLITE_IOCAP_ATOMIC4K               0x00000010
#define SQLITE_IOCAP_ATOMIC8K               0x00000020
#define SQLITE_IOCAP_ATOMIC16K              0x00000040
#define SQLITE_IOCAP_ATOMIC32K              0x00000080
#define SQLITE_IOCAP_ATOMIC64K              0x00000100
#define SQLITE_IOCAP_SAFE_APPEND            0x00000200
#define SQLITE_IOCAP_SEQUENTIAL             0x00000400
#define SQLITE_IOCAP_UNDELETABLE_WHEN_OPEN  0x00000800
#define SQLITE_IOCAP_ZERO_DAMAGE            0x00001000

/*
** CAPI3REF: File Locking Levels
**
** SQLite uses one of these integer values as the second
** argument to calls it makes to the xLock() and xUnlock() methods
** of an [sqlite3_io_methods] object.
Changes to src/test6.c.
712
713
714
715
716
717
718

719
720
721
722
723
724
725
    { "atomic4k",    SQLITE_IOCAP_ATOMIC4K    },
    { "atomic8k",    SQLITE_IOCAP_ATOMIC8K    },
    { "atomic16k",   SQLITE_IOCAP_ATOMIC16K   },
    { "atomic32k",   SQLITE_IOCAP_ATOMIC32K   },
    { "atomic64k",   SQLITE_IOCAP_ATOMIC64K   },
    { "sequential",  SQLITE_IOCAP_SEQUENTIAL  },
    { "safe_append", SQLITE_IOCAP_SAFE_APPEND },

    { 0, 0 }
  };

  int i;
  int iDc = 0;
  int iSectorSize = 0;
  int setSectorsize = 0;







>







712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
    { "atomic4k",    SQLITE_IOCAP_ATOMIC4K    },
    { "atomic8k",    SQLITE_IOCAP_ATOMIC8K    },
    { "atomic16k",   SQLITE_IOCAP_ATOMIC16K   },
    { "atomic32k",   SQLITE_IOCAP_ATOMIC32K   },
    { "atomic64k",   SQLITE_IOCAP_ATOMIC64K   },
    { "sequential",  SQLITE_IOCAP_SEQUENTIAL  },
    { "safe_append", SQLITE_IOCAP_SAFE_APPEND },
    { "zero_damage", SQLITE_IOCAP_ZERO_DAMAGE },
    { 0, 0 }
  };

  int i;
  int iDc = 0;
  int iSectorSize = 0;
  int setSectorsize = 0;
Changes to src/test_vfs.c.
1170
1171
1172
1173
1174
1175
1176

1177
1178
1179
1180
1181
1182
1183
        { "atomic8k",              SQLITE_IOCAP_ATOMIC8K    },
        { "atomic16k",             SQLITE_IOCAP_ATOMIC16K   },
        { "atomic32k",             SQLITE_IOCAP_ATOMIC32K   },
        { "atomic64k",             SQLITE_IOCAP_ATOMIC64K   },
        { "sequential",            SQLITE_IOCAP_SEQUENTIAL  },
        { "safe_append",           SQLITE_IOCAP_SAFE_APPEND },
        { "undeletable_when_open", SQLITE_IOCAP_UNDELETABLE_WHEN_OPEN },

        { 0, 0 }
      };
      Tcl_Obj *pRet;
      int iFlag;

      if( objc>3 ){
        Tcl_WrongNumArgs(interp, 2, objv, "?ATTR-LIST?");







>







1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
        { "atomic8k",              SQLITE_IOCAP_ATOMIC8K    },
        { "atomic16k",             SQLITE_IOCAP_ATOMIC16K   },
        { "atomic32k",             SQLITE_IOCAP_ATOMIC32K   },
        { "atomic64k",             SQLITE_IOCAP_ATOMIC64K   },
        { "sequential",            SQLITE_IOCAP_SEQUENTIAL  },
        { "safe_append",           SQLITE_IOCAP_SAFE_APPEND },
        { "undeletable_when_open", SQLITE_IOCAP_UNDELETABLE_WHEN_OPEN },
        { "zero_damage",           SQLITE_IOCAP_ZERO_DAMAGE },
        { 0, 0 }
      };
      Tcl_Obj *pRet;
      int iFlag;

      if( objc>3 ){
        Tcl_WrongNumArgs(interp, 2, objv, "?ATTR-LIST?");
Changes to src/wal.c.
421
422
423
424
425
426
427

428
429
430
431
432
433
434
  u8 syncFlags;              /* Flags to use to sync header writes */
  u8 exclusiveMode;          /* Non-zero if connection is in exclusive mode */
  u8 writeLock;              /* True if in a write transaction */
  u8 ckptLock;               /* True if holding a checkpoint lock */
  u8 readOnly;               /* WAL_RDWR, WAL_RDONLY, or WAL_SHM_RDONLY */
  u8 truncateOnCommit;       /* True to truncate WAL file on commit */
  u8 noSyncHeader;           /* Avoid WAL header fsyncs if true */

  WalIndexHdr hdr;           /* Wal-index header for current transaction */
  const char *zWalName;      /* Name of WAL file */
  u32 nCkpt;                 /* Checkpoint sequence counter in the wal-header */
#ifdef SQLITE_DEBUG
  u8 lockError;              /* True if a locking error has occurred */
#endif
};







>







421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
  u8 syncFlags;              /* Flags to use to sync header writes */
  u8 exclusiveMode;          /* Non-zero if connection is in exclusive mode */
  u8 writeLock;              /* True if in a write transaction */
  u8 ckptLock;               /* True if holding a checkpoint lock */
  u8 readOnly;               /* WAL_RDWR, WAL_RDONLY, or WAL_SHM_RDONLY */
  u8 truncateOnCommit;       /* True to truncate WAL file on commit */
  u8 noSyncHeader;           /* Avoid WAL header fsyncs if true */
  u8 noPadding;              /* No need to pad transactions to sector size */
  WalIndexHdr hdr;           /* Wal-index header for current transaction */
  const char *zWalName;      /* Name of WAL file */
  u32 nCkpt;                 /* Checkpoint sequence counter in the wal-header */
#ifdef SQLITE_DEBUG
  u8 lockError;              /* True if a locking error has occurred */
#endif
};
1306
1307
1308
1309
1310
1311
1312

1313
1314
1315
1316
1317
1318
1319
  if( rc!=SQLITE_OK ){
    walIndexClose(pRet, 0);
    sqlite3OsClose(pRet->pWalFd);
    sqlite3_free(pRet);
  }else{
    int iDC = sqlite3OsDeviceCharacteristics(pRet->pWalFd);
    if( iDC & SQLITE_IOCAP_SEQUENTIAL ){ pRet->noSyncHeader = 1; }

    *ppWal = pRet;
    WALTRACE(("WAL%d: opened\n", pRet));
  }
  return rc;
}

/*







>







1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
  if( rc!=SQLITE_OK ){
    walIndexClose(pRet, 0);
    sqlite3OsClose(pRet->pWalFd);
    sqlite3_free(pRet);
  }else{
    int iDC = sqlite3OsDeviceCharacteristics(pRet->pWalFd);
    if( iDC & SQLITE_IOCAP_SEQUENTIAL ){ pRet->noSyncHeader = 1; }
    if( iDC & SQLITE_IOCAP_ZERO_DAMAGE ){ pRet->noPadding = 1; }
    *ppWal = pRet;
    WALTRACE(("WAL%d: opened\n", pRet));
  }
  return rc;
}

/*
2776
2777
2778
2779
2780
2781
2782

2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810

2811
2812
2813
2814
2815
2816
2817
      return rc;
    }
    pLast = p;
  }

  /* Sync the log file if the 'isSync' flag was specified. */
  if( isCommit && (sync_flags & WAL_SYNC_TRANSACTIONS)!=0 ){

    i64 iSegment = sqlite3OsSectorSize(pWal->pWalFd);
    i64 iOffset = walFrameOffset(iFrame+1, szPage);

    assert( iSegment>0 );

    iSegment = (((iOffset+iSegment-1)/iSegment) * iSegment);
    while( iOffset<iSegment ){
      void *pData;
#if defined(SQLITE_HAS_CODEC)
      if( (pData = sqlite3PagerCodec(pLast))==0 ) return SQLITE_NOMEM;
#else
      pData = pLast->pData;
#endif
      walEncodeFrame(pWal, pLast->pgno, nTruncate, pData, aFrame);
      /* testcase( IS_BIG_INT(iOffset) ); // requires a 4GiB WAL */
      rc = walWriteToLog(pWal, aFrame, sizeof(aFrame), iOffset);
      if( rc!=SQLITE_OK ){
        return rc;
      }
      iOffset += WAL_FRAME_HDRSIZE;
      rc = walWriteToLog(pWal, pData, szPage, iOffset);
      if( rc!=SQLITE_OK ){
        return rc;
      }
      nLast++;
      iOffset += szPage;
    }


    rc = sqlite3OsSync(pWal->pWalFd, sync_flags & SQLITE_SYNC_MASK);
  }

  if( isCommit && pWal->truncateOnCommit && pWal->mxWalSize>=0 ){
    i64 sz = pWal->mxWalSize;
    if( walFrameOffset(iFrame+nLast+1, szPage)>pWal->mxWalSize ){
      sz = walFrameOffset(iFrame+nLast+1, szPage);







>
|
|
|
|
|
|
|
|

|

|

|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
>







2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
      return rc;
    }
    pLast = p;
  }

  /* Sync the log file if the 'isSync' flag was specified. */
  if( isCommit && (sync_flags & WAL_SYNC_TRANSACTIONS)!=0 ){
    if( !pWal->noPadding ){
      i64 iSegment = sqlite3OsSectorSize(pWal->pWalFd);
      i64 iOffset = walFrameOffset(iFrame+1, szPage);
  
      assert( iSegment>0 );
  
      iSegment = (((iOffset+iSegment-1)/iSegment) * iSegment);
      while( iOffset<iSegment ){
        void *pData;
#if defined(SQLITE_HAS_CODEC)
        if( (pData = sqlite3PagerCodec(pLast))==0 ) return SQLITE_NOMEM;
#else
        pData = pLast->pData;
#endif
        walEncodeFrame(pWal, pLast->pgno, nTruncate, pData, aFrame);
        /* testcase( IS_BIG_INT(iOffset) ); // requires a 4GiB WAL */
        rc = walWriteToLog(pWal, aFrame, sizeof(aFrame), iOffset);
        if( rc!=SQLITE_OK ){
          return rc;
        }
        iOffset += WAL_FRAME_HDRSIZE;
        rc = walWriteToLog(pWal, pData, szPage, iOffset);
        if( rc!=SQLITE_OK ){
          return rc;
        }
        nLast++;
        iOffset += szPage;
      }
    }
  
    rc = sqlite3OsSync(pWal->pWalFd, sync_flags & SQLITE_SYNC_MASK);
  }

  if( isCommit && pWal->truncateOnCommit && pWal->mxWalSize>=0 ){
    i64 sz = pWal->mxWalSize;
    if( walFrameOffset(iFrame+nLast+1, szPage)>pWal->mxWalSize ){
      sz = walFrameOffset(iFrame+nLast+1, szPage);