/ Check-in [416973ed]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Add support for F2FS atomic writes. Untested at this point.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | batch-atomic-write
Files: files | file ages | folders
SHA3-256:416973ede3bde8567d1f2699728f72352979e054ef988d1c1e1cfe4290f6f8b8
User & Date: dan 2017-07-20 19:49:14
Context
2017-07-20
21:00
Split SQLITE_ENABLE_ATOMIC_WRITE into two options - the original and SQLITE_ENABLE_BATCH_ATOMIC_WRITE. check-in: 7eb9bf2c user: dan tags: batch-atomic-write
19:49
Add support for F2FS atomic writes. Untested at this point. check-in: 416973ed user: dan tags: batch-atomic-write
15:08
Enhance the built-in date/time functions so that they can be used in CHECK constraints, in the WHERE clause or partial indexes, and index expressions, provided that none of the non-deterministic keywords ("now", "localtime", "utc") are used as arguments. check-in: a90c062d user: drh tags: trunk
Changes
Hide Diffs Unified Diffs Show Whitespace Changes Patch

Changes to src/memjournal.c.

384
385
386
387
388
389
390
391
392
393
394
395

396


397
398
399
400
401
402
403
404
  sqlite3JournalOpen(0, 0, pJfd, 0, -1);
}

#ifdef SQLITE_ENABLE_ATOMIC_WRITE
/*
** If the argument p points to a MemJournal structure that is not an 
** in-memory-only journal file (i.e. is one that was opened with a +ve
** nSpill parameter), and the underlying file has not yet been created, 
** create it now.
*/
int sqlite3JournalCreate(sqlite3_file *p){
  int rc = SQLITE_OK;

  if( p->pMethods==&MemJournalMethods && ((MemJournal*)p)->nSpill>0 ){


    rc = memjrnlCreateFile((MemJournal*)p);
  }
  return rc;
}
#endif

/*
** The file-handle passed as the only argument is open on a journal file.







|
|

|

>
|
>
>
|







384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
  sqlite3JournalOpen(0, 0, pJfd, 0, -1);
}

#ifdef SQLITE_ENABLE_ATOMIC_WRITE
/*
** If the argument p points to a MemJournal structure that is not an 
** in-memory-only journal file (i.e. is one that was opened with a +ve
** nSpill parameter or as SQLITE_OPEN_MAIN_JOURNAL), and the underlying 
** file has not yet been created, create it now.
*/
int sqlite3JournalCreate(sqlite3_file *pJfd){
  int rc = SQLITE_OK;
  MemJournal *p = (MemJournal*)pJfd;
  if( p->pMethod==&MemJournalMethods 
   && (p->nSpill>0 || (p->flags & SQLITE_OPEN_MAIN_JOURNAL))
  ){
    rc = memjrnlCreateFile(p);
  }
  return rc;
}
#endif

/*
** The file-handle passed as the only argument is open on a journal file.

Changes to src/os_unix.c.

86
87
88
89
90
91
92

93
94
95
96
97
98
99
...
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
...
323
324
325
326
327
328
329







330
331
332
333
334
335
336
...
495
496
497
498
499
500
501



502
503
504
505
506
507
508
....
3773
3774
3775
3776
3777
3778
3779













3780
3781
3782
3783
3784
3785
3786
....
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865

3866
3867
3868
3869
3870
3871
3872

3873
3874


















3875
3876
3877
3878
3879
3880
3881
3882

3883
3884
3885
3886

3887
3888
3889
3890
3891
3892
3893
....
3948
3949
3950
3951
3952
3953
3954
















3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
....
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
....
7594
7595
7596
7597
7598
7599
7600
7601
7602
7603
7604
7605
7606
7607
7608

/*
** standard include files.
*/
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>

#include <unistd.h>
#include <time.h>
#include <sys/time.h>
#include <errno.h>
#if !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0
# include <sys/mman.h>
#endif
................................................................................
#if SQLITE_MAX_MMAP_SIZE>0
  int nFetchOut;                      /* Number of outstanding xFetch refs */
  sqlite3_int64 mmapSize;             /* Usable size of mapping at pMapRegion */
  sqlite3_int64 mmapSizeActual;       /* Actual size of mapping at pMapRegion */
  sqlite3_int64 mmapSizeMax;          /* Configured FCNTL_MMAP_SIZE value */
  void *pMapRegion;                   /* Memory mapped region */
#endif
#ifdef __QNXNTO__
  int sectorSize;                     /* Device sector size */
  int deviceCharacteristics;          /* Precomputed device characteristics */
#endif
#if SQLITE_ENABLE_LOCKING_STYLE
  int openFlags;                      /* The flags specified at open() */
#endif
#if SQLITE_ENABLE_LOCKING_STYLE || defined(__APPLE__)
  unsigned fsFlags;                   /* cached details from statfs() */
#endif
#if OS_VXWORKS
................................................................................
/*
** Explicitly call the 64-bit version of lseek() on Android. Otherwise, lseek()
** is the 32-bit version, even if _FILE_OFFSET_BITS=64 is defined.
*/
#ifdef __ANDROID__
# define lseek lseek64
#endif








/*
** Different Unix systems declare open() in different ways.  Same use
** open(const char*,int,mode_t).  Others use open(const char*,int,...).
** The difference is important when using a pointer to the function.
**
** The safest way to deal with the problem is to always use this wrapper
................................................................................

#if defined(HAVE_LSTAT)
  { "lstat",         (sqlite3_syscall_ptr)lstat,          0 },
#else
  { "lstat",         (sqlite3_syscall_ptr)0,              0 },
#endif
#define osLstat      ((int(*)(const char*,struct stat*))aSyscall[27].pCurrent)




}; /* End of the overrideable system calls */


/*
** On some systems, calls to fchown() will trigger a message in a security
** log if they come from non-root processes.  So avoid calling fchown() if
................................................................................

/*
** Information and control of an open file handle.
*/
static int unixFileControl(sqlite3_file *id, int op, void *pArg){
  unixFile *pFile = (unixFile*)id;
  switch( op ){













    case SQLITE_FCNTL_LOCKSTATE: {
      *(int*)pArg = pFile->eFileLock;
      return SQLITE_OK;
    }
    case SQLITE_FCNTL_LAST_ERRNO: {
      *(int*)pArg = pFile->lastErrno;
      return SQLITE_OK;
................................................................................
    }
#endif /* SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) */
  }
  return SQLITE_NOTFOUND;
}

/*
** Return the sector size in bytes of the underlying block device for
** the specified file. This is almost always 512 bytes, but may be
** larger for some devices.

**
** SQLite code assumes this function cannot fail. It also assumes that
** if two files are created in the same file-system directory (i.e.
** a database and its journal file) that the sector size will be the
** same for both.
*/
#ifndef __QNXNTO__ 

static int unixSectorSize(sqlite3_file *NotUsed){
  UNUSED_PARAMETER(NotUsed);


















  return SQLITE_DEFAULT_SECTOR_SIZE;
}
#endif

/*
** The following version of unixSectorSize() is optimized for QNX.
*/
#ifdef __QNXNTO__

#include <sys/dcmd_blk.h>
#include <sys/statvfs.h>
static int unixSectorSize(sqlite3_file *id){
  unixFile *pFile = (unixFile*)id;

  if( pFile->sectorSize == 0 ){
    struct statvfs fsInfo;
       
    /* Set defaults for non-supported filesystems */
    pFile->sectorSize = SQLITE_DEFAULT_SECTOR_SIZE;
    pFile->deviceCharacteristics = 0;
    if( fstatvfs(pFile->h, &fsInfo) == -1 ) {
................................................................................
  }
  /* Last chance verification.  If the sector size isn't a multiple of 512
  ** then it isn't valid.*/
  if( pFile->sectorSize % 512 != 0 ){
    pFile->deviceCharacteristics = 0;
    pFile->sectorSize = SQLITE_DEFAULT_SECTOR_SIZE;
  }
















  return pFile->sectorSize;
}
#endif /* __QNXNTO__ */

/*
** Return the device characteristics for the file.
**
** This VFS is set up to return SQLITE_IOCAP_POWERSAFE_OVERWRITE by default.
** However, that choice is controversial since technically the underlying
** file system does not always provide powersafe overwrites.  (In other
................................................................................
** written might end up being altered.)  However, non-PSOW behavior is very,
** very rare.  And asserting PSOW makes a large reduction in the amount
** of required I/O for journaling, since a lot of padding is eliminated.
**  Hence, while POWERSAFE_OVERWRITE is on by default, there is a file-control
** available to turn it off and URI query parameter available to turn it off.
*/
static int unixDeviceCharacteristics(sqlite3_file *id){
  unixFile *p = (unixFile*)id;
  int rc = 0;
#ifdef __QNXNTO__
  if( p->sectorSize==0 ) unixSectorSize(id);
  rc = p->deviceCharacteristics;
#endif
  if( p->ctrlFlags & UNIXFILE_PSOW ){
    rc |= SQLITE_IOCAP_POWERSAFE_OVERWRITE;
  }
  return rc;
}

#if !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0

/*
** Return the system page size.
**
................................................................................
    UNIXVFS("unix-proxy",    proxyIoFinder ),
#endif
  };
  unsigned int i;          /* Loop counter */

  /* Double-check that the aSyscall[] array has been constructed
  ** correctly.  See ticket [bb3a86e890c8e96ab] */
  assert( ArraySize(aSyscall)==28 );

  /* Register all VFSes defined in the aVfs[] array */
  for(i=0; i<(sizeof(aVfs)/sizeof(sqlite3_vfs)); i++){
    sqlite3_vfs_register(&aVfs[i], i==0);
  }
  return SQLITE_OK; 
}







>







 







<


<







 







>
>
>
>
>
>
>







 







>
>
>







 







>
>
>
>
>
>
>
>
>
>
>
>
>







 







|
|
|
>

<
|
|
<

|
>
|
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
|
<
|
<
<
<
<
>


<
<
>







 







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|

<







 







|
|
<
<
|
<
<
<
<
<







 







|







86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
...
217
218
219
220
221
222
223

224
225

226
227
228
229
230
231
232
...
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
...
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
....
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
....
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889

3890
3891

3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916

3917




3918
3919
3920


3921
3922
3923
3924
3925
3926
3927
3928
....
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007

4008
4009
4010
4011
4012
4013
4014
....
4016
4017
4018
4019
4020
4021
4022
4023
4024


4025





4026
4027
4028
4029
4030
4031
4032
....
7637
7638
7639
7640
7641
7642
7643
7644
7645
7646
7647
7648
7649
7650
7651

/*
** standard include files.
*/
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <sys/ioctl.h>
#include <unistd.h>
#include <time.h>
#include <sys/time.h>
#include <errno.h>
#if !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0
# include <sys/mman.h>
#endif
................................................................................
#if SQLITE_MAX_MMAP_SIZE>0
  int nFetchOut;                      /* Number of outstanding xFetch refs */
  sqlite3_int64 mmapSize;             /* Usable size of mapping at pMapRegion */
  sqlite3_int64 mmapSizeActual;       /* Actual size of mapping at pMapRegion */
  sqlite3_int64 mmapSizeMax;          /* Configured FCNTL_MMAP_SIZE value */
  void *pMapRegion;                   /* Memory mapped region */
#endif

  int sectorSize;                     /* Device sector size */
  int deviceCharacteristics;          /* Precomputed device characteristics */

#if SQLITE_ENABLE_LOCKING_STYLE
  int openFlags;                      /* The flags specified at open() */
#endif
#if SQLITE_ENABLE_LOCKING_STYLE || defined(__APPLE__)
  unsigned fsFlags;                   /* cached details from statfs() */
#endif
#if OS_VXWORKS
................................................................................
/*
** Explicitly call the 64-bit version of lseek() on Android. Otherwise, lseek()
** is the 32-bit version, even if _FILE_OFFSET_BITS=64 is defined.
*/
#ifdef __ANDROID__
# define lseek lseek64
#endif

#define F2FS_IOCTL_MAGIC        0xf5
#define F2FS_IOC_START_ATOMIC_WRITE     _IO(F2FS_IOCTL_MAGIC, 1)
#define F2FS_IOC_COMMIT_ATOMIC_WRITE    _IO(F2FS_IOCTL_MAGIC, 2)
#define F2FS_IOC_START_VOLATILE_WRITE   _IO(F2FS_IOCTL_MAGIC, 3)
#define F2FS_IOC_ABORT_VOLATILE_WRITE   _IO(F2FS_IOCTL_MAGIC, 5)


/*
** Different Unix systems declare open() in different ways.  Same use
** open(const char*,int,mode_t).  Others use open(const char*,int,...).
** The difference is important when using a pointer to the function.
**
** The safest way to deal with the problem is to always use this wrapper
................................................................................

#if defined(HAVE_LSTAT)
  { "lstat",         (sqlite3_syscall_ptr)lstat,          0 },
#else
  { "lstat",         (sqlite3_syscall_ptr)0,              0 },
#endif
#define osLstat      ((int(*)(const char*,struct stat*))aSyscall[27].pCurrent)

  { "ioctl",         (sqlite3_syscall_ptr)ioctl,          0 },
#define osIoctl ((int(*)(int,int))aSyscall[28].pCurrent)

}; /* End of the overrideable system calls */


/*
** On some systems, calls to fchown() will trigger a message in a security
** log if they come from non-root processes.  So avoid calling fchown() if
................................................................................

/*
** Information and control of an open file handle.
*/
static int unixFileControl(sqlite3_file *id, int op, void *pArg){
  unixFile *pFile = (unixFile*)id;
  switch( op ){
    case SQLITE_FCNTL_BEGIN_ATOMIC_WRITE: {
      int rc = osIoctl(pFile->h, F2FS_IOC_START_ATOMIC_WRITE);
      return rc ? SQLITE_ERROR : SQLITE_OK;
    }
    case SQLITE_FCNTL_COMMIT_ATOMIC_WRITE: {
      int rc = osIoctl(pFile->h, F2FS_IOC_COMMIT_ATOMIC_WRITE);
      return rc ? SQLITE_ERROR : SQLITE_OK;
    }
    case SQLITE_FCNTL_ROLLBACK_ATOMIC_WRITE: {
      int rc = osIoctl(pFile->h, F2FS_IOC_ABORT_VOLATILE_WRITE);
      return rc ? SQLITE_ERROR : SQLITE_OK;
    }

    case SQLITE_FCNTL_LOCKSTATE: {
      *(int*)pArg = pFile->eFileLock;
      return SQLITE_OK;
    }
    case SQLITE_FCNTL_LAST_ERRNO: {
      *(int*)pArg = pFile->lastErrno;
      return SQLITE_OK;
................................................................................
    }
#endif /* SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) */
  }
  return SQLITE_NOTFOUND;
}

/*
** If pFd->sectorSize is non-zero when this function is called, it is a
** no-op. Otherwise, the values of pFd->sectorSize and 
** pFd->deviceCharacteristics are set according to the file-system 
** characteristics. 
**

** There are two versions of this function. One for QNX and one for all
** other systems.

*/
#ifndef __QNXNTO__
static void setDeviceCharacteristics(unixFile *pFd){
  if( pFd->sectorSize==0 ){
    int res;
    assert( pFd->deviceCharacteristics==0 );

    /* Check for support for F2FS atomic batch writes. */
    res = osIoctl(pFd->h, F2FS_IOC_START_VOLATILE_WRITE);
    if( res==SQLITE_OK ){
      osIoctl(pFd->h, F2FS_IOC_ABORT_VOLATILE_WRITE);
      pFd->deviceCharacteristics = 
        SQLITE_IOCAP_BATCH_ATOMIC |
        SQLITE_IOCAP_ATOMIC |
        SQLITE_IOCAP_SEQUENTIAL |
        SQLITE_IOCAP_SAFE_APPEND;
    }

    /* Set the POWERSAFE_OVERWRITE flag if requested. */
    if( pFd->ctrlFlags & UNIXFILE_PSOW ){
      pFd->deviceCharacteristics |= SQLITE_IOCAP_POWERSAFE_OVERWRITE;
    }

    pFd->sectorSize = SQLITE_DEFAULT_SECTOR_SIZE;
  }

}




#else
#include <sys/dcmd_blk.h>
#include <sys/statvfs.h>


static void setDeviceCharacteristics(unixFile *pFile){
  if( pFile->sectorSize == 0 ){
    struct statvfs fsInfo;
       
    /* Set defaults for non-supported filesystems */
    pFile->sectorSize = SQLITE_DEFAULT_SECTOR_SIZE;
    pFile->deviceCharacteristics = 0;
    if( fstatvfs(pFile->h, &fsInfo) == -1 ) {
................................................................................
  }
  /* Last chance verification.  If the sector size isn't a multiple of 512
  ** then it isn't valid.*/
  if( pFile->sectorSize % 512 != 0 ){
    pFile->deviceCharacteristics = 0;
    pFile->sectorSize = SQLITE_DEFAULT_SECTOR_SIZE;
  }
}
#endif

/*
** Return the sector size in bytes of the underlying block device for
** the specified file. This is almost always 512 bytes, but may be
** larger for some devices.
**
** SQLite code assumes this function cannot fail. It also assumes that
** if two files are created in the same file-system directory (i.e.
** a database and its journal file) that the sector size will be the
** same for both.
*/
static int unixSectorSize(sqlite3_file *id){
  unixFile *pFd = (unixFile*)id;
  setDeviceCharacteristics(pFd);
  return pFd->sectorSize;
}


/*
** Return the device characteristics for the file.
**
** This VFS is set up to return SQLITE_IOCAP_POWERSAFE_OVERWRITE by default.
** However, that choice is controversial since technically the underlying
** file system does not always provide powersafe overwrites.  (In other
................................................................................
** written might end up being altered.)  However, non-PSOW behavior is very,
** very rare.  And asserting PSOW makes a large reduction in the amount
** of required I/O for journaling, since a lot of padding is eliminated.
**  Hence, while POWERSAFE_OVERWRITE is on by default, there is a file-control
** available to turn it off and URI query parameter available to turn it off.
*/
static int unixDeviceCharacteristics(sqlite3_file *id){
  unixFile *pFd = (unixFile*)id;
  setDeviceCharacteristics(pFd);


  return pFd->deviceCharacteristics;





}

#if !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0

/*
** Return the system page size.
**
................................................................................
    UNIXVFS("unix-proxy",    proxyIoFinder ),
#endif
  };
  unsigned int i;          /* Loop counter */

  /* Double-check that the aSyscall[] array has been constructed
  ** correctly.  See ticket [bb3a86e890c8e96ab] */
  assert( ArraySize(aSyscall)==29 );

  /* Register all VFSes defined in the aVfs[] array */
  for(i=0; i<(sizeof(aVfs)/sizeof(sqlite3_vfs)); i++){
    sqlite3_vfs_register(&aVfs[i], i==0);
  }
  return SQLITE_OK; 
}

Changes to src/pager.c.

954
955
956
957
958
959
960

961
962
963
964
965
966
967
....
1190
1191
1192
1193
1194
1195
1196





1197
1198
1199
1200
1201
1202
1203
....
2008
2009
2010
2011
2012
2013
2014
2015


2016
2017
2018
2019
2020
2021
2022
....
4563
4564
4565
4566
4567
4568
4569







4570
4571
4572
4573
4574
4575
4576
....
6367
6368
6369
6370
6371
6372
6373







6374
6375
6376
6377
6378
6379
6380
6381
6382
6383
6384
6385
6386
6387
6388
6389
....
6390
6391
6392
6393
6394
6395
6396
6397
6398
6399

6400
6401
6402
6403
6404
6405
6406
....
6416
6417
6418
6419
6420
6421
6422








6423








6424
6425
6426
6427
6428
6429
6430
    case PAGER_WRITER_FINISHED:
      assert( p->eLock==EXCLUSIVE_LOCK );
      assert( pPager->errCode==SQLITE_OK );
      assert( !pagerUseWal(pPager) );
      assert( isOpen(p->jfd) 
           || p->journalMode==PAGER_JOURNALMODE_OFF 
           || p->journalMode==PAGER_JOURNALMODE_WAL 

      );
      break;

    case PAGER_ERROR:
      /* There must be at least one outstanding reference to the pager if
      ** in ERROR state. Otherwise the pager should have already dropped
      ** back to OPEN state.
................................................................................
  if( !pPager->tempFile ){
    int dc;                           /* Device characteristics */
    int nSector;                      /* Sector size */
    int szPage;                       /* Page size */

    assert( isOpen(pPager->fd) );
    dc = sqlite3OsDeviceCharacteristics(pPager->fd);





    nSector = pPager->sectorSize;
    szPage = pPager->pageSize;

    assert(SQLITE_IOCAP_ATOMIC512==(512>>8));
    assert(SQLITE_IOCAP_ATOMIC64K==(65536>>8));
    if( 0==(dc&(SQLITE_IOCAP_ATOMIC|(szPage>>8)) || nSector>szPage) ){
      return 0;
................................................................................
  assert( assert_pager_state(pPager) );
  assert( pPager->eState!=PAGER_ERROR );
  if( pPager->eState<PAGER_WRITER_LOCKED && pPager->eLock<RESERVED_LOCK ){
    return SQLITE_OK;
  }

  releaseAllSavepoints(pPager);
  assert( isOpen(pPager->jfd) || pPager->pInJournal==0 );


  if( isOpen(pPager->jfd) ){
    assert( !pagerUseWal(pPager) );

    /* Finalize the journal file. */
    if( sqlite3JournalIsInMemory(pPager->jfd) ){
      /* assert( pPager->journalMode==PAGER_JOURNALMODE_MEMORY ); */
      sqlite3OsClose(pPager->jfd);
................................................................................
  if( pagerUseWal(pPager) ){
    /* Write a single frame for this page to the log. */
    rc = subjournalPageIfRequired(pPg); 
    if( rc==SQLITE_OK ){
      rc = pagerWalFrames(pPager, pPg, 0, 0);
    }
  }else{







  
    /* Sync the journal file if required. */
    if( pPg->flags&PGHDR_NEED_SYNC 
     || pPager->eState==PAGER_WRITER_CACHEMOD
    ){
      rc = syncJournal(pPager, 1);
    }
................................................................................
      **
      ** Otherwise, if the optimization is both enabled and applicable,
      ** then call pager_incr_changecounter() to update the change-counter
      ** in 'direct' mode. In this case the journal file will never be
      ** created for this transaction.
      */
  #ifdef SQLITE_ENABLE_ATOMIC_WRITE







      PgHdr *pPg;
      assert( isOpen(pPager->jfd) 
           || pPager->journalMode==PAGER_JOURNALMODE_OFF 
           || pPager->journalMode==PAGER_JOURNALMODE_WAL 
      );
      if( !zMaster && isOpen(pPager->jfd) 
       && pPager->journalOff==jrnlBufferSize(pPager) 
       && pPager->dbSize>=pPager->dbOrigSize
       && (0==(pPg = sqlite3PcacheDirtyList(pPager->pPCache)) || 0==pPg->pDirty)
      ){
        /* Update the db file change counter via the direct-write method. The 
        ** following call will modify the in-memory representation of page 1 
        ** to include the updated change counter and then write page 1 
        ** directly to the database file. Because of the atomic-write 
        ** property of the host file-system, this is safe.
        */
................................................................................
        rc = pager_incr_changecounter(pPager, 1);
      }else{
        rc = sqlite3JournalCreate(pPager->jfd);
        if( rc==SQLITE_OK ){
          rc = pager_incr_changecounter(pPager, 0);
        }
      }
  #else
      rc = pager_incr_changecounter(pPager, 0);
  #endif

      if( rc!=SQLITE_OK ) goto commit_phase_one_exit;
  
      /* Write the master journal name into the journal file. If a master 
      ** journal file name has already been written to the journal file, 
      ** or if zMaster is NULL (no master journal), then this call is a no-op.
      */
      rc = writeMasterJournal(pPager, zMaster);
................................................................................
      ** on a system under memory pressure it is just possible that this is 
      ** not the case. In this case it is likely enough that the redundant
      ** xSync() call will be changed to a no-op by the OS anyhow. 
      */
      rc = syncJournal(pPager, 0);
      if( rc!=SQLITE_OK ) goto commit_phase_one_exit;
  








      rc = pager_write_pagelist(pPager,sqlite3PcacheDirtyList(pPager->pPCache));








      if( rc!=SQLITE_OK ){
        assert( rc!=SQLITE_IOERR_BLOCKED );
        goto commit_phase_one_exit;
      }
      sqlite3PcacheCleanAll(pPager->pPCache);

      /* If the file on disk is smaller than the database image, use 







>







 







>
>
>
>
>







 







|
>
>







 







>
>
>
>
>
>
>







 







>
>
>
>
>
>
>








|







 







|
<

>







 







>
>
>
>
>
>
>
>

>
>
>
>
>
>
>
>







954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
....
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
....
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
....
4571
4572
4573
4574
4575
4576
4577
4578
4579
4580
4581
4582
4583
4584
4585
4586
4587
4588
4589
4590
4591
....
6382
6383
6384
6385
6386
6387
6388
6389
6390
6391
6392
6393
6394
6395
6396
6397
6398
6399
6400
6401
6402
6403
6404
6405
6406
6407
6408
6409
6410
6411
....
6412
6413
6414
6415
6416
6417
6418
6419

6420
6421
6422
6423
6424
6425
6426
6427
6428
....
6438
6439
6440
6441
6442
6443
6444
6445
6446
6447
6448
6449
6450
6451
6452
6453
6454
6455
6456
6457
6458
6459
6460
6461
6462
6463
6464
6465
6466
6467
6468
    case PAGER_WRITER_FINISHED:
      assert( p->eLock==EXCLUSIVE_LOCK );
      assert( pPager->errCode==SQLITE_OK );
      assert( !pagerUseWal(pPager) );
      assert( isOpen(p->jfd) 
           || p->journalMode==PAGER_JOURNALMODE_OFF 
           || p->journalMode==PAGER_JOURNALMODE_WAL 
           || (sqlite3OsDeviceCharacteristics(p->fd)&SQLITE_IOCAP_BATCH_ATOMIC)
      );
      break;

    case PAGER_ERROR:
      /* There must be at least one outstanding reference to the pager if
      ** in ERROR state. Otherwise the pager should have already dropped
      ** back to OPEN state.
................................................................................
  if( !pPager->tempFile ){
    int dc;                           /* Device characteristics */
    int nSector;                      /* Sector size */
    int szPage;                       /* Page size */

    assert( isOpen(pPager->fd) );
    dc = sqlite3OsDeviceCharacteristics(pPager->fd);
    /* use in-memory journal */
    if( dc&SQLITE_IOCAP_BATCH_ATOMIC ){
      return -1;
    }

    nSector = pPager->sectorSize;
    szPage = pPager->pageSize;

    assert(SQLITE_IOCAP_ATOMIC512==(512>>8));
    assert(SQLITE_IOCAP_ATOMIC64K==(65536>>8));
    if( 0==(dc&(SQLITE_IOCAP_ATOMIC|(szPage>>8)) || nSector>szPage) ){
      return 0;
................................................................................
  assert( assert_pager_state(pPager) );
  assert( pPager->eState!=PAGER_ERROR );
  if( pPager->eState<PAGER_WRITER_LOCKED && pPager->eLock<RESERVED_LOCK ){
    return SQLITE_OK;
  }

  releaseAllSavepoints(pPager);
  assert( isOpen(pPager->jfd) || pPager->pInJournal==0 
      || (sqlite3OsDeviceCharacteristics(pPager->fd)&SQLITE_IOCAP_BATCH_ATOMIC)
  );
  if( isOpen(pPager->jfd) ){
    assert( !pagerUseWal(pPager) );

    /* Finalize the journal file. */
    if( sqlite3JournalIsInMemory(pPager->jfd) ){
      /* assert( pPager->journalMode==PAGER_JOURNALMODE_MEMORY ); */
      sqlite3OsClose(pPager->jfd);
................................................................................
  if( pagerUseWal(pPager) ){
    /* Write a single frame for this page to the log. */
    rc = subjournalPageIfRequired(pPg); 
    if( rc==SQLITE_OK ){
      rc = pagerWalFrames(pPager, pPg, 0, 0);
    }
  }else{
    
#ifdef SQLITE_ENABLE_ATOMIC_WRITE
    if( pPager->tempFile==0 ){
      rc = sqlite3JournalCreate(pPager->jfd);
      if( rc!=SQLITE_OK ) return pager_error(pPager, rc);
    }
#endif
  
    /* Sync the journal file if required. */
    if( pPg->flags&PGHDR_NEED_SYNC 
     || pPager->eState==PAGER_WRITER_CACHEMOD
    ){
      rc = syncJournal(pPager, 1);
    }
................................................................................
      **
      ** Otherwise, if the optimization is both enabled and applicable,
      ** then call pager_incr_changecounter() to update the change-counter
      ** in 'direct' mode. In this case the journal file will never be
      ** created for this transaction.
      */
  #ifdef SQLITE_ENABLE_ATOMIC_WRITE
      sqlite3_file *fd = pPager->fd;
      int bBatch = zMaster==0      /* An SQLITE_IOCAP_BATCH_ATOMIC commit */
        && (sqlite3OsDeviceCharacteristics(fd) & SQLITE_IOCAP_BATCH_ATOMIC)
        && pPager->journalMode!=PAGER_JOURNALMODE_MEMORY
        && sqlite3JournalIsInMemory(pPager->jfd);

      if( bBatch==0 ){
        PgHdr *pPg;
        assert( isOpen(pPager->jfd) 
            || pPager->journalMode==PAGER_JOURNALMODE_OFF 
            || pPager->journalMode==PAGER_JOURNALMODE_WAL 
            );
        if( !zMaster && isOpen(pPager->jfd) 
         && pPager->journalOff==jrnlBufferSize(pPager) 
         && pPager->dbSize>=pPager->dbOrigSize
         && (!(pPg = sqlite3PcacheDirtyList(pPager->pPCache)) || 0==pPg->pDirty)
        ){
          /* Update the db file change counter via the direct-write method. The 
          ** following call will modify the in-memory representation of page 1 
          ** to include the updated change counter and then write page 1 
          ** directly to the database file. Because of the atomic-write 
          ** property of the host file-system, this is safe.
          */
................................................................................
          rc = pager_incr_changecounter(pPager, 1);
        }else{
          rc = sqlite3JournalCreate(pPager->jfd);
          if( rc==SQLITE_OK ){
            rc = pager_incr_changecounter(pPager, 0);
          }
        }
      }else

  #endif
      rc = pager_incr_changecounter(pPager, 0);
      if( rc!=SQLITE_OK ) goto commit_phase_one_exit;
  
      /* Write the master journal name into the journal file. If a master 
      ** journal file name has already been written to the journal file, 
      ** or if zMaster is NULL (no master journal), then this call is a no-op.
      */
      rc = writeMasterJournal(pPager, zMaster);
................................................................................
      ** on a system under memory pressure it is just possible that this is 
      ** not the case. In this case it is likely enough that the redundant
      ** xSync() call will be changed to a no-op by the OS anyhow. 
      */
      rc = syncJournal(pPager, 0);
      if( rc!=SQLITE_OK ) goto commit_phase_one_exit;

      if( bBatch ){
        /* The pager is now in DBMOD state. But regardless of what happens
        ** next, attempting to play the journal back into the database would
        ** be unsafe. Close it now to make sure that does not happen.  */
        sqlite3OsClose(pPager->jfd);
        rc = sqlite3OsFileControl(fd, SQLITE_FCNTL_BEGIN_ATOMIC_WRITE, 0);
        if( rc!=SQLITE_OK ) goto commit_phase_one_exit;
      }
      rc = pager_write_pagelist(pPager,sqlite3PcacheDirtyList(pPager->pPCache));
      if( bBatch ){
        if( rc==SQLITE_OK ){
          rc = sqlite3OsFileControl(fd, SQLITE_FCNTL_COMMIT_ATOMIC_WRITE, 0);
        }else{
          sqlite3OsFileControl(fd, SQLITE_FCNTL_ROLLBACK_ATOMIC_WRITE, 0);
        }
      }

      if( rc!=SQLITE_OK ){
        assert( rc!=SQLITE_IOERR_BLOCKED );
        goto commit_phase_one_exit;
      }
      sqlite3PcacheCleanAll(pPager->pPCache);

      /* If the file on disk is smaller than the database image, use 

Changes to src/sqlite.h.in.

591
592
593
594
595
596
597

598
599
600
601
602
603
604
...
725
726
727
728
729
730
731

732
733
734
735
736
737
738
....
1039
1040
1041
1042
1043
1044
1045




1046
1047
1048
1049
1050
1051
1052
#define SQLITE_IOCAP_ATOMIC32K              0x00000080
#define SQLITE_IOCAP_ATOMIC64K              0x00000100
#define SQLITE_IOCAP_SAFE_APPEND            0x00000200
#define SQLITE_IOCAP_SEQUENTIAL             0x00000400
#define SQLITE_IOCAP_UNDELETABLE_WHEN_OPEN  0x00000800
#define SQLITE_IOCAP_POWERSAFE_OVERWRITE    0x00001000
#define SQLITE_IOCAP_IMMUTABLE              0x00002000


/*
** CAPI3REF: File Locking Levels
**
** SQLite uses one of these integer values as the second
** argument to calls it makes to the xLock() and xUnlock() methods
** of an [sqlite3_io_methods] object.
................................................................................
** <li> [SQLITE_IOCAP_ATOMIC32K]
** <li> [SQLITE_IOCAP_ATOMIC64K]
** <li> [SQLITE_IOCAP_SAFE_APPEND]
** <li> [SQLITE_IOCAP_SEQUENTIAL]
** <li> [SQLITE_IOCAP_UNDELETABLE_WHEN_OPEN]
** <li> [SQLITE_IOCAP_POWERSAFE_OVERWRITE]
** <li> [SQLITE_IOCAP_IMMUTABLE]

** </ul>
**
** The SQLITE_IOCAP_ATOMIC property means that all writes of
** any size are atomic.  The SQLITE_IOCAP_ATOMICnnn values
** mean that writes of blocks that are nnn bytes in size and
** are aligned to an address which is an integer multiple of
** nnn are atomic.  The SQLITE_IOCAP_SAFE_APPEND value means
................................................................................
#define SQLITE_FCNTL_WAL_BLOCK              24
#define SQLITE_FCNTL_ZIPVFS                 25
#define SQLITE_FCNTL_RBU                    26
#define SQLITE_FCNTL_VFS_POINTER            27
#define SQLITE_FCNTL_JOURNAL_POINTER        28
#define SQLITE_FCNTL_WIN32_GET_HANDLE       29
#define SQLITE_FCNTL_PDB                    30





/* deprecated names */
#define SQLITE_GET_LOCKPROXYFILE      SQLITE_FCNTL_GET_LOCKPROXYFILE
#define SQLITE_SET_LOCKPROXYFILE      SQLITE_FCNTL_SET_LOCKPROXYFILE
#define SQLITE_LAST_ERRNO             SQLITE_FCNTL_LAST_ERRNO









>







 







>







 







>
>
>
>







591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
...
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
....
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
#define SQLITE_IOCAP_ATOMIC32K              0x00000080
#define SQLITE_IOCAP_ATOMIC64K              0x00000100
#define SQLITE_IOCAP_SAFE_APPEND            0x00000200
#define SQLITE_IOCAP_SEQUENTIAL             0x00000400
#define SQLITE_IOCAP_UNDELETABLE_WHEN_OPEN  0x00000800
#define SQLITE_IOCAP_POWERSAFE_OVERWRITE    0x00001000
#define SQLITE_IOCAP_IMMUTABLE              0x00002000
#define SQLITE_IOCAP_BATCH_ATOMIC           0x00004000

/*
** CAPI3REF: File Locking Levels
**
** SQLite uses one of these integer values as the second
** argument to calls it makes to the xLock() and xUnlock() methods
** of an [sqlite3_io_methods] object.
................................................................................
** <li> [SQLITE_IOCAP_ATOMIC32K]
** <li> [SQLITE_IOCAP_ATOMIC64K]
** <li> [SQLITE_IOCAP_SAFE_APPEND]
** <li> [SQLITE_IOCAP_SEQUENTIAL]
** <li> [SQLITE_IOCAP_UNDELETABLE_WHEN_OPEN]
** <li> [SQLITE_IOCAP_POWERSAFE_OVERWRITE]
** <li> [SQLITE_IOCAP_IMMUTABLE]
** <li> [SQLITE_IOCAP_BATCH_ATOMIC]
** </ul>
**
** The SQLITE_IOCAP_ATOMIC property means that all writes of
** any size are atomic.  The SQLITE_IOCAP_ATOMICnnn values
** mean that writes of blocks that are nnn bytes in size and
** are aligned to an address which is an integer multiple of
** nnn are atomic.  The SQLITE_IOCAP_SAFE_APPEND value means
................................................................................
#define SQLITE_FCNTL_WAL_BLOCK              24
#define SQLITE_FCNTL_ZIPVFS                 25
#define SQLITE_FCNTL_RBU                    26
#define SQLITE_FCNTL_VFS_POINTER            27
#define SQLITE_FCNTL_JOURNAL_POINTER        28
#define SQLITE_FCNTL_WIN32_GET_HANDLE       29
#define SQLITE_FCNTL_PDB                    30

#define SQLITE_FCNTL_BEGIN_ATOMIC_WRITE     31
#define SQLITE_FCNTL_COMMIT_ATOMIC_WRITE    32
#define SQLITE_FCNTL_ROLLBACK_ATOMIC_WRITE  33

/* deprecated names */
#define SQLITE_GET_LOCKPROXYFILE      SQLITE_FCNTL_GET_LOCKPROXYFILE
#define SQLITE_SET_LOCKPROXYFILE      SQLITE_FCNTL_SET_LOCKPROXYFILE
#define SQLITE_LAST_ERRNO             SQLITE_FCNTL_LAST_ERRNO


Changes to test/syscall.test.

57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# Tests for the xNextSystemCall method.
#
foreach s {
    open close access getcwd stat fstat ftruncate
    fcntl read pread write pwrite fchmod fallocate
    pread64 pwrite64 unlink openDirectory mkdir rmdir 
    statvfs fchown geteuid umask mmap munmap mremap
    getpagesize readlink lstat
} {
  if {[test_syscall exists $s]} {lappend syscall_list $s}
}
do_test 3.1 { lsort [test_syscall list] } [lsort $syscall_list]

#-------------------------------------------------------------------------
# This test verifies that if a call to open() fails and errno is set to







|







57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# Tests for the xNextSystemCall method.
#
foreach s {
    open close access getcwd stat fstat ftruncate
    fcntl read pread write pwrite fchmod fallocate
    pread64 pwrite64 unlink openDirectory mkdir rmdir 
    statvfs fchown geteuid umask mmap munmap mremap
    getpagesize readlink lstat ioctl
} {
  if {[test_syscall exists $s]} {lappend syscall_list $s}
}
do_test 3.1 { lsort [test_syscall list] } [lsort $syscall_list]

#-------------------------------------------------------------------------
# This test verifies that if a call to open() fails and errno is set to