/ Check-in [416973ed]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Add support for F2FS atomic writes. Untested at this point.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | batch-atomic-write
Files: files | file ages | folders
SHA3-256:416973ede3bde8567d1f2699728f72352979e054ef988d1c1e1cfe4290f6f8b8
User & Date: dan 2017-07-20 19:49:14
Context
2017-07-20
21:00
Split SQLITE_ENABLE_ATOMIC_WRITE into two options - the original and SQLITE_ENABLE_BATCH_ATOMIC_WRITE. check-in: 7eb9bf2c user: dan tags: batch-atomic-write
19:49
Add support for F2FS atomic writes. Untested at this point. check-in: 416973ed user: dan tags: batch-atomic-write
15:08
Enhance the built-in date/time functions so that they can be used in CHECK constraints, in the WHERE clause or partial indexes, and index expressions, provided that none of the non-deterministic keywords ("now", "localtime", "utc") are used as arguments. check-in: a90c062d user: drh tags: trunk
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to src/memjournal.c.

   384    384     sqlite3JournalOpen(0, 0, pJfd, 0, -1);
   385    385   }
   386    386   
   387    387   #ifdef SQLITE_ENABLE_ATOMIC_WRITE
   388    388   /*
   389    389   ** If the argument p points to a MemJournal structure that is not an 
   390    390   ** in-memory-only journal file (i.e. is one that was opened with a +ve
   391         -** nSpill parameter), and the underlying file has not yet been created, 
   392         -** create it now.
          391  +** nSpill parameter or as SQLITE_OPEN_MAIN_JOURNAL), and the underlying 
          392  +** file has not yet been created, create it now.
   393    393   */
   394         -int sqlite3JournalCreate(sqlite3_file *p){
          394  +int sqlite3JournalCreate(sqlite3_file *pJfd){
   395    395     int rc = SQLITE_OK;
   396         -  if( p->pMethods==&MemJournalMethods && ((MemJournal*)p)->nSpill>0 ){
   397         -    rc = memjrnlCreateFile((MemJournal*)p);
          396  +  MemJournal *p = (MemJournal*)pJfd;
          397  +  if( p->pMethod==&MemJournalMethods 
          398  +   && (p->nSpill>0 || (p->flags & SQLITE_OPEN_MAIN_JOURNAL))
          399  +  ){
          400  +    rc = memjrnlCreateFile(p);
   398    401     }
   399    402     return rc;
   400    403   }
   401    404   #endif
   402    405   
   403    406   /*
   404    407   ** The file-handle passed as the only argument is open on a journal file.

Changes to src/os_unix.c.

    86     86   
    87     87   /*
    88     88   ** standard include files.
    89     89   */
    90     90   #include <sys/types.h>
    91     91   #include <sys/stat.h>
    92     92   #include <fcntl.h>
           93  +#include <sys/ioctl.h>
    93     94   #include <unistd.h>
    94     95   #include <time.h>
    95     96   #include <sys/time.h>
    96     97   #include <errno.h>
    97     98   #if !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0
    98     99   # include <sys/mman.h>
    99    100   #endif
................................................................................
   216    217   #if SQLITE_MAX_MMAP_SIZE>0
   217    218     int nFetchOut;                      /* Number of outstanding xFetch refs */
   218    219     sqlite3_int64 mmapSize;             /* Usable size of mapping at pMapRegion */
   219    220     sqlite3_int64 mmapSizeActual;       /* Actual size of mapping at pMapRegion */
   220    221     sqlite3_int64 mmapSizeMax;          /* Configured FCNTL_MMAP_SIZE value */
   221    222     void *pMapRegion;                   /* Memory mapped region */
   222    223   #endif
   223         -#ifdef __QNXNTO__
   224    224     int sectorSize;                     /* Device sector size */
   225    225     int deviceCharacteristics;          /* Precomputed device characteristics */
   226         -#endif
   227    226   #if SQLITE_ENABLE_LOCKING_STYLE
   228    227     int openFlags;                      /* The flags specified at open() */
   229    228   #endif
   230    229   #if SQLITE_ENABLE_LOCKING_STYLE || defined(__APPLE__)
   231    230     unsigned fsFlags;                   /* cached details from statfs() */
   232    231   #endif
   233    232   #if OS_VXWORKS
................................................................................
   323    322   /*
   324    323   ** Explicitly call the 64-bit version of lseek() on Android. Otherwise, lseek()
   325    324   ** is the 32-bit version, even if _FILE_OFFSET_BITS=64 is defined.
   326    325   */
   327    326   #ifdef __ANDROID__
   328    327   # define lseek lseek64
   329    328   #endif
          329  +
          330  +#define F2FS_IOCTL_MAGIC        0xf5
          331  +#define F2FS_IOC_START_ATOMIC_WRITE     _IO(F2FS_IOCTL_MAGIC, 1)
          332  +#define F2FS_IOC_COMMIT_ATOMIC_WRITE    _IO(F2FS_IOCTL_MAGIC, 2)
          333  +#define F2FS_IOC_START_VOLATILE_WRITE   _IO(F2FS_IOCTL_MAGIC, 3)
          334  +#define F2FS_IOC_ABORT_VOLATILE_WRITE   _IO(F2FS_IOCTL_MAGIC, 5)
          335  +
   330    336   
   331    337   /*
   332    338   ** Different Unix systems declare open() in different ways.  Same use
   333    339   ** open(const char*,int,mode_t).  Others use open(const char*,int,...).
   334    340   ** The difference is important when using a pointer to the function.
   335    341   **
   336    342   ** The safest way to deal with the problem is to always use this wrapper
................................................................................
   495    501   
   496    502   #if defined(HAVE_LSTAT)
   497    503     { "lstat",         (sqlite3_syscall_ptr)lstat,          0 },
   498    504   #else
   499    505     { "lstat",         (sqlite3_syscall_ptr)0,              0 },
   500    506   #endif
   501    507   #define osLstat      ((int(*)(const char*,struct stat*))aSyscall[27].pCurrent)
          508  +
          509  +  { "ioctl",         (sqlite3_syscall_ptr)ioctl,          0 },
          510  +#define osIoctl ((int(*)(int,int))aSyscall[28].pCurrent)
   502    511   
   503    512   }; /* End of the overrideable system calls */
   504    513   
   505    514   
   506    515   /*
   507    516   ** On some systems, calls to fchown() will trigger a message in a security
   508    517   ** log if they come from non-root processes.  So avoid calling fchown() if
................................................................................
  3773   3782   
  3774   3783   /*
  3775   3784   ** Information and control of an open file handle.
  3776   3785   */
  3777   3786   static int unixFileControl(sqlite3_file *id, int op, void *pArg){
  3778   3787     unixFile *pFile = (unixFile*)id;
  3779   3788     switch( op ){
         3789  +    case SQLITE_FCNTL_BEGIN_ATOMIC_WRITE: {
         3790  +      int rc = osIoctl(pFile->h, F2FS_IOC_START_ATOMIC_WRITE);
         3791  +      return rc ? SQLITE_ERROR : SQLITE_OK;
         3792  +    }
         3793  +    case SQLITE_FCNTL_COMMIT_ATOMIC_WRITE: {
         3794  +      int rc = osIoctl(pFile->h, F2FS_IOC_COMMIT_ATOMIC_WRITE);
         3795  +      return rc ? SQLITE_ERROR : SQLITE_OK;
         3796  +    }
         3797  +    case SQLITE_FCNTL_ROLLBACK_ATOMIC_WRITE: {
         3798  +      int rc = osIoctl(pFile->h, F2FS_IOC_ABORT_VOLATILE_WRITE);
         3799  +      return rc ? SQLITE_ERROR : SQLITE_OK;
         3800  +    }
         3801  +
  3780   3802       case SQLITE_FCNTL_LOCKSTATE: {
  3781   3803         *(int*)pArg = pFile->eFileLock;
  3782   3804         return SQLITE_OK;
  3783   3805       }
  3784   3806       case SQLITE_FCNTL_LAST_ERRNO: {
  3785   3807         *(int*)pArg = pFile->lastErrno;
  3786   3808         return SQLITE_OK;
................................................................................
  3856   3878       }
  3857   3879   #endif /* SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) */
  3858   3880     }
  3859   3881     return SQLITE_NOTFOUND;
  3860   3882   }
  3861   3883   
  3862   3884   /*
  3863         -** Return the sector size in bytes of the underlying block device for
  3864         -** the specified file. This is almost always 512 bytes, but may be
  3865         -** larger for some devices.
  3866         -**
  3867         -** SQLite code assumes this function cannot fail. It also assumes that
  3868         -** if two files are created in the same file-system directory (i.e.
  3869         -** a database and its journal file) that the sector size will be the
  3870         -** same for both.
  3871         -*/
  3872         -#ifndef __QNXNTO__ 
  3873         -static int unixSectorSize(sqlite3_file *NotUsed){
  3874         -  UNUSED_PARAMETER(NotUsed);
  3875         -  return SQLITE_DEFAULT_SECTOR_SIZE;
  3876         -}
  3877         -#endif
  3878         -
  3879         -/*
  3880         -** The following version of unixSectorSize() is optimized for QNX.
  3881         -*/
  3882         -#ifdef __QNXNTO__
         3885  +** If pFd->sectorSize is non-zero when this function is called, it is a
         3886  +** no-op. Otherwise, the values of pFd->sectorSize and 
         3887  +** pFd->deviceCharacteristics are set according to the file-system 
         3888  +** characteristics. 
         3889  +**
         3890  +** There are two versions of this function. One for QNX and one for all
         3891  +** other systems.
         3892  +*/
         3893  +#ifndef __QNXNTO__
         3894  +static void setDeviceCharacteristics(unixFile *pFd){
         3895  +  if( pFd->sectorSize==0 ){
         3896  +    int res;
         3897  +    assert( pFd->deviceCharacteristics==0 );
         3898  +
         3899  +    /* Check for support for F2FS atomic batch writes. */
         3900  +    res = osIoctl(pFd->h, F2FS_IOC_START_VOLATILE_WRITE);
         3901  +    if( res==SQLITE_OK ){
         3902  +      osIoctl(pFd->h, F2FS_IOC_ABORT_VOLATILE_WRITE);
         3903  +      pFd->deviceCharacteristics = 
         3904  +        SQLITE_IOCAP_BATCH_ATOMIC |
         3905  +        SQLITE_IOCAP_ATOMIC |
         3906  +        SQLITE_IOCAP_SEQUENTIAL |
         3907  +        SQLITE_IOCAP_SAFE_APPEND;
         3908  +    }
         3909  +
         3910  +    /* Set the POWERSAFE_OVERWRITE flag if requested. */
         3911  +    if( pFd->ctrlFlags & UNIXFILE_PSOW ){
         3912  +      pFd->deviceCharacteristics |= SQLITE_IOCAP_POWERSAFE_OVERWRITE;
         3913  +    }
         3914  +
         3915  +    pFd->sectorSize = SQLITE_DEFAULT_SECTOR_SIZE;
         3916  +  }
         3917  +}
         3918  +#else
  3883   3919   #include <sys/dcmd_blk.h>
  3884   3920   #include <sys/statvfs.h>
  3885         -static int unixSectorSize(sqlite3_file *id){
  3886         -  unixFile *pFile = (unixFile*)id;
         3921  +static void setDeviceCharacteristics(unixFile *pFile){
  3887   3922     if( pFile->sectorSize == 0 ){
  3888   3923       struct statvfs fsInfo;
  3889   3924          
  3890   3925       /* Set defaults for non-supported filesystems */
  3891   3926       pFile->sectorSize = SQLITE_DEFAULT_SECTOR_SIZE;
  3892   3927       pFile->deviceCharacteristics = 0;
  3893   3928       if( fstatvfs(pFile->h, &fsInfo) == -1 ) {
................................................................................
  3948   3983     }
  3949   3984     /* Last chance verification.  If the sector size isn't a multiple of 512
  3950   3985     ** then it isn't valid.*/
  3951   3986     if( pFile->sectorSize % 512 != 0 ){
  3952   3987       pFile->deviceCharacteristics = 0;
  3953   3988       pFile->sectorSize = SQLITE_DEFAULT_SECTOR_SIZE;
  3954   3989     }
  3955         -  return pFile->sectorSize;
  3956   3990   }
  3957         -#endif /* __QNXNTO__ */
         3991  +#endif
         3992  +
         3993  +/*
         3994  +** Return the sector size in bytes of the underlying block device for
         3995  +** the specified file. This is almost always 512 bytes, but may be
         3996  +** larger for some devices.
         3997  +**
         3998  +** SQLite code assumes this function cannot fail. It also assumes that
         3999  +** if two files are created in the same file-system directory (i.e.
         4000  +** a database and its journal file) that the sector size will be the
         4001  +** same for both.
         4002  +*/
         4003  +static int unixSectorSize(sqlite3_file *id){
         4004  +  unixFile *pFd = (unixFile*)id;
         4005  +  setDeviceCharacteristics(pFd);
         4006  +  return pFd->sectorSize;
         4007  +}
  3958   4008   
  3959   4009   /*
  3960   4010   ** Return the device characteristics for the file.
  3961   4011   **
  3962   4012   ** This VFS is set up to return SQLITE_IOCAP_POWERSAFE_OVERWRITE by default.
  3963   4013   ** However, that choice is controversial since technically the underlying
  3964   4014   ** file system does not always provide powersafe overwrites.  (In other
................................................................................
  3966   4016   ** written might end up being altered.)  However, non-PSOW behavior is very,
  3967   4017   ** very rare.  And asserting PSOW makes a large reduction in the amount
  3968   4018   ** of required I/O for journaling, since a lot of padding is eliminated.
  3969   4019   **  Hence, while POWERSAFE_OVERWRITE is on by default, there is a file-control
  3970   4020   ** available to turn it off and URI query parameter available to turn it off.
  3971   4021   */
  3972   4022   static int unixDeviceCharacteristics(sqlite3_file *id){
  3973         -  unixFile *p = (unixFile*)id;
  3974         -  int rc = 0;
  3975         -#ifdef __QNXNTO__
  3976         -  if( p->sectorSize==0 ) unixSectorSize(id);
  3977         -  rc = p->deviceCharacteristics;
  3978         -#endif
  3979         -  if( p->ctrlFlags & UNIXFILE_PSOW ){
  3980         -    rc |= SQLITE_IOCAP_POWERSAFE_OVERWRITE;
  3981         -  }
  3982         -  return rc;
         4023  +  unixFile *pFd = (unixFile*)id;
         4024  +  setDeviceCharacteristics(pFd);
         4025  +  return pFd->deviceCharacteristics;
  3983   4026   }
  3984   4027   
  3985   4028   #if !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0
  3986   4029   
  3987   4030   /*
  3988   4031   ** Return the system page size.
  3989   4032   **
................................................................................
  7594   7637       UNIXVFS("unix-proxy",    proxyIoFinder ),
  7595   7638   #endif
  7596   7639     };
  7597   7640     unsigned int i;          /* Loop counter */
  7598   7641   
  7599   7642     /* Double-check that the aSyscall[] array has been constructed
  7600   7643     ** correctly.  See ticket [bb3a86e890c8e96ab] */
  7601         -  assert( ArraySize(aSyscall)==28 );
         7644  +  assert( ArraySize(aSyscall)==29 );
  7602   7645   
  7603   7646     /* Register all VFSes defined in the aVfs[] array */
  7604   7647     for(i=0; i<(sizeof(aVfs)/sizeof(sqlite3_vfs)); i++){
  7605   7648       sqlite3_vfs_register(&aVfs[i], i==0);
  7606   7649     }
  7607   7650     return SQLITE_OK; 
  7608   7651   }

Changes to src/pager.c.

   954    954       case PAGER_WRITER_FINISHED:
   955    955         assert( p->eLock==EXCLUSIVE_LOCK );
   956    956         assert( pPager->errCode==SQLITE_OK );
   957    957         assert( !pagerUseWal(pPager) );
   958    958         assert( isOpen(p->jfd) 
   959    959              || p->journalMode==PAGER_JOURNALMODE_OFF 
   960    960              || p->journalMode==PAGER_JOURNALMODE_WAL 
          961  +           || (sqlite3OsDeviceCharacteristics(p->fd)&SQLITE_IOCAP_BATCH_ATOMIC)
   961    962         );
   962    963         break;
   963    964   
   964    965       case PAGER_ERROR:
   965    966         /* There must be at least one outstanding reference to the pager if
   966    967         ** in ERROR state. Otherwise the pager should have already dropped
   967    968         ** back to OPEN state.
................................................................................
  1190   1191     if( !pPager->tempFile ){
  1191   1192       int dc;                           /* Device characteristics */
  1192   1193       int nSector;                      /* Sector size */
  1193   1194       int szPage;                       /* Page size */
  1194   1195   
  1195   1196       assert( isOpen(pPager->fd) );
  1196   1197       dc = sqlite3OsDeviceCharacteristics(pPager->fd);
         1198  +    /* use in-memory journal */
         1199  +    if( dc&SQLITE_IOCAP_BATCH_ATOMIC ){
         1200  +      return -1;
         1201  +    }
         1202  +
  1197   1203       nSector = pPager->sectorSize;
  1198   1204       szPage = pPager->pageSize;
  1199   1205   
  1200   1206       assert(SQLITE_IOCAP_ATOMIC512==(512>>8));
  1201   1207       assert(SQLITE_IOCAP_ATOMIC64K==(65536>>8));
  1202   1208       if( 0==(dc&(SQLITE_IOCAP_ATOMIC|(szPage>>8)) || nSector>szPage) ){
  1203   1209         return 0;
................................................................................
  2008   2014     assert( assert_pager_state(pPager) );
  2009   2015     assert( pPager->eState!=PAGER_ERROR );
  2010   2016     if( pPager->eState<PAGER_WRITER_LOCKED && pPager->eLock<RESERVED_LOCK ){
  2011   2017       return SQLITE_OK;
  2012   2018     }
  2013   2019   
  2014   2020     releaseAllSavepoints(pPager);
  2015         -  assert( isOpen(pPager->jfd) || pPager->pInJournal==0 );
         2021  +  assert( isOpen(pPager->jfd) || pPager->pInJournal==0 
         2022  +      || (sqlite3OsDeviceCharacteristics(pPager->fd)&SQLITE_IOCAP_BATCH_ATOMIC)
         2023  +  );
  2016   2024     if( isOpen(pPager->jfd) ){
  2017   2025       assert( !pagerUseWal(pPager) );
  2018   2026   
  2019   2027       /* Finalize the journal file. */
  2020   2028       if( sqlite3JournalIsInMemory(pPager->jfd) ){
  2021   2029         /* assert( pPager->journalMode==PAGER_JOURNALMODE_MEMORY ); */
  2022   2030         sqlite3OsClose(pPager->jfd);
................................................................................
  4563   4571     if( pagerUseWal(pPager) ){
  4564   4572       /* Write a single frame for this page to the log. */
  4565   4573       rc = subjournalPageIfRequired(pPg); 
  4566   4574       if( rc==SQLITE_OK ){
  4567   4575         rc = pagerWalFrames(pPager, pPg, 0, 0);
  4568   4576       }
  4569   4577     }else{
         4578  +    
         4579  +#ifdef SQLITE_ENABLE_ATOMIC_WRITE
         4580  +    if( pPager->tempFile==0 ){
         4581  +      rc = sqlite3JournalCreate(pPager->jfd);
         4582  +      if( rc!=SQLITE_OK ) return pager_error(pPager, rc);
         4583  +    }
         4584  +#endif
  4570   4585     
  4571   4586       /* Sync the journal file if required. */
  4572   4587       if( pPg->flags&PGHDR_NEED_SYNC 
  4573   4588        || pPager->eState==PAGER_WRITER_CACHEMOD
  4574   4589       ){
  4575   4590         rc = syncJournal(pPager, 1);
  4576   4591       }
................................................................................
  6367   6382         **
  6368   6383         ** Otherwise, if the optimization is both enabled and applicable,
  6369   6384         ** then call pager_incr_changecounter() to update the change-counter
  6370   6385         ** in 'direct' mode. In this case the journal file will never be
  6371   6386         ** created for this transaction.
  6372   6387         */
  6373   6388     #ifdef SQLITE_ENABLE_ATOMIC_WRITE
  6374         -      PgHdr *pPg;
  6375         -      assert( isOpen(pPager->jfd) 
  6376         -           || pPager->journalMode==PAGER_JOURNALMODE_OFF 
  6377         -           || pPager->journalMode==PAGER_JOURNALMODE_WAL 
  6378         -      );
  6379         -      if( !zMaster && isOpen(pPager->jfd) 
  6380         -       && pPager->journalOff==jrnlBufferSize(pPager) 
  6381         -       && pPager->dbSize>=pPager->dbOrigSize
  6382         -       && (0==(pPg = sqlite3PcacheDirtyList(pPager->pPCache)) || 0==pPg->pDirty)
  6383         -      ){
  6384         -        /* Update the db file change counter via the direct-write method. The 
  6385         -        ** following call will modify the in-memory representation of page 1 
  6386         -        ** to include the updated change counter and then write page 1 
  6387         -        ** directly to the database file. Because of the atomic-write 
  6388         -        ** property of the host file-system, this is safe.
  6389         -        */
  6390         -        rc = pager_incr_changecounter(pPager, 1);
  6391         -      }else{
  6392         -        rc = sqlite3JournalCreate(pPager->jfd);
  6393         -        if( rc==SQLITE_OK ){
  6394         -          rc = pager_incr_changecounter(pPager, 0);
         6389  +      sqlite3_file *fd = pPager->fd;
         6390  +      int bBatch = zMaster==0      /* An SQLITE_IOCAP_BATCH_ATOMIC commit */
         6391  +        && (sqlite3OsDeviceCharacteristics(fd) & SQLITE_IOCAP_BATCH_ATOMIC)
         6392  +        && pPager->journalMode!=PAGER_JOURNALMODE_MEMORY
         6393  +        && sqlite3JournalIsInMemory(pPager->jfd);
         6394  +
         6395  +      if( bBatch==0 ){
         6396  +        PgHdr *pPg;
         6397  +        assert( isOpen(pPager->jfd) 
         6398  +            || pPager->journalMode==PAGER_JOURNALMODE_OFF 
         6399  +            || pPager->journalMode==PAGER_JOURNALMODE_WAL 
         6400  +            );
         6401  +        if( !zMaster && isOpen(pPager->jfd) 
         6402  +         && pPager->journalOff==jrnlBufferSize(pPager) 
         6403  +         && pPager->dbSize>=pPager->dbOrigSize
         6404  +         && (!(pPg = sqlite3PcacheDirtyList(pPager->pPCache)) || 0==pPg->pDirty)
         6405  +        ){
         6406  +          /* Update the db file change counter via the direct-write method. The 
         6407  +          ** following call will modify the in-memory representation of page 1 
         6408  +          ** to include the updated change counter and then write page 1 
         6409  +          ** directly to the database file. Because of the atomic-write 
         6410  +          ** property of the host file-system, this is safe.
         6411  +          */
         6412  +          rc = pager_incr_changecounter(pPager, 1);
         6413  +        }else{
         6414  +          rc = sqlite3JournalCreate(pPager->jfd);
         6415  +          if( rc==SQLITE_OK ){
         6416  +            rc = pager_incr_changecounter(pPager, 0);
         6417  +          }
  6395   6418           }
  6396         -      }
  6397         -  #else
  6398         -      rc = pager_incr_changecounter(pPager, 0);
         6419  +      }else
  6399   6420     #endif
         6421  +      rc = pager_incr_changecounter(pPager, 0);
  6400   6422         if( rc!=SQLITE_OK ) goto commit_phase_one_exit;
  6401   6423     
  6402   6424         /* Write the master journal name into the journal file. If a master 
  6403   6425         ** journal file name has already been written to the journal file, 
  6404   6426         ** or if zMaster is NULL (no master journal), then this call is a no-op.
  6405   6427         */
  6406   6428         rc = writeMasterJournal(pPager, zMaster);
................................................................................
  6415   6437         ** journal requires a sync here. However, in locking_mode=exclusive
  6416   6438         ** on a system under memory pressure it is just possible that this is 
  6417   6439         ** not the case. In this case it is likely enough that the redundant
  6418   6440         ** xSync() call will be changed to a no-op by the OS anyhow. 
  6419   6441         */
  6420   6442         rc = syncJournal(pPager, 0);
  6421   6443         if( rc!=SQLITE_OK ) goto commit_phase_one_exit;
  6422         -  
         6444  +
         6445  +      if( bBatch ){
         6446  +        /* The pager is now in DBMOD state. But regardless of what happens
         6447  +        ** next, attempting to play the journal back into the database would
         6448  +        ** be unsafe. Close it now to make sure that does not happen.  */
         6449  +        sqlite3OsClose(pPager->jfd);
         6450  +        rc = sqlite3OsFileControl(fd, SQLITE_FCNTL_BEGIN_ATOMIC_WRITE, 0);
         6451  +        if( rc!=SQLITE_OK ) goto commit_phase_one_exit;
         6452  +      }
  6423   6453         rc = pager_write_pagelist(pPager,sqlite3PcacheDirtyList(pPager->pPCache));
         6454  +      if( bBatch ){
         6455  +        if( rc==SQLITE_OK ){
         6456  +          rc = sqlite3OsFileControl(fd, SQLITE_FCNTL_COMMIT_ATOMIC_WRITE, 0);
         6457  +        }else{
         6458  +          sqlite3OsFileControl(fd, SQLITE_FCNTL_ROLLBACK_ATOMIC_WRITE, 0);
         6459  +        }
         6460  +      }
         6461  +
  6424   6462         if( rc!=SQLITE_OK ){
  6425   6463           assert( rc!=SQLITE_IOERR_BLOCKED );
  6426   6464           goto commit_phase_one_exit;
  6427   6465         }
  6428   6466         sqlite3PcacheCleanAll(pPager->pPCache);
  6429   6467   
  6430   6468         /* If the file on disk is smaller than the database image, use 

Changes to src/sqlite.h.in.

   591    591   #define SQLITE_IOCAP_ATOMIC32K              0x00000080
   592    592   #define SQLITE_IOCAP_ATOMIC64K              0x00000100
   593    593   #define SQLITE_IOCAP_SAFE_APPEND            0x00000200
   594    594   #define SQLITE_IOCAP_SEQUENTIAL             0x00000400
   595    595   #define SQLITE_IOCAP_UNDELETABLE_WHEN_OPEN  0x00000800
   596    596   #define SQLITE_IOCAP_POWERSAFE_OVERWRITE    0x00001000
   597    597   #define SQLITE_IOCAP_IMMUTABLE              0x00002000
          598  +#define SQLITE_IOCAP_BATCH_ATOMIC           0x00004000
   598    599   
   599    600   /*
   600    601   ** CAPI3REF: File Locking Levels
   601    602   **
   602    603   ** SQLite uses one of these integer values as the second
   603    604   ** argument to calls it makes to the xLock() and xUnlock() methods
   604    605   ** of an [sqlite3_io_methods] object.
................................................................................
   725    726   ** <li> [SQLITE_IOCAP_ATOMIC32K]
   726    727   ** <li> [SQLITE_IOCAP_ATOMIC64K]
   727    728   ** <li> [SQLITE_IOCAP_SAFE_APPEND]
   728    729   ** <li> [SQLITE_IOCAP_SEQUENTIAL]
   729    730   ** <li> [SQLITE_IOCAP_UNDELETABLE_WHEN_OPEN]
   730    731   ** <li> [SQLITE_IOCAP_POWERSAFE_OVERWRITE]
   731    732   ** <li> [SQLITE_IOCAP_IMMUTABLE]
          733  +** <li> [SQLITE_IOCAP_BATCH_ATOMIC]
   732    734   ** </ul>
   733    735   **
   734    736   ** The SQLITE_IOCAP_ATOMIC property means that all writes of
   735    737   ** any size are atomic.  The SQLITE_IOCAP_ATOMICnnn values
   736    738   ** mean that writes of blocks that are nnn bytes in size and
   737    739   ** are aligned to an address which is an integer multiple of
   738    740   ** nnn are atomic.  The SQLITE_IOCAP_SAFE_APPEND value means
................................................................................
  1039   1041   #define SQLITE_FCNTL_WAL_BLOCK              24
  1040   1042   #define SQLITE_FCNTL_ZIPVFS                 25
  1041   1043   #define SQLITE_FCNTL_RBU                    26
  1042   1044   #define SQLITE_FCNTL_VFS_POINTER            27
  1043   1045   #define SQLITE_FCNTL_JOURNAL_POINTER        28
  1044   1046   #define SQLITE_FCNTL_WIN32_GET_HANDLE       29
  1045   1047   #define SQLITE_FCNTL_PDB                    30
         1048  +
         1049  +#define SQLITE_FCNTL_BEGIN_ATOMIC_WRITE     31
         1050  +#define SQLITE_FCNTL_COMMIT_ATOMIC_WRITE    32
         1051  +#define SQLITE_FCNTL_ROLLBACK_ATOMIC_WRITE  33
  1046   1052   
  1047   1053   /* deprecated names */
  1048   1054   #define SQLITE_GET_LOCKPROXYFILE      SQLITE_FCNTL_GET_LOCKPROXYFILE
  1049   1055   #define SQLITE_SET_LOCKPROXYFILE      SQLITE_FCNTL_SET_LOCKPROXYFILE
  1050   1056   #define SQLITE_LAST_ERRNO             SQLITE_FCNTL_LAST_ERRNO
  1051   1057   
  1052   1058   

Changes to test/syscall.test.

    57     57   # Tests for the xNextSystemCall method.
    58     58   #
    59     59   foreach s {
    60     60       open close access getcwd stat fstat ftruncate
    61     61       fcntl read pread write pwrite fchmod fallocate
    62     62       pread64 pwrite64 unlink openDirectory mkdir rmdir 
    63     63       statvfs fchown geteuid umask mmap munmap mremap
    64         -    getpagesize readlink lstat
           64  +    getpagesize readlink lstat ioctl
    65     65   } {
    66     66     if {[test_syscall exists $s]} {lappend syscall_list $s}
    67     67   }
    68     68   do_test 3.1 { lsort [test_syscall list] } [lsort $syscall_list]
    69     69   
    70     70   #-------------------------------------------------------------------------
    71     71   # This test verifies that if a call to open() fails and errno is set to