Index: src/os_unix.c ================================================================== --- src/os_unix.c +++ src/os_unix.c @@ -120,10 +120,14 @@ #include #include #ifndef SQLITE_OMIT_WAL #include #endif +#ifndef MISSING_STATVFS +#include +#endif + #if SQLITE_ENABLE_LOCKING_STYLE # include # if OS_VXWORKS # include @@ -209,10 +213,11 @@ sqlite3_vfs *pVfs; /* The VFS that created this unixFile */ unixInodeInfo *pInode; /* Info about locks on this inode */ int h; /* The file descriptor */ unsigned char eFileLock; /* The type of lock held on this fd */ unsigned char ctrlFlags; /* Behavioral bits. UNIXFILE_* flags */ + unsigned char szSector; /* Sectorsize/512 */ int lastErrno; /* The unix errno from last I/O error */ void *lockingContext; /* Locking style specific state */ UnixUnusedFd *pUnused; /* Pre-allocated UnixUnusedFd */ const char *zPath; /* Name of the file */ unixShm *pShm; /* Shared memory segment information */ @@ -256,10 +261,11 @@ #ifndef SQLITE_DISABLE_DIRSYNC # define UNIXFILE_DIRSYNC 0x08 /* Directory sync needed */ #else # define UNIXFILE_DIRSYNC 0x00 #endif +#define UNIXFILE_PSOW 0x10 /* SQLITE_IOCAP_POWERSAFE_OVERWRITE */ /* ** Include code that is common to all os_*.c files */ #include "os_common.h" @@ -411,10 +417,18 @@ { "mkdir", (sqlite3_syscall_ptr)mkdir, 0 }, #define osMkdir ((int(*)(const char*,mode_t))aSyscall[18].pCurrent) { "rmdir", (sqlite3_syscall_ptr)rmdir, 0 }, #define osRmdir ((int(*)(const char*))aSyscall[19].pCurrent) + +#if defined(MISSING_STATVFS) + { "statvfs", (sqlite3_syscall_ptr)0, 0 }, +#define osStatvfs ((int(*)(const char*,void*))aSyscall[20].pCurrent) +#else + { "statvfs", (sqlite3_syscall_ptr)statvfs, 0 }, +#define osStatvfs ((int(*)(const char*,struct statvfs*))aSyscall[20].pCurrent) +#endif }; /* End of the overrideable system calls */ /* ** This is the xSetSystemCall() method of sqlite3_vfs for all of the @@ -3495,10 +3509,26 @@ } } return SQLITE_OK; } + +/* +** If *pArg is inititially negative then this is a query. Set *pArg to +** 1 or 0 depending on whether or not bit mask of pFile->ctrlFlags is set. +** +** If *pArg is 0 or 1, then clear or set the mask bit of pFile->ctrlFlags. +*/ +static void unixModeBit(unixFile *pFile, unsigned char mask, int *pArg){ + if( *pArg<0 ){ + *pArg = (pFile->ctrlFlags & mask)!=0; + }else if( (*pArg)==0 ){ + pFile->ctrlFlags &= ~mask; + }else{ + pFile->ctrlFlags |= mask; + } +} /* ** Information and control of an open file handle. */ static int unixFileControl(sqlite3_file *id, int op, void *pArg){ @@ -3522,18 +3552,15 @@ rc = fcntlSizeHint(pFile, *(i64 *)pArg); SimulateIOErrorBenign(0); return rc; } case SQLITE_FCNTL_PERSIST_WAL: { - int bPersist = *(int*)pArg; - if( bPersist<0 ){ - *(int*)pArg = (pFile->ctrlFlags & UNIXFILE_PERSIST_WAL)!=0; - }else if( bPersist==0 ){ - pFile->ctrlFlags &= ~UNIXFILE_PERSIST_WAL; - }else{ - pFile->ctrlFlags |= UNIXFILE_PERSIST_WAL; - } + unixModeBit(pFile, UNIXFILE_PERSIST_WAL, (int*)pArg); + return SQLITE_OK; + } + case SQLITE_FCNTL_POWERSAFE_OVERWRITE: { + unixModeBit(pFile, UNIXFILE_PSOW, (int*)pArg); return SQLITE_OK; } case SQLITE_FCNTL_VFSNAME: { *(char**)pArg = sqlite3_mprintf("%s", pFile->pVfs->zName); return SQLITE_OK; @@ -3570,21 +3597,50 @@ ** SQLite code assumes this function cannot fail. It also assumes that ** if two files are created in the same file-system directory (i.e. ** a database and its journal file) that the sector size will be the ** same for both. */ -static int unixSectorSize(sqlite3_file *NotUsed){ - UNUSED_PARAMETER(NotUsed); - return SQLITE_DEFAULT_SECTOR_SIZE; +static int unixSectorSize(sqlite3_file *pFile){ + unixFile *p = (unixFile*)pFile; + if( p->szSector==0 ){ +#ifdef MISSING_STATVFS + p->szSector = SQLITE_DEFAULT_SECTOR_SIZE/512; +#else + struct statvfs x; + int sz; + memset(&x, 0, sizeof(x)); + osStatvfs(p->zPath, &x); + sz = (int)x.f_frsize; + if( sz<512 || sz>65536 || (sz&(sz-1))!=0 ){ + sz = SQLITE_DEFAULT_SECTOR_SIZE; + } + p->szSector = sz/512; +#endif + } + return p->szSector*512; } /* -** Return the device characteristics for the file. This is always 0 for unix. +** Return the device characteristics for the file. +** +** This VFS is set up to return SQLITE_IOCAP_POWERSAFE_OVERWRITE by default. +** However, that choice is contraversial since technically the underlying +** file system does not always provide powersafe overwrites. (In other +** words, after a power-loss event, parts of the file that were never +** written might end up being altered.) However, non-PSOW behavior is very, +** very rare. And asserting PSOW makes a large reduction in the amount +** of required I/O for journaling, since a lot of padding is eliminated. +** Hence, while POWERSAFE_OVERWRITE is on by default, there is a file-control +** available to turn it off and URI query parameter available to turn it off. */ -static int unixDeviceCharacteristics(sqlite3_file *NotUsed){ - UNUSED_PARAMETER(NotUsed); - return 0; +static int unixDeviceCharacteristics(sqlite3_file *id){ + unixFile *p = (unixFile*)id; + if( p->ctrlFlags & UNIXFILE_PSOW ){ + return SQLITE_IOCAP_POWERSAFE_OVERWRITE; + }else{ + return 0; + } } #ifndef SQLITE_OMIT_WAL @@ -4563,14 +4619,16 @@ OSTRACE(("OPEN %-3d %s\n", h, zFilename)); pNew->h = h; pNew->pVfs = pVfs; pNew->zPath = zFilename; + pNew->ctrlFlags = 0; + if( sqlite3_uri_boolean(zFilename, "psow", SQLITE_POWERSAFE_OVERWRITE) ){ + pNew->ctrlFlags |= UNIXFILE_PSOW; + } if( memcmp(pVfs->zName,"unix-excl",10)==0 ){ - pNew->ctrlFlags = UNIXFILE_EXCL; - }else{ - pNew->ctrlFlags = 0; + pNew->ctrlFlags |= UNIXFILE_EXCL; } if( isReadOnly ){ pNew->ctrlFlags |= UNIXFILE_RDONLY; } if( syncDir ){ @@ -6773,11 +6831,11 @@ }; unsigned int i; /* Loop counter */ /* Double-check that the aSyscall[] array has been constructed ** correctly. See ticket [bb3a86e890c8e96ab] */ - assert( ArraySize(aSyscall)==20 ); + assert( ArraySize(aSyscall)==21 ); /* Register all VFSes defined in the aVfs[] array */ for(i=0; i<(sizeof(aVfs)/sizeof(sqlite3_vfs)); i++){ sqlite3_vfs_register(&aVfs[i], i==0); } Index: src/os_win.c ================================================================== --- src/os_win.c +++ src/os_win.c @@ -57,11 +57,11 @@ const sqlite3_io_methods *pMethod; /*** Must be first ***/ sqlite3_vfs *pVfs; /* The VFS used to open this file */ HANDLE h; /* Handle for accessing the file */ u8 locktype; /* Type of lock currently held on this file */ short sharedLockByte; /* Randomly chosen byte used as a shared lock */ - u8 bPersistWal; /* True to persist WAL files */ + u8 ctrlFlags; /* Flags. See WINFILE_* below */ DWORD lastErrno; /* The Windows errno from the last I/O error */ DWORD sectorSize; /* Sector size of the device file is on */ winShm *pShm; /* Instance of shared memory on this file */ const char *zPath; /* Full pathname of this file */ int szChunk; /* Chunk size configured by FCNTL_CHUNK_SIZE */ @@ -72,10 +72,16 @@ winceLock local; /* Locks obtained by this instance of winFile */ winceLock *shared; /* Global shared lock memory for the file */ #endif }; +/* +** Allowed values for winFile.ctrlFlags +*/ +#define WINFILE_PERSIST_WAL 0x04 /* Persistent WAL mode */ +#define WINFILE_PSOW 0x10 /* SQLITE_IOCAP_POWERSAFE_OVERWRITE */ + /* * If compiled with SQLITE_WIN32_MALLOC on Windows, we will use the * various Win32 API heap functions instead of our own. */ #ifdef SQLITE_WIN32_MALLOC @@ -2122,10 +2128,26 @@ osUnlockFile(pFile->h, PENDING_BYTE, 0, 1, 0); } pFile->locktype = (u8)locktype; return rc; } + +/* +** If *pArg is inititially negative then this is a query. Set *pArg to +** 1 or 0 depending on whether or not bit mask of pFile->ctrlFlags is set. +** +** If *pArg is 0 or 1, then clear or set the mask bit of pFile->ctrlFlags. +*/ +static void winModeBit(winFile *pFile, unsigned char mask, int *pArg){ + if( *pArg<0 ){ + *pArg = (pFile->ctrlFlags & mask)!=0; + }else if( (*pArg)==0 ){ + pFile->ctrlFlags &= ~mask; + }else{ + pFile->ctrlFlags |= mask; + } +} /* ** Control and query of the open file handle. */ static int winFileControl(sqlite3_file *id, int op, void *pArg){ @@ -2158,16 +2180,15 @@ return rc; } return SQLITE_OK; } case SQLITE_FCNTL_PERSIST_WAL: { - int bPersist = *(int*)pArg; - if( bPersist<0 ){ - *(int*)pArg = pFile->bPersistWal; - }else{ - pFile->bPersistWal = bPersist!=0; - } + winModeBit(pFile, WINFILE_PERSIST_WAL, (int*)pArg); + return SQLITE_OK; + } + case SQLITE_FCNTL_POWERSAFE_OVERWRITE: { + winModeBit(pFile, WINFILE_PSOW, (int*)pArg); return SQLITE_OK; } case SQLITE_FCNTL_VFSNAME: { *(char**)pArg = sqlite3_mprintf("win32"); return SQLITE_OK; @@ -2210,12 +2231,13 @@ /* ** Return a vector of device characteristics. */ static int winDeviceCharacteristics(sqlite3_file *id){ - UNUSED_PARAMETER(id); - return SQLITE_IOCAP_UNDELETABLE_WHEN_OPEN; + winFile *p = (winFile*)id; + return SQLITE_IOCAP_UNDELETABLE_WHEN_OPEN | + ((p->ctrlFlags & WINFILE_PSOW)?SQLITE_IOCAP_POWERSAFE_OVERWRITE:0); } #ifndef SQLITE_OMIT_WAL /* @@ -3176,10 +3198,13 @@ pFile->h = h; pFile->lastErrno = NO_ERROR; pFile->pVfs = pVfs; pFile->pShm = 0; pFile->zPath = zName; + if( sqlite3_uri_boolean(zName, "psow", SQLITE_POWERSAFE_OVERWRITE) ){ + pFile->ctrlFlags |= WINFILE_PSOW; + } pFile->sectorSize = getSectorSize(pVfs, zUtf8Name); #if SQLITE_OS_WINCE if( isReadWrite && eType==SQLITE_OPEN_MAIN_DB && !winceCreateLock(zName, pFile) Index: src/pager.c ================================================================== --- src/pager.c +++ src/pager.c @@ -2513,27 +2513,40 @@ ** ** Otherwise, for non-temporary files, the effective sector size is ** the value returned by the xSectorSize() method rounded up to 32 if ** it is less than 32, or rounded down to MAX_SECTOR_SIZE if it ** is greater than MAX_SECTOR_SIZE. +** +** If the file has the SQLITE_IOCAP_POWERSAFE_OVERWRITE property, then set +** the effective sector size to its minimum value (512). The purpose of +** pPager->sectorSize is to define the "blast radius" of bytes that +** might change if a crash occurs while writing to a single byte in +** that range. But with POWERSAFE_OVERWRITE, the blast radius is zero +** (that is what POWERSAFE_OVERWRITE means), so we minimize the sector +** size. For backwards compatibility of the rollback journal file format, +** we cannot reduce the effective sector size below 512. */ static void setSectorSize(Pager *pPager){ assert( isOpen(pPager->fd) || pPager->tempFile ); - if( !pPager->tempFile ){ + if( pPager->tempFile + || (sqlite3OsDeviceCharacteristics(pPager->fd) & + SQLITE_IOCAP_POWERSAFE_OVERWRITE)!=0 + ){ /* Sector size doesn't matter for temporary files. Also, the file ** may not have been opened yet, in which case the OsSectorSize() - ** call will segfault. - */ + ** call will segfault. */ + pPager->sectorSize = 512; + }else{ pPager->sectorSize = sqlite3OsSectorSize(pPager->fd); - } - if( pPager->sectorSize<32 ){ - pPager->sectorSize = 512; - } - if( pPager->sectorSize>MAX_SECTOR_SIZE ){ - assert( MAX_SECTOR_SIZE>=512 ); - pPager->sectorSize = MAX_SECTOR_SIZE; + if( pPager->sectorSize<32 ){ + pPager->sectorSize = 512; + } + if( pPager->sectorSize>MAX_SECTOR_SIZE ){ + assert( MAX_SECTOR_SIZE>=512 ); + pPager->sectorSize = MAX_SECTOR_SIZE; + } } } /* ** Playback the journal and thus restore the database file to Index: src/sqlite.h.in ================================================================== --- src/sqlite.h.in +++ src/sqlite.h.in @@ -502,11 +502,18 @@ ** nnn are atomic. The SQLITE_IOCAP_SAFE_APPEND value means ** that when data is appended to a file, the data is appended ** first then the size of the file is extended, never the other ** way around. The SQLITE_IOCAP_SEQUENTIAL property means that ** information is written to disk in the same order as calls -** to xWrite(). +** to xWrite(). The SQLITE_IOCAP_POWERSAFE_OVERWRITE property means that +** after reboot following a crash or power loss, the value of +** each byte in a file is a value that was actually written +** into that byte at some point. In other words, a crash will +** not cause unwritten bytes of the file to change nor introduce +** randomness into a file nor zero out parts of the file, and any byte of +** a file that are never written will not change values due to +** writes to nearby bytes. */ #define SQLITE_IOCAP_ATOMIC 0x00000001 #define SQLITE_IOCAP_ATOMIC512 0x00000002 #define SQLITE_IOCAP_ATOMIC1K 0x00000004 #define SQLITE_IOCAP_ATOMIC2K 0x00000008 @@ -516,10 +523,11 @@ #define SQLITE_IOCAP_ATOMIC32K 0x00000080 #define SQLITE_IOCAP_ATOMIC64K 0x00000100 #define SQLITE_IOCAP_SAFE_APPEND 0x00000200 #define SQLITE_IOCAP_SEQUENTIAL 0x00000400 #define SQLITE_IOCAP_UNDELETABLE_WHEN_OPEN 0x00000800 +#define SQLITE_IOCAP_POWERSAFE_OVERWRITE 0x00001000 /* ** CAPI3REF: File Locking Levels ** ** SQLite uses one of these integer values as the second @@ -764,10 +772,19 @@ ** in order for the database to be readable. The fourth parameter to ** [sqlite3_file_control()] for this opcode should be a pointer to an integer. ** That integer is 0 to disable persistent WAL mode or 1 to enable persistent ** WAL mode. If the integer is -1, then it is overwritten with the current ** WAL persistence setting. +** +** ^The [SQLITE_FCNTL_POWERSAFE_OVERWRITE] opcode is used to set or query the +** persistent "powersafe-overwrite" or "PSOW" setting. The PSOW setting +** determines the [SQLITE_IOCAP_POWERSAFE_OVERWRITE] bit of the +** xDeviceCharacteristics methods. The fourth parameter to +** [sqlite3_file_control()] for this opcode should be a pointer to an integer. +** That integer is 0 to disable zero-damage mode or 1 to enable zero-damage +** mode. If the integer is -1, then it is overwritten with the current +** zero-damage mode setting. ** ** ^The [SQLITE_FCNTL_OVERWRITE] opcode is invoked by SQLite after opening ** a write transaction to indicate that, unless it is rolled back for some ** reason, the entire database file will be overwritten by the current ** transaction. This is used by VACUUM operations. @@ -781,22 +798,23 @@ ** all file-control actions, there is no guarantee that this will actually ** do anything. Callers should initialize the char* variable to a NULL ** pointer in case this file-control is not implemented. This file-control ** is intended for diagnostic use only. */ -#define SQLITE_FCNTL_LOCKSTATE 1 -#define SQLITE_GET_LOCKPROXYFILE 2 -#define SQLITE_SET_LOCKPROXYFILE 3 -#define SQLITE_LAST_ERRNO 4 -#define SQLITE_FCNTL_SIZE_HINT 5 -#define SQLITE_FCNTL_CHUNK_SIZE 6 -#define SQLITE_FCNTL_FILE_POINTER 7 -#define SQLITE_FCNTL_SYNC_OMITTED 8 -#define SQLITE_FCNTL_WIN32_AV_RETRY 9 -#define SQLITE_FCNTL_PERSIST_WAL 10 -#define SQLITE_FCNTL_OVERWRITE 11 -#define SQLITE_FCNTL_VFSNAME 12 +#define SQLITE_FCNTL_LOCKSTATE 1 +#define SQLITE_GET_LOCKPROXYFILE 2 +#define SQLITE_SET_LOCKPROXYFILE 3 +#define SQLITE_LAST_ERRNO 4 +#define SQLITE_FCNTL_SIZE_HINT 5 +#define SQLITE_FCNTL_CHUNK_SIZE 6 +#define SQLITE_FCNTL_FILE_POINTER 7 +#define SQLITE_FCNTL_SYNC_OMITTED 8 +#define SQLITE_FCNTL_WIN32_AV_RETRY 9 +#define SQLITE_FCNTL_PERSIST_WAL 10 +#define SQLITE_FCNTL_OVERWRITE 11 +#define SQLITE_FCNTL_VFSNAME 12 +#define SQLITE_FCNTL_POWERSAFE_OVERWRITE 13 /* ** CAPI3REF: Mutex Handle ** ** The mutex module within SQLite defines [sqlite3_mutex] to be an Index: src/sqliteInt.h ================================================================== --- src/sqliteInt.h +++ src/sqliteInt.h @@ -123,10 +123,18 @@ #else # define SQLITE_THREADSAFE 1 /* IMP: R-07272-22309 */ #endif #endif +/* +** Powersafe overwrite is on by default. But can be turned off using +** the -DSQLITE_POWERSAFE_OVERWRITE=0 command-line option. +*/ +#ifndef SQLITE_POWERSAFE_OVERWRITE +# define SQLITE_POWERSAFE_OVERWRITE 1 +#endif + /* ** The SQLITE_DEFAULT_MEMSTATUS macro must be defined as either 0 or 1. ** It determines whether or not the features related to ** SQLITE_CONFIG_MEMSTATUS are available by default or not. This value can ** be overridden at runtime using the sqlite3_config() API. Index: src/tclsqlite.c ================================================================== --- src/tclsqlite.c +++ src/tclsqlite.c @@ -2998,10 +2998,18 @@ if( b ){ flags |= SQLITE_OPEN_FULLMUTEX; flags &= ~SQLITE_OPEN_NOMUTEX; }else{ flags &= ~SQLITE_OPEN_FULLMUTEX; + } + }else if( strcmp(zArg, "-uri")==0 ){ + int b; + if( Tcl_GetBooleanFromObj(interp, objv[i+1], &b) ) return TCL_ERROR; + if( b ){ + flags |= SQLITE_OPEN_URI; + }else{ + flags &= ~SQLITE_OPEN_URI; } }else{ Tcl_AppendResult(interp, "unknown option: ", zArg, (char*)0); return TCL_ERROR; } Index: src/test1.c ================================================================== --- src/test1.c +++ src/test1.c @@ -5233,10 +5233,42 @@ sqlite3_snprintf(sizeof(z), z, "%d %d", rc, bPersist); Tcl_AppendResult(interp, z, (char*)0); return TCL_OK; } +/* +** tclcmd: file_control_powersafe_overwrite DB PSOW-FLAG +** +** This TCL command runs the sqlite3_file_control interface with +** the SQLITE_FCNTL_POWERSAFE_OVERWRITE opcode. +*/ +static int file_control_powersafe_overwrite( + ClientData clientData, /* Pointer to sqlite3_enable_XXX function */ + Tcl_Interp *interp, /* The TCL interpreter that invoked this command */ + int objc, /* Number of arguments */ + Tcl_Obj *CONST objv[] /* Command arguments */ +){ + sqlite3 *db; + int rc; + int b; + char z[100]; + + if( objc!=3 ){ + Tcl_AppendResult(interp, "wrong # args: should be \"", + Tcl_GetStringFromObj(objv[0], 0), " DB FLAG", 0); + return TCL_ERROR; + } + if( getDbPointer(interp, Tcl_GetString(objv[1]), &db) ){ + return TCL_ERROR; + } + if( Tcl_GetIntFromObj(interp, objv[2], &b) ) return TCL_ERROR; + rc = sqlite3_file_control(db,NULL,SQLITE_FCNTL_POWERSAFE_OVERWRITE,(void*)&b); + sqlite3_snprintf(sizeof(z), z, "%d %d", rc, b); + Tcl_AppendResult(interp, z, (char*)0); + return TCL_OK; +} + /* ** tclcmd: file_control_vfsname DB ?AUXDB? ** ** Return a string that describes the stack of VFSes. @@ -6091,10 +6123,11 @@ { "file_control_lockproxy_test", file_control_lockproxy_test, 0 }, { "file_control_chunksize_test", file_control_chunksize_test, 0 }, { "file_control_sizehint_test", file_control_sizehint_test, 0 }, { "file_control_win32_av_retry", file_control_win32_av_retry, 0 }, { "file_control_persist_wal", file_control_persist_wal, 0 }, + { "file_control_powersafe_overwrite",file_control_powersafe_overwrite,0}, { "file_control_vfsname", file_control_vfsname, 0 }, { "sqlite3_vfs_list", vfs_list, 0 }, { "sqlite3_create_function_v2", test_create_function_v2, 0 }, /* Functions from os.h */ Index: src/test6.c ================================================================== --- src/test6.c +++ src/test6.c @@ -703,21 +703,22 @@ ){ struct DeviceFlag { char *zName; int iValue; } aFlag[] = { - { "atomic", SQLITE_IOCAP_ATOMIC }, - { "atomic512", SQLITE_IOCAP_ATOMIC512 }, - { "atomic1k", SQLITE_IOCAP_ATOMIC1K }, - { "atomic2k", SQLITE_IOCAP_ATOMIC2K }, - { "atomic4k", SQLITE_IOCAP_ATOMIC4K }, - { "atomic8k", SQLITE_IOCAP_ATOMIC8K }, - { "atomic16k", SQLITE_IOCAP_ATOMIC16K }, - { "atomic32k", SQLITE_IOCAP_ATOMIC32K }, - { "atomic64k", SQLITE_IOCAP_ATOMIC64K }, - { "sequential", SQLITE_IOCAP_SEQUENTIAL }, - { "safe_append", SQLITE_IOCAP_SAFE_APPEND }, + { "atomic", SQLITE_IOCAP_ATOMIC }, + { "atomic512", SQLITE_IOCAP_ATOMIC512 }, + { "atomic1k", SQLITE_IOCAP_ATOMIC1K }, + { "atomic2k", SQLITE_IOCAP_ATOMIC2K }, + { "atomic4k", SQLITE_IOCAP_ATOMIC4K }, + { "atomic8k", SQLITE_IOCAP_ATOMIC8K }, + { "atomic16k", SQLITE_IOCAP_ATOMIC16K }, + { "atomic32k", SQLITE_IOCAP_ATOMIC32K }, + { "atomic64k", SQLITE_IOCAP_ATOMIC64K }, + { "sequential", SQLITE_IOCAP_SEQUENTIAL }, + { "safe_append", SQLITE_IOCAP_SAFE_APPEND }, + { "powersafe_overwrite", SQLITE_IOCAP_POWERSAFE_OVERWRITE }, { 0, 0 } }; int i; int iDc = 0; Index: src/test_vfs.c ================================================================== --- src/test_vfs.c +++ src/test_vfs.c @@ -1160,22 +1160,23 @@ struct DeviceFlag { char *zName; int iValue; } aFlag[] = { { "default", -1 }, - { "atomic", SQLITE_IOCAP_ATOMIC }, - { "atomic512", SQLITE_IOCAP_ATOMIC512 }, - { "atomic1k", SQLITE_IOCAP_ATOMIC1K }, - { "atomic2k", SQLITE_IOCAP_ATOMIC2K }, - { "atomic4k", SQLITE_IOCAP_ATOMIC4K }, - { "atomic8k", SQLITE_IOCAP_ATOMIC8K }, - { "atomic16k", SQLITE_IOCAP_ATOMIC16K }, - { "atomic32k", SQLITE_IOCAP_ATOMIC32K }, - { "atomic64k", SQLITE_IOCAP_ATOMIC64K }, - { "sequential", SQLITE_IOCAP_SEQUENTIAL }, - { "safe_append", SQLITE_IOCAP_SAFE_APPEND }, + { "atomic", SQLITE_IOCAP_ATOMIC }, + { "atomic512", SQLITE_IOCAP_ATOMIC512 }, + { "atomic1k", SQLITE_IOCAP_ATOMIC1K }, + { "atomic2k", SQLITE_IOCAP_ATOMIC2K }, + { "atomic4k", SQLITE_IOCAP_ATOMIC4K }, + { "atomic8k", SQLITE_IOCAP_ATOMIC8K }, + { "atomic16k", SQLITE_IOCAP_ATOMIC16K }, + { "atomic32k", SQLITE_IOCAP_ATOMIC32K }, + { "atomic64k", SQLITE_IOCAP_ATOMIC64K }, + { "sequential", SQLITE_IOCAP_SEQUENTIAL }, + { "safe_append", SQLITE_IOCAP_SAFE_APPEND }, { "undeletable_when_open", SQLITE_IOCAP_UNDELETABLE_WHEN_OPEN }, + { "powersafe_overwrite", SQLITE_IOCAP_POWERSAFE_OVERWRITE }, { 0, 0 } }; Tcl_Obj *pRet; int iFlag; @@ -1205,11 +1206,11 @@ return TCL_ERROR; } iNew |= aFlag[idx].iValue; } - p->iDevchar = iNew; + p->iDevchar = iNew| 0x10000000; } pRet = Tcl_NewObj(); for(iFlag=0; iFlagiDevchar & aFlag[iFlag].iValue ){ Index: src/wal.c ================================================================== --- src/wal.c +++ src/wal.c @@ -422,11 +422,12 @@ u8 exclusiveMode; /* Non-zero if connection is in exclusive mode */ u8 writeLock; /* True if in a write transaction */ u8 ckptLock; /* True if holding a checkpoint lock */ u8 readOnly; /* WAL_RDWR, WAL_RDONLY, or WAL_SHM_RDONLY */ u8 truncateOnCommit; /* True to truncate WAL file on commit */ - u8 noSyncHeader; /* Avoid WAL header fsyncs if true */ + u8 syncHeader; /* Fsync the WAL header if true */ + u8 padToSectorBoundary; /* Pad transactions out to the next sector */ WalIndexHdr hdr; /* Wal-index header for current transaction */ const char *zWalName; /* Name of WAL file */ u32 nCkpt; /* Checkpoint sequence counter in the wal-header */ #ifdef SQLITE_DEBUG u8 lockError; /* True if a locking error has occurred */ @@ -1151,28 +1152,20 @@ } aData = &aFrame[WAL_FRAME_HDRSIZE]; /* Read all frames from the log file. */ iFrame = 0; - isValid = 1; for(iOffset=WAL_HDRSIZE; (iOffset+szFrame)<=nSize; iOffset+=szFrame){ u32 pgno; /* Database page number for frame */ u32 nTruncate; /* dbsize field from frame header */ /* Read and decode the next log frame. */ iFrame++; rc = sqlite3OsRead(pWal->pWalFd, aFrame, szFrame, iOffset); if( rc!=SQLITE_OK ) break; - if( sqlite3Get4byte(&aFrame[8]) == - 1+sqlite3Get4byte((u8*)&pWal->hdr.aSalt[0]) ){ - pWal->hdr.mxFrame = 0; - pWal->hdr.nPage = 0; - break; - } - if( !isValid ) continue; isValid = walDecodeFrame(pWal, &pgno, &nTruncate, aData, aFrame); - if( !isValid ) continue; + if( !isValid ) break; rc = walIndexAppend(pWal, iFrame, pgno); if( rc!=SQLITE_OK ) break; /* If nTruncate is non-zero, this is a commit record. */ if( nTruncate ){ @@ -1292,10 +1285,12 @@ pRet->pWalFd = (sqlite3_file *)&pRet[1]; pRet->pDbFd = pDbFd; pRet->readLock = -1; pRet->mxWalSize = mxWalSize; pRet->zWalName = zWalName; + pRet->syncHeader = 1; + pRet->padToSectorBoundary = 1; pRet->exclusiveMode = (bNoShm ? WAL_HEAPMEMORY_MODE: WAL_NORMAL_MODE); /* Open file handle on the write-ahead log file. */ flags = (SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE|SQLITE_OPEN_WAL); rc = sqlite3OsOpen(pVfs, zWalName, pRet->pWalFd, flags, &flags); @@ -1307,11 +1302,14 @@ walIndexClose(pRet, 0); sqlite3OsClose(pRet->pWalFd); sqlite3_free(pRet); }else{ int iDC = sqlite3OsDeviceCharacteristics(pRet->pWalFd); - if( iDC & SQLITE_IOCAP_SEQUENTIAL ){ pRet->noSyncHeader = 1; } + if( iDC & SQLITE_IOCAP_SEQUENTIAL ){ pRet->syncHeader = 0; } + if( iDC & SQLITE_IOCAP_POWERSAFE_OVERWRITE ){ + pRet->padToSectorBoundary = 0; + } *ppWal = pRet; WALTRACE(("WAL%d: opened\n", pRet)); } return rc; } @@ -2629,45 +2627,75 @@ testcase( rc==SQLITE_OK ); } return rc; } +/* +** Information about the current state of the WAL file and where +** the next fsync should occur - passed from sqlite3WalFrames() into +** walWriteToLog(). +*/ +typedef struct WalWriter { + Wal *pWal; /* The complete WAL information */ + sqlite3_file *pFd; /* The WAL file to which we write */ + sqlite3_int64 iSyncPoint; /* Fsync at this offset */ + int syncFlags; /* Flags for the fsync */ + int szPage; /* Size of one page */ +} WalWriter; + /* ** Write iAmt bytes of content into the WAL file beginning at iOffset. +** Do a sync when crossing the p->iSyncPoint boundary. ** -** When crossing the boundary between the first and second sectors of the -** file, first write all of the first sector content, then fsync(), then -** continue writing content for the second sector. This ensures that -** the WAL header is overwritten before the first commit mark. +** In other words, if iSyncPoint is in between iOffset and iOffset+iAmt, +** first write the part before iSyncPoint, then sync, then write the +** rest. */ static int walWriteToLog( - Wal *pWal, /* WAL to write to */ + WalWriter *p, /* WAL to write to */ void *pContent, /* Content to be written */ int iAmt, /* Number of bytes to write */ sqlite3_int64 iOffset /* Start writing at this offset */ ){ int rc; - if( iOffset>=pWal->szFirstBlock - || iOffset+iAmtszFirstBlock - || pWal->syncFlags==0 - ){ - /* The common and fast case. Just write the data. */ - rc = sqlite3OsWrite(pWal->pWalFd, pContent, iAmt, iOffset); - }else{ - /* If this write will cross the first sector boundary, it has to - ** be split it two with a sync in between. */ - int iFirstAmt = pWal->szFirstBlock - iOffset; - assert( iFirstAmt>0 && iFirstAmtpWalFd, pContent, iFirstAmt, iOffset); - if( rc ) return rc; - assert( pWal->syncFlags & (SQLITE_SYNC_NORMAL|SQLITE_SYNC_FULL) ); - rc = sqlite3OsSync(pWal->pWalFd, pWal->syncFlags); - if( rc ) return rc; - pContent = (void*)(iFirstAmt + (char*)pContent); - rc = sqlite3OsWrite(pWal->pWalFd, pContent, - iAmt-iFirstAmt, iOffset+iFirstAmt); - } + if( iOffsetiSyncPoint && iOffset+iAmt>=p->iSyncPoint ){ + int iFirstAmt = (int)(p->iSyncPoint - iOffset); + rc = sqlite3OsWrite(p->pFd, pContent, iFirstAmt, iOffset); + if( rc ) return rc; + iOffset += iFirstAmt; + iAmt -= iFirstAmt; + pContent = (void*)(iFirstAmt + (char*)pContent); + assert( p->syncFlags & (SQLITE_SYNC_NORMAL|SQLITE_SYNC_FULL) ); + rc = sqlite3OsSync(p->pFd, p->syncFlags); + if( rc ) return rc; + } + rc = sqlite3OsWrite(p->pFd, pContent, iAmt, iOffset); + return rc; +} + +/* +** Write out a single frame of the WAL +*/ +static int walWriteOneFrame( + WalWriter *p, /* Where to write the frame */ + PgHdr *pPage, /* The page of the frame to be written */ + int nTruncate, /* The commit flag. Usually 0. >0 for commit */ + sqlite3_int64 iOffset /* Byte offset at which to write */ +){ + int rc; /* Result code from subfunctions */ + void *pData; /* Data actually written */ + u8 aFrame[WAL_FRAME_HDRSIZE]; /* Buffer to assemble frame-header in */ +#if defined(SQLITE_HAS_CODEC) + if( (pData = sqlite3PagerCodec(pPage))==0 ) return SQLITE_NOMEM; +#else + pData = pPage->pData; +#endif + walEncodeFrame(p->pWal, pPage->pgno, nTruncate, pData, aFrame); + rc = walWriteToLog(p, aFrame, sizeof(aFrame), iOffset); + if( rc ) return rc; + /* Write the page data */ + rc = walWriteToLog(p, pData, p->szPage, iOffset+sizeof(aFrame)); return rc; } /* ** Write a set of frames to the log. The caller must hold the write-lock @@ -2681,14 +2709,16 @@ int isCommit, /* True if this is a commit */ int sync_flags /* Flags to pass to OsSync() (or 0) */ ){ int rc; /* Used to catch return codes */ u32 iFrame; /* Next frame address */ - u8 aFrame[WAL_FRAME_HDRSIZE]; /* Buffer to assemble frame-header in */ PgHdr *p; /* Iterator to run through pList with. */ PgHdr *pLast = 0; /* Last frame in list */ - int nLast = 0; /* Number of extra copies of last page */ + int nExtra = 0; /* Number of extra copies of last page */ + int szFrame; /* The size of a single frame */ + i64 iOffset; /* Next byte to write in WAL file */ + WalWriter w; /* The writer */ assert( pList ); assert( pWal->writeLock ); /* If this frame set completes a transaction, then nTruncate>0. If @@ -2737,90 +2767,82 @@ rc = sqlite3OsWrite(pWal->pWalFd, aWalHdr, sizeof(aWalHdr), 0); WALTRACE(("WAL%p: wal-header write %s\n", pWal, rc ? "failed" : "ok")); if( rc!=SQLITE_OK ){ return rc; } + + /* Sync the header (unless SQLITE_IOCAP_SEQUENTIAL is true or unless + ** all syncing is turned off by PRAGMA synchronous=OFF). Otherwise + ** an out-of-order write following a WAL restart could result in + ** database corruption. See the ticket: + ** + ** http://localhost:591/sqlite/info/ff5be73dee + */ + if( pWal->syncHeader && sync_flags ){ + rc = sqlite3OsSync(pWal->pWalFd, sync_flags & SQLITE_SYNC_MASK); + if( rc ) return rc; + } } assert( (int)pWal->szPage==szPage ); - /* Setup information needed to do the WAL header sync */ - if( pWal->noSyncHeader ){ - assert( pWal->szFirstBlock==0 ); - assert( pWal->syncFlags==0 ); - }else{ - pWal->szFirstBlock = sqlite3OsSectorSize(pWal->pWalFd); - if( szPage>pWal->szFirstBlock ) pWal->szFirstBlock = szPage; - pWal->syncFlags = sync_flags & SQLITE_SYNC_MASK; - } - - /* Write the log file. */ + /* Setup information needed to write frames into the WAL */ + w.pWal = pWal; + w.pFd = pWal->pWalFd; + w.iSyncPoint = 0; + w.syncFlags = sync_flags; + w.szPage = szPage; + iOffset = walFrameOffset(iFrame+1, szPage); + szFrame = szPage + WAL_FRAME_HDRSIZE; + + /* Write all frames into the log file exactly once */ for(p=pList; p; p=p->pDirty){ - u32 nDbsize; /* Db-size field for frame header */ - i64 iOffset; /* Write offset in log file */ - void *pData; - - iOffset = walFrameOffset(++iFrame, szPage); - /* testcase( IS_BIG_INT(iOffset) ); // requires a 4GiB WAL */ - - /* Populate and write the frame header */ - nDbsize = (isCommit && p->pDirty==0) ? nTruncate : 0; -#if defined(SQLITE_HAS_CODEC) - if( (pData = sqlite3PagerCodec(p))==0 ) return SQLITE_NOMEM; -#else - pData = p->pData; -#endif - walEncodeFrame(pWal, p->pgno, nDbsize, pData, aFrame); - rc = walWriteToLog(pWal, aFrame, sizeof(aFrame), iOffset); - if( rc!=SQLITE_OK ){ - return rc; - } - - /* Write the page data */ - rc = walWriteToLog(pWal, pData, szPage, iOffset+sizeof(aFrame)); - if( rc!=SQLITE_OK ){ - return rc; - } + int nDbSize; /* 0 normally. Positive == commit flag */ + iFrame++; + assert( iOffset==walFrameOffset(iFrame, szPage) ); + nDbSize = (isCommit && p->pDirty==0) ? nTruncate : 0; + rc = walWriteOneFrame(&w, p, nDbSize, iOffset); + if( rc ) return rc; pLast = p; + iOffset += szFrame; } - /* Sync the log file if the 'isSync' flag was specified. */ + /* If this is the end of a transaction, then we might need to pad + ** the transaction and/or sync the WAL file. + ** + ** Padding and syncing only occur if this set of frames complete a + ** transaction and if PRAGMA synchronous=FULL. If synchronous==NORMAL + ** or synchonous==OFF, then no padding or syncing are needed. + ** + ** If SQLITE_IOCAP_POWERSAFE_OVERWRITE is defined, then padding is not + ** needed and only the sync is done. If padding is needed, then the + ** final frame is repeated (with its commit mark) until the next sector + ** boundary is crossed. Only the part of the WAL prior to the last + ** sector boundary is synced; the part of the last frame that extends + ** past the sector boundary is written after the sync. + */ if( isCommit && (sync_flags & WAL_SYNC_TRANSACTIONS)!=0 ){ - i64 iSegment = sqlite3OsSectorSize(pWal->pWalFd); - i64 iOffset = walFrameOffset(iFrame+1, szPage); - - assert( iSegment>0 ); - - iSegment = (((iOffset+iSegment-1)/iSegment) * iSegment); - while( iOffsetpData; -#endif - walEncodeFrame(pWal, pLast->pgno, nTruncate, pData, aFrame); - /* testcase( IS_BIG_INT(iOffset) ); // requires a 4GiB WAL */ - rc = walWriteToLog(pWal, aFrame, sizeof(aFrame), iOffset); - if( rc!=SQLITE_OK ){ - return rc; - } - iOffset += WAL_FRAME_HDRSIZE; - rc = walWriteToLog(pWal, pData, szPage, iOffset); - if( rc!=SQLITE_OK ){ - return rc; - } - nLast++; - iOffset += szPage; - } - - rc = sqlite3OsSync(pWal->pWalFd, sync_flags & SQLITE_SYNC_MASK); - } - + if( pWal->padToSectorBoundary ){ + int sectorSize = sqlite3OsSectorSize(pWal->pWalFd); + w.iSyncPoint = ((iOffset+sectorSize-1)/sectorSize)*sectorSize; + while( iOffsettruncateOnCommit && pWal->mxWalSize>=0 ){ i64 sz = pWal->mxWalSize; - if( walFrameOffset(iFrame+nLast+1, szPage)>pWal->mxWalSize ){ - sz = walFrameOffset(iFrame+nLast+1, szPage); + if( walFrameOffset(iFrame+nExtra+1, szPage)>pWal->mxWalSize ){ + sz = walFrameOffset(iFrame+nExtra+1, szPage); } walLimitSize(pWal, sz); pWal->truncateOnCommit = 0; } @@ -2832,13 +2854,13 @@ iFrame = pWal->hdr.mxFrame; for(p=pList; p && rc==SQLITE_OK; p=p->pDirty){ iFrame++; rc = walIndexAppend(pWal, iFrame, p->pgno); } - while( nLast>0 && rc==SQLITE_OK ){ + while( nExtra>0 && rc==SQLITE_OK ){ iFrame++; - nLast--; + nExtra--; rc = walIndexAppend(pWal, iFrame, pLast->pgno); } if( rc==SQLITE_OK ){ /* Update the private copy of the header. */ Index: test/incrvacuum2.test ================================================================== --- test/incrvacuum2.test +++ test/incrvacuum2.test @@ -189,11 +189,11 @@ PRAGMA journal_mode = WAL; PRAGMA incremental_vacuum(1); PRAGMA wal_checkpoint; } file size test.db-wal - } {1640} + } [expr {32+2*(512+24)}] do_test 4.3 { db close sqlite3 db test.db set maxsz 0 @@ -203,9 +203,9 @@ execsql { PRAGMA incremental_vacuum(1) } set newsz [file size test.db-wal] if {$newsz>$maxsz} {set maxsz $newsz} } set maxsz - } {2176} + } [expr {32+3*(512+24)}] } finish_test Index: test/journal2.test ================================================================== --- test/journal2.test +++ test/journal2.test @@ -32,11 +32,11 @@ # Create a [testvfs] and install it as the default VFS. Set the device # characteristics flags to "SAFE_DELETE". # testvfs tvfs -default 1 -tvfs devchar undeletable_when_open +tvfs devchar {undeletable_when_open powersafe_overwrite} # Set up a hook so that each time a journal file is opened, closed or # deleted, the method name ("xOpen", "xClose" or "xDelete") and the final # segment of the journal file-name (i.e. "test.db-journal") are appended to # global list variable $::oplog. @@ -229,6 +229,5 @@ db close } tvfs delete finish_test - Index: test/pager1.test ================================================================== --- test/pager1.test +++ test/pager1.test @@ -1329,10 +1329,11 @@ foreach sectorsize { 32 64 128 256 512 1024 2048 4096 8192 16384 32768 65536 131072 262144 } { tv sectorsize $sectorsize + tv devchar {} set eff $sectorsize if {$sectorsize < 512} { set eff 512 } if {$sectorsize > 65536} { set eff 65536 } do_test pager1-10.$sectorsize.1 { Index: test/superlock.test ================================================================== --- test/superlock.test +++ test/superlock.test @@ -74,11 +74,14 @@ do_catchsql_test 3.3 { SELECT * FROM t1 } {1 {database is locked}} do_catchsql_test 3.4 { INSERT INTO t1 VALUES(5, 6)} {1 {database is locked}} do_catchsql_test 3.5 { PRAGMA wal_checkpoint } {0 {1 -1 -1}} do_test 3.6 { unlock } {} -do_execsql_test 4.1 { PRAGMA wal_checkpoint } {0 2 2} +# At this point the WAL file consists of a single frame only - written +# by test case 3.1. If the ZERO_DAMAGE flag were not set, it would consist +# of two frames - the frame written by 3.1 and a padding frame. +do_execsql_test 4.1 { PRAGMA wal_checkpoint } {0 1 1} do_test 4.2 { sqlite3demo_superlock unlock test.db } {unlock} do_catchsql_test 4.3 { SELECT * FROM t1 } {1 {database is locked}} do_catchsql_test 4.4 { INSERT INTO t1 VALUES(5, 6)} {1 {database is locked}} do_catchsql_test 4.5 { PRAGMA wal_checkpoint } {0 {1 -1 -1}} Index: test/syscall.test ================================================================== --- test/syscall.test +++ test/syscall.test @@ -57,11 +57,12 @@ # Tests for the xNextSystemCall method. # foreach s { open close access getcwd stat fstat ftruncate fcntl read pread write pwrite fchmod fallocate - pread64 pwrite64 unlink openDirectory mkdir rmdir + pread64 pwrite64 unlink openDirectory mkdir rmdir + statvfs } { if {[test_syscall exists $s]} {lappend syscall_list $s} } do_test 3.1 { lsort [test_syscall list] } [lsort $syscall_list] Index: test/wal.test ================================================================== --- test/wal.test +++ test/wal.test @@ -544,34 +544,34 @@ do_test wal-10.$tn.11 { sql2 { BEGIN; SELECT * FROM t1 } } {1 2 3 4 5 6 7 8 9 10} do_test wal-10.$tn.12 { catchsql { PRAGMA wal_checkpoint } - } {0 {0 13 13}} ;# Reader no longer block checkpoints + } {0 {0 7 7}} ;# Reader no longer block checkpoints do_test wal-10.$tn.13 { execsql { INSERT INTO t1 VALUES(11, 12) } sql2 {SELECT * FROM t1} } {1 2 3 4 5 6 7 8 9 10} # Writers do not block checkpoints any more either. # do_test wal-10.$tn.14 { catchsql { PRAGMA wal_checkpoint } - } {0 {0 15 13}} + } {0 {0 8 7}} # The following series of test cases used to verify another blocking # case in WAL - a case which no longer blocks. # do_test wal-10.$tn.15 { sql2 { COMMIT; BEGIN; SELECT * FROM t1; } } {1 2 3 4 5 6 7 8 9 10 11 12} do_test wal-10.$tn.16 { catchsql { PRAGMA wal_checkpoint } - } {0 {0 15 15}} + } {0 {0 8 8}} do_test wal-10.$tn.17 { execsql { PRAGMA wal_checkpoint } - } {0 15 15} + } {0 8 8} do_test wal-10.$tn.18 { sql3 { BEGIN; SELECT * FROM t1 } } {1 2 3 4 5 6 7 8 9 10 11 12} do_test wal-10.$tn.19 { catchsql { INSERT INTO t1 VALUES(13, 14) } @@ -590,17 +590,17 @@ # Another series of tests that used to demonstrate blocking behavior # but which now work. # do_test wal-10.$tn.23 { execsql { PRAGMA wal_checkpoint } - } {0 17 17} + } {0 9 9} do_test wal-10.$tn.24 { sql2 { BEGIN; SELECT * FROM t1; } } {1 2 3 4 5 6 7 8 9 10 11 12 13 14} do_test wal-10.$tn.25 { execsql { PRAGMA wal_checkpoint } - } {0 17 17} + } {0 9 9} do_test wal-10.$tn.26 { catchsql { INSERT INTO t1 VALUES(15, 16) } } {0 {}} do_test wal-10.$tn.27 { sql3 { INSERT INTO t1 VALUES(17, 18) } @@ -613,15 +613,15 @@ execsql { SELECT * FROM t1 } } {1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18} do_test wal-10.$tn.29 { execsql { INSERT INTO t1 VALUES(19, 20) } catchsql { PRAGMA wal_checkpoint } - } {0 {0 6 0}} + } {0 {0 3 0}} do_test wal-10.$tn.30 { code3 { sqlite3_finalize $::STMT } execsql { PRAGMA wal_checkpoint } - } {0 6 0} + } {0 3 0} # At one point, if a reader failed to upgrade to a writer because it # was reading an old snapshot, the write-locks were not being released. # Test that this bug has been fixed. # @@ -656,19 +656,19 @@ SELECT * FROM t1; } } {a b c d} do_test wal-10.$tn.36 { catchsql { PRAGMA wal_checkpoint } - } {0 {0 16 16}} + } {0 {0 8 8}} do_test wal-10.$tn.36 { sql3 { INSERT INTO t1 VALUES('e', 'f') } sql2 { SELECT * FROM t1 } } {a b c d} do_test wal-10.$tn.37 { sql2 COMMIT execsql { PRAGMA wal_checkpoint } - } {0 18 18} + } {0 9 9} } #------------------------------------------------------------------------- # This block of tests, wal-11.*, test that nothing goes terribly wrong # if frames must be written to the log file before a transaction is @@ -1038,11 +1038,11 @@ 4 {sqlite3_wal_checkpoint db main} SQLITE_OK 1 0 5 {sqlite3_wal_checkpoint db aux} SQLITE_OK 0 1 6 {sqlite3_wal_checkpoint db temp} SQLITE_OK 0 0 7 {db eval "PRAGMA main.wal_checkpoint"} {0 10 10} 1 0 - 8 {db eval "PRAGMA aux.wal_checkpoint"} {0 16 16} 0 1 + 8 {db eval "PRAGMA aux.wal_checkpoint"} {0 13 13} 0 1 9 {db eval "PRAGMA temp.wal_checkpoint"} {0 -1 -1} 0 0 } { do_test wal-16.$tn.1 { forcedelete test2.db test2.db-wal test2.db-journal forcedelete test.db test.db-wal test.db-journal @@ -1052,11 +1052,12 @@ ATTACH 'test2.db' AS aux; PRAGMA main.auto_vacuum = 0; PRAGMA aux.auto_vacuum = 0; PRAGMA main.journal_mode = WAL; PRAGMA aux.journal_mode = WAL; - PRAGMA synchronous = NORMAL; + PRAGMA main.synchronous = NORMAL; + PRAGMA aux.synchronous = NORMAL; } } {wal wal} do_test wal-16.$tn.2 { execsql { @@ -1070,21 +1071,21 @@ list [file size test.db] [file size test.db-wal] } [list [expr 1*1024] [wal_file_size 10 1024]] do_test wal-16.$tn.3 { list [file size test2.db] [file size test2.db-wal] - } [list [expr 1*1024] [wal_file_size 16 1024]] + } [list [expr 1*1024] [wal_file_size 13 1024]] do_test wal-16.$tn.4 [list eval $ckpt_cmd] $ckpt_res do_test wal-16.$tn.5 { list [file size test.db] [file size test.db-wal] } [list [expr ($ckpt_main ? 7 : 1)*1024] [wal_file_size 10 1024]] do_test wal-16.$tn.6 { list [file size test2.db] [file size test2.db-wal] - } [list [expr ($ckpt_aux ? 7 : 1)*1024] [wal_file_size 16 1024]] + } [list [expr ($ckpt_aux ? 7 : 1)*1024] [wal_file_size 13 1024]] catch { db close } } #------------------------------------------------------------------------- @@ -1550,16 +1551,20 @@ PRAGMA incremental_vacuum; PRAGMA wal_checkpoint; } file size test.db } [expr 3 * 1024] + + # WAL file now contains a single frame - the new root page for table t1. + # It would be two frames (the new root page and a padding frame) if the + # ZERO_DAMAGE flag were not set. do_test 24.5 { file size test.db-wal - } 2128 + } [wal_file_size 1 1024] } db close sqlite3_shutdown test_sqlite3_log sqlite3_initialize finish_test Index: test/wal2.test ================================================================== --- test/wal2.test +++ test/wal2.test @@ -359,11 +359,13 @@ PRAGMA journal_mode = WAL; CREATE TABLE data(x); INSERT INTO data VALUES('need xShmOpen to see this'); PRAGMA wal_checkpoint; } -} {wal 0 5 5} + # Three pages in the WAL file at this point: One copy of page 1 and two + # of the root page for table "data". +} {wal 0 3 3} do_test wal2-4.2 { db close testvfs tvfs -noshm 1 sqlite3 db test.db -vfs tvfs catchsql { SELECT * FROM data } @@ -728,11 +730,11 @@ CREATE TABLE t2(a, b); PRAGMA wal_checkpoint; INSERT INTO t2 VALUES('I', 'II'); PRAGMA journal_mode; } -} {wal exclusive 0 3 3 wal} +} {wal exclusive 0 2 2 wal} do_test wal2-6.5.2 { execsql { PRAGMA locking_mode = normal; INSERT INTO t2 VALUES('III', 'IV'); PRAGMA locking_mode = exclusive; @@ -739,11 +741,11 @@ SELECT * FROM t2; } } {normal exclusive I II III IV} do_test wal2-6.5.3 { execsql { PRAGMA wal_checkpoint } -} {0 4 4} +} {0 2 2} db close proc lock_control {method filename handle spec} { foreach {start n op type} $spec break if {$op == "lock"} { return SQLITE_IOERR } @@ -1182,10 +1184,11 @@ } { faultsim_delete_and_reopen execsql {PRAGMA auto_vacuum = 0} execsql $sql + do_execsql_test wal2-14.$tn.0 { PRAGMA page_size = 4096 } {} do_execsql_test wal2-14.$tn.1 { PRAGMA journal_mode = WAL } {wal} set sqlite_sync_count 0 set sqlite_fullsync_count 0 @@ -1197,11 +1200,11 @@ BEGIN; INSERT INTO t1 VALUES(3, 4); INSERT INTO t1 VALUES(5, 6); COMMIT; -- 2 wal sync PRAGMA wal_checkpoint; -- 1 wal sync, 1 db sync - } {10 0 5 5 0 2 2} + } {10 0 3 3 0 1 1} do_test wal2-14.$tn.3 { cond_incr_sync_count 1 list $sqlite_sync_count $sqlite_fullsync_count } [lrange $reslist 0 1] @@ -1259,18 +1262,20 @@ incr ::sync($flags) } sqlite3 db test.db do_execsql_test 15.$tn.1 " + PRAGMA page_size = 4096; CREATE TABLE t1(x); PRAGMA wal_autocheckpoint = OFF; PRAGMA journal_mode = WAL; PRAGMA checkpoint_fullfsync = [lindex $settings 0]; PRAGMA fullfsync = [lindex $settings 1]; PRAGMA synchronous = [lindex $settings 2]; " {0 wal} +if { $tn==2} breakpoint do_test 15.$tn.2 { set sync(normal) 0 set sync(full) 0 execsql { INSERT INTO t1 VALUES('abc') } list $::sync(normal) $::sync(full) Index: test/wal3.test ================================================================== --- test/wal3.test +++ test/wal3.test @@ -196,13 +196,13 @@ # foreach {tn syncmode synccount} { 1 off {} 2 normal - {test.db-wal normal test.db normal} + {test.db-wal normal test.db-wal normal test.db normal} 3 full - {test.db-wal normal test.db-wal normal test.db-wal normal test.db normal} + {test.db-wal normal test.db-wal normal test.db-wal normal test.db-wal normal test.db normal} } { proc sync_counter {args} { foreach {method filename id flags} $args break lappend ::syncs [file tail $filename] $flags @@ -427,11 +427,11 @@ sqlite3 db3 test.db execsql { BEGIN ; SELECT * FROM t1 } db3 } {o t t f} do_test wal3-6.1.3 { execsql { PRAGMA wal_checkpoint } db2 -} {0 7 7} +} {0 4 4} # At this point the log file has been fully checkpointed. However, # connection [db3] holds a lock that prevents the log from being wrapped. # Test case 3.6.1.4 has [db] attempt a read-lock on aReadMark[0]. But # as it is obtaining the lock, [db2] appends to the log file. @@ -516,11 +516,11 @@ }] } } do_test wal3-6.2.2 { execsql { PRAGMA wal_checkpoint } -} {0 7 7} +} {0 4 4} do_test wal3-6.2.3 { set ::R } {h h l b} do_test wal3-6.2.4 { set sz1 [file size test.db-wal] @@ -626,11 +626,11 @@ INSERT INTO b VALUES('Tehran'); INSERT INTO b VALUES('Qom'); INSERT INTO b VALUES('Markazi'); PRAGMA wal_checkpoint; } -} {wal 0 9 9} +} {wal 0 5 5} do_test wal3-8.2 { execsql { SELECT * FROM b } } {Tehran Qom Markazi} do_test wal3-8.3 { db eval { SELECT * FROM b } { Index: test/wal5.test ================================================================== --- test/wal5.test +++ test/wal5.test @@ -195,13 +195,13 @@ INSERT INTO t1 VALUES(1, 2); CREATE TABLE aux.t2(a, b); INSERT INTO t2 VALUES(1, 2); } } {} - do_test 2.2.$tn.2 { file_page_counts } {1 5 1 5} - do_test 2.1.$tn.3 { code1 { do_wal_checkpoint db } } {0 5 5} - do_test 2.1.$tn.4 { file_page_counts } {2 5 2 5} + do_test 2.2.$tn.2 { file_page_counts } {1 3 1 3} + do_test 2.1.$tn.3 { code1 { do_wal_checkpoint db } } {0 3 3} + do_test 2.1.$tn.4 { file_page_counts } {2 3 2 3} } do_multiclient_test tn { setup_and_attach_aux do_test 2.2.$tn.1 { @@ -211,14 +211,14 @@ CREATE TABLE aux.t2(a, b); INSERT INTO t2 VALUES(1, 2); INSERT INTO t2 VALUES(3, 4); } } {} - do_test 2.2.$tn.2 { file_page_counts } {1 5 1 7} + do_test 2.2.$tn.2 { file_page_counts } {1 3 1 4} do_test 2.2.$tn.3 { sql2 { BEGIN; SELECT * FROM t1 } } {1 2} - do_test 2.2.$tn.4 { code1 { do_wal_checkpoint db -mode restart } } {1 5 5} - do_test 2.2.$tn.5 { file_page_counts } {2 5 2 7} + do_test 2.2.$tn.4 { code1 { do_wal_checkpoint db -mode restart } } {1 3 3} + do_test 2.2.$tn.5 { file_page_counts } {2 3 2 4} } do_multiclient_test tn { setup_and_attach_aux do_test 2.3.$tn.1 { @@ -227,17 +227,17 @@ INSERT INTO t1 VALUES(1, 2); CREATE TABLE aux.t2(a, b); INSERT INTO t2 VALUES(1, 2); } } {} - do_test 2.3.$tn.2 { file_page_counts } {1 5 1 5} + do_test 2.3.$tn.2 { file_page_counts } {1 3 1 3} do_test 2.3.$tn.3 { sql2 { BEGIN; SELECT * FROM t1 } } {1 2} do_test 2.3.$tn.4 { sql1 { INSERT INTO t1 VALUES(3, 4) } } {} do_test 2.3.$tn.5 { sql1 { INSERT INTO t2 VALUES(3, 4) } } {} - do_test 2.3.$tn.6 { file_page_counts } {1 7 1 7} - do_test 2.3.$tn.7 { code1 { do_wal_checkpoint db -mode full } } {1 7 5} - do_test 2.3.$tn.8 { file_page_counts } {1 7 2 7} + do_test 2.3.$tn.6 { file_page_counts } {1 4 1 4} + do_test 2.3.$tn.7 { code1 { do_wal_checkpoint db -mode full } } {1 4 3} + do_test 2.3.$tn.8 { file_page_counts } {1 4 2 4} } # Check that checkpoints block on the correct locks. And respond correctly # if they cannot obtain those locks. There are three locks that a checkpoint # may block on (in the following order): @@ -254,22 +254,22 @@ # # This test case involves running a checkpoint while there exist other # processes holding all three types of locks. # foreach {tn1 checkpoint busy_on ckpt_expected expected} { - 1 PASSIVE - {0 5 5} - - 2 TYPO - {0 5 5} - - - 3 FULL - {0 7 7} 2 - 4 FULL 1 {1 5 5} 1 - 5 FULL 2 {1 7 5} 2 - 6 FULL 3 {0 7 7} 2 - - 7 RESTART - {0 7 7} 3 - 8 RESTART 1 {1 5 5} 1 - 9 RESTART 2 {1 7 5} 2 - 10 RESTART 3 {1 7 7} 3 + 1 PASSIVE - {0 3 3} - + 2 TYPO - {0 3 3} - + + 3 FULL - {0 4 4} 2 + 4 FULL 1 {1 3 3} 1 + 5 FULL 2 {1 4 3} 2 + 6 FULL 3 {0 4 4} 2 + + 7 RESTART - {0 4 4} 3 + 8 RESTART 1 {1 3 3} 1 + 9 RESTART 2 {1 4 3} 2 + 10 RESTART 3 {1 4 4} 3 } { do_multiclient_test tn { setup_and_attach_aux ADDED test/zerodamage.test Index: test/zerodamage.test ================================================================== --- /dev/null +++ test/zerodamage.test @@ -0,0 +1,112 @@ +# 2011 December 21 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# +# This file implements tests of the SQLITE_IOCAP_POWERSAFE_OVERWRITE property +# and the SQLITE_FCNTL_POWERSAFE_OVERWRITE file-control for manipulating it. +# +# The name of this file comes from the fact that we used to call the +# POWERSAFE_OVERWRITE property ZERO_DAMAGE. +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl +set testprefix wal5 + +# POWERSAFE_OVERWRITE defaults to true +# +do_test zerodamage-1.0 { + file_control_powersafe_overwrite db -1 +} {0 1} + +# Check the ability to turn zero-damage on and off. +# +do_test zerodamage-1.1 { + file_control_powersafe_overwrite db 0 + file_control_powersafe_overwrite db -1 +} {0 0} +do_test zerodamage-1.2 { + file_control_powersafe_overwrite db 1 + file_control_powersafe_overwrite db -1 +} {0 1} + +# Run a transaction with zero-damage on, a small page size and a much larger +# sectorsize. Verify that the maximum journal size is small - that the +# rollback journal is not being padded. +# +do_test zerodamage-2.0 { + db close + testvfs tv -default 1 + tv sectorsize 8192 + sqlite3 db file:test.db?psow=TRUE -uri 1 + unset -nocomplain ::max_journal_size + set ::max_journal_size 0 + proc xDeleteCallback {method file args} { + set sz [file size $file] + if {$sz>$::max_journal_size} {set ::max_journal_size $sz} + } + tv filter xDelete + tv script xDeleteCallback + register_wholenumber_module db + db eval { + PRAGMA page_size=1024; + PRAGMA journal_mode=DELETE; + PRAGMA cache_size=5; + CREATE VIRTUAL TABLE nums USING wholenumber; + CREATE TABLE t1(x, y); + INSERT INTO t1 SELECT value, randomblob(100) FROM nums + WHERE value BETWEEN 1 AND 400; + } + set ::max_journal_size 0 + db eval { + UPDATE t1 SET y=randomblob(50) WHERE x=123; + } + concat [file_control_powersafe_overwrite db -1] [set ::max_journal_size] +} {0 1 2576} + +# Repeat the previous step with zero-damage turned off. This time the +# maximum rollback journal size should be much larger. +# +do_test zerodamage-2.1 { + set ::max_journal_size 0 + db close + sqlite3 db file:test.db?psow=FALSE -uri 1 + db eval { + UPDATE t1 SET y=randomblob(50) WHERE x=124; + } + concat [file_control_powersafe_overwrite db -1] [set ::max_journal_size] +} {0 0 24704} + +# Run a WAL-mode transaction with POWERSAFE_OVERWRITE on to verify that the +# WAL file does not get too big. +# +do_test zerodamage-3.0 { + db eval { + PRAGMA journal_mode=WAL; + } + db close + sqlite3 db file:test.db?psow=TRUE -uri 1 + db eval { + UPDATE t1 SET y=randomblob(50) WHERE x=124; + } + file size test.db-wal +} {1080} + +# Repeat the previous with POWERSAFE_OVERWRITE off. Verify that the WAL file +# is padded. +# +do_test zerodamage-3.1 { + db close + sqlite3 db file:test.db?psow=FALSE -uri 1 + db eval { + UPDATE t1 SET y=randomblob(50) WHERE x=124; + } + file size test.db-wal +} {8416}