Index: Makefile.in ================================================================== --- Makefile.in +++ Makefile.in @@ -274,10 +274,12 @@ $(TOP)/src/vdbeblob.c \ $(TOP)/src/vdbemem.c \ $(TOP)/src/vdbetrace.c \ $(TOP)/src/vdbeInt.h \ $(TOP)/src/vtab.c \ + $(TOP)/src/wal.c \ + $(TOP)/src/wal.h \ $(TOP)/src/walker.c \ $(TOP)/src/where.c # Generated source code files # @@ -740,10 +742,13 @@ $(LTCOMPILE) $(TEMP_STORE) -c $(TOP)/src/vdbetrace.c vtab.lo: $(TOP)/src/vtab.c $(HDR) $(LTCOMPILE) $(TEMP_STORE) -c $(TOP)/src/vtab.c +wal.lo: $(TOP)/src/wal.c $(HDR) + $(LTCOMPILE) $(TEMP_STORE) -c $(TOP)/src/wal.c + walker.lo: $(TOP)/src/walker.c $(HDR) $(LTCOMPILE) $(TEMP_STORE) -c $(TOP)/src/walker.c where.lo: $(TOP)/src/where.c $(HDR) $(LTCOMPILE) $(TEMP_STORE) -c $(TOP)/src/where.c Index: VERSION ================================================================== --- VERSION +++ VERSION @@ -1,1 +1,1 @@ -3.6.23 +3.7.0 Index: main.mk ================================================================== --- main.mk +++ main.mk @@ -64,11 +64,11 @@ pager.o parse.o pcache.o pcache1.o pragma.o prepare.o printf.o \ random.o resolve.o rowset.o rtree.o select.o status.o \ table.o tokenize.o trigger.o \ update.o util.o vacuum.o \ vdbe.o vdbeapi.o vdbeaux.o vdbeblob.o vdbemem.o vdbetrace.o \ - walker.o where.o utf.o vtab.o + wal.o walker.o where.o utf.o vtab.o # All of the source code files. # @@ -156,10 +156,12 @@ $(TOP)/src/vdbeblob.c \ $(TOP)/src/vdbemem.c \ $(TOP)/src/vdbetrace.c \ $(TOP)/src/vdbeInt.h \ $(TOP)/src/vtab.c \ + $(TOP)/src/wal.c \ + $(TOP)/src/wal.h \ $(TOP)/src/walker.c \ $(TOP)/src/where.c # Source code for extensions # @@ -253,12 +255,12 @@ #TESTSRC += $(TOP)/ext/fts3/fts3_tokenizer.c TESTSRC2 = \ $(TOP)/src/attach.c $(TOP)/src/backup.c $(TOP)/src/btree.c \ $(TOP)/src/build.c $(TOP)/src/date.c \ - $(TOP)/src/expr.c $(TOP)/src/func.c $(TOP)/src/insert.c $(TOP)/src/mem5.c \ - $(TOP)/src/os.c \ + $(TOP)/src/expr.c $(TOP)/src/func.c $(TOP)/src/insert.c $(TOP)/src/wal.c \ + $(TOP)/src/mem5.c $(TOP)/src/os.c \ $(TOP)/src/os_os2.c $(TOP)/src/os_unix.c $(TOP)/src/os_win.c \ $(TOP)/src/pager.c $(TOP)/src/pragma.c $(TOP)/src/prepare.c \ $(TOP)/src/printf.c $(TOP)/src/random.c $(TOP)/src/pcache.c \ $(TOP)/src/pcache1.c $(TOP)/src/select.c $(TOP)/src/tokenize.c \ $(TOP)/src/utf.c $(TOP)/src/util.c $(TOP)/src/vdbeapi.c $(TOP)/src/vdbeaux.c \ Index: src/btree.c ================================================================== --- src/btree.c +++ src/btree.c @@ -2259,16 +2259,46 @@ u8 *page1 = pPage1->aData; rc = SQLITE_NOTADB; if( memcmp(page1, zMagicHeader, 16)!=0 ){ goto page1_init_failed; } + +#ifdef SQLITE_OMIT_WAL if( page1[18]>1 ){ pBt->readOnly = 1; } if( page1[19]>1 ){ goto page1_init_failed; } +#else + if( page1[18]>2 ){ + pBt->readOnly = 1; + } + if( page1[19]>2 ){ + goto page1_init_failed; + } + + /* If the write version is set to 2, this database should be accessed + ** in WAL mode. If the log is not already open, open it now. Then + ** return SQLITE_OK and return without populating BtShared.pPage1. + ** The caller detects this and calls this function again. This is + ** required as the version of page 1 currently in the page1 buffer + ** may not be the latest version - there may be a newer one in the log + ** file. + */ + if( page1[19]==2 && pBt->doNotUseWAL==0 ){ + int isOpen = 0; + rc = sqlite3PagerOpenWal(pBt->pPager, &isOpen); + if( rc!=SQLITE_OK ){ + goto page1_init_failed; + }else if( isOpen==0 ){ + releasePage(pPage1); + return SQLITE_OK; + } + rc = SQLITE_NOTADB; + } +#endif /* The maximum embedded fraction must be exactly 25%. And the minimum ** embedded fraction must be 12.5% for both leaf-data and non-leaf-data. ** The original design allowed these amounts to vary, but as of ** version 3.6.0, we require them to be fixed. @@ -7961,5 +7991,41 @@ assert(!pCur->isIncrblobHandle); assert(!pCur->aOverflow); pCur->isIncrblobHandle = 1; } #endif + +/* +** Set both the "read version" (single byte at byte offset 18) and +** "write version" (single byte at byte offset 19) fields in the database +** header to iVersion. +*/ +int sqlite3BtreeSetVersion(Btree *pBtree, int iVersion){ + BtShared *pBt = pBtree->pBt; + int rc; /* Return code */ + + assert( pBtree->inTrans==TRANS_NONE ); + assert( iVersion==1 || iVersion==2 ); + + /* If setting the version fields to 1, do not automatically open the + ** WAL connection, even if the version fields are currently set to 2. + */ + pBt->doNotUseWAL = (iVersion==1); + + rc = sqlite3BtreeBeginTrans(pBtree, 0); + if( rc==SQLITE_OK ){ + u8 *aData = pBt->pPage1->aData; + if( aData[18]!=(u8)iVersion || aData[19]!=(u8)iVersion ){ + rc = sqlite3BtreeBeginTrans(pBtree, 2); + if( rc==SQLITE_OK ){ + rc = sqlite3PagerWrite(pBt->pPage1->pDbPage); + if( rc==SQLITE_OK ){ + aData[18] = (u8)iVersion; + aData[19] = (u8)iVersion; + } + } + } + } + + pBt->doNotUseWAL = 0; + return rc; +} Index: src/btree.h ================================================================== --- src/btree.h +++ src/btree.h @@ -183,10 +183,12 @@ struct Pager *sqlite3BtreePager(Btree*); int sqlite3BtreePutData(BtCursor*, u32 offset, u32 amt, void*); void sqlite3BtreeCacheOverflow(BtCursor *); void sqlite3BtreeClearCursor(BtCursor *); + +int sqlite3BtreeSetVersion(Btree *pBt, int iVersion); #ifndef NDEBUG int sqlite3BtreeCursorIsValid(BtCursor*); #endif Index: src/btreeInt.h ================================================================== --- src/btreeInt.h +++ src/btreeInt.h @@ -418,10 +418,11 @@ u16 maxLocal; /* Maximum local payload in non-LEAFDATA tables */ u16 minLocal; /* Minimum local payload in non-LEAFDATA tables */ u16 maxLeaf; /* Maximum local payload in a LEAFDATA table */ u16 minLeaf; /* Minimum local payload in a LEAFDATA table */ u8 inTransaction; /* Transaction state */ + u8 doNotUseWAL; /* If true, do not open write-ahead-log file */ int nTransaction; /* Number of open transactions (read + write) */ u32 nPage; /* Number of pages in the database */ void *pSchema; /* Pointer to space allocated by sqlite3BtreeSchema() */ void (*xFreeSchema)(void*); /* Destructor for BtShared.pSchema */ sqlite3_mutex *mutex; /* Non-recursive mutex required to access this struct */ Index: src/main.c ================================================================== --- src/main.c +++ src/main.c @@ -1184,10 +1184,146 @@ db->pRollbackArg = pArg; sqlite3_mutex_leave(db->mutex); return pRet; } +#ifndef SQLITE_OMIT_WAL +/* +** The sqlite3_wal_hook() callback registered by sqlite3_wal_autocheckpoint(). +** Return non-zero, indicating to the caller that a checkpoint should be run, +** if the number of frames in the log file is greater than +** sqlite3.pWalArg cast to an integer (the value configured by +** wal_autocheckpoint()). +*/ +int sqlite3WalDefaultHook( + void *p, /* Argument */ + sqlite3 *db, /* Connection */ + const char *zNotUsed, /* Database */ + int nFrame /* Size of WAL */ +){ + UNUSED_PARAMETER(zNotUsed); + return ( nFrame>=SQLITE_PTR_TO_INT(p)); +} +#endif /* SQLITE_OMIT_WAL */ + +/* +** Configure an sqlite3_wal_hook() callback to automatically checkpoint +** a database after committing a transaction if there are nFrame or +** more frames in the log file. Passing zero or a negative value as the +** nFrame parameter disables automatic checkpoints entirely. +** +** The callback registered by this function replaces any existing callback +** registered using sqlite3_wal_hook(). Likewise, registering a callback +** using sqlite3_wal_hook() disables the automatic checkpoint mechanism +** configured by this function. +*/ +int sqlite3_wal_autocheckpoint(sqlite3 *db, int nFrame){ +#ifndef SQLITE_OMIT_WAL + sqlite3_mutex_enter(db->mutex); + if( nFrame>0 ){ + sqlite3_wal_hook(db, sqlite3WalDefaultHook, SQLITE_INT_TO_PTR(nFrame)); + }else{ + sqlite3_wal_hook(db, 0, 0); + } + sqlite3_mutex_leave(db->mutex); +#endif + return SQLITE_OK; +} + +/* +** Register a callback to be invoked each time a transaction is written +** into the write-ahead-log by this database connection. +*/ +void *sqlite3_wal_hook( + sqlite3 *db, /* Attach the hook to this db handle */ + int(*xCallback)(void *, sqlite3*, const char*, int), + void *pArg /* First argument passed to xCallback() */ +){ +#ifndef SQLITE_OMIT_WAL + void *pRet; + sqlite3_mutex_enter(db->mutex); + pRet = db->pWalArg; + db->xWalCallback = xCallback; + db->pWalArg = pArg; + sqlite3_mutex_leave(db->mutex); + return pRet; +#else + return 0; +#endif +} + + +/* +** Checkpoint database zDb. If zDb is NULL, the main database is checkpointed. +*/ +int sqlite3_wal_checkpoint(sqlite3 *db, const char *zDb){ +#ifdef SQLITE_OMIT_WAL + return SQLITE_OK; +#else + int rc; /* Return code */ + int iDb = SQLITE_MAX_ATTACHED; /* sqlite3.aDb[] index of db to checkpoint */ + + sqlite3_mutex_enter(db->mutex); + if( zDb ){ + iDb = sqlite3FindDbName(db, zDb); + } + if( iDb<0 ){ + rc = SQLITE_ERROR; + sqlite3Error(db, SQLITE_ERROR, "unknown database: %s", zDb); + }else{ + rc = sqlite3Checkpoint(db, iDb); + sqlite3Error(db, rc, 0); + } + rc = sqlite3ApiExit(db, rc); + sqlite3_mutex_leave(db->mutex); + return rc; +#endif +} + +#ifndef SQLITE_OMIT_WAL +/* +** Run a checkpoint on database iDb. This is a no-op if database iDb is +** not currently open in WAL mode. +** +** If a transaction is open on the database being checkpointed, this +** function returns SQLITE_LOCKED and a checkpoint is not attempted. If +** an error occurs while running the checkpoint, an SQLite error code is +** returned (i.e. SQLITE_IOERR). Otherwise, SQLITE_OK. +** +** The mutex on database handle db should be held by the caller. The mutex +** associated with the specific b-tree being checkpointed is taken by +** this function while the checkpoint is running. +** +** If iDb is passed SQLITE_MAX_ATTACHED, then all attached databases are +** checkpointed. If an error is encountered it is returned immediately - +** no attempt is made to checkpoint any remaining databases. +*/ +int sqlite3Checkpoint(sqlite3 *db, int iDb){ + int rc = SQLITE_OK; /* Return code */ + int i; /* Used to iterate through attached dbs */ + + assert( sqlite3_mutex_held(db->mutex) ); + + for(i=0; inDb && rc==SQLITE_OK; i++){ + if( i==iDb || iDb==SQLITE_MAX_ATTACHED ){ + Btree *pBt = db->aDb[i].pBt; + if( pBt ){ + if( sqlite3BtreeIsInReadTrans(pBt) ){ + rc = SQLITE_LOCKED; + }else{ + sqlite3BtreeEnter(pBt); + rc = sqlite3PagerCheckpoint(sqlite3BtreePager(pBt)); + sqlite3BtreeLeave(pBt); + } + } + } + } + + return rc; +} +#endif /* SQLITE_OMIT_WAL */ + /* ** This function returns true if main-memory should be used instead of ** a temporary file for transient pager files and statement journals. ** The value returned depends on the value of db->temp_store (runtime ** parameter) and the compile time value of SQLITE_TEMP_STORE. The @@ -1743,10 +1879,12 @@ #endif /* Enable the lookaside-malloc subsystem */ setupLookaside(db, 0, sqlite3GlobalConfig.szLookaside, sqlite3GlobalConfig.nLookaside); + + sqlite3_wal_autocheckpoint(db, SQLITE_DEFAULT_WAL_AUTOCHECKPOINT); opendb_out: if( db ){ assert( db->mutex!=0 || isThreadsafe==0 || sqlite3GlobalConfig.bFullMutex==0 ); sqlite3_mutex_leave(db->mutex); Index: src/mutex.c ================================================================== --- src/mutex.c +++ src/mutex.c @@ -75,10 +75,11 @@ /* ** Retrieve a pointer to a static mutex or allocate a new dynamic one. */ sqlite3_mutex *sqlite3_mutex_alloc(int id){ + if( !sqlite3GlobalConfig.bCoreMutex ) return 0; #ifndef SQLITE_OMIT_AUTOINIT if( sqlite3_initialize() ) return 0; #endif return sqlite3GlobalConfig.mutex.xMutexAlloc(id); } Index: src/os_common.h ================================================================== --- src/os_common.h +++ src/os_common.h @@ -38,18 +38,20 @@ #define OSTRACE5(X,Y,Z,A,B) if( sqlite3OSTrace ) sqlite3DebugPrintf(X,Y,Z,A,B) #define OSTRACE6(X,Y,Z,A,B,C) \ if(sqlite3OSTrace) sqlite3DebugPrintf(X,Y,Z,A,B,C) #define OSTRACE7(X,Y,Z,A,B,C,D) \ if(sqlite3OSTrace) sqlite3DebugPrintf(X,Y,Z,A,B,C,D) +#define OSTRACE(X) if( sqlite3OSTrace ) sqlite3DebugPrintf X #else #define OSTRACE1(X) #define OSTRACE2(X,Y) #define OSTRACE3(X,Y,Z) #define OSTRACE4(X,Y,Z,A) #define OSTRACE5(X,Y,Z,A,B) #define OSTRACE6(X,Y,Z,A,B,C) #define OSTRACE7(X,Y,Z,A,B,C,D) +#define OSTRACE(X) #endif /* ** Macros for performance tracing. Normally turned off. Only works ** on i486 hardware. Index: src/os_os2.c ================================================================== --- src/os_os2.c +++ src/os_os2.c @@ -1109,11 +1109,11 @@ os2DlSym, /* xDlSym */ os2DlClose, /* xDlClose */ os2Randomness, /* xRandomness */ os2Sleep, /* xSleep */ os2CurrentTime, /* xCurrentTime */ - os2GetLastError /* xGetLastError */ + os2GetLastError, /* xGetLastError */ }; sqlite3_vfs_register(&os2Vfs, 1); initUconvObjects(); return SQLITE_OK; } Index: src/os_unix.c ================================================================== --- src/os_unix.c +++ src/os_unix.c @@ -117,10 +117,11 @@ #include #include #include #include #include +#include #if SQLITE_ENABLE_LOCKING_STYLE # include # if OS_VXWORKS # include @@ -1534,13 +1535,15 @@ ** file changed. If the transaction counter is not updated, ** other connections to the same file might not realize that ** the file has changed and hence might not know to flush their ** cache. The use of a stale cache can lead to database corruption. */ +#if 0 assert( pFile->inNormalWrite==0 || pFile->dbUpdate==0 || pFile->transCntrChng==1 ); +#endif pFile->inNormalWrite = 0; #endif /* downgrading to a shared lock on NFS involves clearing the write lock ** before establishing the readlock - to avoid a race condition we downgrade @@ -2954,14 +2957,16 @@ int got; assert( id ); /* If this is a database file (not a journal, master-journal or temp ** file), the bytes in the locking range should never be read or written. */ +#if 0 assert( pFile->pUnused==0 || offset>=PENDING_BYTE+512 || offset+amt<=PENDING_BYTE ); +#endif got = seekAndRead(pFile, offset, pBuf, amt); if( got==amt ){ return SQLITE_OK; }else if( got<0 ){ @@ -3029,14 +3034,16 @@ assert( id ); assert( amt>0 ); /* If this is a database file (not a journal, master-journal or temp ** file), the bytes in the locking range should never be read or written. */ +#if 0 assert( pFile->pUnused==0 || offset>=PENDING_BYTE+512 || offset+amt<=PENDING_BYTE ); +#endif #ifndef NDEBUG /* If we are doing a normal write to a database file (as opposed to ** doing a hot-journal rollback or a write to some file other than a ** normal database file) then record the fact that the database @@ -4553,10 +4560,824 @@ UNUSED_PARAMETER(NotUsed2); UNUSED_PARAMETER(NotUsed3); return 0; } +#ifndef SQLITE_OMIT_WAL + +/* Forward reference */ +typedef struct unixShm unixShm; +typedef struct unixShmFile unixShmFile; + +/* +** Object used to represent a single file opened and mmapped to provide +** shared memory. When multiple threads all reference the same +** log-summary, each thread has its own unixFile object, but they all +** point to a single instance of this object. In other words, each +** log-summary is opened only once per process. +** +** unixMutexHeld() must be true when creating or destroying +** this object or while reading or writing the following fields: +** +** nRef +** pNext +** +** The following fields are read-only after the object is created: +** +** fid +** zFilename +** +** Either unixShmFile.mutex must be held or unixShmFile.nRef==0 and +** unixMutexHeld() is true when reading or writing any other field +** in this structure. +** +** To avoid deadlocks, mutex and mutexBuf are always released in the +** reverse order that they are acquired. mutexBuf is always acquired +** first and released last. This invariant is check by asserting +** sqlite3_mutex_notheld() on mutex whenever mutexBuf is acquired or +** released. +*/ +struct unixShmFile { + struct unixFileId fid; /* Unique file identifier */ + sqlite3_mutex *mutex; /* Mutex to access this object */ + sqlite3_mutex *mutexBuf; /* Mutex to access zBuf[] */ + char *zFilename; /* Name of the file */ + int h; /* Open file descriptor */ + int szMap; /* Size of the mapping of file into memory */ + char *pMMapBuf; /* Where currently mmapped(). NULL if unmapped */ + int nRef; /* Number of unixShm objects pointing to this */ + unixShm *pFirst; /* All unixShm objects pointing to this */ + unixShmFile *pNext; /* Next in list of all unixShmFile objects */ +#ifdef SQLITE_DEBUG + u8 exclMask; /* Mask of exclusive locks held */ + u8 sharedMask; /* Mask of shared locks held */ + u8 nextShmId; /* Next available unixShm.id value */ +#endif +}; + +/* +** A global array of all unixShmFile objects. +** +** The unixMutexHeld() must be true while reading or writing this list. +*/ +static unixShmFile *unixShmFileList = 0; + +/* +** Structure used internally by this VFS to record the state of an +** open shared memory connection. +** +** unixShm.pFile->mutex must be held while reading or writing the +** unixShm.pNext and unixShm.locks[] elements. +** +** The unixShm.pFile element is initialized when the object is created +** and is read-only thereafter. +*/ +struct unixShm { + unixShmFile *pFile; /* The underlying unixShmFile object */ + unixShm *pNext; /* Next unixShm with the same unixShmFile */ + u8 lockState; /* Current lock state */ + u8 readLock; /* Which of the two read-lock states to use */ + u8 hasMutex; /* True if holding the unixShmFile mutex */ + u8 hasMutexBuf; /* True if holding pFile->mutexBuf */ + u8 sharedMask; /* Mask of shared locks held */ + u8 exclMask; /* Mask of exclusive locks held */ +#ifdef SQLITE_DEBUG + u8 id; /* Id of this connection with its unixShmFile */ +#endif +}; + +/* +** Size increment by which shared memory grows +*/ +#define SQLITE_UNIX_SHM_INCR 4096 + +/* +** Constants used for locking +*/ +#define UNIX_SHM_BASE 32 /* Byte offset of the first lock byte */ +#define UNIX_SHM_MUTEX 0x01 /* Mask for MUTEX lock */ +#define UNIX_SHM_DMS 0x04 /* Mask for Dead-Man-Switch lock */ +#define UNIX_SHM_A 0x10 /* Mask for region locks... */ +#define UNIX_SHM_B 0x20 +#define UNIX_SHM_C 0x40 +#define UNIX_SHM_D 0x80 + +#ifdef SQLITE_DEBUG +/* +** Return a pointer to a nul-terminated string in static memory that +** describes a locking mask. The string is of the form "MSABCD" with +** each character representing a lock. "M" for MUTEX, "S" for DMS, +** and "A" through "D" for the region locks. If a lock is held, the +** letter is shown. If the lock is not held, the letter is converted +** to ".". +** +** This routine is for debugging purposes only and does not appear +** in a production build. +*/ +static const char *unixShmLockString(u8 mask){ + static char zBuf[48]; + static int iBuf = 0; + char *z; + + z = &zBuf[iBuf]; + iBuf += 8; + if( iBuf>=sizeof(zBuf) ) iBuf = 0; + + z[0] = (mask & UNIX_SHM_MUTEX) ? 'M' : '.'; + z[1] = (mask & UNIX_SHM_DMS) ? 'S' : '.'; + z[2] = (mask & UNIX_SHM_A) ? 'A' : '.'; + z[3] = (mask & UNIX_SHM_B) ? 'B' : '.'; + z[4] = (mask & UNIX_SHM_C) ? 'C' : '.'; + z[5] = (mask & UNIX_SHM_D) ? 'D' : '.'; + z[6] = 0; + return z; +} +#endif /* SQLITE_DEBUG */ + +/* +** Apply posix advisory locks for all bytes identified in lockMask. +** +** lockMask might contain multiple bits but all bits are guaranteed +** to be contiguous. +** +** Locks block if the UNIX_SHM_MUTEX bit is set and are non-blocking +** otherwise. +*/ +static int unixShmSystemLock( + unixShmFile *pFile, /* Apply locks to this open shared-memory segment */ + int lockType, /* F_UNLCK, F_RDLCK, or F_WRLCK */ + u8 lockMask /* Which bytes to lock or unlock */ +){ + struct flock f; /* The posix advisory locking structure */ + int lockOp; /* The opcode for fcntl() */ + int i; /* Offset into the locking byte range */ + int rc; /* Result code form fcntl() */ + u8 mask; /* Mask of bits in lockMask */ + + /* Access to the unixShmFile object is serialized by the caller */ + assert( sqlite3_mutex_held(pFile->mutex) || pFile->nRef==0 ); + + /* Initialize the locking parameters */ + memset(&f, 0, sizeof(f)); + f.l_type = lockType; + f.l_whence = SEEK_SET; + if( (lockMask & UNIX_SHM_MUTEX)!=0 && lockType!=F_UNLCK ){ + lockOp = F_SETLKW; + OSTRACE(("SHM-LOCK requesting blocking lock\n")); + }else{ + lockOp = F_SETLK; + } + + /* Find the first bit in lockMask that is set */ + for(i=0, mask=0x01; mask!=0 && (lockMask&mask)==0; mask <<= 1, i++){} + assert( mask!=0 ); + f.l_start = i+UNIX_SHM_BASE; + f.l_len = 1; + + /* Extend the locking range for each additional bit that is set */ + mask <<= 1; + while( mask!=0 && (lockMask & mask)!=0 ){ + f.l_len++; + mask <<= 1; + } + + /* Verify that all bits set in lockMask are contiguous */ + assert( mask==0 || (lockMask & ~(mask | (mask-1)))==0 ); + + /* Acquire the system-level lock */ + rc = fcntl(pFile->h, lockOp, &f); + rc = (rc!=(-1)) ? SQLITE_OK : SQLITE_BUSY; + + /* Update the global lock state and do debug tracing */ +#ifdef SQLITE_DEBUG + OSTRACE(("SHM-LOCK ")); + if( rc==SQLITE_OK ){ + if( lockType==F_UNLCK ){ + OSTRACE(("unlock ok")); + pFile->exclMask &= ~lockMask; + pFile->sharedMask &= ~lockMask; + }else if( lockType==F_RDLCK ){ + OSTRACE(("read-lock ok")); + pFile->exclMask &= ~lockMask; + pFile->sharedMask |= lockMask; + }else{ + assert( lockType==F_WRLCK ); + OSTRACE(("write-lock ok")); + pFile->exclMask |= lockMask; + pFile->sharedMask &= ~lockMask; + } + }else{ + if( lockType==F_UNLCK ){ + OSTRACE(("unlock failed")); + }else if( lockType==F_RDLCK ){ + OSTRACE(("read-lock failed")); + }else{ + assert( lockType==F_WRLCK ); + OSTRACE(("write-lock failed")); + } + } + OSTRACE((" - change requested %s - afterwards %s:%s\n", + unixShmLockString(lockMask), + unixShmLockString(pFile->sharedMask), + unixShmLockString(pFile->exclMask))); +#endif + + return rc; +} + +/* +** For connection p, unlock all of the locks identified by the unlockMask +** parameter. +*/ +static int unixShmUnlock( + unixShmFile *pFile, /* The underlying shared-memory file */ + unixShm *p, /* The connection to be unlocked */ + u8 unlockMask /* Mask of locks to be unlocked */ +){ + int rc; /* Result code */ + unixShm *pX; /* For looping over all sibling connections */ + u8 allMask; /* Union of locks held by connections other than "p" */ + + /* Access to the unixShmFile object is serialized by the caller */ + assert( sqlite3_mutex_held(pFile->mutex) ); + + /* Compute locks held by sibling connections */ + allMask = 0; + for(pX=pFile->pFirst; pX; pX=pX->pNext){ + if( pX==p ) continue; + assert( (pX->exclMask & (p->exclMask|p->sharedMask))==0 ); + allMask |= pX->sharedMask; + } + + /* Unlock the system-level locks */ + if( (unlockMask & allMask)!=unlockMask ){ + rc = unixShmSystemLock(pFile, F_UNLCK, unlockMask & ~allMask); + }else{ + rc = SQLITE_OK; + } + + /* Undo the local locks */ + if( rc==SQLITE_OK ){ + p->exclMask &= ~unlockMask; + p->sharedMask &= ~unlockMask; + } + return rc; +} + +/* +** Get reader locks for connection p on all locks in the readMask parameter. +*/ +static int unixShmSharedLock( + unixShmFile *pFile, /* The underlying shared-memory file */ + unixShm *p, /* The connection to get the shared locks */ + u8 readMask /* Mask of shared locks to be acquired */ +){ + int rc; /* Result code */ + unixShm *pX; /* For looping over all sibling connections */ + u8 allShared; /* Union of locks held by connections other than "p" */ + + /* Access to the unixShmFile object is serialized by the caller */ + assert( sqlite3_mutex_held(pFile->mutex) ); + + /* Find out which shared locks are already held by sibling connections. + ** If any sibling already holds an exclusive lock, go ahead and return + ** SQLITE_BUSY. + */ + allShared = 0; + for(pX=pFile->pFirst; pX; pX=pX->pNext){ + if( pX==p ) continue; + if( (pX->exclMask & readMask)!=0 ) return SQLITE_BUSY; + allShared |= pX->sharedMask; + } + + /* Get shared locks at the system level, if necessary */ + if( (~allShared) & readMask ){ + rc = unixShmSystemLock(pFile, F_RDLCK, readMask); + }else{ + rc = SQLITE_OK; + } + + /* Get the local shared locks */ + if( rc==SQLITE_OK ){ + p->sharedMask |= readMask; + } + return rc; +} + +/* +** For connection p, get an exclusive lock on all locks identified in +** the writeMask parameter. +*/ +static int unixShmExclusiveLock( + unixShmFile *pFile, /* The underlying shared-memory file */ + unixShm *p, /* The connection to get the exclusive locks */ + u8 writeMask /* Mask of exclusive locks to be acquired */ +){ + int rc; /* Result code */ + unixShm *pX; /* For looping over all sibling connections */ + + /* Access to the unixShmFile object is serialized by the caller */ + assert( sqlite3_mutex_held(pFile->mutex) ); + + /* Make sure no sibling connections hold locks that will block this + ** lock. If any do, return SQLITE_BUSY right away. + */ + for(pX=pFile->pFirst; pX; pX=pX->pNext){ + if( pX==p ) continue; + if( (pX->exclMask & writeMask)!=0 ) return SQLITE_BUSY; + if( (pX->sharedMask & writeMask)!=0 ) return SQLITE_BUSY; + } + + /* Get the exclusive locks at the system level. Then if successful + ** also mark the local connection as being locked. + */ + rc = unixShmSystemLock(pFile, F_WRLCK, writeMask); + if( rc==SQLITE_OK ){ + p->sharedMask &= ~writeMask; + p->exclMask |= writeMask; + } + return rc; +} + +/* +** Purge the unixShmFileList list of all entries with unixShmFile.nRef==0. +** +** This is not a VFS shared-memory method; it is a utility function called +** by VFS shared-memory methods. +*/ +static void unixShmPurge(void){ + unixShmFile **pp; + unixShmFile *p; + assert( unixMutexHeld() ); + pp = &unixShmFileList; + while( (p = *pp)!=0 ){ + if( p->nRef==0 ){ + if( p->mutex ) sqlite3_mutex_free(p->mutex); + if( p->mutexBuf ) sqlite3_mutex_free(p->mutexBuf); + if( p->h>=0 ) close(p->h); + *pp = p->pNext; + sqlite3_free(p); + }else{ + pp = &p->pNext; + } + } +} + +/* +** Open a shared-memory area. This particular implementation uses +** mmapped files. +** +** zName is a filename used to identify the shared-memory area. The +** implementation does not (and perhaps should not) use this name +** directly, but rather use it as a template for finding an appropriate +** name for the shared-memory storage. In this implementation, the +** string "-index" is appended to zName and used as the name of the +** mmapped file. +** +** When opening a new shared-memory file, if no other instances of that +** file are currently open, in this process or in other processes, then +** the file must be truncated to zero length or have its header cleared. +*/ +static int unixShmOpen( + sqlite3_vfs *pVfs, /* The VFS */ + const char *zName, /* Base name of file to mmap */ + sqlite3_shm **pShm /* Write the unixShm object created here */ +){ + struct unixShm *p = 0; /* The connection to be opened */ + struct unixShmFile *pFile = 0; /* The underlying mmapped file */ + int rc; /* Result code */ + struct unixFileId fid; /* Unix file identifier */ + struct unixShmFile *pNew; /* Newly allocated pFile */ + struct stat sStat; /* Result from stat() an fstat() */ + int nName; /* Size of zName in bytes */ + + /* Allocate space for the new sqlite3_shm object. Also speculatively + ** allocate space for a new unixShmFile and filename. + */ + p = sqlite3_malloc( sizeof(*p) ); + if( p==0 ) return SQLITE_NOMEM; + memset(p, 0, sizeof(*p)); + nName = strlen(zName); + pNew = sqlite3_malloc( sizeof(*pFile) + nName + 10 ); + if( pNew==0 ){ + rc = SQLITE_NOMEM; + goto shm_open_err; + } + memset(pNew, 0, sizeof(*pNew)); + pNew->zFilename = (char*)&pNew[1]; + sqlite3_snprintf(nName+10, pNew->zFilename, "%s-index", zName); + + /* Look to see if there is an existing unixShmFile that can be used. + ** If no matching unixShmFile currently exists, create a new one. + */ + unixEnterMutex(); + rc = stat(pNew->zFilename, &sStat); + if( rc==0 ){ + memset(&fid, 0, sizeof(fid)); + fid.dev = sStat.st_dev; + fid.ino = sStat.st_ino; + for(pFile = unixShmFileList; pFile; pFile=pFile->pNext){ + if( memcmp(&pFile->fid, &fid, sizeof(fid))==0 ) break; + } + } + if( pFile ){ + sqlite3_free(pNew); + }else{ + pFile = pNew; + pNew = 0; + pFile->h = -1; + pFile->pNext = unixShmFileList; + unixShmFileList = pFile; + + pFile->mutex = sqlite3_mutex_alloc(SQLITE_MUTEX_FAST); + if( pFile->mutex==0 ){ + rc = SQLITE_NOMEM; + goto shm_open_err; + } + pFile->mutexBuf = sqlite3_mutex_alloc(SQLITE_MUTEX_FAST); + if( pFile->mutexBuf==0 ){ + rc = SQLITE_NOMEM; + goto shm_open_err; + } + + pFile->h = open(pFile->zFilename, O_RDWR|O_CREAT, 0664); + if( pFile->h<0 ){ + rc = SQLITE_CANTOPEN_BKPT; + goto shm_open_err; + } + + rc = fstat(pFile->h, &sStat); + if( rc ){ + rc = SQLITE_CANTOPEN_BKPT; + goto shm_open_err; + } + pFile->fid.dev = sStat.st_dev; + pFile->fid.ino = sStat.st_ino; + + /* Check to see if another process is holding the dead-man switch. + ** If not, truncate the file to zero length. + */ + if( unixShmSystemLock(pFile, F_WRLCK, UNIX_SHM_MUTEX) ){ + rc = SQLITE_IOERR_LOCK; + goto shm_open_err; + } + if( unixShmSystemLock(pFile, F_WRLCK, UNIX_SHM_DMS)==SQLITE_OK ){ + if( ftruncate(pFile->h, 0) ){ + rc = SQLITE_IOERR; + } + } + if( rc==SQLITE_OK ){ + rc = unixShmSystemLock(pFile, F_RDLCK, UNIX_SHM_DMS); + } + unixShmSystemLock(pFile, F_UNLCK, UNIX_SHM_MUTEX); + if( rc ) goto shm_open_err; + } + + /* Make the new connection a child of the unixShmFile */ + p->pFile = pFile; + p->pNext = pFile->pFirst; +#ifdef SQLITE_DEBUG + p->id = pFile->nextShmId++; +#endif + pFile->pFirst = p; + pFile->nRef++; + *pShm = (sqlite3_shm*)p; + unixLeaveMutex(); + return SQLITE_OK; + + /* Jump here on any error */ +shm_open_err: + unixShmPurge(); + sqlite3_free(p); + sqlite3_free(pFile); + sqlite3_free(pNew); + *pShm = 0; + unixLeaveMutex(); + return rc; +} + +/* +** Close a connection to shared-memory. Delete the underlying +** storage if deleteFlag is true. +*/ +static int unixShmClose(sqlite3_shm *pSharedMem, int deleteFlag){ + unixShm *p; /* The connection to be closed */ + unixShmFile *pFile; /* The underlying shared-memory file */ + unixShm **pp; /* For looping over sibling connections */ + + if( pSharedMem==0 ) return SQLITE_OK; + p = (struct unixShm*)pSharedMem; + pFile = p->pFile; + + /* Verify that the connection being closed holds no locks */ + assert( p->exclMask==0 ); + assert( p->sharedMask==0 ); + + /* Remove connection p from the set of connections associated with pFile */ + sqlite3_mutex_enter(pFile->mutex); + for(pp=&pFile->pFirst; (*pp)!=p; pp = &(*pp)->pNext){} + *pp = p->pNext; + + /* Free the connection p */ + sqlite3_free(p); + sqlite3_mutex_leave(pFile->mutex); + + /* If pFile->nRef has reached 0, then close the underlying + ** shared-memory file, too */ + unixEnterMutex(); + assert( pFile->nRef>0 ); + pFile->nRef--; + if( pFile->nRef==0 ){ + if( deleteFlag ) unlink(pFile->zFilename); + unixShmPurge(); + } + unixLeaveMutex(); + + return SQLITE_OK; +} + +/* +** Query and/or changes the size of the underlying storage for +** a shared-memory segment. The reqSize parameter is the new size +** of the underlying storage, or -1 to do just a query. The size +** of the underlying storage (after resizing if resizing occurs) is +** written into pNewSize. +** +** This routine does not (necessarily) change the size of the mapping +** of the underlying storage into memory. Use xShmGet() to change +** the mapping size. +** +** The reqSize parameter is the minimum size requested. The implementation +** is free to expand the storage to some larger amount if it chooses. +*/ +static int unixShmSize( + sqlite3_shm *pSharedMem, /* Pointer returned by unixShmOpen() */ + int reqSize, /* Requested size. -1 for query only */ + int *pNewSize /* Write new size here */ +){ + unixShm *p = (unixShm*)pSharedMem; + unixShmFile *pFile = p->pFile; + int rc = SQLITE_OK; + struct stat sStat; + + if( reqSize>=0 ){ + reqSize = (reqSize + SQLITE_UNIX_SHM_INCR - 1)/SQLITE_UNIX_SHM_INCR; + reqSize *= SQLITE_UNIX_SHM_INCR; + rc = ftruncate(pFile->h, reqSize); + } + if( fstat(pFile->h, &sStat)==0 ){ + *pNewSize = (int)sStat.st_size; + }else{ + *pNewSize = 0; + rc = SQLITE_IOERR; + } + return rc; +} + + +/* +** Map the shared storage into memory. The minimum size of the +** mapping should be reqMapSize if reqMapSize is positive. If +** reqMapSize is zero or negative, the implementation can choose +** whatever mapping size is convenient. +** +** *ppBuf is made to point to the memory which is a mapping of the +** underlying storage. A mutex is acquired to prevent other threads +** from running while *ppBuf is in use in order to prevent other threads +** remapping *ppBuf out from under this thread. The unixShmRelease() +** call will release the mutex. However, if the lock state is CHECKPOINT, +** the mutex is not acquired because CHECKPOINT will never remap the +** buffer. RECOVER might remap, though, so CHECKPOINT will acquire +** the mutex if and when it promotes to RECOVER. +** +** RECOVER needs to be atomic. The same mutex that prevents *ppBuf from +** being remapped also prevents more than one thread from being in +** RECOVER at a time. But, RECOVER sometimes wants to remap itself. +** To prevent RECOVER from losing its lock while remapping, the +** mutex is not released by unixShmRelease() when in RECOVER. +** +** *pNewMapSize is set to the size of the mapping. +** +** *ppBuf and *pNewMapSize might be NULL and zero if no space has +** yet been allocated to the underlying storage. +*/ +static int unixShmGet( + sqlite3_shm *pSharedMem, /* Pointer returned by unixShmOpen() */ + int reqMapSize, /* Requested size of mapping. -1 means don't care */ + int *pNewMapSize, /* Write new size of mapping here */ + void **ppBuf /* Write mapping buffer origin here */ +){ + unixShm *p = (unixShm*)pSharedMem; + unixShmFile *pFile = p->pFile; + int rc = SQLITE_OK; + + if( p->lockState!=SQLITE_SHM_CHECKPOINT && p->hasMutexBuf==0 ){ + assert( sqlite3_mutex_notheld(pFile->mutex) ); + sqlite3_mutex_enter(pFile->mutexBuf); + p->hasMutexBuf = 1; + } + sqlite3_mutex_enter(pFile->mutex); + if( pFile->szMap==0 || reqMapSize>pFile->szMap ){ + int actualSize; + if( unixShmSize(pSharedMem, -1, &actualSize)==SQLITE_OK + && reqMapSizepMMapBuf ){ + munmap(pFile->pMMapBuf, pFile->szMap); + } + pFile->pMMapBuf = mmap(0, reqMapSize, PROT_READ|PROT_WRITE, MAP_SHARED, + pFile->h, 0); + pFile->szMap = pFile->pMMapBuf ? reqMapSize : 0; + } + *pNewMapSize = pFile->szMap; + *ppBuf = pFile->pMMapBuf; + sqlite3_mutex_leave(pFile->mutex); + return rc; +} + +/* +** Release the lock held on the shared memory segment to that other +** threads are free to resize it if necessary. +** +** If the lock is not currently held, this routine is a harmless no-op. +** +** If the shared-memory object is in lock state RECOVER, then we do not +** really want to release the lock, so in that case too, this routine +** is a no-op. +*/ +static int unixShmRelease(sqlite3_shm *pSharedMem){ + unixShm *p = (unixShm*)pSharedMem; + if( p->hasMutexBuf && p->lockState!=SQLITE_SHM_RECOVER ){ + unixShmFile *pFile = p->pFile; + assert( sqlite3_mutex_notheld(pFile->mutex) ); + sqlite3_mutex_leave(pFile->mutexBuf); + p->hasMutexBuf = 0; + } + return SQLITE_OK; +} + +/* +** Symbolic names for LOCK states used for debugging. +*/ +#ifdef SQLITE_DEBUG +static const char *azLkName[] = { + "UNLOCK", + "READ", + "READ_FULL", + "WRITE", + "PENDING", + "CHECKPOINT", + "RECOVER" +}; +#endif + + +/* +** Change the lock state for a shared-memory segment. +*/ +static int unixShmLock( + sqlite3_shm *pSharedMem, /* Pointer from unixShmOpen() */ + int desiredLock, /* One of SQLITE_SHM_xxxxx locking states */ + int *pGotLock /* The lock you actually got */ +){ + unixShm *p = (unixShm*)pSharedMem; + unixShmFile *pFile = p->pFile; + int rc = SQLITE_PROTOCOL; + + /* Note that SQLITE_SHM_READ_FULL and SQLITE_SHM_PENDING are never + ** directly requested; they are side effects from requesting + ** SQLITE_SHM_READ and SQLITE_SHM_CHECKPOINT, respectively. + */ + assert( desiredLock==SQLITE_SHM_QUERY + || desiredLock==SQLITE_SHM_UNLOCK + || desiredLock==SQLITE_SHM_READ + || desiredLock==SQLITE_SHM_WRITE + || desiredLock==SQLITE_SHM_CHECKPOINT + || desiredLock==SQLITE_SHM_RECOVER ); + + /* Return directly if this is just a lock state query, or if + ** the connection is already in the desired locking state. + */ + if( desiredLock==SQLITE_SHM_QUERY + || desiredLock==p->lockState + || (desiredLock==SQLITE_SHM_READ && p->lockState==SQLITE_SHM_READ_FULL) + ){ + OSTRACE(("SHM-LOCK shmid-%d, pid-%d request %s and got %s\n", + p->id, getpid(), azLkName[desiredLock], azLkName[p->lockState])); + if( pGotLock ) *pGotLock = p->lockState; + return SQLITE_OK; + } + + OSTRACE(("SHM-LOCK shmid-%d, pid-%d request %s->%s\n", + p->id, getpid(), azLkName[p->lockState], azLkName[desiredLock])); + + if( desiredLock==SQLITE_SHM_RECOVER && !p->hasMutexBuf ){ + assert( sqlite3_mutex_notheld(pFile->mutex) ); + sqlite3_mutex_enter(pFile->mutexBuf); + p->hasMutexBuf = 1; + } + sqlite3_mutex_enter(pFile->mutex); + switch( desiredLock ){ + case SQLITE_SHM_UNLOCK: { + assert( p->lockState!=SQLITE_SHM_RECOVER ); + unixShmUnlock(pFile, p, UNIX_SHM_A|UNIX_SHM_B|UNIX_SHM_C|UNIX_SHM_D); + rc = SQLITE_OK; + p->lockState = SQLITE_SHM_UNLOCK; + break; + } + case SQLITE_SHM_READ: { + if( p->lockState==SQLITE_SHM_UNLOCK ){ + int nAttempt; + rc = SQLITE_BUSY; + assert( p->lockState==SQLITE_SHM_UNLOCK ); + for(nAttempt=0; nAttempt<5 && rc==SQLITE_BUSY; nAttempt++){ + rc = unixShmSharedLock(pFile, p, UNIX_SHM_A|UNIX_SHM_B); + if( rc==SQLITE_BUSY ){ + rc = unixShmSharedLock(pFile, p, UNIX_SHM_D); + if( rc==SQLITE_OK ){ + p->lockState = p->readLock = SQLITE_SHM_READ_FULL; + } + }else{ + unixShmUnlock(pFile, p, UNIX_SHM_B); + p->lockState = p->readLock = SQLITE_SHM_READ; + } + } + }else if( p->lockState==SQLITE_SHM_WRITE ){ + rc = unixShmSharedLock(pFile, p, UNIX_SHM_A); + unixShmUnlock(pFile, p, UNIX_SHM_C|UNIX_SHM_D); + p->lockState = p->readLock = SQLITE_SHM_READ; + }else{ + assert( p->lockState==SQLITE_SHM_RECOVER ); + unixShmUnlock(pFile, p, UNIX_SHM_MUTEX); + p->lockState = p->readLock; + rc = SQLITE_OK; + } + break; + } + case SQLITE_SHM_WRITE: { + assert( p->lockState==SQLITE_SHM_READ + || p->lockState==SQLITE_SHM_READ_FULL ); + rc = unixShmExclusiveLock(pFile, p, UNIX_SHM_C|UNIX_SHM_D); + if( rc==SQLITE_OK ){ + p->lockState = SQLITE_SHM_WRITE; + } + break; + } + case SQLITE_SHM_CHECKPOINT: { + assert( p->lockState==SQLITE_SHM_UNLOCK + || p->lockState==SQLITE_SHM_PENDING + || p->lockState==SQLITE_SHM_RECOVER ); + if( p->lockState==SQLITE_SHM_RECOVER ){ + unixShmUnlock(pFile, p, UNIX_SHM_MUTEX); + p->lockState = SQLITE_SHM_CHECKPOINT; + rc = SQLITE_OK; + } + if( p->lockState==SQLITE_SHM_UNLOCK ){ + rc = unixShmExclusiveLock(pFile, p, UNIX_SHM_B|UNIX_SHM_C); + if( rc==SQLITE_OK ){ + p->lockState = SQLITE_SHM_PENDING; + } + } + if( p->lockState==SQLITE_SHM_PENDING ){ + rc = unixShmExclusiveLock(pFile, p, UNIX_SHM_A); + if( rc==SQLITE_OK ){ + p->lockState = SQLITE_SHM_CHECKPOINT; + } + } + break; + } + default: { + assert( desiredLock==SQLITE_SHM_RECOVER ); + assert( p->lockState==SQLITE_SHM_READ + || p->lockState==SQLITE_SHM_READ_FULL + || p->lockState==SQLITE_SHM_CHECKPOINT ); + assert( sqlite3_mutex_held(pFile->mutexBuf) ); + rc = unixShmExclusiveLock(pFile, p, UNIX_SHM_MUTEX); + if( rc==SQLITE_OK ){ + p->lockState = SQLITE_SHM_RECOVER; + } + break; + } + } + sqlite3_mutex_leave(pFile->mutex); + OSTRACE(("SHM-LOCK shmid-%d, pid-%d got %s\n", + p->id, getpid(), azLkName[p->lockState])); + if( pGotLock ) *pGotLock = p->lockState; + return rc; +} + +#else +# define unixShmOpen 0 +# define unixShmSize 0 +# define unixShmGet 0 +# define unixShmRelease 0 +# define unixShmLock 0 +# define unixShmClose 0 +#endif /* #ifndef SQLITE_OMIT_WAL */ + /* ************************ End of sqlite3_vfs methods *************************** ******************************************************************************/ /****************************************************************************** @@ -5753,11 +6574,11 @@ ** more than that; it looks at the filesystem type that hosts the ** database file and tries to choose an locking method appropriate for ** that filesystem time. */ #define UNIXVFS(VFSNAME, FINDER) { \ - 1, /* iVersion */ \ + 2, /* iVersion */ \ sizeof(unixFile), /* szOsFile */ \ MAX_PATHNAME, /* mxPathname */ \ 0, /* pNext */ \ VFSNAME, /* zName */ \ (void*)&FINDER, /* pAppData */ \ @@ -5770,11 +6591,19 @@ unixDlSym, /* xDlSym */ \ unixDlClose, /* xDlClose */ \ unixRandomness, /* xRandomness */ \ unixSleep, /* xSleep */ \ unixCurrentTime, /* xCurrentTime */ \ - unixGetLastError /* xGetLastError */ \ + unixGetLastError, /* xGetLastError */ \ + unixShmOpen, /* xShmOpen */ \ + unixShmSize, /* xShmSize */ \ + unixShmGet, /* xShmGet */ \ + unixShmRelease, /* xShmRelease */ \ + unixShmLock, /* xShmLock */ \ + unixShmClose, /* xShmClose */ \ + 0, /* xRename */ \ + 0, /* xCurrentTimeInt64 */ \ } /* ** All default VFSes for unix are contained in the following array. ** Index: src/os_win.c ================================================================== --- src/os_win.c +++ src/os_win.c @@ -1910,11 +1910,11 @@ winDlSym, /* xDlSym */ winDlClose, /* xDlClose */ winRandomness, /* xRandomness */ winSleep, /* xSleep */ winCurrentTime, /* xCurrentTime */ - winGetLastError /* xGetLastError */ + winGetLastError, /* xGetLastError */ }; sqlite3_vfs_register(&winVfs, 1); return SQLITE_OK; } Index: src/pager.c ================================================================== --- src/pager.c +++ src/pager.c @@ -18,10 +18,11 @@ ** file simultaneously, or one process from reading the database while ** another is writing. */ #ifndef SQLITE_OMIT_DISKIO #include "sqliteInt.h" +#include "wal.h" /* ******************** NOTES ON THE DESIGN OF THE PAGER ************************ ** ** Within this comment block, a page is deemed to have been synced @@ -218,10 +219,11 @@ i64 iOffset; /* Starting offset in main journal */ i64 iHdrOffset; /* See above */ Bitvec *pInSavepoint; /* Set of pages in this savepoint */ Pgno nOrig; /* Original number of pages in file */ Pgno iSubRec; /* Index of first record in sub-journal */ + u32 iFrame; /* Last frame in WAL when savepoint opened */ }; /* ** A open page cache is an instance of the following structure. ** @@ -395,10 +397,13 @@ void *pCodec; /* First argument to xCodec... methods */ #endif char *pTmpSpace; /* Pager.pageSize bytes of space for tmp use */ PCache *pPCache; /* Pointer to page cache object */ sqlite3_backup *pBackup; /* Pointer to list of ongoing backup processes */ +#ifndef SQLITE_OMIT_WAL + Wal *pWal; /* Write-ahead log used by "journal_mode=wal" */ +#endif }; /* ** The following global variables hold counters used for ** testing purposes only. These variables do not exist in @@ -1183,10 +1188,25 @@ } } return rc; } +/* +** Return true if this pager uses a write-ahead log instead of the usual +** rollback journal. Otherwise false. +*/ +#ifndef SQLITE_OMIT_WAL +static int pagerUseWal(Pager *pPager){ + return (pPager->pWal!=0); +} +#else +# define pagerUseWal(x) 0 +# define pagerRollbackWal(x) 0 +# define pagerWalFrames(v,w,x,y,z) 0 +# define pagerOpenWalIfPresent(z) SQLITE_OK +#endif + /* ** Unlock the database file. This function is a no-op if the pager ** is in exclusive mode. ** ** If the pager is currently in error state, discard the contents of @@ -1195,11 +1215,11 @@ ** on the pager file (by this or any other process), it will be ** treated as a hot-journal and rolled back. */ static void pager_unlock(Pager *pPager){ if( !pPager->exclusiveMode ){ - int rc; /* Return code */ + int rc = SQLITE_OK; /* Return code */ /* Always close the journal file when dropping the database lock. ** Otherwise, another connection with journal_mode=delete might ** delete the file out from under us. */ @@ -1214,11 +1234,15 @@ ** until the change-counter check fails in PagerSharedLock(). ** Clearing the page size cache here is being conservative. */ pPager->dbSizeValid = 0; - rc = osUnlock(pPager->fd, NO_LOCK); + if( pagerUseWal(pPager) ){ + sqlite3WalCloseSnapshot(pPager->pWal); + }else{ + rc = osUnlock(pPager->fd, NO_LOCK); + } if( rc ){ pPager->errCode = rc; } IOTRACE(("UNLOCK %p\n", pPager)) @@ -1363,10 +1387,11 @@ } releaseAllSavepoints(pPager); assert( isOpen(pPager->jfd) || pPager->pInJournal==0 ); if( isOpen(pPager->jfd) ){ + assert( !pagerUseWal(pPager) ); /* Finalize the journal file. */ if( sqlite3IsMemJournal(pPager->jfd) ){ assert( pPager->journalMode==PAGER_JOURNALMODE_MEMORY ); sqlite3OsClose(pPager->jfd); @@ -1387,13 +1412,15 @@ pPager->journalStarted = 0; }else{ /* This branch may be executed with Pager.journalMode==MEMORY if ** a hot-journal was just rolled back. In this case the journal ** file should be closed and deleted. If this connection writes to - ** the database file, it will do so using an in-memory journal. */ + ** the database file, it will do so using an in-memory journal. + */ assert( pPager->journalMode==PAGER_JOURNALMODE_DELETE || pPager->journalMode==PAGER_JOURNALMODE_MEMORY + || pPager->journalMode==PAGER_JOURNALMODE_WAL ); sqlite3OsClose(pPager->jfd); if( !pPager->tempFile ){ rc = sqlite3OsDelete(pPager->pVfs, pPager->zJournal, 0); } @@ -1406,11 +1433,14 @@ sqlite3BitvecDestroy(pPager->pInJournal); pPager->pInJournal = 0; pPager->nRec = 0; sqlite3PcacheCleanAll(pPager->pPCache); - if( !pPager->exclusiveMode ){ + if( pagerUseWal(pPager) ){ + rc2 = sqlite3WalWriteLock(pPager->pWal, 0); + pPager->state = PAGER_SHARED; + }else if( !pPager->exclusiveMode ){ rc2 = osUnlock(pPager->fd, SHARED_LOCK); pPager->state = PAGER_SHARED; pPager->changeCountDone = 0; }else if( pPager->state==PAGER_SYNCED ){ pPager->state = PAGER_EXCLUSIVE; @@ -1517,10 +1547,11 @@ assert( isMainJrnl || pDone ); /* pDone always used on sub-journals */ assert( isSavepnt || pDone==0 ); /* pDone never used on non-savepoint */ aData = pPager->pTmpSpace; assert( aData ); /* Temp storage must have already been allocated */ + assert( pagerUseWal(pPager)==0 || (!isMainJrnl && isSavepnt) ); /* Read the page number and page data from the journal or sub-journal ** file. Return an error code to the caller if an IO error occurs. */ jfd = isMainJrnl ? pPager->jfd : pPager->sjfd; @@ -1586,11 +1617,15 @@ ** ** 2008-04-14: When attempting to vacuum a corrupt database file, it ** is possible to fail a statement on a database that does not yet exist. ** Do not attempt to write if database file has never been opened. */ - pPg = pager_lookup(pPager, pgno); + if( pagerUseWal(pPager) ){ + pPg = 0; + }else{ + pPg = pager_lookup(pPager, pgno); + } assert( pPg || !MEMDB ); PAGERTRACE(("PLAYBACK %d page %d hash(%08x) %s\n", PAGERID(pPager), pgno, pager_datahash(pPager->pageSize, (u8*)aData), (isMainJrnl?"main-journal":"sub-journal") )); @@ -1603,10 +1638,11 @@ && isOpen(pPager->fd) && isSynced ){ i64 ofst = (pgno-1)*(i64)pPager->pageSize; testcase( !isSavepnt && pPg!=0 && (pPg->flags&PGHDR_NEED_SYNC)!=0 ); + assert( !pagerUseWal(pPager) ); rc = sqlite3OsWrite(pPager->fd, (u8*)aData, pPager->pageSize, ofst); if( pgno>pPager->dbFileSize ){ pPager->dbFileSize = pgno; } if( pPager->pBackup ){ @@ -1667,10 +1703,11 @@ ** the PGHDR_NEED_SYNC flag will not be set. It could then potentially ** be written out into the database file before its journal file ** segment is synced. If a crash occurs during or following this, ** database corruption may ensue. */ + assert( !pagerUseWal(pPager) ); sqlite3PcacheMakeClean(pPg); } #ifdef SQLITE_CHECK_PAGES pPg->pageHash = pager_pagehash(pPg); #endif @@ -2118,10 +2155,13 @@ testcase( rc!=SQLITE_OK ); } if( rc==SQLITE_OK && pPager->noSync==0 && pPager->state>=PAGER_EXCLUSIVE ){ rc = sqlite3OsSync(pPager->fd, pPager->sync_flags); } + if( rc==SQLITE_OK && pPager->noSync==0 && pPager->state>=PAGER_EXCLUSIVE ){ + rc = sqlite3OsSync(pPager->fd, pPager->sync_flags); + } if( rc==SQLITE_OK ){ rc = pager_end_transaction(pPager, zMaster[0]!='\0'); testcase( rc!=SQLITE_OK ); } if( rc==SQLITE_OK && zMaster[0] && res ){ @@ -2138,10 +2178,260 @@ */ setSectorSize(pPager); return rc; } + +/* +** Read the content for page pPg out of the database file and into +** pPg->pData. A shared lock or greater must be held on the database +** file before this function is called. +** +** If page 1 is read, then the value of Pager.dbFileVers[] is set to +** the value read from the database file. +** +** If an IO error occurs, then the IO error is returned to the caller. +** Otherwise, SQLITE_OK is returned. +*/ +static int readDbPage(PgHdr *pPg){ + Pager *pPager = pPg->pPager; /* Pager object associated with page pPg */ + Pgno pgno = pPg->pgno; /* Page number to read */ + int rc = SQLITE_OK; /* Return code */ + i64 iOffset; /* Byte offset of file to read from */ + int isInWal = 0; /* True if page is in log file */ + + assert( pPager->state>=PAGER_SHARED && !MEMDB ); + assert( isOpen(pPager->fd) ); + + if( NEVER(!isOpen(pPager->fd)) ){ + assert( pPager->tempFile ); + memset(pPg->pData, 0, pPager->pageSize); + return SQLITE_OK; + } + + if( pagerUseWal(pPager) ){ + /* Try to pull the page from the write-ahead log. */ + rc = sqlite3WalRead(pPager->pWal, pgno, &isInWal, pPg->pData); + } + if( rc==SQLITE_OK && !isInWal ){ + iOffset = (pgno-1)*(i64)pPager->pageSize; + rc = sqlite3OsRead(pPager->fd, pPg->pData, pPager->pageSize, iOffset); + if( rc==SQLITE_IOERR_SHORT_READ ){ + rc = SQLITE_OK; + } + } + + if( pgno==1 ){ + if( rc ){ + /* If the read is unsuccessful, set the dbFileVers[] to something + ** that will never be a valid file version. dbFileVers[] is a copy + ** of bytes 24..39 of the database. Bytes 28..31 should always be + ** zero. Bytes 32..35 and 35..39 should be page numbers which are + ** never 0xffffffff. So filling pPager->dbFileVers[] with all 0xff + ** bytes should suffice. + ** + ** For an encrypted database, the situation is more complex: bytes + ** 24..39 of the database are white noise. But the probability of + ** white noising equaling 16 bytes of 0xff is vanishingly small so + ** we should still be ok. + */ + memset(pPager->dbFileVers, 0xff, sizeof(pPager->dbFileVers)); + }else{ + u8 *dbFileVers = &((u8*)pPg->pData)[24]; + memcpy(&pPager->dbFileVers, dbFileVers, sizeof(pPager->dbFileVers)); + } + } + CODEC1(pPager, pPg->pData, pgno, 3, rc = SQLITE_NOMEM); + + PAGER_INCR(sqlite3_pager_readdb_count); + PAGER_INCR(pPager->nRead); + IOTRACE(("PGIN %p %d\n", pPager, pgno)); + PAGERTRACE(("FETCH %d page %d hash(%08x)\n", + PAGERID(pPager), pgno, pager_pagehash(pPg))); + + return rc; +} + +#ifndef SQLITE_OMIT_WAL +/* +** This function is invoked once for each page that has already been +** written into the log file when a WAL transaction is rolled back. +** Parameter iPg is the page number of said page. The pCtx argument +** is actually a pointer to the Pager structure. +** +** If page iPg is present in the cache, and has no outstanding references, +** it is discarded. Otherwise, if there are one or more outstanding +** references, the page content is reloaded from the database. If the +** attempt to reload content from the database is required and fails, +** return an SQLite error code. Otherwise, SQLITE_OK. +*/ +static int pagerUndoCallback(void *pCtx, Pgno iPg){ + int rc = SQLITE_OK; + Pager *pPager = (Pager *)pCtx; + PgHdr *pPg; + + pPg = sqlite3PagerLookup(pPager, iPg); + if( pPg ){ + if( sqlite3PcachePageRefcount(pPg)==1 ){ + sqlite3PcacheDrop(pPg); + }else{ + rc = readDbPage(pPg); + if( rc==SQLITE_OK ){ + pPager->xReiniter(pPg); + } + sqlite3PagerUnref(pPg); + } + } + + /* Normally, if a transaction is rolled back, any backup processes are + ** updated as data is copied out of the rollback journal and into the + ** database. This is not generally possible with a WAL database, as + ** rollback involves simply truncating the log file. Therefore, if one + ** or more frames have already been written to the log (and therefore + ** also copied into the backup databases) as part of this transaction, + ** the backups must be restarted. + */ + sqlite3BackupRestart(pPager->pBackup); + + return rc; +} + +/* +** This function is called to rollback a transaction on a WAL database. +*/ +static int pagerRollbackWal(Pager *pPager){ + int rc; /* Return Code */ + PgHdr *pList; /* List of dirty pages to revert */ + + /* For all pages in the cache that are currently dirty or have already + ** been written (but not committed) to the log file, do one of the + ** following: + ** + ** + Discard the cached page (if refcount==0), or + ** + Reload page content from the database (if refcount>0). + */ + pPager->dbSize = pPager->dbOrigSize; + rc = sqlite3WalUndo(pPager->pWal, pagerUndoCallback, (void *)pPager); + pList = sqlite3PcacheDirtyList(pPager->pPCache); + while( pList && rc==SQLITE_OK ){ + PgHdr *pNext = pList->pDirty; + rc = pagerUndoCallback((void *)pPager, pList->pgno); + pList = pNext; + } + + return rc; +} + +/* +** This function is a wrapper around sqlite3WalFrames(). As well as logging +** the contents of the list of pages headed by pList (connected by pDirty), +** this function notifies any active backup processes that the pages have +** changed. +*/ +static int pagerWalFrames( + Pager *pPager, /* Pager object */ + PgHdr *pList, /* List of frames to log */ + Pgno nTruncate, /* Database size after this commit */ + int isCommit, /* True if this is a commit */ + int sync_flags /* Flags to pass to OsSync() (or 0) */ +){ + int rc; /* Return code */ + + assert( pPager->pWal ); + rc = sqlite3WalFrames(pPager->pWal, + pPager->pageSize, pList, nTruncate, isCommit, sync_flags + ); + if( rc==SQLITE_OK && pPager->pBackup ){ + PgHdr *p; + for(p=pList; p; p=p->pDirty){ + sqlite3BackupUpdate(pPager->pBackup, p->pgno, (u8 *)p->pData); + } + } + return rc; +} + +/* +** Open a WAL snapshot on the log file this pager is connected to. +*/ +static int pagerOpenSnapshot(Pager *pPager){ + int rc; /* Return code */ + int changed = 0; /* True if cache must be reset */ + + assert( pagerUseWal(pPager) ); + + rc = sqlite3WalOpenSnapshot(pPager->pWal, &changed); + if( rc==SQLITE_OK ){ + int dummy; + if( changed ){ + pager_reset(pPager); + assert( pPager->errCode || pPager->dbSizeValid==0 ); + } + rc = sqlite3PagerPagecount(pPager, &dummy); + } + pPager->state = PAGER_SHARED; + + return rc; +} + +/* +** Check if the *-wal file that corresponds to the database opened by pPager +** exists. Assuming no error occurs, set *pExists to 1 if the file exists, +** or 0 otherwise and return SQLITE_OK. If an IO or OOM error occurs, return +** an SQLite error code. +*/ +static int pagerHasWAL(Pager *pPager, int *pExists){ + int rc; /* Return code */ + + if( !pPager->tempFile ){ + char *zWal = sqlite3_mprintf("%s-wal", pPager->zFilename); + if( !zWal ){ + rc = SQLITE_NOMEM; + }else{ + rc = sqlite3OsAccess(pPager->pVfs, zWal, SQLITE_ACCESS_EXISTS, pExists); + sqlite3_free(zWal); + } + }else{ + rc = SQLITE_OK; + *pExists = 0; + } + return rc; +} + +/* +** Check if the *-wal file that corresponds to the database opened by pPager +** exists. If it does, open the pager in WAL mode. Otherwise, if no error +** occurs, make sure Pager.journalMode is not set to PAGER_JOURNALMODE_WAL. +** If an IO or OOM error occurs, return an SQLite error code. +** +** If the WAL file is opened, also open a snapshot (read transaction). +** +** The caller must hold a SHARED lock on the database file to call this +** function. Because an EXCLUSIVE lock on the db file is required to delete +** a WAL, this ensures there is no race condition between the xAccess() +** below and an xDelete() being executed by some other connection. +*/ +static int pagerOpenWalIfPresent(Pager *pPager){ + int rc = SQLITE_OK; + if( !pPager->tempFile ){ + int isWal; /* True if WAL file exists */ + rc = pagerHasWAL(pPager, &isWal); + if( rc==SQLITE_OK ){ + if( isWal ){ + pager_reset(pPager); + rc = sqlite3PagerOpenWal(pPager, 0); + if( rc==SQLITE_OK ){ + rc = pagerOpenSnapshot(pPager); + } + }else if( pPager->journalMode==PAGER_JOURNALMODE_WAL ){ + pPager->journalMode = PAGER_JOURNALMODE_DELETE; + } + } + } + return rc; +} +#endif + /* ** Playback savepoint pSavepoint. Or, if pSavepoint==NULL, then playback ** the entire master journal file. The case pSavepoint==NULL occurs when ** a ROLLBACK TO command is invoked on a SAVEPOINT that is a transaction ** savepoint. @@ -2194,26 +2484,31 @@ /* Set the database size back to the value it was before the savepoint ** being reverted was opened. */ pPager->dbSize = pSavepoint ? pSavepoint->nOrig : pPager->dbOrigSize; + + if( !pSavepoint && pagerUseWal(pPager) ){ + return pagerRollbackWal(pPager); + } /* Use pPager->journalOff as the effective size of the main rollback ** journal. The actual file might be larger than this in ** PAGER_JOURNALMODE_TRUNCATE or PAGER_JOURNALMODE_PERSIST. But anything ** past pPager->journalOff is off-limits to us. */ szJ = pPager->journalOff; + assert( pagerUseWal(pPager)==0 || szJ==0 ); /* Begin by rolling back records from the main journal starting at ** PagerSavepoint.iOffset and continuing to the next journal header. ** There might be records in the main journal that have a page number ** greater than the current database size (pPager->dbSize) but those ** will be skipped automatically. Pages are added to pDone as they ** are played back. */ - if( pSavepoint ){ + if( pSavepoint && !pagerUseWal(pPager) ){ iHdrOff = pSavepoint->iHdrOffset ? pSavepoint->iHdrOffset : szJ; pPager->journalOff = pSavepoint->iOffset; while( rc==SQLITE_OK && pPager->journalOffjournalOff, pDone, 1, 1); } @@ -2256,10 +2551,14 @@ ** will be skipped. Out-of-range pages are also skipped. */ if( pSavepoint ){ u32 ii; /* Loop counter */ i64 offset = pSavepoint->iSubRec*(4+pPager->pageSize); + + if( pagerUseWal(pPager) ){ + rc = sqlite3WalSavepointUndo(pPager->pWal, pSavepoint->iFrame); + } for(ii=pSavepoint->iSubRec; rc==SQLITE_OK && iinSubRec; ii++){ assert( offset==ii*(4+pPager->pageSize) ); rc = pager_playback_one_page(pPager, &offset, pDone, 0, 1); } assert( rc!=SQLITE_DONE ); @@ -2267,10 +2566,11 @@ sqlite3BitvecDestroy(pDone); if( rc==SQLITE_OK ){ pPager->journalOff = szJ; } + return rc; } /* ** Change the maximum number of in-memory pages that are allowed. @@ -2556,28 +2856,36 @@ ** ** Otherwise, if everything is successful, then SQLITE_OK is returned ** and *pnPage is set to the number of pages in the database. */ int sqlite3PagerPagecount(Pager *pPager, int *pnPage){ - Pgno nPage; /* Value to return via *pnPage */ + Pgno nPage = 0; /* Value to return via *pnPage */ /* Determine the number of pages in the file. Store this in nPage. */ if( pPager->dbSizeValid ){ nPage = pPager->dbSize; }else{ int rc; /* Error returned by OsFileSize() */ i64 n = 0; /* File size in bytes returned by OsFileSize() */ - assert( isOpen(pPager->fd) || pPager->tempFile ); - if( isOpen(pPager->fd) && (0 != (rc = sqlite3OsFileSize(pPager->fd, &n))) ){ - pager_error(pPager, rc); - return rc; - } - if( n>0 && npageSize ){ - nPage = 1; - }else{ - nPage = (Pgno)(n / pPager->pageSize); + if( pagerUseWal(pPager) ){ + sqlite3WalDbsize(pPager->pWal, &nPage); + } + + if( nPage==0 ){ + assert( isOpen(pPager->fd) || pPager->tempFile ); + if( isOpen(pPager->fd) ){ + if( SQLITE_OK!=(rc = sqlite3OsFileSize(pPager->fd, &n)) ){ + pager_error(pPager, rc); + return rc; + } + } + if( n>0 && npageSize ){ + nPage = 1; + }else{ + nPage = (Pgno)(n / pPager->pageSize); + } } if( pPager->state!=PAGER_UNLOCK ){ pPager->dbSize = nPage; pPager->dbFileSize = nPage; pPager->dbSizeValid = 1; @@ -2695,10 +3003,11 @@ assert( pPager->dbSize>=nPage ); assert( pPager->state>=PAGER_RESERVED ); pPager->dbSize = nPage; assertTruncateConstraint(pPager); } + /* ** This function is called before attempting a hot-journal rollback. It ** syncs the journal file to disk, then sets pPager->journalHdr to the ** size of the journal file so that the pager_playback() routine knows @@ -2736,14 +3045,22 @@ ** is made to roll it back. If an error occurs during the rollback ** a hot journal may be left in the filesystem but no error is returned ** to the caller. */ int sqlite3PagerClose(Pager *pPager){ + u8 *pTmp = (u8 *)pPager->pTmpSpace; + disable_simulated_io_errors(); sqlite3BeginBenignMalloc(); pPager->errCode = 0; pPager->exclusiveMode = 0; +#ifndef SQLITE_OMIT_WAL + sqlite3WalClose(pPager->pWal, pPager->fd, + (pPager->noSync ? 0 : pPager->sync_flags), pTmp + ); + pPager->pWal = 0; +#endif pager_reset(pPager); if( MEMDB ){ pager_unlock(pPager); }else{ /* Set Pager.journalHdr to -1 for the benefit of the pager_playback() @@ -2760,11 +3077,11 @@ sqlite3EndBenignMalloc(); enable_simulated_io_errors(); PAGERTRACE(("CLOSE %d\n", PAGERID(pPager))); IOTRACE(("CLOSE %p\n", pPager)) sqlite3OsClose(pPager->fd); - sqlite3PageFree(pPager->pTmpSpace); + sqlite3PageFree(pTmp); sqlite3PcacheClose(pPager->pPCache); #ifdef SQLITE_HAS_CODEC if( pPager->xCodecFree ) pPager->xCodecFree(pPager->pCodec); #endif @@ -2861,14 +3178,14 @@ ** as a temporary buffer to inspect the first couple of bytes of ** the potential journal header. */ i64 iNextHdrOffset; u8 aMagic[8]; - u8 zHeader[sizeof(aJournalMagic)+4]; + u8 zHeader[sizeof(aJournalMagic)+4]; - memcpy(zHeader, aJournalMagic, sizeof(aJournalMagic)); - put32bits(&zHeader[sizeof(aJournalMagic)], pPager->nRec); + memcpy(zHeader, aJournalMagic, sizeof(aJournalMagic)); + put32bits(&zHeader[sizeof(aJournalMagic)], pPager->nRec); iNextHdrOffset = journalHdrOffset(pPager); rc = sqlite3OsRead(pPager->jfd, aMagic, 8, iNextHdrOffset); if( rc==SQLITE_OK && 0==memcmp(aMagic, aJournalMagic, 8) ){ static const u8 zerobyte = 0; @@ -2896,11 +3213,11 @@ if( rc!=SQLITE_OK ) return rc; } IOTRACE(("JHDR %p %lld\n", pPager, pPager->journalHdr)); rc = sqlite3OsWrite( pPager->jfd, zHeader, sizeof(zHeader), pPager->journalHdr - ); + ); if( rc!=SQLITE_OK ) return rc; } if( 0==(iDc&SQLITE_IOCAP_SEQUENTIAL) ){ PAGERTRACE(("SYNC journal of %d\n", PAGERID(pPager))); IOTRACE(("JSYNC %p\n", pPager)) @@ -2976,10 +3293,11 @@ ** is unchanged and we can rollback without having to playback the ** journal into the original database file. Once we transition to ** EXCLUSIVE, it means the database file has been changed and any rollback ** will require a journal playback. */ + assert( !pagerUseWal(pList->pPager) ); assert( pPager->state>=PAGER_RESERVED ); rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK); /* If the file is a temp-file has not yet been opened, open it now. It ** is not possible for rc to be other than SQLITE_OK if this branch @@ -3064,11 +3382,14 @@ char *pData2; CODEC2(pPager, pData, pPg->pgno, 7, return SQLITE_NOMEM, pData2); PAGERTRACE(("STMT-JOURNAL %d page %d\n", PAGERID(pPager), pPg->pgno)); - assert( pageInJournal(pPg) || pPg->pgno>pPager->dbOrigSize ); + assert( pagerUseWal(pPager) + || pageInJournal(pPg) + || pPg->pgno>pPager->dbOrigSize + ); rc = write32bits(pPager->sjfd, offset, pPg->pgno); if( rc==SQLITE_OK ){ rc = sqlite3OsWrite(pPager->sjfd, pData2, pPager->pageSize, offset+4); } } @@ -3077,11 +3398,10 @@ assert( pPager->nSavepoint>0 ); rc = addToSavepointBitvecs(pPager, pPg->pgno); } return rc; } - /* ** This function is called by the pcache layer when it has reached some ** soft memory limit. The first argument is a pointer to a Pager object ** (cast as a void*). The pager is always 'purgeable' (not an in-memory @@ -3105,78 +3425,88 @@ int rc = SQLITE_OK; assert( pPg->pPager==pPager ); assert( pPg->flags&PGHDR_DIRTY ); - /* The doNotSync flag is set by the sqlite3PagerWrite() function while it - ** is journalling a set of two or more database pages that are stored - ** on the same disk sector. Syncing the journal is not allowed while - ** this is happening as it is important that all members of such a - ** set of pages are synced to disk together. So, if the page this function - ** is trying to make clean will require a journal sync and the doNotSync - ** flag is set, return without doing anything. The pcache layer will - ** just have to go ahead and allocate a new page buffer instead of - ** reusing pPg. - ** - ** Similarly, if the pager has already entered the error state, do not - ** try to write the contents of pPg to disk. - */ - if( NEVER(pPager->errCode) - || (pPager->doNotSync && pPg->flags&PGHDR_NEED_SYNC) - ){ - return SQLITE_OK; - } - - /* Sync the journal file if required. */ - if( pPg->flags&PGHDR_NEED_SYNC ){ - rc = syncJournal(pPager); - if( rc==SQLITE_OK && pPager->fullSync && - !(pPager->journalMode==PAGER_JOURNALMODE_MEMORY) && - !(sqlite3OsDeviceCharacteristics(pPager->fd)&SQLITE_IOCAP_SAFE_APPEND) - ){ - pPager->nRec = 0; - rc = writeJournalHdr(pPager); - } - } - - /* If the page number of this page is larger than the current size of - ** the database image, it may need to be written to the sub-journal. - ** This is because the call to pager_write_pagelist() below will not - ** actually write data to the file in this case. - ** - ** Consider the following sequence of events: - ** - ** BEGIN; - ** - ** - ** SAVEPOINT sp; - ** - ** pagerStress(page X) - ** ROLLBACK TO sp; - ** - ** If (X>Y), then when pagerStress is called page X will not be written - ** out to the database file, but will be dropped from the cache. Then, - ** following the "ROLLBACK TO sp" statement, reading page X will read - ** data from the database file. This will be the copy of page X as it - ** was when the transaction started, not as it was when "SAVEPOINT sp" - ** was executed. - ** - ** The solution is to write the current data for page X into the - ** sub-journal file now (if it is not already there), so that it will - ** be restored to its current value when the "ROLLBACK TO sp" is - ** executed. - */ - if( NEVER( - rc==SQLITE_OK && pPg->pgno>pPager->dbSize && subjRequiresPage(pPg) - ) ){ - rc = subjournalPage(pPg); - } - - /* Write the contents of the page out to the database file. */ - if( rc==SQLITE_OK ){ - pPg->pDirty = 0; - rc = pager_write_pagelist(pPg); + pPg->pDirty = 0; + if( pagerUseWal(pPager) ){ + /* Write a single frame for this page to the log. */ + if( subjRequiresPage(pPg) ){ + rc = subjournalPage(pPg); + } + if( rc==SQLITE_OK ){ + rc = pagerWalFrames(pPager, pPg, 0, 0, 0); + } + }else{ + /* The doNotSync flag is set by the sqlite3PagerWrite() function while it + ** is journalling a set of two or more database pages that are stored + ** on the same disk sector. Syncing the journal is not allowed while + ** this is happening as it is important that all members of such a + ** set of pages are synced to disk together. So, if the page this function + ** is trying to make clean will require a journal sync and the doNotSync + ** flag is set, return without doing anything. The pcache layer will + ** just have to go ahead and allocate a new page buffer instead of + ** reusing pPg. + ** + ** Similarly, if the pager has already entered the error state, do not + ** try to write the contents of pPg to disk. + */ + if( NEVER(pPager->errCode) + || (pPager->doNotSync && pPg->flags&PGHDR_NEED_SYNC) + ){ + return SQLITE_OK; + } + + /* Sync the journal file if required. */ + if( pPg->flags&PGHDR_NEED_SYNC ){ + rc = syncJournal(pPager); + if( rc==SQLITE_OK && pPager->fullSync && + !(pPager->journalMode==PAGER_JOURNALMODE_MEMORY) && + !(sqlite3OsDeviceCharacteristics(pPager->fd)&SQLITE_IOCAP_SAFE_APPEND) + ){ + pPager->nRec = 0; + rc = writeJournalHdr(pPager); + } + } + + /* If the page number of this page is larger than the current size of + ** the database image, it may need to be written to the sub-journal. + ** This is because the call to pager_write_pagelist() below will not + ** actually write data to the file in this case. + ** + ** Consider the following sequence of events: + ** + ** BEGIN; + ** + ** + ** SAVEPOINT sp; + ** + ** pagerStress(page X) + ** ROLLBACK TO sp; + ** + ** If (X>Y), then when pagerStress is called page X will not be written + ** out to the database file, but will be dropped from the cache. Then, + ** following the "ROLLBACK TO sp" statement, reading page X will read + ** data from the database file. This will be the copy of page X as it + ** was when the transaction started, not as it was when "SAVEPOINT sp" + ** was executed. + ** + ** The solution is to write the current data for page X into the + ** sub-journal file now (if it is not already there), so that it will + ** be restored to its current value when the "ROLLBACK TO sp" is + ** executed. + */ + if( NEVER( + rc==SQLITE_OK && pPg->pgno>pPager->dbSize && subjRequiresPage(pPg) + ) ){ + rc = subjournalPage(pPg); + } + + /* Write the contents of the page out to the database file. */ + if( rc==SQLITE_OK ){ + rc = pager_write_pagelist(pPg); + } } /* Mark the page as clean. */ if( rc==SQLITE_OK ){ PAGERTRACE(("STRESS %d page %d\n", PAGERID(pPager), pPg->pgno)); @@ -3580,71 +3910,10 @@ } return rc; } -/* -** Read the content for page pPg out of the database file and into -** pPg->pData. A shared lock or greater must be held on the database -** file before this function is called. -** -** If page 1 is read, then the value of Pager.dbFileVers[] is set to -** the value read from the database file. -** -** If an IO error occurs, then the IO error is returned to the caller. -** Otherwise, SQLITE_OK is returned. -*/ -static int readDbPage(PgHdr *pPg){ - Pager *pPager = pPg->pPager; /* Pager object associated with page pPg */ - Pgno pgno = pPg->pgno; /* Page number to read */ - int rc; /* Return code */ - i64 iOffset; /* Byte offset of file to read from */ - - assert( pPager->state>=PAGER_SHARED && !MEMDB ); - assert( isOpen(pPager->fd) ); - - if( NEVER(!isOpen(pPager->fd)) ){ - assert( pPager->tempFile ); - memset(pPg->pData, 0, pPager->pageSize); - return SQLITE_OK; - } - iOffset = (pgno-1)*(i64)pPager->pageSize; - rc = sqlite3OsRead(pPager->fd, pPg->pData, pPager->pageSize, iOffset); - if( rc==SQLITE_IOERR_SHORT_READ ){ - rc = SQLITE_OK; - } - if( pgno==1 ){ - if( rc ){ - /* If the read is unsuccessful, set the dbFileVers[] to something - ** that will never be a valid file version. dbFileVers[] is a copy - ** of bytes 24..39 of the database. Bytes 28..31 should always be - ** zero. Bytes 32..35 and 35..39 should be page numbers which are - ** never 0xffffffff. So filling pPager->dbFileVers[] with all 0xff - ** bytes should suffice. - ** - ** For an encrypted database, the situation is more complex: bytes - ** 24..39 of the database are white noise. But the probability of - ** white noising equaling 16 bytes of 0xff is vanishingly small so - ** we should still be ok. - */ - memset(pPager->dbFileVers, 0xff, sizeof(pPager->dbFileVers)); - }else{ - u8 *dbFileVers = &((u8*)pPg->pData)[24]; - memcpy(&pPager->dbFileVers, dbFileVers, sizeof(pPager->dbFileVers)); - } - } - CODEC1(pPager, pPg->pData, pgno, 3, rc = SQLITE_NOMEM); - - PAGER_INCR(sqlite3_pager_readdb_count); - PAGER_INCR(pPager->nRead); - IOTRACE(("PGIN %p %d\n", pPager, pgno)); - PAGERTRACE(("FETCH %d page %d hash(%08x)\n", - PAGERID(pPager), pgno, pager_pagehash(pPg))); - - return rc; -} - /* ** This function is called to obtain a shared lock on the database file. ** It is illegal to call sqlite3PagerAcquire() until after this function ** has been successfully called. If a shared-lock is already held when ** this function is called, it is a no-op. @@ -3694,11 +3963,13 @@ } pPager->errCode = SQLITE_OK; pager_reset(pPager); } - if( pPager->state==PAGER_UNLOCK || isErrorReset ){ + if( pagerUseWal(pPager) ){ + rc = pagerOpenSnapshot(pPager); + }else if( pPager->state==PAGER_UNLOCK || isErrorReset ){ sqlite3_vfs * const pVfs = pPager->pVfs; int isHotJournal = 0; assert( !MEMDB ); assert( sqlite3PcacheRefCount(pPager->pPCache)==0 ); if( pPager->noReadlock ){ @@ -3783,11 +4054,11 @@ ** rollback journal at this time. */ pPager->journalStarted = 0; pPager->journalOff = 0; pPager->setMaster = 0; pPager->journalHdr = 0; - + /* Make sure the journal file has been synced to disk. */ /* Playback and delete the journal. Drop the database write ** lock and reacquire the read lock. Purge the cache before ** playing back the hot-journal so that we don't end up with @@ -3850,10 +4121,15 @@ if( memcmp(pPager->dbFileVers, dbFileVers, sizeof(dbFileVers))!=0 ){ pager_reset(pPager); } } assert( pPager->exclusiveMode || pPager->state==PAGER_SHARED ); + + /* If there is a WAL file in the file-system, open this database in WAL + ** mode. Otherwise, the following function call is a no-op. + */ + rc = pagerOpenWalIfPresent(pPager); } failed: if( rc!=SQLITE_OK ){ /* pager_unlock() is a no-op for exclusive mode and in-memory databases. */ @@ -3990,12 +4266,12 @@ goto pager_acquire_err; } if( MEMDB || nMax<(int)pgno || noContent || !isOpen(pPager->fd) ){ if( pgno>pPager->mxPgno ){ - rc = SQLITE_FULL; - goto pager_acquire_err; + rc = SQLITE_FULL; + goto pager_acquire_err; } if( noContent ){ /* Failure to set the bits in the InJournal bit-vectors is benign. ** It merely means that we might do some extra work to journal a ** page that does not need to be journaled. Nevertheless, be sure @@ -4086,11 +4362,11 @@ ** An SQLITE_IOERR_XXX error code is returned if a call to ** sqlite3OsOpen() fails. */ static int openSubJournal(Pager *pPager){ int rc = SQLITE_OK; - if( isOpen(pPager->jfd) && !isOpen(pPager->sjfd) ){ + if( (pagerUseWal(pPager) || isOpen(pPager->jfd)) && !isOpen(pPager->sjfd) ){ if( pPager->journalMode==PAGER_JOURNALMODE_MEMORY || pPager->subjInMemory ){ sqlite3MemJournalOpen(pPager->sjfd); }else{ rc = pagerOpentemp(pPager, pPager->sjfd, SQLITE_OPEN_SUBJOURNAL); } @@ -4222,20 +4498,34 @@ pPager->subjInMemory = (u8)subjInMemory; if( pPager->state==PAGER_SHARED ){ assert( pPager->pInJournal==0 ); assert( !MEMDB && !pPager->tempFile ); - /* Obtain a RESERVED lock on the database file. If the exFlag parameter - ** is true, then immediately upgrade this to an EXCLUSIVE lock. The - ** busy-handler callback can be used when upgrading to the EXCLUSIVE - ** lock, but not when obtaining the RESERVED lock. - */ - rc = sqlite3OsLock(pPager->fd, RESERVED_LOCK); - if( rc==SQLITE_OK ){ - pPager->state = PAGER_RESERVED; - if( exFlag ){ - rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK); + if( pagerUseWal(pPager) ){ + /* Grab the write lock on the log file. If successful, upgrade to + ** PAGER_EXCLUSIVE state. Otherwise, return an error code to the caller. + ** The busy-handler is not invoked if another connection already + ** holds the write-lock. If possible, the upper layer will call it. + */ + rc = sqlite3WalWriteLock(pPager->pWal, 1); + if( rc==SQLITE_OK ){ + pPager->dbOrigSize = pPager->dbSize; + pPager->state = PAGER_RESERVED; + pPager->journalOff = 0; + } + }else{ + /* Obtain a RESERVED lock on the database file. If the exFlag parameter + ** is true, then immediately upgrade this to an EXCLUSIVE lock. The + ** busy-handler callback can be used when upgrading to the EXCLUSIVE + ** lock, but not when obtaining the RESERVED lock. + */ + rc = sqlite3OsLock(pPager->fd, RESERVED_LOCK); + if( rc==SQLITE_OK ){ + pPager->state = PAGER_RESERVED; + if( exFlag ){ + rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK); + } } } /* No need to open the journal file at this time. It will be ** opened before it is written to. If we defer opening the journal, @@ -4247,10 +4537,11 @@ ** time a (read or write) transaction was successfully concluded ** by this connection. Instead of deleting the journal file it was ** kept open and either was truncated to 0 bytes or its header was ** overwritten with zeros. */ + assert( pagerUseWal(pPager)==0 ); assert( pPager->nRec==0 ); assert( pPager->dbOrigSize==0 ); assert( pPager->pInJournal==0 ); rc = pager_open_journal(pPager); } @@ -4301,10 +4592,11 @@ /* Mark the page as dirty. If the page has already been written ** to the journal then we can return right away. */ sqlite3PcacheMakeDirty(pPg); if( pageInJournal(pPg) && !subjRequiresPage(pPg) ){ + assert( !pagerUseWal(pPager) ); pPager->dbModified = 1; }else{ /* If we get this far, it means that the page needs to be ** written to the transaction journal or the ckeckpoint journal @@ -4316,11 +4608,14 @@ */ rc = sqlite3PagerBegin(pPager, 0, pPager->subjInMemory); if( rc!=SQLITE_OK ){ return rc; } - if( !isOpen(pPager->jfd) && pPager->journalMode!=PAGER_JOURNALMODE_OFF ){ + if( !isOpen(pPager->jfd) + && pPager->journalMode!=PAGER_JOURNALMODE_OFF + && !pagerUseWal(pPager) + ){ assert( pPager->useJournal ); rc = pager_open_journal(pPager); if( rc!=SQLITE_OK ) return rc; } pPager->dbModified = 1; @@ -4328,10 +4623,11 @@ /* The transaction journal now exists and we have a RESERVED or an ** EXCLUSIVE lock on the main database file. Write the current page to ** the transaction journal if it is not there already. */ if( !pageInJournal(pPg) && isOpen(pPager->jfd) ){ + assert( !pagerUseWal(pPager) ); if( pPg->pgno<=pPager->dbOrigSize ){ u32 cksum; char *pData2; /* We should never write to the journal file the page that @@ -4708,132 +5004,141 @@ ** function has already been called, it is mostly a no-op. However, any ** backup in progress needs to be restarted. */ sqlite3BackupRestart(pPager->pBackup); }else if( pPager->state!=PAGER_SYNCED && pPager->dbModified ){ - - /* The following block updates the change-counter. Exactly how it - ** does this depends on whether or not the atomic-update optimization - ** was enabled at compile time, and if this transaction meets the - ** runtime criteria to use the operation: - ** - ** * The file-system supports the atomic-write property for - ** blocks of size page-size, and - ** * This commit is not part of a multi-file transaction, and - ** * Exactly one page has been modified and store in the journal file. - ** - ** If the optimization was not enabled at compile time, then the - ** pager_incr_changecounter() function is called to update the change - ** counter in 'indirect-mode'. If the optimization is compiled in but - ** is not applicable to this transaction, call sqlite3JournalCreate() - ** to make sure the journal file has actually been created, then call - ** pager_incr_changecounter() to update the change-counter in indirect - ** mode. - ** - ** Otherwise, if the optimization is both enabled and applicable, - ** then call pager_incr_changecounter() to update the change-counter - ** in 'direct' mode. In this case the journal file will never be - ** created for this transaction. - */ -#ifdef SQLITE_ENABLE_ATOMIC_WRITE - PgHdr *pPg; - assert( isOpen(pPager->jfd) || pPager->journalMode==PAGER_JOURNALMODE_OFF ); - if( !zMaster && isOpen(pPager->jfd) - && pPager->journalOff==jrnlBufferSize(pPager) - && pPager->dbSize>=pPager->dbFileSize - && (0==(pPg = sqlite3PcacheDirtyList(pPager->pPCache)) || 0==pPg->pDirty) - ){ - /* Update the db file change counter via the direct-write method. The - ** following call will modify the in-memory representation of page 1 - ** to include the updated change counter and then write page 1 - ** directly to the database file. Because of the atomic-write - ** property of the host file-system, this is safe. - */ - rc = pager_incr_changecounter(pPager, 1); - }else{ - rc = sqlite3JournalCreate(pPager->jfd); - if( rc==SQLITE_OK ){ - rc = pager_incr_changecounter(pPager, 0); - } - } -#else - rc = pager_incr_changecounter(pPager, 0); -#endif - if( rc!=SQLITE_OK ) goto commit_phase_one_exit; - - /* If this transaction has made the database smaller, then all pages - ** being discarded by the truncation must be written to the journal - ** file. This can only happen in auto-vacuum mode. - ** - ** Before reading the pages with page numbers larger than the - ** current value of Pager.dbSize, set dbSize back to the value - ** that it took at the start of the transaction. Otherwise, the - ** calls to sqlite3PagerGet() return zeroed pages instead of - ** reading data from the database file. - ** - ** When journal_mode==OFF the dbOrigSize is always zero, so this - ** block never runs if journal_mode=OFF. - */ -#ifndef SQLITE_OMIT_AUTOVACUUM - if( pPager->dbSizedbOrigSize - && ALWAYS(pPager->journalMode!=PAGER_JOURNALMODE_OFF) - ){ - Pgno i; /* Iterator variable */ - const Pgno iSkip = PAGER_MJ_PGNO(pPager); /* Pending lock page */ - const Pgno dbSize = pPager->dbSize; /* Database image size */ - pPager->dbSize = pPager->dbOrigSize; - for( i=dbSize+1; i<=pPager->dbOrigSize; i++ ){ - if( !sqlite3BitvecTest(pPager->pInJournal, i) && i!=iSkip ){ - PgHdr *pPage; /* Page to journal */ - rc = sqlite3PagerGet(pPager, i, &pPage); - if( rc!=SQLITE_OK ) goto commit_phase_one_exit; - rc = sqlite3PagerWrite(pPage); - sqlite3PagerUnref(pPage); - if( rc!=SQLITE_OK ) goto commit_phase_one_exit; - } - } - pPager->dbSize = dbSize; - } -#endif - - /* Write the master journal name into the journal file. If a master - ** journal file name has already been written to the journal file, - ** or if zMaster is NULL (no master journal), then this call is a no-op. - */ - rc = writeMasterJournal(pPager, zMaster); - if( rc!=SQLITE_OK ) goto commit_phase_one_exit; - - /* Sync the journal file. If the atomic-update optimization is being - ** used, this call will not create the journal file or perform any - ** real IO. - */ - rc = syncJournal(pPager); - if( rc!=SQLITE_OK ) goto commit_phase_one_exit; - - /* Write all dirty pages to the database file. */ - rc = pager_write_pagelist(sqlite3PcacheDirtyList(pPager->pPCache)); - if( rc!=SQLITE_OK ){ - assert( rc!=SQLITE_IOERR_BLOCKED ); - goto commit_phase_one_exit; - } - sqlite3PcacheCleanAll(pPager->pPCache); - - /* If the file on disk is not the same size as the database image, - ** then use pager_truncate to grow or shrink the file here. - */ - if( pPager->dbSize!=pPager->dbFileSize ){ - Pgno nNew = pPager->dbSize - (pPager->dbSize==PAGER_MJ_PGNO(pPager)); - assert( pPager->state>=PAGER_EXCLUSIVE ); - rc = pager_truncate(pPager, nNew); - if( rc!=SQLITE_OK ) goto commit_phase_one_exit; - } - - /* Finally, sync the database file. */ - if( !pPager->noSync && !noSync ){ - rc = sqlite3OsSync(pPager->fd, pPager->sync_flags); - } - IOTRACE(("DBSYNC %p\n", pPager)) + if( pagerUseWal(pPager) ){ + PgHdr *pList = sqlite3PcacheDirtyList(pPager->pPCache); + if( pList ){ + rc = pagerWalFrames(pPager, pList, pPager->dbSize, 1, + (pPager->fullSync ? pPager->sync_flags : 0) + ); + } + sqlite3PcacheCleanAll(pPager->pPCache); + }else{ + /* The following block updates the change-counter. Exactly how it + ** does this depends on whether or not the atomic-update optimization + ** was enabled at compile time, and if this transaction meets the + ** runtime criteria to use the operation: + ** + ** * The file-system supports the atomic-write property for + ** blocks of size page-size, and + ** * This commit is not part of a multi-file transaction, and + ** * Exactly one page has been modified and store in the journal file. + ** + ** If the optimization was not enabled at compile time, then the + ** pager_incr_changecounter() function is called to update the change + ** counter in 'indirect-mode'. If the optimization is compiled in but + ** is not applicable to this transaction, call sqlite3JournalCreate() + ** to make sure the journal file has actually been created, then call + ** pager_incr_changecounter() to update the change-counter in indirect + ** mode. + ** + ** Otherwise, if the optimization is both enabled and applicable, + ** then call pager_incr_changecounter() to update the change-counter + ** in 'direct' mode. In this case the journal file will never be + ** created for this transaction. + */ + #ifdef SQLITE_ENABLE_ATOMIC_WRITE + PgHdr *pPg; + assert( isOpen(pPager->jfd) || pPager->journalMode==PAGER_JOURNALMODE_OFF ); + if( !zMaster && isOpen(pPager->jfd) + && pPager->journalOff==jrnlBufferSize(pPager) + && pPager->dbSize>=pPager->dbFileSize + && (0==(pPg = sqlite3PcacheDirtyList(pPager->pPCache)) || 0==pPg->pDirty) + ){ + /* Update the db file change counter via the direct-write method. The + ** following call will modify the in-memory representation of page 1 + ** to include the updated change counter and then write page 1 + ** directly to the database file. Because of the atomic-write + ** property of the host file-system, this is safe. + */ + rc = pager_incr_changecounter(pPager, 1); + }else{ + rc = sqlite3JournalCreate(pPager->jfd); + if( rc==SQLITE_OK ){ + rc = pager_incr_changecounter(pPager, 0); + } + } + #else + rc = pager_incr_changecounter(pPager, 0); + #endif + if( rc!=SQLITE_OK ) goto commit_phase_one_exit; + + /* If this transaction has made the database smaller, then all pages + ** being discarded by the truncation must be written to the journal + ** file. This can only happen in auto-vacuum mode. + ** + ** Before reading the pages with page numbers larger than the + ** current value of Pager.dbSize, set dbSize back to the value + ** that it took at the start of the transaction. Otherwise, the + ** calls to sqlite3PagerGet() return zeroed pages instead of + ** reading data from the database file. + ** + ** When journal_mode==OFF the dbOrigSize is always zero, so this + ** block never runs if journal_mode=OFF. + */ + #ifndef SQLITE_OMIT_AUTOVACUUM + if( pPager->dbSizedbOrigSize + && ALWAYS(pPager->journalMode!=PAGER_JOURNALMODE_OFF) + ){ + Pgno i; /* Iterator variable */ + const Pgno iSkip = PAGER_MJ_PGNO(pPager); /* Pending lock page */ + const Pgno dbSize = pPager->dbSize; /* Database image size */ + pPager->dbSize = pPager->dbOrigSize; + for( i=dbSize+1; i<=pPager->dbOrigSize; i++ ){ + if( !sqlite3BitvecTest(pPager->pInJournal, i) && i!=iSkip ){ + PgHdr *pPage; /* Page to journal */ + rc = sqlite3PagerGet(pPager, i, &pPage); + if( rc!=SQLITE_OK ) goto commit_phase_one_exit; + rc = sqlite3PagerWrite(pPage); + sqlite3PagerUnref(pPage); + if( rc!=SQLITE_OK ) goto commit_phase_one_exit; + } + } + pPager->dbSize = dbSize; + } + #endif + + /* Write the master journal name into the journal file. If a master + ** journal file name has already been written to the journal file, + ** or if zMaster is NULL (no master journal), then this call is a no-op. + */ + rc = writeMasterJournal(pPager, zMaster); + if( rc!=SQLITE_OK ) goto commit_phase_one_exit; + + /* Sync the journal file. If the atomic-update optimization is being + ** used, this call will not create the journal file or perform any + ** real IO. + */ + rc = syncJournal(pPager); + if( rc!=SQLITE_OK ) goto commit_phase_one_exit; + + /* Write all dirty pages to the database file. */ + rc = pager_write_pagelist(sqlite3PcacheDirtyList(pPager->pPCache)); + if( rc!=SQLITE_OK ){ + assert( rc!=SQLITE_IOERR_BLOCKED ); + goto commit_phase_one_exit; + } + sqlite3PcacheCleanAll(pPager->pPCache); + + /* If the file on disk is not the same size as the database image, + ** then use pager_truncate to grow or shrink the file here. + */ + if( pPager->dbSize!=pPager->dbFileSize ){ + Pgno nNew = pPager->dbSize - (pPager->dbSize==PAGER_MJ_PGNO(pPager)); + assert( pPager->state>=PAGER_EXCLUSIVE ); + rc = pager_truncate(pPager, nNew); + if( rc!=SQLITE_OK ) goto commit_phase_one_exit; + } + + /* Finally, sync the database file. */ + if( !pPager->noSync && !noSync ){ + rc = sqlite3OsSync(pPager->fd, pPager->sync_flags); + } + IOTRACE(("DBSYNC %p\n", pPager)) + } pPager->state = PAGER_SYNCED; } commit_phase_one_exit: @@ -4938,11 +5243,17 @@ ** hot-journal rollback). */ int sqlite3PagerRollback(Pager *pPager){ int rc = SQLITE_OK; /* Return code */ PAGERTRACE(("ROLLBACK %d\n", PAGERID(pPager))); - if( !pPager->dbModified || !isOpen(pPager->jfd) ){ + if( pagerUseWal(pPager) ){ + int rc2; + + rc = sqlite3PagerSavepoint(pPager, SAVEPOINT_ROLLBACK, -1); + rc2 = pager_end_transaction(pPager, pPager->setMaster); + if( rc==SQLITE_OK ) rc = rc2; + }else if( !pPager->dbModified || !isOpen(pPager->jfd) ){ rc = pager_end_transaction(pPager, pPager->setMaster); }else if( pPager->errCode && pPager->errCode!=SQLITE_FULL ){ if( pPager->state>=PAGER_EXCLUSIVE ){ pager_playback(pPager, 0); } @@ -5079,10 +5390,13 @@ aNew[ii].iSubRec = pPager->nSubRec; aNew[ii].pInSavepoint = sqlite3BitvecCreate(nPage); if( !aNew[ii].pInSavepoint ){ return SQLITE_NOMEM; } + if( pagerUseWal(pPager) ){ + aNew[ii].iFrame = sqlite3WalSavepoint(pPager->pWal); + } } /* Open the sub-journal, if it is not already opened. */ rc = openSubJournal(pPager); assertTruncateConstraint(pPager); @@ -5156,11 +5470,11 @@ /* Else this is a rollback operation, playback the specified savepoint. ** If this is a temp-file, it is possible that the journal file has ** not yet been opened. In this case there have been no changes to ** the database file, so the playback operation can be skipped. */ - else if( isOpen(pPager->jfd) ){ + else if( pagerUseWal(pPager) || isOpen(pPager->jfd) ){ PagerSavepoint *pSavepoint = (nNew==0)?0:&pPager->aSavepoint[nNew-1]; rc = pagerPlaybackSavepoint(pPager, pSavepoint); assert(rc!=SQLITE_DONE); } @@ -5433,13 +5747,14 @@ ** PAGER_JOURNALMODE_DELETE ** PAGER_JOURNALMODE_TRUNCATE ** PAGER_JOURNALMODE_PERSIST ** PAGER_JOURNALMODE_OFF ** PAGER_JOURNALMODE_MEMORY +** PAGER_JOURNALMODE_WAL ** ** If the parameter is not _QUERY, then the journal_mode is set to the -** value specified if the change is allowed. The change is disallowed +** value specified if the change is allowed. The change may be disallowed ** for the following reasons: ** ** * An in-memory database can only have its journal_mode set to _OFF ** or _MEMORY. ** @@ -5451,15 +5766,21 @@ assert( eMode==PAGER_JOURNALMODE_QUERY || eMode==PAGER_JOURNALMODE_DELETE || eMode==PAGER_JOURNALMODE_TRUNCATE || eMode==PAGER_JOURNALMODE_PERSIST || eMode==PAGER_JOURNALMODE_OFF + || eMode==PAGER_JOURNALMODE_WAL || eMode==PAGER_JOURNALMODE_MEMORY ); assert( PAGER_JOURNALMODE_QUERY<0 ); - if( eMode>=0 - && (!MEMDB || eMode==PAGER_JOURNALMODE_MEMORY - || eMode==PAGER_JOURNALMODE_OFF) + + if( eMode==PAGER_JOURNALMODE_WAL + && pPager->journalMode==PAGER_JOURNALMODE_DELETE + ){ + pPager->journalMode = PAGER_JOURNALMODE_WAL; + }else if( eMode>=0 + && (pPager->tempFile==0 || eMode!=PAGER_JOURNALMODE_WAL) + && (!MEMDB || eMode==PAGER_JOURNALMODE_MEMORY||eMode==PAGER_JOURNALMODE_OFF) && !pPager->dbModified && (!isOpen(pPager->jfd) || 0==pPager->journalOff) ){ if( isOpen(pPager->jfd) ){ sqlite3OsClose(pPager->jfd); @@ -5471,10 +5792,11 @@ assert( (PAGER_JOURNALMODE_OFF & 1)==0 ); if( (pPager->journalMode & 1)==1 && (eMode & 1)==0 && !pPager->exclusiveMode ){ sqlite3OsDelete(pPager->pVfs, pPager->zJournal, 0); } + pPager->journalMode = (u8)eMode; } return (int)pPager->journalMode; } @@ -5498,7 +5820,101 @@ ** sqlite3BackupUpdate() only. */ sqlite3_backup **sqlite3PagerBackupPtr(Pager *pPager){ return &pPager->pBackup; } + +#ifndef SQLITE_OMIT_WAL +/* +** This function is called when the user invokes "PRAGMA checkpoint". +*/ +int sqlite3PagerCheckpoint(Pager *pPager){ + int rc = SQLITE_OK; + if( pPager->pWal ){ + u8 *zBuf = (u8 *)pPager->pTmpSpace; + rc = sqlite3WalCheckpoint(pPager->pWal, pPager->fd, + (pPager->noSync ? 0 : pPager->sync_flags), + zBuf, pPager->xBusyHandler, pPager->pBusyHandlerArg + ); + } + return rc; +} + +int sqlite3PagerWalCallback(Pager *pPager){ + return sqlite3WalCallback(pPager->pWal); +} + +/* +** Open a connection to the write-ahead log file for pager pPager. If +** the log connection is already open, this function is a no-op. +** +** The caller must be holding a SHARED lock on the database file to call +** this function. +*/ +int sqlite3PagerOpenWal(Pager *pPager, int *pisOpen){ + int rc = SQLITE_OK; /* Return code */ + + assert( pPager->state>=PAGER_SHARED ); + if( !pPager->pWal ){ + + /* Open the connection to the log file. If this operation fails, + ** (e.g. due to malloc() failure), unlock the database file and + ** return an error code. + */ + rc = sqlite3WalOpen(pPager->pVfs, pPager->zFilename, &pPager->pWal); + if( rc==SQLITE_OK ){ + pPager->journalMode = PAGER_JOURNALMODE_WAL; + } + }else{ + *pisOpen = 1; + } + + return rc; +} + +/* +** This function is called to close the connection to the log file prior +** to switching from WAL to rollback mode. +** +** Before closing the log file, this function attempts to take an +** EXCLUSIVE lock on the database file. If this cannot be obtained, an +** error (SQLITE_BUSY) is returned and the log connection is not closed. +** If successful, the EXCLUSIVE lock is not released before returning. +*/ +int sqlite3PagerCloseWal(Pager *pPager){ + int rc = SQLITE_OK; + + assert( pPager->journalMode==PAGER_JOURNALMODE_WAL ); + + /* If the log file is not already open, but does exist in the file-system, + ** it may need to be checkpointed before the connection can switch to + ** rollback mode. Open it now so this can happen. + */ + if( !pPager->pWal ){ + int logexists = 0; + rc = sqlite3OsLock(pPager->fd, SQLITE_LOCK_SHARED); + if( rc==SQLITE_OK ){ + rc = pagerHasWAL(pPager, &logexists); + } + if( rc==SQLITE_OK && logexists ){ + rc = sqlite3WalOpen(pPager->pVfs, pPager->zFilename, &pPager->pWal); + } + } + + /* Checkpoint and close the log. Because an EXCLUSIVE lock is held on + ** the database file, the log and log-summary files will be deleted. + */ + if( rc==SQLITE_OK && pPager->pWal ){ + rc = sqlite3OsLock(pPager->fd, SQLITE_LOCK_EXCLUSIVE); + if( rc==SQLITE_OK ){ + rc = sqlite3WalClose(pPager->pWal, pPager->fd, + (pPager->noSync ? 0 : pPager->sync_flags), + (u8*)pPager->pTmpSpace + ); + pPager->pWal = 0; + } + } + return rc; +} +#endif #endif /* SQLITE_OMIT_DISKIO */ Index: src/pager.h ================================================================== --- src/pager.h +++ src/pager.h @@ -74,10 +74,11 @@ #define PAGER_JOURNALMODE_DELETE 0 /* Commit by deleting journal file */ #define PAGER_JOURNALMODE_PERSIST 1 /* Commit by zeroing journal header */ #define PAGER_JOURNALMODE_OFF 2 /* Journal omitted. */ #define PAGER_JOURNALMODE_TRUNCATE 3 /* Commit by truncating journal */ #define PAGER_JOURNALMODE_MEMORY 4 /* In-memory journal file */ +#define PAGER_JOURNALMODE_WAL 5 /* Use write-ahead logging */ /* ** The remainder of this file contains the declarations of the functions ** that make up the Pager sub-system API. See source code comments for ** a detailed description of each routine. @@ -130,10 +131,15 @@ int sqlite3PagerCommitPhaseTwo(Pager*); int sqlite3PagerRollback(Pager*); int sqlite3PagerOpenSavepoint(Pager *pPager, int n); int sqlite3PagerSavepoint(Pager *pPager, int op, int iSavepoint); int sqlite3PagerSharedLock(Pager *pPager); + +int sqlite3PagerCheckpoint(Pager *pPager); +int sqlite3PagerWalCallback(Pager *pPager); +int sqlite3PagerOpenWal(Pager *pPager, int *pisOpen); +int sqlite3PagerCloseWal(Pager *pPager); /* Functions used to query pager state and configuration. */ u8 sqlite3PagerIsreadonly(Pager*); int sqlite3PagerRefcount(Pager*); int sqlite3PagerMemUsed(Pager*); Index: src/pragma.c ================================================================== --- src/pragma.c +++ src/pragma.c @@ -255,10 +255,35 @@ } return zName; } #endif + +/* +** Parameter eMode must be one of the PAGER_JOURNALMODE_XXX constants +** defined in pager.h. This function returns the associated lowercase +** journal-mode name. +*/ +const char *sqlite3JournalModename(int eMode){ + static char * const azModeName[] = { + "delete", "persist", "off", "truncate", "memory" +#ifndef SQLITE_OMIT_WAL + , "wal" +#endif + }; + assert( PAGER_JOURNALMODE_DELETE==0 ); + assert( PAGER_JOURNALMODE_PERSIST==1 ); + assert( PAGER_JOURNALMODE_OFF==2 ); + assert( PAGER_JOURNALMODE_TRUNCATE==3 ); + assert( PAGER_JOURNALMODE_MEMORY==4 ); + assert( PAGER_JOURNALMODE_WAL==5 ); + assert( eMode>=0 && eMode<=ArraySize(azModeName) ); + + if( eMode==ArraySize(azModeName) ) return 0; + return azModeName[eMode]; +} + /* ** Process a pragma statement. ** ** Pragmas are of this form: ** @@ -507,63 +532,59 @@ /* ** PRAGMA [database.]journal_mode ** PRAGMA [database.]journal_mode = (delete|persist|off|truncate|memory) */ if( sqlite3StrICmp(zLeft,"journal_mode")==0 ){ - int eMode; - static char * const azModeName[] = { - "delete", "persist", "off", "truncate", "memory" - }; + int eMode; /* One of the PAGER_JOURNALMODE_XXX symbols */ + + sqlite3VdbeSetNumCols(v, 1); + sqlite3VdbeSetColName(v, 0, COLNAME_NAME, "journal_mode", SQLITE_STATIC); if( zRight==0 ){ eMode = PAGER_JOURNALMODE_QUERY; }else{ + const char *zMode; int n = sqlite3Strlen30(zRight); - eMode = sizeof(azModeName)/sizeof(azModeName[0]) - 1; - while( eMode>=0 && sqlite3StrNICmp(zRight, azModeName[eMode], n)!=0 ){ - eMode--; + for(eMode=0; (zMode = sqlite3JournalModename(eMode)); eMode++){ + if( sqlite3StrNICmp(zRight, zMode, n)==0 ) break; + } + if( !zMode ){ + eMode = PAGER_JOURNALMODE_QUERY; } } if( pId2->n==0 && eMode==PAGER_JOURNALMODE_QUERY ){ /* Simple "PRAGMA journal_mode;" statement. This is a query for ** the current default journal mode (which may be different to ** the journal-mode of the main database). */ eMode = db->dfltJournalMode; + sqlite3VdbeAddOp2(v, OP_String8, 0, 1); + sqlite3VdbeChangeP4(v, -1, sqlite3JournalModename(eMode), P4_STATIC); }else{ - Pager *pPager; - if( pId2->n==0 ){ + int ii; + + if( pId2->n==0 && eMode!=PAGER_JOURNALMODE_WAL ){ /* This indicates that no database name was specified as part ** of the PRAGMA command. In this case the journal-mode must be ** set on all attached databases, as well as the main db file. ** ** Also, the sqlite3.dfltJournalMode variable is set so that ** any subsequently attached databases also use the specified - ** journal mode. + ** journal mode. Except, the default journal mode is never set + ** to WAL. */ - int ii; - assert(pDb==&db->aDb[0]); - for(ii=1; iinDb; ii++){ - if( db->aDb[ii].pBt ){ - pPager = sqlite3BtreePager(db->aDb[ii].pBt); - sqlite3PagerJournalMode(pPager, eMode); - } - } db->dfltJournalMode = (u8)eMode; } - pPager = sqlite3BtreePager(pDb->pBt); - eMode = sqlite3PagerJournalMode(pPager, eMode); - } - assert( eMode==PAGER_JOURNALMODE_DELETE - || eMode==PAGER_JOURNALMODE_TRUNCATE - || eMode==PAGER_JOURNALMODE_PERSIST - || eMode==PAGER_JOURNALMODE_OFF - || eMode==PAGER_JOURNALMODE_MEMORY ); - sqlite3VdbeSetNumCols(v, 1); - sqlite3VdbeSetColName(v, 0, COLNAME_NAME, "journal_mode", SQLITE_STATIC); - sqlite3VdbeAddOp4(v, OP_String8, 0, 1, 0, - azModeName[eMode], P4_STATIC); + + for(ii=db->nDb-1; ii>=0; ii--){ + if( db->aDb[ii].pBt && (ii==iDb || pId2->n==0) ){ + sqlite3VdbeUsesBtree(v, ii); + sqlite3VdbeAddOp3(v, OP_JournalMode, ii, 1, eMode); + } + } + } + sqlite3VdbeAddOp2(v, OP_ResultRow, 1, 1); }else /* ** PRAGMA [database.]journal_size_limit @@ -1376,10 +1397,40 @@ sqlite3VdbeAddOp4(v, OP_String8, 0, 1, 0, zOpt, 0); sqlite3VdbeAddOp2(v, OP_ResultRow, 1, 1); } }else #endif /* SQLITE_OMIT_COMPILEOPTION_DIAGS */ + +#ifndef SQLITE_OMIT_WAL + /* + ** PRAGMA [database.]wal_checkpoint + ** + ** Checkpoint the database. + */ + if( sqlite3StrICmp(zLeft, "wal_checkpoint")==0 ){ + if( sqlite3ReadSchema(pParse) ) goto pragma_out; + sqlite3VdbeAddOp3(v, OP_Checkpoint, iDb, 0, 0); + }else + + /* + ** PRAGMA wal_autocheckpoint + ** PRAGMA wal_autocheckpoint = N + ** + ** Configure a database connection to automatically checkpoint a database + ** after accumulating N frames in the log. Or query for the current value + ** of N. + */ + if( sqlite3StrICmp(zLeft, "wal_autocheckpoint")==0 ){ + if( zRight ){ + int nAuto = atoi(zRight); + sqlite3_wal_autocheckpoint(db, nAuto); + } + returnSingleInt(pParse, "wal_autocheckpoint", + db->xWalCallback==sqlite3WalDefaultHook ? + SQLITE_PTR_TO_INT(db->pWalArg) : 0); + }else +#endif #if defined(SQLITE_DEBUG) || defined(SQLITE_TEST) /* ** Report the current state of file logs for all databases */ Index: src/sqlite.h.in ================================================================== --- src/sqlite.h.in +++ src/sqlite.h.in @@ -816,12 +816,13 @@ ** least the number of microseconds given. The xCurrentTime() ** method returns a Julian Day Number for the current date and time. ** */ typedef struct sqlite3_vfs sqlite3_vfs; +typedef struct sqlite3_shm sqlite3_shm; struct sqlite3_vfs { - int iVersion; /* Structure version number */ + int iVersion; /* Structure version number (currently 2) */ int szOsFile; /* Size of subclassed sqlite3_file */ int mxPathname; /* Maximum file pathname length */ sqlite3_vfs *pNext; /* Next registered VFS */ const char *zName; /* Name of this virtual file system */ void *pAppData; /* Pointer to application-specific data */ @@ -836,12 +837,27 @@ void (*xDlClose)(sqlite3_vfs*, void*); int (*xRandomness)(sqlite3_vfs*, int nByte, char *zOut); int (*xSleep)(sqlite3_vfs*, int microseconds); int (*xCurrentTime)(sqlite3_vfs*, double*); int (*xGetLastError)(sqlite3_vfs*, int, char *); - /* New fields may be appended in figure versions. The iVersion - ** value will increment whenever this happens. */ + /* + ** The methods above are in version 1 of the sqlite_vfs object + ** definition. Those that follow are added in version 2 or later + */ + int (*xShmOpen)(sqlite3_vfs*, const char *zName, sqlite3_shm**); + int (*xShmSize)(sqlite3_shm*, int reqSize, int *pNewSize); + int (*xShmGet)(sqlite3_shm*, int reqMapSize, int *pMapSize, void**); + int (*xShmRelease)(sqlite3_shm*); + int (*xShmLock)(sqlite3_shm*, int desiredLock, int *gotLock); + int (*xShmClose)(sqlite3_shm*, int deleteFlag); + int (*xRename)(sqlite3_vfs*, const char *zOld, const char *zNew, int dirSync); + int (*xCurrentTimeInt64)(sqlite3_vfs*, sqlite3_int64*); + /* + ** The methods above are in versions 1 and 2 of the sqlite_vfs object. + ** New fields may be appended in figure versions. The iVersion + ** value will increment whenever this happens. + */ }; /* ** CAPI3REF: Flags for the xAccess VFS method ** @@ -857,10 +873,28 @@ */ #define SQLITE_ACCESS_EXISTS 0 #define SQLITE_ACCESS_READWRITE 1 #define SQLITE_ACCESS_READ 2 +/* +** CAPI3REF: Flags for the xShmLock VFS method +** +** These integer constants define the various locking states that +** an sqlite3_shm object can be in. The SQLITE_SHM_QUERY integer +** is not a valid data - it is a constant pasted to the +** sqlite3_vfs.xShmLock() method for querying the current lock +** state. +*/ +#define SQLITE_SHM_UNLOCK 0 +#define SQLITE_SHM_READ 1 +#define SQLITE_SHM_READ_FULL 2 +#define SQLITE_SHM_WRITE 3 +#define SQLITE_SHM_PENDING 4 +#define SQLITE_SHM_CHECKPOINT 5 +#define SQLITE_SHM_RECOVER 6 +#define SQLITE_SHM_QUERY (-1) + /* ** CAPI3REF: Initialize The SQLite Library ** ** ^The sqlite3_initialize() routine initializes the ** SQLite library. ^The sqlite3_shutdown() routine @@ -5724,10 +5758,54 @@ ** a few hundred characters, it will be truncated to the length of the ** buffer. */ void sqlite3_log(int iErrCode, const char *zFormat, ...); +/* +** CAPI3REF: Write-Ahead Log Commit Hook +** +** The [sqlite3_wal_hook()] function is used to register a callback that +** will be invoked each time a database connection commits data to a +** write-ahead-log (i.e. whenever a transaction is committed in +** journal_mode=WAL mode). +** +** The callback is invoked by SQLite after the commit has taken place and +** the associated write-lock on the database released, so the implementation +** may read, write or checkpoint the database as required. +** +** The first parameter passed to the callback function when it is invoked +** is a copy of the third parameter passed to sqlite3_wal_hook() when +** registering the callback. The second is a copy of the database handle. +** The third parameter is the name of the database that was written to - +** either "main" or the name of an ATTACHed database. The fourth parameter +** is the number of pages currently in the log file, including those that +** were just committed. +** +** If an invocation of the callback function returns non-zero, then a +** checkpoint is automatically run on the database. If zero is returned, +** no special action is taken. +** +** A single database handle may have at most a single log callback +** registered at one time. Calling [sqlite3_wal_hook()] replaces any +** previously registered log callback. +*/ +void *sqlite3_wal_hook( + sqlite3*, + int(*)(void *,sqlite3*,const char*,int), + void* +); + +/* +** CAPI3REF: Configure an auto-checkpoint +*/ +int sqlite3_wal_autocheckpoint(sqlite3 *db, int N); + +/* +** CAPI3REF: Checkpoint a database +*/ +int sqlite3_wal_checkpoint(sqlite3 *db, const char *zDb); + /* ** Undo the hack that converts floating point types to integer for ** builds on processors without floating point support. */ #ifdef SQLITE_OMIT_FLOATING_POINT Index: src/sqliteInt.h ================================================================== --- src/sqliteInt.h +++ src/sqliteInt.h @@ -821,10 +821,14 @@ int (*xCommitCallback)(void*); /* Invoked at every commit. */ void *pRollbackArg; /* Argument to xRollbackCallback() */ void (*xRollbackCallback)(void*); /* Invoked at every commit. */ void *pUpdateArg; void (*xUpdateCallback)(void*,int, const char*,const char*,sqlite_int64); +#ifndef SQLITE_OMIT_WAL + int (*xWalCallback)(void *, sqlite3 *, const char *, int); + void *pWalArg; +#endif void(*xCollNeeded)(void*,sqlite3*,int eTextRep,const char*); void(*xCollNeeded16)(void*,sqlite3*,int eTextRep,const void*); void *pCollNeededArg; sqlite3_value *pErr; /* Most recent error message */ char *zErrMsg; /* Most recent error message (UTF-8 encoded) */ @@ -2991,10 +2995,13 @@ int sqlite3Reprepare(Vdbe*); void sqlite3ExprListCheckLength(Parse*, ExprList*, const char*); CollSeq *sqlite3BinaryCompareCollSeq(Parse *, Expr *, Expr *); int sqlite3TempInMemory(const sqlite3*); VTable *sqlite3GetVTable(sqlite3*, Table*); +const char *sqlite3JournalModename(int); +int sqlite3Checkpoint(sqlite3*, int); +int sqlite3WalDefaultHook(void*,sqlite3*,const char*,int); /* Declarations for functions in fkey.c. All of these are replaced by ** no-op macros if OMIT_FOREIGN_KEY is defined. In this case no foreign ** key functionality is available. If OMIT_TRIGGER is defined but ** OMIT_FOREIGN_KEY is not, only some of the functions are no-oped. In Index: src/sqliteLimit.h ================================================================== --- src/sqliteLimit.h +++ src/sqliteLimit.h @@ -106,10 +106,18 @@ #endif #ifndef SQLITE_DEFAULT_TEMP_CACHE_SIZE # define SQLITE_DEFAULT_TEMP_CACHE_SIZE 500 #endif +/* +** The default number of frames to accumulate in the log file before +** checkpointing the database in WAL mode. +*/ +#ifndef SQLITE_DEFAULT_WAL_AUTOCHECKPOINT +# define SQLITE_DEFAULT_WAL_AUTOCHECKPOINT 1000 +#endif + /* ** The maximum number of attached databases. This must be between 0 ** and 30. The upper bound on 30 is because a 32-bit integer bitmap ** is used internally to track attached databases. */ Index: src/tclsqlite.c ================================================================== --- src/tclsqlite.c +++ src/tclsqlite.c @@ -121,10 +121,11 @@ int disableAuth; /* Disable the authorizer if it exists */ char *zNull; /* Text to substitute for an SQL NULL value */ SqlFunc *pFunc; /* List of SQL functions */ Tcl_Obj *pUpdateHook; /* Update hook script (if any) */ Tcl_Obj *pRollbackHook; /* Rollback hook script (if any) */ + Tcl_Obj *pLogHook; /* WAL hook script (if any) */ Tcl_Obj *pUnlockNotify; /* Unlock notify script (if any) */ SqlCollate *pCollate; /* List of SQL collation functions */ int rc; /* Return code of most recent sqlite3_exec() */ Tcl_Obj *pCollateNeeded; /* Collation needed script */ SqlPreparedStmt *stmtList; /* List of prepared statements*/ @@ -483,10 +484,13 @@ Tcl_DecrRefCount(pDb->pUpdateHook); } if( pDb->pRollbackHook ){ Tcl_DecrRefCount(pDb->pRollbackHook); } + if( pDb->pLogHook ){ + Tcl_DecrRefCount(pDb->pLogHook); + } if( pDb->pCollateNeeded ){ Tcl_DecrRefCount(pDb->pCollateNeeded); } Tcl_Free((char*)pDb); } @@ -586,10 +590,36 @@ assert(pDb->pRollbackHook); if( TCL_OK!=Tcl_EvalObjEx(pDb->interp, pDb->pRollbackHook, 0) ){ Tcl_BackgroundError(pDb->interp); } } + +static int DbLogHandler( + void *clientData, + sqlite3 *db, + const char *zDb, + int nEntry +){ + int ret = 0; + Tcl_Obj *p; + SqliteDb *pDb = (SqliteDb*)clientData; + Tcl_Interp *interp = pDb->interp; + assert(pDb->pLogHook); + + p = Tcl_DuplicateObj(pDb->pLogHook); + Tcl_IncrRefCount(p); + Tcl_ListObjAppendElement(interp, p, Tcl_NewStringObj(zDb, -1)); + Tcl_ListObjAppendElement(interp, p, Tcl_NewIntObj(nEntry)); + if( TCL_OK!=Tcl_EvalObjEx(interp, p, 0) + || TCL_OK!=Tcl_GetIntFromObj(interp, Tcl_GetObjResult(interp), &ret) + ){ + Tcl_BackgroundError(interp); + } + Tcl_DecrRefCount(p); + + return ret; +} #if defined(SQLITE_TEST) && defined(SQLITE_ENABLE_UNLOCK_NOTIFY) static void setTestUnlockNotifyVars(Tcl_Interp *interp, int iArg, int nArg){ char zBuf[64]; sprintf(zBuf, "%d", iArg); @@ -1543,11 +1573,11 @@ "last_insert_rowid", "nullvalue", "onecolumn", "profile", "progress", "rekey", "restore", "rollback_hook", "status", "timeout", "total_changes", "trace", "transaction", "unlock_notify", "update_hook", - "version", 0 + "version", "wal_hook", 0 }; enum DB_enum { DB_AUTHORIZER, DB_BACKUP, DB_BUSY, DB_CACHE, DB_CHANGES, DB_CLOSE, DB_COLLATE, DB_COLLATION_NEEDED, DB_COMMIT_HOOK, @@ -1557,11 +1587,11 @@ DB_LAST_INSERT_ROWID, DB_NULLVALUE, DB_ONECOLUMN, DB_PROFILE, DB_PROGRESS, DB_REKEY, DB_RESTORE, DB_ROLLBACK_HOOK, DB_STATUS, DB_TIMEOUT, DB_TOTAL_CHANGES, DB_TRACE, DB_TRANSACTION, DB_UNLOCK_NOTIFY, DB_UPDATE_HOOK, - DB_VERSION, + DB_VERSION, DB_WAL_HOOK }; /* don't leave trailing commas on DB_enum, it confuses the AIX xlc compiler */ if( objc<2 ){ Tcl_WrongNumArgs(interp, 1, objv, "SUBCOMMAND ..."); @@ -2728,22 +2758,26 @@ #endif break; } /* + ** $db wal_hook ?script? ** $db update_hook ?script? ** $db rollback_hook ?script? */ + case DB_WAL_HOOK: case DB_UPDATE_HOOK: case DB_ROLLBACK_HOOK: { /* set ppHook to point at pUpdateHook or pRollbackHook, depending on ** whether [$db update_hook] or [$db rollback_hook] was invoked. */ Tcl_Obj **ppHook; if( choice==DB_UPDATE_HOOK ){ ppHook = &pDb->pUpdateHook; + }else if( choice==DB_WAL_HOOK ){ + ppHook = &pDb->pLogHook; }else{ ppHook = &pDb->pRollbackHook; } if( objc!=2 && objc!=3 ){ @@ -2765,10 +2799,11 @@ } } sqlite3_update_hook(pDb->db, (pDb->pUpdateHook?DbUpdateHandler:0), pDb); sqlite3_rollback_hook(pDb->db,(pDb->pRollbackHook?DbRollbackHandler:0),pDb); + sqlite3_wal_hook(pDb->db,(pDb->pLogHook?DbLogHandler:0),pDb); break; } /* $db version Index: src/test1.c ================================================================== --- src/test1.c +++ src/test1.c @@ -4866,10 +4866,39 @@ Tcl_SetResult(interp, (char *)t1ErrorName(rc), TCL_STATIC); return TCL_OK; } #endif +/* +** tclcmd: sqlite3_wal_checkpoint db ?NAME? +*/ +static int test_wal_checkpoint( + ClientData clientData, /* Unused */ + Tcl_Interp *interp, /* The TCL interpreter that invoked this command */ + int objc, /* Number of arguments */ + Tcl_Obj *CONST objv[] /* Command arguments */ +){ + char *zDb = 0; + sqlite3 *db; + int rc; + + if( objc!=3 && objc!=2 ){ + Tcl_WrongNumArgs(interp, 1, objv, "DB ?NAME?"); + return TCL_ERROR; + } + + if( getDbPointer(interp, Tcl_GetString(objv[1]), &db) ){ + return TCL_ERROR; + } + if( objc==3 ){ + zDb = Tcl_GetString(objv[2]); + } + rc = sqlite3_wal_checkpoint(db, zDb); + Tcl_SetResult(interp, (char *)t1ErrorName(rc), TCL_STATIC); + return TCL_OK; +} + /* ** tcl_objproc COMMANDNAME ARGS... ** ** Run a TCL command using its objProc interface. Throw an error if @@ -5087,10 +5116,11 @@ #endif { "pcache_stats", test_pcache_stats, 0 }, #ifdef SQLITE_ENABLE_UNLOCK_NOTIFY { "sqlite3_unlock_notify", test_unlock_notify, 0 }, #endif + { "sqlite3_wal_checkpoint", test_wal_checkpoint, 0 }, }; static int bitmask_size = sizeof(Bitmask)*8; int i; extern int sqlite3_sync_count, sqlite3_fullsync_count; extern int sqlite3_opentemp_count; Index: src/test6.c ================================================================== --- src/test6.c +++ src/test6.c @@ -779,11 +779,11 @@ cfDlError, /* xDlError */ cfDlSym, /* xDlSym */ cfDlClose, /* xDlClose */ cfRandomness, /* xRandomness */ cfSleep, /* xSleep */ - cfCurrentTime /* xCurrentTime */ + cfCurrentTime, /* xCurrentTime */ }; if( objc!=2 ){ Tcl_WrongNumArgs(interp, 1, objv, "ENABLE"); return TCL_ERROR; Index: src/test_config.c ================================================================== --- src/test_config.c +++ src/test_config.c @@ -496,10 +496,16 @@ #ifdef SQLITE_OMIT_VIRTUALTABLE Tcl_SetVar2(interp, "sqlite_options", "vtab", "0", TCL_GLOBAL_ONLY); #else Tcl_SetVar2(interp, "sqlite_options", "vtab", "1", TCL_GLOBAL_ONLY); #endif + +#ifdef SQLITE_OMIT_WAL + Tcl_SetVar2(interp, "sqlite_options", "wal", "0", TCL_GLOBAL_ONLY); +#else + Tcl_SetVar2(interp, "sqlite_options", "wal", "1", TCL_GLOBAL_ONLY); +#endif #ifdef SQLITE_OMIT_WSD Tcl_SetVar2(interp, "sqlite_options", "wsd", "0", TCL_GLOBAL_ONLY); #else Tcl_SetVar2(interp, "sqlite_options", "wsd", "1", TCL_GLOBAL_ONLY); Index: src/test_demovfs.c ================================================================== --- src/test_demovfs.c +++ src/test_demovfs.c @@ -621,11 +621,11 @@ demoDlError, /* xDlError */ demoDlSym, /* xDlSym */ demoDlClose, /* xDlClose */ demoRandomness, /* xRandomness */ demoSleep, /* xSleep */ - demoCurrentTime /* xCurrentTime */ + demoCurrentTime, /* xCurrentTime */ }; return &demovfs; } #endif /* !defined(SQLITE_TEST) || defined(SQLITE_OS_UNIX) */ Index: src/test_devsym.c ================================================================== --- src/test_devsym.c +++ src/test_devsym.c @@ -66,12 +66,19 @@ #endif /* SQLITE_OMIT_LOAD_EXTENSION */ static int devsymRandomness(sqlite3_vfs*, int nByte, char *zOut); static int devsymSleep(sqlite3_vfs*, int microseconds); static int devsymCurrentTime(sqlite3_vfs*, double*); +static int devsymShmOpen(sqlite3_vfs *, const char *, sqlite3_shm **); +static int devsymShmSize(sqlite3_shm *, int , int *); +static int devsymShmGet(sqlite3_shm *, int , int *, void **); +static int devsymShmRelease(sqlite3_shm *); +static int devsymShmLock(sqlite3_shm *, int , int *); +static int devsymShmClose(sqlite3_shm *, int); + static sqlite3_vfs devsym_vfs = { - 1, /* iVersion */ + 2, /* iVersion */ sizeof(devsym_file), /* szOsFile */ DEVSYM_MAX_PATHNAME, /* mxPathname */ 0, /* pNext */ DEVSYM_VFS_NAME, /* zName */ 0, /* pAppData */ @@ -90,11 +97,20 @@ 0, /* xDlSym */ 0, /* xDlClose */ #endif /* SQLITE_OMIT_LOAD_EXTENSION */ devsymRandomness, /* xRandomness */ devsymSleep, /* xSleep */ - devsymCurrentTime /* xCurrentTime */ + devsymCurrentTime, /* xCurrentTime */ + 0, /* xGetLastError */ + devsymShmOpen, + devsymShmSize, + devsymShmGet, + devsymShmRelease, + devsymShmLock, + devsymShmClose, + 0, + 0, }; static sqlite3_io_methods devsym_io_methods = { 1, /* iVersion */ devsymClose, /* xClose */ @@ -331,19 +347,54 @@ */ static int devsymCurrentTime(sqlite3_vfs *pVfs, double *pTimeOut){ return sqlite3OsCurrentTime(g.pVfs, pTimeOut); } + +static int devsymShmOpen( + sqlite3_vfs *pVfs, + const char *zName, + sqlite3_shm **pp +){ + return g.pVfs->xShmOpen(g.pVfs, zName, pp); +} +static int devsymShmSize(sqlite3_shm *p, int reqSize, int *pNewSize){ + return g.pVfs->xShmSize(p, reqSize, pNewSize); +} +static int devsymShmGet( + sqlite3_shm *p, + int reqMapSize, + int *pMapSize, + void **pp +){ + return g.pVfs->xShmGet(p, reqMapSize, pMapSize, pp); +} +static int devsymShmRelease(sqlite3_shm *p){ + return g.pVfs->xShmRelease(p); +} +static int devsymShmLock(sqlite3_shm *p, int desiredLock, int *gotLock){ + return g.pVfs->xShmLock(p, desiredLock, gotLock); +} +static int devsymShmClose(sqlite3_shm *p, int deleteFlag){ + return g.pVfs->xShmClose(p, deleteFlag); +} + /* ** This procedure registers the devsym vfs with SQLite. If the argument is ** true, the devsym vfs becomes the new default vfs. It is the only publicly ** available function in this file. */ void devsym_register(int iDeviceChar, int iSectorSize){ if( g.pVfs==0 ){ g.pVfs = sqlite3_vfs_find(0); devsym_vfs.szOsFile += g.pVfs->szOsFile; + devsym_vfs.xShmOpen = (g.pVfs->xShmOpen ? devsymShmOpen : 0); + devsym_vfs.xShmSize = (g.pVfs->xShmSize ? devsymShmSize : 0); + devsym_vfs.xShmGet = (g.pVfs->xShmGet ? devsymShmGet : 0); + devsym_vfs.xShmRelease = (g.pVfs->xShmRelease ? devsymShmRelease : 0); + devsym_vfs.xShmLock = (g.pVfs->xShmLock ? devsymShmLock : 0); + devsym_vfs.xShmClose = (g.pVfs->xShmClose ? devsymShmClose : 0); sqlite3_vfs_register(&devsym_vfs, 0); } if( iDeviceChar>=0 ){ g.iDeviceChar = iDeviceChar; }else{ Index: src/test_journal.c ================================================================== --- src/test_journal.c +++ src/test_journal.c @@ -177,11 +177,11 @@ jtDlError, /* xDlError */ jtDlSym, /* xDlSym */ jtDlClose, /* xDlClose */ jtRandomness, /* xRandomness */ jtSleep, /* xSleep */ - jtCurrentTime /* xCurrentTime */ + jtCurrentTime, /* xCurrentTime */ }; static sqlite3_io_methods jt_io_methods = { 1, /* iVersion */ jtClose, /* xClose */ Index: src/test_onefile.c ================================================================== --- src/test_onefile.c +++ src/test_onefile.c @@ -196,11 +196,18 @@ fsDlError, /* xDlError */ fsDlSym, /* xDlSym */ fsDlClose, /* xDlClose */ fsRandomness, /* xRandomness */ fsSleep, /* xSleep */ - fsCurrentTime /* xCurrentTime */ + fsCurrentTime, /* xCurrentTime */ + 0, /* xShmOpen */ + 0, /* xShmSize */ + 0, /* xShmLock */ + 0, /* xShmClose */ + 0, /* xShmDelete */ + 0, /* xRename */ + 0 /* xCurrentTimeInt64 */ }, 0, /* pFileList */ 0 /* pParent */ }; Index: src/test_osinst.c ================================================================== --- src/test_osinst.c +++ src/test_osinst.c @@ -201,11 +201,11 @@ instDlError, /* xDlError */ instDlSym, /* xDlSym */ instDlClose, /* xDlClose */ instRandomness, /* xRandomness */ instSleep, /* xSleep */ - instCurrentTime /* xCurrentTime */ + instCurrentTime, /* xCurrentTime */ }; static sqlite3_io_methods inst_io_methods = { 1, /* iVersion */ instClose, /* xClose */ Index: src/test_thread.c ================================================================== --- src/test_thread.c +++ src/test_thread.c @@ -56,10 +56,11 @@ #if defined(SQLITE_OS_UNIX) && defined(SQLITE_ENABLE_UNLOCK_NOTIFY) static Tcl_ObjCmdProc blocking_step_proc; static Tcl_ObjCmdProc blocking_prepare_v2_proc; #endif int Sqlitetest1_Init(Tcl_Interp *); +int Sqlite3_Init(Tcl_Interp *); /* Functions from test1.c */ void *sqlite3TestTextToPtr(const char *); const char *sqlite3TestErrorName(int); int getDbPointer(Tcl_Interp *, const char *, sqlite3 **); @@ -122,10 +123,11 @@ Tcl_CreateObjCommand(interp, "sqlite3_nonblocking_prepare_v2", blocking_prepare_v2_proc, 0, 0); #endif Sqlitetest1_Init(interp); Sqlitetest_mutex_Init(interp); + Sqlite3_Init(interp); rc = Tcl_Eval(interp, p->zScript); pRes = Tcl_GetObjResult(interp); pList = Tcl_NewObj(); Tcl_IncrRefCount(pList); Index: src/vdbe.c ================================================================== --- src/vdbe.c +++ src/vdbe.c @@ -5184,10 +5184,116 @@ goto too_big; } break; } +#ifndef SQLITE_OMIT_WAL +/* Opcode: Checkpoint P1 * * * * +** +** Checkpoint database P1. This is a no-op if P1 is not currently in +** WAL mode. +*/ +case OP_Checkpoint: { + rc = sqlite3Checkpoint(db, pOp->p1); + break; +}; +#endif + +/* Opcode: JournalMode P1 P2 P3 * * +** +** Change the journal mode of database P1 to P3. P3 must be one of the +** PAGER_JOURNALMODE_XXX values. If changing between the various rollback +** modes (delete, truncate, persist, off and memory), this is a simple +** operation. No IO is required. +** +** If changing into or out of WAL mode the procedure is more complicated. +** +** Write a string containing the final journal-mode to register P2. +*/ +case OP_JournalMode: { /* out2-prerelease */ + Btree *pBt; /* Btree to change journal mode of */ + Pager *pPager; /* Pager associated with pBt */ + int eNew; /* New journal mode */ + int eOld; /* The old journal mode */ + const sqlite3_vfs *pVfs; /* The VFS of pPager */ + const char *zFilename; /* Name of database file for pPager */ + + eNew = pOp->p3; + assert( eNew==PAGER_JOURNALMODE_DELETE + || eNew==PAGER_JOURNALMODE_TRUNCATE + || eNew==PAGER_JOURNALMODE_PERSIST + || eNew==PAGER_JOURNALMODE_OFF + || eNew==PAGER_JOURNALMODE_MEMORY + || eNew==PAGER_JOURNALMODE_WAL + || eNew==PAGER_JOURNALMODE_QUERY + ); + assert( pOp->p1>=0 && pOp->p1nDb ); + assert( (p->btreeMask & (1<p1))!=0 ); + + pBt = db->aDb[pOp->p1].pBt; + pPager = sqlite3BtreePager(pBt); + +#ifndef SQLITE_OMIT_WAL + zFilename = sqlite3PagerFilename(pPager); + pVfs = sqlite3PagerVfs(pPager); + + /* Do not allow a transition to journal_mode=WAL for a database + ** in temporary storage or if the VFS does not support xShmOpen. + */ + if( eNew==PAGER_JOURNALMODE_WAL + && (zFilename[0]==0 /* Temp file */ + || pVfs->iVersion<2 || pVfs->xShmOpen==0) /* No xShmOpen support */ + ){ + eNew = PAGER_JOURNALMODE_QUERY; + } + + if( eNew!=PAGER_JOURNALMODE_QUERY ){ + eOld = sqlite3PagerJournalMode(pPager, PAGER_JOURNALMODE_QUERY); + if( (eNew!=eOld) + && (eOld==PAGER_JOURNALMODE_WAL || eNew==PAGER_JOURNALMODE_WAL) + ){ + if( !db->autoCommit || db->activeVdbeCnt>1 ){ + rc = SQLITE_ERROR; + sqlite3SetString(&p->zErrMsg, db, + "cannot change %s wal mode from within a transaction", + (eNew==PAGER_JOURNALMODE_WAL ? "into" : "out of") + ); + }else{ + + if( eOld==PAGER_JOURNALMODE_WAL ){ + /* If leaving WAL mode, close the log file. If successful, the call + ** to PagerCloseWal() checkpoints and deletes the write-ahead-log + ** file. An EXCLUSIVE lock may still be held on the database file + ** after a successful return. + */ + rc = sqlite3PagerCloseWal(pPager); + if( rc!=SQLITE_OK ) goto abort_due_to_error; + sqlite3PagerJournalMode(pPager, eNew); + }else{ + sqlite3PagerJournalMode(pPager, PAGER_JOURNALMODE_DELETE); + } + + /* Open a transaction on the database file. Regardless of the journal + ** mode, this transaction always uses a rollback journal. + */ + assert( sqlite3BtreeIsInTrans(pBt)==0 ); + rc = sqlite3BtreeSetVersion(pBt, (eNew==PAGER_JOURNALMODE_WAL ? 2 : 1)); + if( rc!=SQLITE_OK ) goto abort_due_to_error; + } + } + } +#endif /* ifndef SQLITE_OMIT_WAL */ + + eNew = sqlite3PagerJournalMode(pPager, eNew); + pOut = &aMem[pOp->p2]; + pOut->flags = MEM_Str|MEM_Static|MEM_Term; + pOut->z = (char *)sqlite3JournalModename(eNew); + pOut->n = sqlite3Strlen30(pOut->z); + pOut->enc = SQLITE_UTF8; + sqlite3VdbeChangeEncoding(pOut, encoding); + break; +}; #if !defined(SQLITE_OMIT_VACUUM) && !defined(SQLITE_OMIT_ATTACH) /* Opcode: Vacuum * * * * * ** ** Vacuum the entire database. This opcode will cause other virtual Index: src/vdbeapi.c ================================================================== --- src/vdbeapi.c +++ src/vdbeapi.c @@ -303,10 +303,33 @@ assert( sqlite3_mutex_held(pCtx->s.db->mutex) ); sqlite3VdbeMemSetNull(&pCtx->s); pCtx->isError = SQLITE_NOMEM; pCtx->s.db->mallocFailed = 1; } + +/* +** This function is called after a transaction has been committed. It +** invokes callbacks registered with sqlite3_wal_hook() as required. +*/ +static int doWalCallbacks(sqlite3 *db){ + int rc = SQLITE_OK; +#ifndef SQLITE_OMIT_WAL + int i; + for(i=0; inDb; i++){ + Btree *pBt = db->aDb[i].pBt; + if( pBt ){ + int nEntry = sqlite3PagerWalCallback(sqlite3BtreePager(pBt)); + if( db->xWalCallback && nEntry>0 && rc==SQLITE_OK + && db->xWalCallback(db->pWalArg, db, db->aDb[i].zName, nEntry) + ){ + rc = sqlite3Checkpoint(db, i); + } + } + } +#endif + return rc; +} /* ** Execute the statement pStmt, either until a row of data is ready, the ** statement is completely executed or an error occurs. ** @@ -384,10 +407,18 @@ elapseTime = (u64)((rNow - (int)rNow)*3600.0*24.0*1000000000.0); elapseTime -= p->startTime; db->xProfile(db->pProfileArg, p->zSql, elapseTime); } #endif + + if( rc==SQLITE_DONE ){ + assert( p->rc==SQLITE_OK ); + p->rc = doWalCallbacks(db); + if( p->rc!=SQLITE_OK ){ + rc = SQLITE_ERROR; + } + } db->errCode = rc; if( SQLITE_NOMEM==sqlite3ApiExit(p->db, p->rc) ){ p->rc = SQLITE_NOMEM; } ADDED src/wal.c Index: src/wal.c ================================================================== --- /dev/null +++ src/wal.c @@ -0,0 +1,1316 @@ +/* +** 2010 February 1 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** +** This file contains the implementation of a write-ahead log file used in +** "journal_mode=wal" mode. +*/ +#ifndef SQLITE_OMIT_WAL + +#include "wal.h" + + +/* +** WRITE-AHEAD LOG (WAL) FILE FORMAT +** +** A wal file consists of a header followed by zero or more "frames". +** The header is 12 bytes in size and consists of the following three +** big-endian 32-bit unsigned integer values: +** +** 0: Database page size, +** 4: Randomly selected salt value 1, +** 8: Randomly selected salt value 2. +** +** Immediately following the header are zero or more frames. Each +** frame itself consists of a 16-byte header followed by a bytes +** of page data. The header is broken into 4 big-endian 32-bit unsigned +** integer values, as follows: +** +** 0: Page number. +** 4: For commit records, the size of the database image in pages +** after the commit. For all other records, zero. +** 8: Checksum value 1. +** 12: Checksum value 2. +*/ + +/* +** WAL-INDEX FILE FORMAT +** +** The wal-index file consists of a 32-byte header region, followed by an +** 8-byte region that contains no useful data (used to apply byte-range locks +** to), followed by the data region. +** +** The contents of both the header and data region are specified in terms +** of 1, 2 and 4 byte unsigned integers. All integers are stored in +** machine-endian order. The wal-index is not a persistent file and +** so it does not need to be portable across archtectures. +** +** A wal-index file is essentially a shadow-pager map. It contains a +** mapping from database page number to the set of locations in the wal +** file that contain versions of the database page. When a database +** client needs to read a page of data, it first queries the wal-index +** file to determine if the required version of the page is stored in +** the wal. If so, the page is read from the wal. If not, the page is +** read from the database file. +** +** Whenever a transaction is appended to the wal or a checkpoint transfers +** data from the wal into the database file, the wal-index is +** updated accordingly. +** +** The fields in the wal-index file header are described in the comment +** directly above the definition of struct WalIndexHdr (see below). +** Immediately following the fields in the WalIndexHdr structure is +** an 8 byte checksum based on the contents of the header. This field is +** not the same as the iCheck1 and iCheck2 fields of the WalIndexHdr. +*/ + +/* Object declarations */ +typedef struct WalIndexHdr WalIndexHdr; +typedef struct WalIterator WalIterator; + + +/* +** The following object stores a copy of the wal-index header. +** +** Member variables iCheck1 and iCheck2 contain the checksum for the +** last frame written to the wal, or 2 and 3 respectively if the log +** is currently empty. +*/ +struct WalIndexHdr { + u32 iChange; /* Counter incremented each transaction */ + u32 pgsz; /* Database page size in bytes */ + u32 iLastPg; /* Address of last valid frame in log */ + u32 nPage; /* Size of database in pages */ + u32 iCheck1; /* Checkpoint value 1 */ + u32 iCheck2; /* Checkpoint value 2 */ +}; + +/* Size of serialized WalIndexHdr object. */ +#define WALINDEX_HDR_NFIELD (sizeof(WalIndexHdr) / sizeof(u32)) + +/* A block of 16 bytes beginning at WALINDEX_LOCK_OFFSET is reserved +** for locks. Since some systems only feature mandatory file-locks, we +** do not read or write data from the region of the file on which locks +** are applied. +*/ +#define WALINDEX_LOCK_OFFSET ((sizeof(WalIndexHdr))+2*sizeof(u32)) +#define WALINDEX_LOCK_RESERVED 8 + +/* Size of header before each frame in wal */ +#define WAL_FRAME_HDRSIZE 16 + +/* Size of write ahead log header */ +#define WAL_HDRSIZE 12 + +/* +** Return the offset of frame iFrame in the write-ahead log file, +** assuming a database page size of pgsz bytes. The offset returned +** is to the start of the write-ahead log frame-header. +*/ +#define walFrameOffset(iFrame, pgsz) ( \ + WAL_HDRSIZE + ((iFrame)-1)*((pgsz)+WAL_FRAME_HDRSIZE) \ +) + +/* +** An open write-ahead log file is represented by an instance of the +** following object. +*/ +struct Wal { + sqlite3_vfs *pVfs; /* The VFS used to create pFd */ + sqlite3_file *pFd; /* File handle for WAL file */ + u32 iCallback; /* Value to pass to log callback (or 0) */ + sqlite3_shm *pWIndex; /* The open wal-index file */ + int szWIndex; /* Size of the wal-index that is mapped in mem */ + u32 *pWiData; /* Pointer to wal-index content in memory */ + u8 lockState; /* SQLITE_SHM_xxxx constant showing lock state */ + u8 readerType; /* SQLITE_SHM_READ or SQLITE_SHM_READ_FULL */ + WalIndexHdr hdr; /* Wal-index for current snapshot */ + char *zName; /* Name of underlying storage */ +}; + + +/* +** This structure is used to implement an iterator that iterates through +** all frames in the log in database page order. Where two or more frames +** correspond to the same database page, the iterator visits only the +** frame most recently written to the log. +** +** The internals of this structure are only accessed by: +** +** walIteratorInit() - Create a new iterator, +** walIteratorNext() - Step an iterator, +** walIteratorFree() - Free an iterator. +** +** This functionality is used by the checkpoint code (see walCheckpoint()). +*/ +struct WalIterator { + int nSegment; /* Size of WalIterator.aSegment[] array */ + int nFinal; /* Elements in segment nSegment-1 */ + struct WalSegment { + int iNext; /* Next aIndex index */ + u8 *aIndex; /* Pointer to index array */ + u32 *aDbPage; /* Pointer to db page array */ + } aSegment[1]; +}; + + +/* +** Generate an 8 byte checksum based on the data in array aByte[] and the +** initial values of aCksum[0] and aCksum[1]. The checksum is written into +** aCksum[] before returning. +** +** The range of bytes to checksum is treated as an array of 32-bit +** little-endian unsigned integers. For each integer X in the array, from +** start to finish, do the following: +** +** aCksum[0] += X; +** aCksum[1] += aCksum[0]; +** +** For the calculation above, use 64-bit unsigned accumulators. Before +** returning, truncate the values to 32-bits as follows: +** +** aCksum[0] = (u32)(aCksum[0] + (aCksum[0]>>24)); +** aCksum[1] = (u32)(aCksum[1] + (aCksum[1]>>24)); +*/ +static void walChecksumBytes(u8 *aByte, int nByte, u32 *aCksum){ + u64 sum1 = aCksum[0]; + u64 sum2 = aCksum[1]; + u32 *a32 = (u32 *)aByte; + u32 *aEnd = (u32 *)&aByte[nByte]; + + assert( (nByte&0x00000003)==0 ); + + if( SQLITE_LITTLEENDIAN ){ +#ifdef SQLITE_DEBUG + u8 *a = (u8 *)a32; + assert( *a32==(a[0] + (a[1]<<8) + (a[2]<<16) + (a[3]<<24)) ); +#endif + do { + sum1 += *a32; + sum2 += sum1; + } while( ++a32>24); + aCksum[1] = sum2 + (sum2>>24); +} + +/* +** Attempt to change the lock status. +** +** When changing the lock status to SQLITE_SHM_READ, store the +** type of reader lock (either SQLITE_SHM_READ or SQLITE_SHM_READ_FULL) +** in pWal->readerType. +*/ +static int walSetLock(Wal *pWal, int desiredStatus){ + int rc, got; + if( pWal->lockState==desiredStatus ) return SQLITE_OK; + rc = pWal->pVfs->xShmLock(pWal->pWIndex, desiredStatus, &got); + pWal->lockState = got; + if( got==SQLITE_SHM_READ_FULL || got==SQLITE_SHM_READ ){ + pWal->readerType = got; + pWal->lockState = SQLITE_SHM_READ; + } + return rc; +} + +/* +** Update the header of the wal-index file. +*/ +static void walIndexWriteHdr(Wal *pWal, WalIndexHdr *pHdr){ + u32 *aHdr = pWal->pWiData; /* Write header here */ + u32 *aCksum = &aHdr[WALINDEX_HDR_NFIELD]; /* Write header cksum here */ + + assert( WALINDEX_HDR_NFIELD==sizeof(WalIndexHdr)/4 ); + assert( aHdr!=0 ); + memcpy(aHdr, pHdr, sizeof(WalIndexHdr)); + aCksum[0] = aCksum[1] = 1; + walChecksumBytes((u8 *)aHdr, sizeof(WalIndexHdr), aCksum); +} + +/* +** This function encodes a single frame header and writes it to a buffer +** supplied by the caller. A frame-header is made up of a series of +** 4-byte big-endian integers, as follows: +** +** 0: Database page size in bytes. +** 4: Page number. +** 8: New database size (for commit frames, otherwise zero). +** 12: Frame checksum 1. +** 16: Frame checksum 2. +*/ +static void walEncodeFrame( + u32 *aCksum, /* IN/OUT: Checksum values */ + u32 iPage, /* Database page number for frame */ + u32 nTruncate, /* New db size (or 0 for non-commit frames) */ + int nData, /* Database page size (size of aData[]) */ + u8 *aData, /* Pointer to page data (for checksum) */ + u8 *aFrame /* OUT: Write encoded frame here */ +){ + assert( WAL_FRAME_HDRSIZE==16 ); + + sqlite3Put4byte(&aFrame[0], iPage); + sqlite3Put4byte(&aFrame[4], nTruncate); + + walChecksumBytes(aFrame, 8, aCksum); + walChecksumBytes(aData, nData, aCksum); + + sqlite3Put4byte(&aFrame[8], aCksum[0]); + sqlite3Put4byte(&aFrame[12], aCksum[1]); +} + +/* +** Return 1 and populate *piPage, *pnTruncate and aCksum if the +** frame checksum looks Ok. Otherwise return 0. +*/ +static int walDecodeFrame( + u32 *aCksum, /* IN/OUT: Checksum values */ + u32 *piPage, /* OUT: Database page number for frame */ + u32 *pnTruncate, /* OUT: New db size (or 0 if not commit) */ + int nData, /* Database page size (size of aData[]) */ + u8 *aData, /* Pointer to page data (for checksum) */ + u8 *aFrame /* Frame data */ +){ + assert( WAL_FRAME_HDRSIZE==16 ); + + walChecksumBytes(aFrame, 8, aCksum); + walChecksumBytes(aData, nData, aCksum); + + if( aCksum[0]!=sqlite3Get4byte(&aFrame[8]) + || aCksum[1]!=sqlite3Get4byte(&aFrame[12]) + ){ + /* Checksum failed. */ + return 0; + } + + *piPage = sqlite3Get4byte(&aFrame[0]); + *pnTruncate = sqlite3Get4byte(&aFrame[4]); + return 1; +} + +static void walMergesort8( + Pgno *aContent, /* Pages in wal */ + u8 *aBuffer, /* Buffer of at least *pnList items to use */ + u8 *aList, /* IN/OUT: List to sort */ + int *pnList /* IN/OUT: Number of elements in aList[] */ +){ + int nList = *pnList; + if( nList>1 ){ + int nLeft = nList / 2; /* Elements in left list */ + int nRight = nList - nLeft; /* Elements in right list */ + u8 *aLeft = aList; /* Left list */ + u8 *aRight = &aList[nLeft]; /* Right list */ + int iLeft = 0; /* Current index in aLeft */ + int iRight = 0; /* Current index in aright */ + int iOut = 0; /* Current index in output buffer */ + + /* TODO: Change to non-recursive version. */ + walMergesort8(aContent, aBuffer, aLeft, &nLeft); + walMergesort8(aContent, aBuffer, aRight, &nRight); + + while( iRight=nRight || aContent[aLeft[iLeft]]=nLeft || aContent[aLeft[iLeft]]>dbpage ); + assert( iRight>=nRight || aContent[aRight[iRight]]>dbpage ); + } + memcpy(aList, aBuffer, sizeof(aList[0])*iOut); + *pnList = iOut; + } + +#ifdef SQLITE_DEBUG + { + int i; + for(i=1; i<*pnList; i++){ + assert( aContent[aList[i]] > aContent[aList[i-1]] ); + } + } +#endif +} + + +/* +** Return the index in the WalIndex.aData array that corresponds to +** frame iFrame. The wal-index file consists of a header, followed by +** alternating "map" and "index" blocks. +*/ +static int walIndexEntry(u32 iFrame){ + return ( + (WALINDEX_LOCK_OFFSET+WALINDEX_LOCK_RESERVED)/sizeof(u32) + + (((iFrame-1)>>8)<<6) /* Indexes that occur before iFrame */ + + iFrame-1 /* Db page numbers that occur before iFrame */ + ); +} + +/* +** Release our reference to the wal-index memory map, if we are holding +** it. +*/ +static void walIndexUnmap(Wal *pWal){ + if( pWal->pWiData ){ + pWal->pVfs->xShmRelease(pWal->pWIndex); + pWal->pWiData = 0; + } +} + +/* +** Map the wal-index file into memory if it isn't already. +** +** The reqSize parameter is the minimum required size of the mapping. +** A value of -1 means "don't care". The reqSize parameter is ignored +** if the mapping is already held. +*/ +static int walIndexMap(Wal *pWal, int reqSize){ + int rc = SQLITE_OK; + if( pWal->pWiData==0 ){ + rc = pWal->pVfs->xShmGet(pWal->pWIndex, reqSize, &pWal->szWIndex, + (void**)(char*)&pWal->pWiData); + if( rc==SQLITE_OK && pWal->pWiData==0 ){ + /* Make sure pWal->pWiData is not NULL while we are holding the + ** lock on the mapping. */ + assert( pWal->szWIndex==0 ); + pWal->pWiData = &pWal->iCallback; + } + } + return rc; +} + +/* +** Remap the wal-index so that the mapping covers the full size +** of the underlying file. +** +** If enlargeTo is non-negative, then increase the size of the underlying +** storage to be at least as big as enlargeTo before remapping. +*/ +static int walIndexRemap(Wal *pWal, int enlargeTo){ + int rc; + int sz; + rc = pWal->pVfs->xShmSize(pWal->pWIndex, enlargeTo, &sz); + if( rc==SQLITE_OK && sz>pWal->szWIndex ){ + walIndexUnmap(pWal); + rc = walIndexMap(pWal, sz); + } + return rc; +} + +/* +** Increment by which to increase the wal-index file size. +*/ +#define WALINDEX_MMAP_INCREMENT (64*1024) + +/* +** Set an entry in the wal-index map to map log frame iFrame to db +** page iPage. Values are always appended to the wal-index (i.e. the +** value of iFrame is always exactly one more than the value passed to +** the previous call), but that restriction is not enforced or asserted +** here. +*/ +static int walIndexAppend(Wal *pWal, u32 iFrame, u32 iPage){ + u32 iSlot = walIndexEntry(iFrame); + + walIndexMap(pWal, -1); + while( ((iSlot+128)*sizeof(u32))>=pWal->szWIndex ){ + int rc; + int nByte = pWal->szWIndex + WALINDEX_MMAP_INCREMENT; + + /* Enlarge the storage, then remap it. */ + rc = walIndexRemap(pWal, nByte); + if( rc!=SQLITE_OK ){ + return rc; + } + } + + /* Set the wal-index entry itself */ + pWal->pWiData[iSlot] = iPage; + + /* If the frame number is a multiple of 256 (frames are numbered starting + ** at 1), build an index of the most recently added 256 frames. + */ + if( (iFrame&0x000000FF)==0 ){ + int i; /* Iterator used while initializing aIndex */ + u32 *aFrame; /* Pointer to array of 256 frames */ + int nIndex; /* Number of entries in index */ + u8 *aIndex; /* 256 bytes to build index in */ + u8 *aTmp; /* Scratch space to use while sorting */ + + aFrame = &pWal->pWiData[iSlot-255]; + aIndex = (u8 *)&pWal->pWiData[iSlot+1]; + aTmp = &aIndex[256]; + + nIndex = 256; + for(i=0; i<256; i++) aIndex[i] = (u8)i; + walMergesort8(aFrame, aTmp, aIndex, &nIndex); + memset(&aIndex[nIndex], aIndex[nIndex-1], 256-nIndex); + } + + return SQLITE_OK; +} + + +/* +** Recover the wal-index by reading the write-ahead log file. +** The caller must hold RECOVER lock on the wal-index file. +*/ +static int walIndexRecover(Wal *pWal){ + int rc; /* Return Code */ + i64 nSize; /* Size of log file */ + WalIndexHdr hdr; /* Recovered wal-index header */ + + assert( pWal->lockState==SQLITE_SHM_RECOVER ); + memset(&hdr, 0, sizeof(hdr)); + + rc = sqlite3OsFileSize(pWal->pFd, &nSize); + if( rc!=SQLITE_OK ){ + return rc; + } + + if( nSize>WAL_FRAME_HDRSIZE ){ + u8 aBuf[WAL_FRAME_HDRSIZE]; /* Buffer to load first frame header into */ + u8 *aFrame = 0; /* Malloc'd buffer to load entire frame */ + int nFrame; /* Number of bytes at aFrame */ + u8 *aData; /* Pointer to data part of aFrame buffer */ + int iFrame; /* Index of last frame read */ + i64 iOffset; /* Next offset to read from log file */ + int nPgsz; /* Page size according to the log */ + u32 aCksum[2]; /* Running checksum */ + + /* Read in the first frame header in the file (to determine the + ** database page size). + */ + rc = sqlite3OsRead(pWal->pFd, aBuf, WAL_HDRSIZE, 0); + if( rc!=SQLITE_OK ){ + return rc; + } + + /* If the database page size is not a power of two, or is greater than + ** SQLITE_MAX_PAGE_SIZE, conclude that the log file contains no valid data. + */ + nPgsz = sqlite3Get4byte(&aBuf[0]); + if( nPgsz&(nPgsz-1) || nPgsz>SQLITE_MAX_PAGE_SIZE || nPgsz<512 ){ + goto finished; + } + aCksum[0] = sqlite3Get4byte(&aBuf[4]); + aCksum[1] = sqlite3Get4byte(&aBuf[8]); + + /* Malloc a buffer to read frames into. */ + nFrame = nPgsz + WAL_FRAME_HDRSIZE; + aFrame = (u8 *)sqlite3_malloc(nFrame); + if( !aFrame ){ + return SQLITE_NOMEM; + } + aData = &aFrame[WAL_FRAME_HDRSIZE]; + + /* Read all frames from the log file. */ + iFrame = 0; + for(iOffset=WAL_HDRSIZE; (iOffset+nFrame)<=nSize; iOffset+=nFrame){ + u32 pgno; /* Database page number for frame */ + u32 nTruncate; /* dbsize field from frame header */ + int isValid; /* True if this frame is valid */ + + /* Read and decode the next log frame. */ + rc = sqlite3OsRead(pWal->pFd, aFrame, nFrame, iOffset); + if( rc!=SQLITE_OK ) break; + isValid = walDecodeFrame(aCksum, &pgno, &nTruncate, nPgsz, aData, aFrame); + if( !isValid ) break; + walIndexAppend(pWal, ++iFrame, pgno); + + /* If nTruncate is non-zero, this is a commit record. */ + if( nTruncate ){ + hdr.iCheck1 = aCksum[0]; + hdr.iCheck2 = aCksum[1]; + hdr.iLastPg = iFrame; + hdr.nPage = nTruncate; + hdr.pgsz = nPgsz; + } + } + + sqlite3_free(aFrame); + }else{ + hdr.iCheck1 = 2; + hdr.iCheck2 = 3; + } + +finished: + walIndexWriteHdr(pWal, &hdr); + return rc; +} + +/* +** Open a connection to the log file associated with database zDb. The +** database file does not actually have to exist. zDb is used only to +** figure out the name of the log file to open. If the log file does not +** exist it is created by this call. +** +** A SHARED lock should be held on the database file when this function +** is called. The purpose of this SHARED lock is to prevent any other +** client from unlinking the log or wal-index file. If another process +** were to do this just after this client opened one of these files, the +** system would be badly broken. +*/ +int sqlite3WalOpen( + sqlite3_vfs *pVfs, /* vfs module to open wal and wal-index */ + const char *zDb, /* Name of database file */ + Wal **ppWal /* OUT: Allocated Wal handle */ +){ + int rc = SQLITE_OK; /* Return Code */ + Wal *pRet; /* Object to allocate and return */ + int flags; /* Flags passed to OsOpen() */ + char *zWal = 0; /* Path to WAL file */ + int nWal; /* Length of zWal in bytes */ + + assert( zDb ); + if( pVfs->xShmOpen==0 ) return SQLITE_CANTOPEN_BKPT; + + /* Allocate an instance of struct Wal to return. */ + *ppWal = 0; + nWal = strlen(zDb); + pRet = (Wal*)sqlite3MallocZero(sizeof(Wal) + pVfs->szOsFile + nWal+5); + if( !pRet ) goto wal_open_out; + pRet->pVfs = pVfs; + pRet->pFd = (sqlite3_file *)&pRet[1]; + pRet->zName = zWal = pVfs->szOsFile + (char*)pRet->pFd; + sqlite3_snprintf(nWal+5, zWal, "%s-wal", zDb); + rc = pVfs->xShmOpen(pVfs, zWal, &pRet->pWIndex); + if( rc ) goto wal_open_out; + + /* Open file handle on the write-ahead log file. */ + flags = (SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE|SQLITE_OPEN_MAIN_JOURNAL); + rc = sqlite3OsOpen(pVfs, zWal, pRet->pFd, flags, &flags); + +wal_open_out: + if( rc!=SQLITE_OK ){ + if( pRet ){ + pVfs->xShmClose(pRet->pWIndex, 0); + sqlite3OsClose(pRet->pFd); + sqlite3_free(pRet); + } + } + *ppWal = pRet; + return rc; +} + +static int walIteratorNext( + WalIterator *p, /* Iterator */ + u32 *piPage, /* OUT: Next db page to write */ + u32 *piFrame /* OUT: Wal frame to read from */ +){ + u32 iMin = *piPage; + u32 iRet = 0xFFFFFFFF; + int i; + int nBlock = p->nFinal; + + for(i=p->nSegment-1; i>=0; i--){ + struct WalSegment *pSegment = &p->aSegment[i]; + while( pSegment->iNextaDbPage[pSegment->aIndex[pSegment->iNext]]; + if( iPg>iMin ){ + if( iPgaIndex[pSegment->iNext]; + } + break; + } + pSegment->iNext++; + } + + nBlock = 256; + } + + *piPage = iRet; + return (iRet==0xFFFFFFFF); +} + +static WalIterator *walIteratorInit(Wal *pWal){ + u32 *aData; /* Content of the wal-index file */ + WalIterator *p; /* Return value */ + int nSegment; /* Number of segments to merge */ + u32 iLast; /* Last frame in log */ + int nByte; /* Number of bytes to allocate */ + int i; /* Iterator variable */ + int nFinal; /* Number of unindexed entries */ + struct WalSegment *pFinal; /* Final (unindexed) segment */ + u8 *aTmp; /* Temp space used by merge-sort */ + + walIndexMap(pWal, -1); + aData = pWal->pWiData; + iLast = pWal->hdr.iLastPg; + nSegment = (iLast >> 8) + 1; + nFinal = (iLast & 0x000000FF); + + nByte = sizeof(WalIterator) + (nSegment-1)*sizeof(struct WalSegment) + 512; + p = (WalIterator *)sqlite3_malloc(nByte); + if( p ){ + memset(p, 0, nByte); + p->nSegment = nSegment; + p->nFinal = nFinal; + } + + for(i=0; iaSegment[i].aDbPage = &aData[walIndexEntry(i*256+1)]; + p->aSegment[i].aIndex = (u8 *)&aData[walIndexEntry(i*256+1)+256]; + } + pFinal = &p->aSegment[nSegment-1]; + + pFinal->aDbPage = &aData[walIndexEntry((nSegment-1)*256+1)]; + pFinal->aIndex = (u8 *)&pFinal[1]; + aTmp = &pFinal->aIndex[256]; + for(i=0; iaIndex[i] = i; + } + walMergesort8(pFinal->aDbPage, aTmp, pFinal->aIndex, &nFinal); + p->nFinal = nFinal; + + return p; +} + +/* +** Free a log iterator allocated by walIteratorInit(). +*/ +static void walIteratorFree(WalIterator *p){ + sqlite3_free(p); +} + +/* +** Checkpoint the contents of the log file. +*/ +static int walCheckpoint( + Wal *pWal, /* Wal connection */ + sqlite3_file *pFd, /* File descriptor open on db file */ + int sync_flags, /* Flags for OsSync() (or 0) */ + u8 *zBuf /* Temporary buffer to use */ +){ + int rc; /* Return code */ + int pgsz = pWal->hdr.pgsz; /* Database page-size */ + WalIterator *pIter = 0; /* Wal iterator context */ + u32 iDbpage = 0; /* Next database page to write */ + u32 iFrame = 0; /* Wal frame containing data for iDbpage */ + + if( pWal->hdr.iLastPg==0 ){ + return SQLITE_OK; + } + + /* Allocate the iterator */ + pIter = walIteratorInit(pWal); + if( !pIter ) return SQLITE_NOMEM; + + /* Sync the log file to disk */ + if( sync_flags ){ + rc = sqlite3OsSync(pWal->pFd, sync_flags); + if( rc!=SQLITE_OK ) goto out; + } + + /* Iterate through the contents of the log, copying data to the db file. */ + while( 0==walIteratorNext(pIter, &iDbpage, &iFrame) ){ + rc = sqlite3OsRead(pWal->pFd, zBuf, pgsz, + walFrameOffset(iFrame, pgsz) + WAL_FRAME_HDRSIZE + ); + if( rc!=SQLITE_OK ) goto out; + rc = sqlite3OsWrite(pFd, zBuf, pgsz, (iDbpage-1)*pgsz); + if( rc!=SQLITE_OK ) goto out; + } + + /* Truncate the database file */ + rc = sqlite3OsTruncate(pFd, ((i64)pWal->hdr.nPage*(i64)pgsz)); + if( rc!=SQLITE_OK ) goto out; + + /* Sync the database file. If successful, update the wal-index. */ + if( sync_flags ){ + rc = sqlite3OsSync(pFd, sync_flags); + if( rc!=SQLITE_OK ) goto out; + } + pWal->hdr.iLastPg = 0; + pWal->hdr.iCheck1 = 2; + pWal->hdr.iCheck2 = 3; + walIndexWriteHdr(pWal, &pWal->hdr); + + /* TODO: If a crash occurs and the current log is copied into the + ** database there is no problem. However, if a crash occurs while + ** writing the next transaction into the start of the log, such that: + ** + ** * The first transaction currently in the log is left intact, but + ** * The second (or subsequent) transaction is damaged, + ** + ** then the database could become corrupt. + ** + ** The easiest thing to do would be to write and sync a dummy header + ** into the log at this point. Unfortunately, that turns out to be + ** an unwelcome performance hit. Alternatives are... + */ +#if 0 + memset(zBuf, 0, WAL_FRAME_HDRSIZE); + rc = sqlite3OsWrite(pWal->pFd, zBuf, WAL_FRAME_HDRSIZE, 0); + if( rc!=SQLITE_OK ) goto out; + rc = sqlite3OsSync(pWal->pFd, pWal->sync_flags); +#endif + + out: + walIteratorFree(pIter); + return rc; +} + +/* +** Close a connection to a log file. +*/ +int sqlite3WalClose( + Wal *pWal, /* Wal to close */ + sqlite3_file *pFd, /* Database file */ + int sync_flags, /* Flags to pass to OsSync() (or 0) */ + u8 *zBuf /* Buffer of at least page-size bytes */ +){ + int rc = SQLITE_OK; + if( pWal ){ + int isDelete = 0; /* True to unlink wal and wal-index files */ + + /* If an EXCLUSIVE lock can be obtained on the database file (using the + ** ordinary, rollback-mode locking methods, this guarantees that the + ** connection associated with this log file is the only connection to + ** the database. In this case checkpoint the database and unlink both + ** the wal and wal-index files. + ** + ** The EXCLUSIVE lock is not released before returning. + */ + rc = sqlite3OsLock(pFd, SQLITE_LOCK_EXCLUSIVE); + if( rc==SQLITE_OK ){ + rc = walCheckpoint(pWal, pFd, sync_flags, zBuf); + if( rc==SQLITE_OK ){ + isDelete = 1; + } + walIndexUnmap(pWal); + } + + pWal->pVfs->xShmClose(pWal->pWIndex, isDelete); + sqlite3OsClose(pWal->pFd); + if( isDelete ){ + sqlite3OsDelete(pWal->pVfs, pWal->zName, 0); + } + sqlite3_free(pWal); + } + return rc; +} + +/* +** Try to read the wal-index header. Attempt to verify the header +** checksum. If the checksum can be verified, copy the wal-index +** header into structure pWal->hdr. If the contents of pWal->hdr are +** modified by this and pChanged is not NULL, set *pChanged to 1. +** Otherwise leave *pChanged unmodified. +** +** If the checksum cannot be verified return SQLITE_ERROR. +*/ +int walIndexTryHdr(Wal *pWal, int *pChanged){ + u32 aCksum[2] = {1, 1}; + u32 aHdr[WALINDEX_HDR_NFIELD+2]; + + if( pWal->szWIndex==0 ){ + int rc; + rc = walIndexRemap(pWal, WALINDEX_MMAP_INCREMENT); + if( rc ) return rc; + } + + /* Read the header. The caller may or may not have locked the wal-index + ** file, meaning it is possible that an inconsistent snapshot is read + ** from the file. If this happens, return SQLITE_ERROR. The caller will + ** retry. Or, if the caller has already locked the file and the header + ** still looks inconsistent, it will run recovery. + ** + ** FIX-ME: It is no longer possible to have not locked the wal-index. + */ + memcpy(aHdr, pWal->pWiData, sizeof(aHdr)); + walChecksumBytes((u8*)aHdr, sizeof(u32)*WALINDEX_HDR_NFIELD, aCksum); + if( aCksum[0]!=aHdr[WALINDEX_HDR_NFIELD] + || aCksum[1]!=aHdr[WALINDEX_HDR_NFIELD+1] + ){ + return SQLITE_ERROR; + } + + if( memcmp(&pWal->hdr, aHdr, sizeof(WalIndexHdr)) ){ + if( pChanged ){ + *pChanged = 1; + } + memcpy(&pWal->hdr, aHdr, sizeof(WalIndexHdr)); + } + return SQLITE_OK; +} + +/* +** Read the wal-index header from the wal-index file into structure +** pWal->hdr. If attempting to verify the header checksum fails, try +** to recover the log before returning. +** +** If the wal-index header is successfully read, return SQLITE_OK. +** Otherwise an SQLite error code. +*/ +static int walIndexReadHdr(Wal *pWal, int *pChanged){ + int rc; + + assert( pWal->lockState>=SQLITE_SHM_READ ); + walIndexMap(pWal, -1); + + /* First try to read the header without a lock. Verify the checksum + ** before returning. This will almost always work. + */ + if( SQLITE_OK==walIndexTryHdr(pWal, pChanged) ){ + return SQLITE_OK; + } + + /* If the first attempt to read the header failed, lock the wal-index + ** file and try again. If the header checksum verification fails this + ** time as well, run log recovery. + */ + if( SQLITE_OK==(rc = walSetLock(pWal, SQLITE_SHM_RECOVER)) ){ + if( SQLITE_OK!=walIndexTryHdr(pWal, pChanged) ){ + if( pChanged ){ + *pChanged = 1; + } + rc = walIndexRecover(pWal); + if( rc==SQLITE_OK ){ + rc = walIndexTryHdr(pWal, 0); + } + } + walSetLock(pWal, SQLITE_SHM_READ); + } + + return rc; +} + +/* +** Lock a snapshot. +** +** If this call obtains a new read-lock and the database contents have been +** modified since the most recent call to WalCloseSnapshot() on this Wal +** connection, then *pChanged is set to 1 before returning. Otherwise, it +** is left unmodified. This is used by the pager layer to determine whether +** or not any cached pages may be safely reused. +*/ +int sqlite3WalOpenSnapshot(Wal *pWal, int *pChanged){ + int rc; + + rc = walSetLock(pWal, SQLITE_SHM_READ); + if( rc==SQLITE_OK ){ + pWal->lockState = SQLITE_SHM_READ; + + rc = walIndexReadHdr(pWal, pChanged); + if( rc!=SQLITE_OK ){ + /* An error occured while attempting log recovery. */ + sqlite3WalCloseSnapshot(pWal); + }else{ + /* Check if the mapping needs to grow. */ + if( pWal->hdr.iLastPg + && walIndexEntry(pWal->hdr.iLastPg)*sizeof(u32)>=pWal->szWIndex + ){ + walIndexRemap(pWal, -1); + } + } + } + + walIndexUnmap(pWal); + return rc; +} + +/* +** Unlock the current snapshot. +*/ +void sqlite3WalCloseSnapshot(Wal *pWal){ + if( pWal->lockState!=SQLITE_SHM_UNLOCK ){ + assert( pWal->lockState==SQLITE_SHM_READ ); + walSetLock(pWal, SQLITE_SHM_UNLOCK); + } +} + +/* +** Read a page from the log, if it is present. +*/ +int sqlite3WalRead(Wal *pWal, Pgno pgno, int *pInWal, u8 *pOut){ + u32 iRead = 0; + u32 *aData; + int iFrame = (pWal->hdr.iLastPg & 0xFFFFFF00); + + assert( pWal->lockState==SQLITE_SHM_READ||pWal->lockState==SQLITE_SHM_WRITE ); + walIndexMap(pWal, -1); + + /* Do a linear search of the unindexed block of page-numbers (if any) + ** at the end of the wal-index. An alternative to this would be to + ** build an index in private memory each time a read transaction is + ** opened on a new snapshot. + */ + aData = pWal->pWiData; + if( pWal->hdr.iLastPg ){ + u32 *pi = &aData[walIndexEntry(pWal->hdr.iLastPg)]; + u32 *piStop = pi - (pWal->hdr.iLastPg & 0xFF); + while( *pi!=pgno && pi!=piStop ) pi--; + if( pi!=piStop ){ + iRead = (pi-piStop) + iFrame; + } + } + assert( iRead==0 || aData[walIndexEntry(iRead)]==pgno ); + + while( iRead==0 && iFrame>0 ){ + int iLow = 0; + int iHigh = 255; + u32 *aFrame; + u8 *aIndex; + + iFrame -= 256; + aFrame = &aData[walIndexEntry(iFrame+1)]; + aIndex = (u8 *)&aFrame[256]; + + while( iLow<=iHigh ){ + int iTest = (iLow+iHigh)>>1; + u32 iPg = aFrame[aIndex[iTest]]; + + if( iPg==pgno ){ + iRead = iFrame + 1 + aIndex[iTest]; + break; + } + else if( iPghdr.pgsz) + WAL_FRAME_HDRSIZE; + *pInWal = 1; + return sqlite3OsRead(pWal->pFd, pOut, pWal->hdr.pgsz, iOffset); + } + + *pInWal = 0; + return SQLITE_OK; +} + + +/* +** Set *pPgno to the size of the database file (or zero, if unknown). +*/ +void sqlite3WalDbsize(Wal *pWal, Pgno *pPgno){ + assert( pWal->lockState==SQLITE_SHM_READ + || pWal->lockState==SQLITE_SHM_WRITE ); + *pPgno = pWal->hdr.nPage; +} + +/* +** This function returns SQLITE_OK if the caller may write to the database. +** Otherwise, if the caller is operating on a snapshot that has already +** been overwritten by another writer, SQLITE_BUSY is returned. +*/ +int sqlite3WalWriteLock(Wal *pWal, int op){ + int rc; + if( op ){ + assert( pWal->lockState == SQLITE_SHM_READ ); + rc = walSetLock(pWal, SQLITE_SHM_WRITE); + + /* If this connection is not reading the most recent database snapshot, + ** it is not possible to write to the database. In this case release + ** the write locks and return SQLITE_BUSY. + */ + if( rc==SQLITE_OK ){ + rc = walIndexMap(pWal, -1); + if( rc==SQLITE_OK + && memcmp(&pWal->hdr, pWal->pWiData, sizeof(WalIndexHdr)) + ){ + rc = SQLITE_BUSY; + } + walIndexUnmap(pWal); + if( rc!=SQLITE_OK ){ + walSetLock(pWal, SQLITE_SHM_READ); + } + } + }else if( pWal->lockState==SQLITE_SHM_WRITE ){ + rc = walSetLock(pWal, SQLITE_SHM_READ); + } + return rc; +} + +/* +** The Wal object passed to this function must be holding the write-lock. +** +** If any data has been written (but not committed) to the log file, this +** function moves the write-pointer back to the start of the transaction. +** +** Additionally, the callback function is invoked for each frame written +** to the log since the start of the transaction. If the callback returns +** other than SQLITE_OK, it is not invoked again and the error code is +** returned to the caller. +** +** Otherwise, if the callback function does not return an error, this +** function returns SQLITE_OK. +*/ +int sqlite3WalUndo(Wal *pWal, int (*xUndo)(void *, Pgno), void *pUndoCtx){ + int rc = SQLITE_OK; + Pgno iMax = pWal->hdr.iLastPg; + Pgno iFrame; + + assert( pWal->lockState==SQLITE_SHM_WRITE ); + walIndexReadHdr(pWal, 0); + for(iFrame=pWal->hdr.iLastPg+1; iFrame<=iMax && rc==SQLITE_OK; iFrame++){ + rc = xUndo(pUndoCtx, pWal->pWiData[walIndexEntry(iFrame)]); + } + walIndexUnmap(pWal); + return rc; +} + +/* Return an integer that records the current (uncommitted) write +** position in the WAL +*/ +u32 sqlite3WalSavepoint(Wal *pWal){ + assert( pWal->lockState==SQLITE_SHM_WRITE ); + return pWal->hdr.iLastPg; +} + +/* Move the write position of the WAL back to iFrame. Called in +** response to a ROLLBACK TO command. +*/ +int sqlite3WalSavepointUndo(Wal *pWal, u32 iFrame){ + int rc = SQLITE_OK; + u8 aCksum[8]; + assert( pWal->lockState==SQLITE_SHM_WRITE ); + + pWal->hdr.iLastPg = iFrame; + if( iFrame>0 ){ + i64 iOffset = walFrameOffset(iFrame, pWal->hdr.pgsz) + sizeof(u32)*2; + rc = sqlite3OsRead(pWal->pFd, aCksum, sizeof(aCksum), iOffset); + pWal->hdr.iCheck1 = sqlite3Get4byte(&aCksum[0]); + pWal->hdr.iCheck2 = sqlite3Get4byte(&aCksum[4]); + } + + return rc; +} + +/* +** Write a set of frames to the log. The caller must hold the write-lock +** on the log file (obtained using sqlite3WalWriteLock()). +*/ +int sqlite3WalFrames( + Wal *pWal, /* Wal handle to write to */ + int nPgsz, /* Database page-size in bytes */ + PgHdr *pList, /* List of dirty pages to write */ + Pgno nTruncate, /* Database size after this commit */ + int isCommit, /* True if this is a commit */ + int sync_flags /* Flags to pass to OsSync() (or 0) */ +){ + int rc; /* Used to catch return codes */ + u32 iFrame; /* Next frame address */ + u8 aFrame[WAL_FRAME_HDRSIZE]; /* Buffer to assemble frame-header in */ + PgHdr *p; /* Iterator to run through pList with. */ + u32 aCksum[2]; /* Checksums */ + PgHdr *pLast; /* Last frame in list */ + int nLast = 0; /* Number of extra copies of last page */ + + assert( WAL_FRAME_HDRSIZE==(4 * 2 + 2*sizeof(u32)) ); + assert( pList ); + assert( pWal->lockState==SQLITE_SHM_WRITE ); + assert( pWal->pWiData==0 ); + + /* If this is the first frame written into the log, write the log + ** header to the start of the log file. See comments at the top of + ** this file for a description of the log-header format. + */ + assert( WAL_FRAME_HDRSIZE>=WAL_HDRSIZE ); + iFrame = pWal->hdr.iLastPg; + if( iFrame==0 ){ + sqlite3Put4byte(aFrame, nPgsz); + sqlite3_randomness(8, &aFrame[4]); + pWal->hdr.iCheck1 = sqlite3Get4byte(&aFrame[4]); + pWal->hdr.iCheck2 = sqlite3Get4byte(&aFrame[8]); + rc = sqlite3OsWrite(pWal->pFd, aFrame, WAL_HDRSIZE, 0); + if( rc!=SQLITE_OK ){ + return rc; + } + } + + aCksum[0] = pWal->hdr.iCheck1; + aCksum[1] = pWal->hdr.iCheck2; + + /* Write the log file. */ + for(p=pList; p; p=p->pDirty){ + u32 nDbsize; /* Db-size field for frame header */ + i64 iOffset; /* Write offset in log file */ + + iOffset = walFrameOffset(++iFrame, nPgsz); + + /* Populate and write the frame header */ + nDbsize = (isCommit && p->pDirty==0) ? nTruncate : 0; + walEncodeFrame(aCksum, p->pgno, nDbsize, nPgsz, p->pData, aFrame); + rc = sqlite3OsWrite(pWal->pFd, aFrame, sizeof(aFrame), iOffset); + if( rc!=SQLITE_OK ){ + return rc; + } + + /* Write the page data */ + rc = sqlite3OsWrite(pWal->pFd, p->pData, nPgsz, iOffset + sizeof(aFrame)); + if( rc!=SQLITE_OK ){ + return rc; + } + pLast = p; + } + + /* Sync the log file if the 'isSync' flag was specified. */ + if( sync_flags ){ + i64 iSegment = sqlite3OsSectorSize(pWal->pFd); + i64 iOffset = walFrameOffset(iFrame+1, nPgsz); + + assert( isCommit ); + + if( iSegmentpgno,nTruncate,nPgsz,pLast->pData,aFrame); + rc = sqlite3OsWrite(pWal->pFd, aFrame, sizeof(aFrame), iOffset); + if( rc!=SQLITE_OK ){ + return rc; + } + + iOffset += WAL_FRAME_HDRSIZE; + rc = sqlite3OsWrite(pWal->pFd, pLast->pData, nPgsz, iOffset); + if( rc!=SQLITE_OK ){ + return rc; + } + nLast++; + iOffset += nPgsz; + } + + rc = sqlite3OsSync(pWal->pFd, sync_flags); + if( rc!=SQLITE_OK ){ + return rc; + } + } + assert( pWal->pWiData==0 ); + + /* Append data to the log summary. It is not necessary to lock the + ** wal-index to do this as the RESERVED lock held on the db file + ** guarantees that there are no other writers, and no data that may + ** be in use by existing readers is being overwritten. + */ + iFrame = pWal->hdr.iLastPg; + for(p=pList; p; p=p->pDirty){ + iFrame++; + walIndexAppend(pWal, iFrame, p->pgno); + } + while( nLast>0 ){ + iFrame++; + nLast--; + walIndexAppend(pWal, iFrame, pLast->pgno); + } + + /* Update the private copy of the header. */ + pWal->hdr.pgsz = nPgsz; + pWal->hdr.iLastPg = iFrame; + if( isCommit ){ + pWal->hdr.iChange++; + pWal->hdr.nPage = nTruncate; + } + pWal->hdr.iCheck1 = aCksum[0]; + pWal->hdr.iCheck2 = aCksum[1]; + + /* If this is a commit, update the wal-index header too. */ + if( isCommit ){ + walIndexWriteHdr(pWal, &pWal->hdr); + pWal->iCallback = iFrame; + } + walIndexUnmap(pWal); + + return rc; +} + +/* +** Checkpoint the database: +** +** 1. Acquire a CHECKPOINT lock +** 2. Copy the contents of the log into the database file. +** 3. Zero the wal-index header (so new readers will ignore the log). +** 4. Drop the CHECKPOINT lock. +*/ +int sqlite3WalCheckpoint( + Wal *pWal, /* Wal connection */ + sqlite3_file *pFd, /* File descriptor open on db file */ + int sync_flags, /* Flags to sync db file with (or 0) */ + u8 *zBuf, /* Temporary buffer to use */ + int (*xBusyHandler)(void *), /* Pointer to busy-handler function */ + void *pBusyHandlerArg /* Argument to pass to xBusyHandler */ +){ + int rc; /* Return code */ + int isChanged = 0; /* True if a new wal-index header is loaded */ + + assert( pWal->lockState==SQLITE_SHM_UNLOCK ); + assert( pWal->pWiData==0 ); + + /* Get the CHECKPOINT lock */ + do { + rc = walSetLock(pWal, SQLITE_SHM_CHECKPOINT); + }while( rc==SQLITE_BUSY && xBusyHandler(pBusyHandlerArg) ); + if( rc!=SQLITE_OK ){ + walSetLock(pWal, SQLITE_SHM_UNLOCK); + return rc; + } + + /* Copy data from the log to the database file. */ + rc = walIndexReadHdr(pWal, &isChanged); + if( rc==SQLITE_OK ){ + rc = walCheckpoint(pWal, pFd, sync_flags, zBuf); + } + if( isChanged ){ + /* If a new wal-index header was loaded before the checkpoint was + ** performed, then the pager-cache associated with log pWal is now + ** out of date. So zero the cached wal-index header to ensure that + ** next time the pager opens a snapshot on this database it knows that + ** the cache needs to be reset. + */ + memset(&pWal->hdr, 0, sizeof(WalIndexHdr)); + } + + /* Release the locks. */ + walIndexUnmap(pWal); + walSetLock(pWal, SQLITE_SHM_UNLOCK); + return rc; +} + +/* Return the value to pass to a sqlite3_wal_hook callback, the +** number of frames in the WAL at the point of the last commit since +** sqlite3WalCallback() was called. If no commits have occurred since +** the last call, then return 0. +*/ +int sqlite3WalCallback(Wal *pWal){ + u32 ret = 0; + if( pWal ){ + ret = pWal->iCallback; + pWal->iCallback = 0; + } + return (int)ret; +} +#endif /* #ifndef SQLITE_OMIT_WAL */ ADDED src/wal.h Index: src/wal.h ================================================================== --- /dev/null +++ src/wal.h @@ -0,0 +1,99 @@ +/* +** 2010 February 1 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** This header file defines the interface to the write-ahead logging +** system. Refer to the comments below and the header comment attached to +** the implementation of each function in log.c for further details. +*/ + +#ifndef _WAL_H_ +#define _WAL_H_ + +#include "sqliteInt.h" + +#ifdef SQLITE_OMIT_WAL +# define sqlite3WalOpen(x,y,z) 0 +# define sqlite3WalClose(w,x,y,z) 0 +# define sqlite3WalOpenSnapshot(y,z) 0 +# define sqlite3WalCloseSnapshot(z) +# define sqlite3WalRead(w,x,y,z) 0 +# define sqlite3WalDbsize(y,z) +# define sqlite3WalWriteLock(y,z) 0 +# define sqlite3WalUndo(x,y,z) 0 +# define sqlite3WalSavepoint(z) 0 +# define sqlite3WalSavepointUndo(y,z) 0 +# define sqlite3WalFrames(u,v,w,x,y,z) 0 +# define sqlite3WalCheckpoint(u,v,w,x,y,z) 0 +# define sqlite3WalCallback(z) 0 +#else + +/* Connection to a write-ahead log (WAL) file. +** There is one object of this type for each pager. +*/ +typedef struct Wal Wal; + +/* Open and close a connection to a write-ahead log. */ +int sqlite3WalOpen(sqlite3_vfs*, const char *zDb, Wal **ppWal); +int sqlite3WalClose(Wal *pWal, sqlite3_file *pFd, int sync_flags, u8 *zBuf); + +/* Used by readers to open (lock) and close (unlock) a snapshot. A +** snapshot is like a read-transaction. It is the state of the database +** at an instant in time. sqlite3WalOpenSnapshot gets a read lock and +** preserves the current state even if the other threads or processes +** write to or checkpoint the WAL. sqlite3WalCloseSnapshot() closes the +** transaction and releases the lock. +*/ +int sqlite3WalOpenSnapshot(Wal *pWal, int *); +void sqlite3WalCloseSnapshot(Wal *pWal); + +/* Read a page from the write-ahead log, if it is present. */ +int sqlite3WalRead(Wal *pWal, Pgno pgno, int *pInWal, u8 *pOut); + +/* Return the size of the database as it existed at the beginning +** of the snapshot */ +void sqlite3WalDbsize(Wal *pWal, Pgno *pPgno); + +/* Obtain or release the WRITER lock. */ +int sqlite3WalWriteLock(Wal *pWal, int op); + +/* Undo any frames written (but not committed) to the log */ +int sqlite3WalUndo(Wal *pWal, int (*xUndo)(void *, Pgno), void *pUndoCtx); + +/* Return an integer that records the current (uncommitted) write +** position in the WAL */ +u32 sqlite3WalSavepoint(Wal *pWal); + +/* Move the write position of the WAL back to iFrame. Called in +** response to a ROLLBACK TO command. */ +int sqlite3WalSavepointUndo(Wal *pWal, u32 iFrame); + +/* Write a frame or frames to the log. */ +int sqlite3WalFrames(Wal *pWal, int, PgHdr *, Pgno, int, int); + +/* Copy pages from the log to the database file */ +int sqlite3WalCheckpoint( + Wal *pWal, /* Write-ahead log connection */ + sqlite3_file *pFd, /* File descriptor open on db file */ + int sync_flags, /* Flags to sync db file with (or 0) */ + u8 *zBuf, /* Temporary buffer to use */ + int (*xBusyHandler)(void *), /* Pointer to busy-handler function */ + void *pBusyHandlerArg /* Argument to pass to xBusyHandler */ +); + +/* Return the value to pass to a sqlite3_wal_hook callback, the +** number of frames in the WAL at the point of the last commit since +** sqlite3WalCallback() was called. If no commits have occurred since +** the last call, then return 0. +*/ +int sqlite3WalCallback(Wal *pWal); + +#endif /* ifndef SQLITE_OMIT_WAL */ +#endif /* _WAL_H_ */ Index: test/avtrans.test ================================================================== --- test/avtrans.test +++ test/avtrans.test @@ -20,12 +20,13 @@ # Create several tables to work with. # do_test avtrans-1.0 { - execsql { - PRAGMA auto_vacuum=ON; + execsql { PRAGMA auto_vacuum=ON } + wal_set_journal_mode + execsql { CREATE TABLE one(a int PRIMARY KEY, b text); INSERT INTO one VALUES(1,'one'); INSERT INTO one VALUES(2,'two'); INSERT INTO one VALUES(3,'three'); SELECT b FROM one ORDER BY a; @@ -46,10 +47,11 @@ } {one two three} do_test avtrans-1.10 { execsql {SELECT b FROM two ORDER BY a} altdb } {I V X} integrity_check avtrans-1.11 +wal_check_journal_mode avtrans-1.12 # Basic transactions # do_test avtrans-2.1 { set v [catch {execsql {BEGIN}} msg] @@ -82,10 +84,11 @@ SELECT a FROM two ORDER BY a; END; } } {1 2 3 1 5 10} integrity_check avtrans-2.11 +wal_check_journal_mode avtrans-2.12 # Check the locking behavior # sqlite3_soft_heap_limit 0 do_test avtrans-3.1 { @@ -911,11 +914,13 @@ do_test avtrans-9.$i.5-$cnt { expr {$sqlite_fullsync_count==0} } {1} } } + wal_check_journal_mode avtrans-9.$i-6.$cnt } set ::pager_old_format 0 } integrity_check avtrans-10.1 +wal_check_journal_mode avtrans-10.2 finish_test Index: test/backup.test ================================================================== --- test/backup.test +++ test/backup.test @@ -36,10 +36,12 @@ # # backup-8.*: Test multiple simultaneous backup operations. # # backup-9.*: Test that passing a negative argument to backup_step() is # interpreted as "copy the whole file". +# +# backup-10.*: Test writing the source database mid backup. # proc data_checksum {db file} { $db one "SELECT md5sum(a, b) FROM ${file}.t1" } proc test_contents {name db1 file1 db2 file2} { $db2 eval {select * from sqlite_master} @@ -485,10 +487,11 @@ # 2) Backing up file-to-file. The writer writes via the same pager as # is used by the backup operation. # 3) Backing up memory-to-file. # set iTest 0 +file delete -force bak.db-wal foreach {writer file} {db test.db db3 test.db db :memory:} { incr iTest catch { file delete bak.db } sqlite3 db2 bak.db catch { file delete $file } @@ -903,10 +906,11 @@ db2 close db3 close } +#----------------------------------------------------------------------- # Test that if the database is written to via the same database handle being # used as the source by a backup operation: # # 10.1.*: If the db is in-memory, the backup is restarted. # 10.2.*: If the db is a file, the backup is not restarted. Index: test/corruptA.test ================================================================== --- test/corruptA.test +++ test/corruptA.test @@ -35,13 +35,15 @@ # is detected when opening the database file. # db close file copy -force test.db test.db-template +set unreadable_version 02 +ifcapable wal { set unreadable_version 03 } do_test corruptA-2.1 { file copy -force test.db-template test.db - hexio_write test.db 19 02 ;# the read format number + hexio_write test.db 19 $unreadable_version ;# the read format number sqlite3 db test.db catchsql {SELECT * FROM t1} } {1 {file is encrypted or is not a database}} do_test corruptA-2.2 { Index: test/lock2.test ================================================================== --- test/lock2.test +++ test/lock2.test @@ -14,73 +14,12 @@ # $Id: lock2.test,v 1.11 2009/05/01 10:55:34 danielk1977 Exp $ set testdir [file dirname $argv0] source $testdir/tester.tcl - -# Launch another testfixture process to be controlled by this one. A -# channel name is returned that may be passed as the first argument to proc -# 'testfixture' to execute a command. The child testfixture process is shut -# down by closing the channel. -proc launch_testfixture {} { - set prg [info nameofexec] - if {$prg eq ""} { - set prg [file join . testfixture] - } - set chan [open "|$prg tf_main.tcl" r+] - fconfigure $chan -buffering line - return $chan -} - -# Execute a command in a child testfixture process, connected by two-way -# channel $chan. Return the result of the command, or an error message. -proc testfixture {chan cmd} { - puts $chan $cmd - puts $chan OVER - set r "" - while { 1 } { - set line [gets $chan] - if { $line == "OVER" } { - return $r - } - if {[eof $chan]} { - return "ERROR: Child process hung up" - } - append r $line - } -} - -# Write the main loop for the child testfixture processes into file -# tf_main.tcl. The parent (this script) interacts with the child processes -# via a two way pipe. The parent writes a script to the stdin of the child -# process, followed by the word "OVER" on a line of its own. The child -# process evaluates the script and writes the results to stdout, followed -# by an "OVER" of its own. -set f [open tf_main.tcl w] -puts $f { - set l [open log w] - set script "" - while {![eof stdin]} { - flush stdout - set line [gets stdin] - puts $l "READ $line" - if { $line == "OVER" } { - catch {eval $script} result - puts $result - puts $l "WRITE $result" - puts OVER - puts $l "WRITE OVER" - flush stdout - set script "" - } else { - append script $line - append script " ; " - } - } - close $l -} -close $f +source $testdir/lock_common.tcl + # Simple locking test case: # # lock2-1.1: Connect a second process to the database. # lock2-1.2: Establish a RESERVED lock with this process. @@ -93,11 +32,10 @@ # this fails due to the PENDING lock. # lock2-1.8: Ensure the first process can now upgrade to EXCLUSIVE. # do_test lock2-1.1 { set ::tf1 [launch_testfixture] - testfixture $::tf1 "sqlite3_test_control_pending_byte $::sqlite_pending_byte" testfixture $::tf1 { sqlite3 db test.db -key xyzzy db eval {select * from sqlite_master} } } {} ADDED test/lock_common.tcl Index: test/lock_common.tcl ================================================================== --- /dev/null +++ test/lock_common.tcl @@ -0,0 +1,99 @@ +# 2010 April 14 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# This file contains code used by several different test scripts. The +# code in this file allows testfixture to control another process (or +# processes) to test locking. +# + +# Launch another testfixture process to be controlled by this one. A +# channel name is returned that may be passed as the first argument to proc +# 'testfixture' to execute a command. The child testfixture process is shut +# down by closing the channel. +proc launch_testfixture {} { + set prg [info nameofexec] + if {$prg eq ""} { + set prg [file join . testfixture] + } + set chan [open "|$prg tf_main.tcl" r+] + fconfigure $chan -buffering line + testfixture $chan "sqlite3_test_control_pending_byte $::sqlite_pending_byte" + return $chan +} + +# Execute a command in a child testfixture process, connected by two-way +# channel $chan. Return the result of the command, or an error message. +proc testfixture {chan cmd} { + puts $chan $cmd + puts $chan OVER + set r "" + while { 1 } { + set line [gets $chan] + if { $line == "OVER" } { + return $r + } + if {[eof $chan]} { + return "ERROR: Child process hung up" + } + append r $line + } +} + +proc testfixture_nb_cb {varname chan} { + set line [gets $chan] + if { $line == "OVER" } { + set $varname $::tfnb($chan) + unset ::tfnb($chan) + close $chan + } else { + append ::tfnb($chan) $line + } +} + +proc testfixture_nb {varname cmd} { + set chan [launch_testfixture] + set ::tfnb($chan) "" + fconfigure $chan -blocking 0 -buffering none + puts $chan $cmd + puts $chan OVER + fileevent $chan readable [list testfixture_nb_cb $varname $chan] + return "" +} + +# Write the main loop for the child testfixture processes into file +# tf_main.tcl. The parent (this script) interacts with the child processes +# via a two way pipe. The parent writes a script to the stdin of the child +# process, followed by the word "OVER" on a line of its own. The child +# process evaluates the script and writes the results to stdout, followed +# by an "OVER" of its own. +set f [open tf_main.tcl w] +puts $f { + set l [open log w] + set script "" + while {![eof stdin]} { + flush stdout + set line [gets stdin] + puts $l "READ $line" + if { $line == "OVER" } { + set rc [catch {eval $script} result] + puts $result + puts $l "WRITE $result" + puts OVER + puts $l "WRITE OVER" + flush stdout + set script "" + } else { + append script $line + append script "\n" + } + } + close $l +} +close $f Index: test/permutations.test ================================================================== --- test/permutations.test +++ test/permutations.test @@ -753,10 +753,21 @@ } -include { insert.test insert2.test insert3.test rollback.test select1.test select2.test select3.test } } + +run_tests "wal" -description { + Run tests with journal_mode=WAL +} -initialize { + set ::savepoint6_iterations 100 +} -shutdown { + unset -nocomplain ::savepoint6_iterations +} -include { + savepoint.test savepoint2.test savepoint6.test + trans.test avtrans.test +} # End of tests ############################################################################# if {$::perm::testmode eq "targets"} { puts "" ; exit } Index: test/quick.test ================================================================== --- test/quick.test +++ test/quick.test @@ -99,10 +99,14 @@ btree8.test shared_err.test vtab_err.test veryquick.test mallocAll.test + + walslow.test + walcrash.test + walthread.test } if {[sqlite3 -has-codec]} { # lappend EXCLUDE \ # conflict.test Index: test/rdonly.test ================================================================== --- test/rdonly.test +++ test/rdonly.test @@ -27,19 +27,19 @@ INSERT INTO t1 VALUES(1); SELECT * FROM t1; } } {1} -# Changes the write version from 1 to 2. Verify that the database +# Changes the write version from 1 to 3. Verify that the database # can be read but not written. # do_test rdonly-1.2 { db close hexio_get_int [hexio_read test.db 18 1] } 1 do_test rdonly-1.3 { - hexio_write test.db 18 02 + hexio_write test.db 18 03 sqlite3 db test.db execsql { SELECT * FROM t1; } } {1} @@ -65,14 +65,16 @@ # Now, after connection [db] has loaded the database schema, modify the # write-version of the file (and the change-counter, so that the # write-version is reloaded). This way, SQLite does not discover that # the database is read-only until after it is locked. # +set ro_version 02 +ifcapable wal { set ro_version 03 } do_test rdonly-1.6 { - hexio_write test.db 18 02 ; # write-version + hexio_write test.db 18 $ro_version ; # write-version hexio_write test.db 24 11223344 ; # change-counter catchsql { INSERT INTO t1 VALUES(2); } } {1 {attempt to write a readonly database}} finish_test Index: test/savepoint.test ================================================================== --- test/savepoint.test +++ test/savepoint.test @@ -12,17 +12,17 @@ # $Id: savepoint.test,v 1.13 2009/07/18 08:30:45 danielk1977 Exp $ set testdir [file dirname $argv0] source $testdir/tester.tcl - #---------------------------------------------------------------------- # The following tests - savepoint-1.* - test that the SAVEPOINT, RELEASE # and ROLLBACK TO comands are correctly parsed, and that the auto-commit # flag is correctly set and unset as a result. # do_test savepoint-1.1 { + wal_set_journal_mode execsql { SAVEPOINT sp1; RELEASE sp1; } } {} @@ -91,10 +91,11 @@ } } {} do_test savepoint-1.6 { execsql COMMIT } {} +wal_check_journal_mode savepoint-1.7 #------------------------------------------------------------------------ # These tests - savepoint-2.* - test rollbacks and releases of savepoints # with a very simple data set. # @@ -173,40 +174,46 @@ execsql { ROLLBACK; } execsql { SELECT * FROM t1 } } {} +wal_check_journal_mode savepoint-2.12 #------------------------------------------------------------------------ # This block of tests - savepoint-3.* - test that when a transaction # savepoint is rolled back, locks are not released from database files. # And that when a transaction savepoint is released, they are released. -# -do_test savepoint-3.1 { - execsql { SAVEPOINT "transaction" } - execsql { PRAGMA lock_status } -} {main unlocked temp closed} - -do_test savepoint-3.2 { - execsql { INSERT INTO t1 VALUES(1, 2, 3) } - execsql { PRAGMA lock_status } -} {main reserved temp closed} - -do_test savepoint-3.3 { - execsql { ROLLBACK TO "transaction" } - execsql { PRAGMA lock_status } -} {main reserved temp closed} - -do_test savepoint-3.4 { - execsql { INSERT INTO t1 VALUES(1, 2, 3) } - execsql { PRAGMA lock_status } -} {main reserved temp closed} - -do_test savepoint-3.5 { - execsql { RELEASE "transaction" } - execsql { PRAGMA lock_status } -} {main unlocked temp closed} +# +# These tests do not work in WAL mode. WAL mode does not take RESERVED +# locks on the database file. +# +if {[wal_is_wal_mode]==0} { + do_test savepoint-3.1 { + execsql { SAVEPOINT "transaction" } + execsql { PRAGMA lock_status } + } {main unlocked temp closed} + + do_test savepoint-3.2 { + execsql { INSERT INTO t1 VALUES(1, 2, 3) } + execsql { PRAGMA lock_status } + } {main reserved temp closed} + + do_test savepoint-3.3 { + execsql { ROLLBACK TO "transaction" } + execsql { PRAGMA lock_status } + } {main reserved temp closed} + + do_test savepoint-3.4 { + execsql { INSERT INTO t1 VALUES(1, 2, 3) } + execsql { PRAGMA lock_status } + } {main reserved temp closed} + + do_test savepoint-3.5 { + execsql { RELEASE "transaction" } + execsql { PRAGMA lock_status } + } {main unlocked temp closed} +} #------------------------------------------------------------------------ # Test that savepoints that include schema modifications are handled # correctly. Test cases savepoint-4.*. # @@ -262,10 +269,11 @@ execsql {SELECT * FROM t3} } {value} do_test savepoint-4.8 { execsql COMMIT } {} +wal_check_journal_mode savepoint-4.9 #------------------------------------------------------------------------ # Test some logic errors to do with the savepoint feature. # @@ -310,29 +318,52 @@ do_test savepoint-5.3.5 { close $fd execsql {release abc} } {} + # Rollback mode: + # + # Open a savepoint transaction and insert a row into the database. Then, + # using a second database handle, open a read-only transaction on the + # database file. Check that the savepoint transaction cannot be committed + # until after the read-only transaction has been closed. + # + # WAL mode: + # + # As above, except that the savepoint transaction can be successfully + # committed before the read-only transaction has been closed. + # do_test savepoint-5.4.1 { execsql { SAVEPOINT main; INSERT INTO blobs VALUES('another blob'); } } {} do_test savepoint-5.4.2 { sqlite3 db2 test.db - execsql { BEGIN ; SELECT * FROM blobs } db2 - catchsql { RELEASE main } - } {1 {database is locked}} - do_test savepoint-5.4.3 { - db2 close - catchsql { RELEASE main } - } {0 {}} - do_test savepoint-5.4.4 { + execsql { BEGIN ; SELECT count(*) FROM blobs } db2 + } {1} + if {[wal_is_wal_mode]} { + do_test savepoint-5.4.3 { catchsql "RELEASE main" } {0 {}} + do_test savepoint-5.4.4 { db2 close } {} + } else { + do_test savepoint-5.4.3 { + catchsql { RELEASE main } + } {1 {database is locked}} + do_test savepoint-5.4.4 { + db2 close + catchsql { RELEASE main } + } {0 {}} + } + do_test savepoint-5.4.5 { execsql { SELECT x FROM blobs WHERE rowid = 2 } } {{another blob}} + do_test savepoint-5.4.6 { + execsql { SELECT count(*) FROM blobs } + } {2} } +wal_check_journal_mode savepoint-5.5 #------------------------------------------------------------------------- # The following tests, savepoint-6.*, test an incr-vacuum inside of a # couple of nested savepoints. # @@ -340,12 +371,13 @@ db close file delete -force test.db sqlite3 db test.db do_test savepoint-6.1 { - execsql { - PRAGMA auto_vacuum = incremental; + execsql { PRAGMA auto_vacuum = incremental } + wal_set_journal_mode + execsql { CREATE TABLE t1(a, b, c); CREATE INDEX i1 ON t1(a, b); BEGIN; INSERT INTO t1 VALUES(randstr(10,400),randstr(10,400),randstr(10,400)); } @@ -374,10 +406,12 @@ COMMIT; } } {} integrity_check savepoint-6.4 + + wal_check_journal_mode savepoint-6.5 } #------------------------------------------------------------------------- # The following tests, savepoint-7.*, attempt to break the logic # surrounding savepoints by growing and shrinking the database file. @@ -385,12 +419,13 @@ db close file delete -force test.db sqlite3 db test.db do_test savepoint-7.1 { + execsql { PRAGMA auto_vacuum = incremental } + wal_set_journal_mode execsql { - PRAGMA auto_vacuum = incremental; PRAGMA cache_size = 10; BEGIN; CREATE TABLE t1(a PRIMARY KEY, b); INSERT INTO t1(a) VALUES('alligator'); INSERT INTO t1(a) VALUES('angelfish'); @@ -447,17 +482,19 @@ ROLLBACK TO two; COMMIT; } execsql { PRAGMA integrity_check } } {ok} +wal_check_journal_mode savepoint-7.3.3 do_test savepoint-7.4.1 { db close file delete -force test.db sqlite3 db test.db + execsql { PRAGMA auto_vacuum = incremental } + wal_set_journal_mode execsql { - PRAGMA auto_vacuum = incremental; CREATE TABLE t1(a, b, PRIMARY KEY(a, b)); INSERT INTO t1 VALUES(randstr(1000,1000), randstr(1000,1000)); BEGIN; DELETE FROM t1; SAVEPOINT one; @@ -495,10 +532,11 @@ do_test savepoint-7.5.2 { execsql { DROP TABLE t5; } } {} +wal_check_journal_mode savepoint-7.5.3 # Test oddly named and quoted savepoints. # do_test savepoint-8-1 { execsql { SAVEPOINT "save1" } @@ -596,124 +634,126 @@ # of the aux1 and aux2 locks. So record the current lock status of # TEMP for use in the answers. set templockstate [lindex [db eval {PRAGMA lock_status}] 3] -do_test savepoint-10.2.1 { - file delete -force test3.db - file delete -force test2.db - execsql { - ATTACH 'test2.db' AS aux1; - ATTACH 'test3.db' AS aux2; - DROP TABLE t1; - CREATE TABLE main.t1(x, y); - CREATE TABLE aux1.t2(x, y); - CREATE TABLE aux2.t3(x, y); - SELECT name FROM sqlite_master - UNION ALL - SELECT name FROM aux1.sqlite_master - UNION ALL - SELECT name FROM aux2.sqlite_master; - } -} {t1 t2 t3} -do_test savepoint-10.2.2 { - execsql { PRAGMA lock_status } -} [list main unlocked temp $templockstate aux1 unlocked aux2 unlocked] - -do_test savepoint-10.2.3 { - execsql { - SAVEPOINT one; - INSERT INTO t1 VALUES(1, 2); - PRAGMA lock_status; - } -} [list main reserved temp $templockstate aux1 unlocked aux2 unlocked] -do_test savepoint-10.2.4 { - execsql { - INSERT INTO t3 VALUES(3, 4); - PRAGMA lock_status; - } -} [list main reserved temp $templockstate aux1 unlocked aux2 reserved] -do_test savepoint-10.2.5 { - execsql { - SAVEPOINT two; - INSERT INTO t2 VALUES(5, 6); - PRAGMA lock_status; - } -} [list main reserved temp $templockstate aux1 reserved aux2 reserved] -do_test savepoint-10.2.6 { - execsql { SELECT * FROM t2 } -} {5 6} -do_test savepoint-10.2.7 { - execsql { ROLLBACK TO two } - execsql { SELECT * FROM t2 } -} {} -do_test savepoint-10.2.8 { - execsql { PRAGMA lock_status } -} [list main reserved temp $templockstate aux1 reserved aux2 reserved] -do_test savepoint-10.2.9 { - execsql { SELECT 'a', * FROM t1 UNION ALL SELECT 'b', * FROM t3 } -} {a 1 2 b 3 4} -do_test savepoint-10.2.9 { - execsql { - INSERT INTO t2 VALUES(5, 6); - RELEASE one; - } - execsql { - SELECT * FROM t1; - SELECT * FROM t2; - SELECT * FROM t3; - } -} {1 2 5 6 3 4} -do_test savepoint-10.2.9 { - execsql { PRAGMA lock_status } -} [list main unlocked temp $templockstate aux1 unlocked aux2 unlocked] - -do_test savepoint-10.2.10 { - execsql { - SAVEPOINT one; - INSERT INTO t1 VALUES('a', 'b'); - SAVEPOINT two; - INSERT INTO t2 VALUES('c', 'd'); - SAVEPOINT three; - INSERT INTO t3 VALUES('e', 'f'); - } - execsql { - SELECT * FROM t1; - SELECT * FROM t2; - SELECT * FROM t3; - } -} {1 2 a b 5 6 c d 3 4 e f} -do_test savepoint-10.2.11 { - execsql { ROLLBACK TO two } - execsql { - SELECT * FROM t1; - SELECT * FROM t2; - SELECT * FROM t3; - } -} {1 2 a b 5 6 3 4} -do_test savepoint-10.2.12 { - execsql { - INSERT INTO t3 VALUES('g', 'h'); - ROLLBACK TO two; - } - execsql { - SELECT * FROM t1; - SELECT * FROM t2; - SELECT * FROM t3; - } -} {1 2 a b 5 6 3 4} -do_test savepoint-10.2.13 { - execsql { ROLLBACK } - execsql { - SELECT * FROM t1; - SELECT * FROM t2; - SELECT * FROM t3; - } -} {1 2 5 6 3 4} -do_test savepoint-10.2.14 { - execsql { PRAGMA lock_status } -} [list main unlocked temp $templockstate aux1 unlocked aux2 unlocked] +if {[wal_is_wal_mode]==0} { + do_test savepoint-10.2.1 { + file delete -force test3.db + file delete -force test2.db + execsql { + ATTACH 'test2.db' AS aux1; + ATTACH 'test3.db' AS aux2; + DROP TABLE t1; + CREATE TABLE main.t1(x, y); + CREATE TABLE aux1.t2(x, y); + CREATE TABLE aux2.t3(x, y); + SELECT name FROM sqlite_master + UNION ALL + SELECT name FROM aux1.sqlite_master + UNION ALL + SELECT name FROM aux2.sqlite_master; + } + } {t1 t2 t3} + do_test savepoint-10.2.2 { + execsql { PRAGMA lock_status } + } [list main unlocked temp $templockstate aux1 unlocked aux2 unlocked] + + do_test savepoint-10.2.3 { + execsql { + SAVEPOINT one; + INSERT INTO t1 VALUES(1, 2); + PRAGMA lock_status; + } + } [list main reserved temp $templockstate aux1 unlocked aux2 unlocked] + do_test savepoint-10.2.4 { + execsql { + INSERT INTO t3 VALUES(3, 4); + PRAGMA lock_status; + } + } [list main reserved temp $templockstate aux1 unlocked aux2 reserved] + do_test savepoint-10.2.5 { + execsql { + SAVEPOINT two; + INSERT INTO t2 VALUES(5, 6); + PRAGMA lock_status; + } + } [list main reserved temp $templockstate aux1 reserved aux2 reserved] + do_test savepoint-10.2.6 { + execsql { SELECT * FROM t2 } + } {5 6} + do_test savepoint-10.2.7 { + execsql { ROLLBACK TO two } + execsql { SELECT * FROM t2 } + } {} + do_test savepoint-10.2.8 { + execsql { PRAGMA lock_status } + } [list main reserved temp $templockstate aux1 reserved aux2 reserved] + do_test savepoint-10.2.9 { + execsql { SELECT 'a', * FROM t1 UNION ALL SELECT 'b', * FROM t3 } + } {a 1 2 b 3 4} + do_test savepoint-10.2.9 { + execsql { + INSERT INTO t2 VALUES(5, 6); + RELEASE one; + } + execsql { + SELECT * FROM t1; + SELECT * FROM t2; + SELECT * FROM t3; + } + } {1 2 5 6 3 4} + do_test savepoint-10.2.9 { + execsql { PRAGMA lock_status } + } [list main unlocked temp $templockstate aux1 unlocked aux2 unlocked] + + do_test savepoint-10.2.10 { + execsql { + SAVEPOINT one; + INSERT INTO t1 VALUES('a', 'b'); + SAVEPOINT two; + INSERT INTO t2 VALUES('c', 'd'); + SAVEPOINT three; + INSERT INTO t3 VALUES('e', 'f'); + } + execsql { + SELECT * FROM t1; + SELECT * FROM t2; + SELECT * FROM t3; + } + } {1 2 a b 5 6 c d 3 4 e f} + do_test savepoint-10.2.11 { + execsql { ROLLBACK TO two } + execsql { + SELECT * FROM t1; + SELECT * FROM t2; + SELECT * FROM t3; + } + } {1 2 a b 5 6 3 4} + do_test savepoint-10.2.12 { + execsql { + INSERT INTO t3 VALUES('g', 'h'); + ROLLBACK TO two; + } + execsql { + SELECT * FROM t1; + SELECT * FROM t2; + SELECT * FROM t3; + } + } {1 2 a b 5 6 3 4} + do_test savepoint-10.2.13 { + execsql { ROLLBACK } + execsql { + SELECT * FROM t1; + SELECT * FROM t2; + SELECT * FROM t3; + } + } {1 2 5 6 3 4} + do_test savepoint-10.2.14 { + execsql { PRAGMA lock_status } + } [list main unlocked temp $templockstate aux1 unlocked aux2 unlocked] +} #------------------------------------------------------------------------- # The following tests - savepoint-11.* - test the interaction of # savepoints and creating or dropping tables and indexes in # auto-vacuum mode. @@ -720,12 +760,13 @@ # do_test savepoint-11.1 { db close file delete -force test.db sqlite3 db test.db + execsql { PRAGMA auto_vacuum = full; } + wal_set_journal_mode execsql { - PRAGMA auto_vacuum = full; CREATE TABLE t1(a, b, UNIQUE(a, b)); INSERT INTO t1 VALUES(1, randstr(1000,1000)); INSERT INTO t1 VALUES(2, randstr(1000,1000)); } } {} @@ -749,13 +790,13 @@ } } {} integrity_check savepoint-11.7 do_test savepoint-11.8 { execsql { ROLLBACK } + execsql { PRAGMA wal_checkpoint } file size test.db } {8192} - do_test savepoint-11.9 { execsql { DROP TABLE IF EXISTS t1; DROP TABLE IF EXISTS t2; @@ -780,10 +821,11 @@ execsql COMMIT } {} do_test savepoint-11.12 { execsql {SELECT * FROM t2} } {1 2 3 4} +wal_check_journal_mode savepoint-11.13 #------------------------------------------------------------------------- # The following tests - savepoint-12.* - test the interaction of # savepoints and "ON CONFLICT ROLLBACK" clauses. # @@ -813,52 +855,55 @@ sqlite3_get_autocommit db } {1} do_test savepoint-12.4 { execsql { SAVEPOINT one } } {} +wal_check_journal_mode savepoint-12.5 #------------------------------------------------------------------------- # The following tests - savepoint-13.* - test the interaction of # savepoints and "journal_mode = off". # -do_test savepoint-13.1 { - db close - catch {file delete -force test.db} - sqlite3 db test.db - execsql { - BEGIN; - CREATE TABLE t1(a PRIMARY KEY, b); - INSERT INTO t1 VALUES(1, 2); - COMMIT; - PRAGMA journal_mode = off; - } -} {off} -do_test savepoint-13.2 { - execsql { - BEGIN; - INSERT INTO t1 VALUES(3, 4); - INSERT INTO t1 SELECT a+4,b+4 FROM t1; - COMMIT; - } -} {} -do_test savepoint-13.3 { - execsql { - BEGIN; - INSERT INTO t1 VALUES(9, 10); - SAVEPOINT s1; - INSERT INTO t1 VALUES(11, 12); - COMMIT; - } -} {} -do_test savepoint-13.4 { - execsql { - BEGIN; - INSERT INTO t1 VALUES(13, 14); - SAVEPOINT s1; - INSERT INTO t1 VALUES(15, 16); - ROLLBACK TO s1; - ROLLBACK; - SELECT * FROM t1; - } -} {1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16} +if {[wal_is_wal_mode]==0} { + do_test savepoint-13.1 { + db close + catch {file delete -force test.db} + sqlite3 db test.db + execsql { + BEGIN; + CREATE TABLE t1(a PRIMARY KEY, b); + INSERT INTO t1 VALUES(1, 2); + COMMIT; + PRAGMA journal_mode = off; + } + } {off} + do_test savepoint-13.2 { + execsql { + BEGIN; + INSERT INTO t1 VALUES(3, 4); + INSERT INTO t1 SELECT a+4,b+4 FROM t1; + COMMIT; + } + } {} + do_test savepoint-13.3 { + execsql { + BEGIN; + INSERT INTO t1 VALUES(9, 10); + SAVEPOINT s1; + INSERT INTO t1 VALUES(11, 12); + COMMIT; + } + } {} + do_test savepoint-13.4 { + execsql { + BEGIN; + INSERT INTO t1 VALUES(13, 14); + SAVEPOINT s1; + INSERT INTO t1 VALUES(15, 16); + ROLLBACK TO s1; + ROLLBACK; + SELECT * FROM t1; + } + } {1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16} +} finish_test Index: test/savepoint2.test ================================================================== --- test/savepoint2.test +++ test/savepoint2.test @@ -11,10 +11,11 @@ # # $Id: savepoint2.test,v 1.5 2009/06/05 17:09:12 drh Exp $ set testdir [file dirname $argv0] source $testdir/tester.tcl + # Tests in this file are quite similar to those run by trans.test and # avtrans.test. # @@ -21,10 +22,11 @@ proc signature {} { return [db eval {SELECT count(*), md5sum(x) FROM t3}] } do_test savepoint2-1 { + wal_set_journal_mode execsql { PRAGMA cache_size=10; BEGIN; CREATE TABLE t3(x TEXT); INSERT INTO t3 VALUES(randstr(10,400)); @@ -40,10 +42,11 @@ INSERT INTO t3 SELECT randstr(10,400) FROM t3; COMMIT; SELECT count(*) FROM t3; } } {1024} +wal_check_journal_mode savepoint2-1.1 unset -nocomplain ::sig unset -nocomplain SQL set iterations 20 @@ -138,11 +141,14 @@ execsql $SQL(4) execsql COMMIT sqlite3_get_autocommit db } {1} integrity_check savepoint2-$ii.6.1 + + # Check that the connection is still running in WAL mode. + wal_check_journal_mode savepoint2-$ii.7 } unset -nocomplain ::sig unset -nocomplain SQL finish_test Index: test/savepoint6.test ================================================================== --- test/savepoint6.test +++ test/savepoint6.test @@ -23,10 +23,14 @@ PRAGMA auto_vacuum = incremental; CREATE TABLE t1(x, y); CREATE UNIQUE INDEX i1 ON t1(x); CREATE INDEX i2 ON t1(y); } + +if {0==[info exists ::savepoint6_iterations]} { + set ::savepoint6_iterations 1000 +} #-------------------------------------------------------------------------- # In memory database state. # # ::lSavepoint is a list containing one entry for each active savepoint. The @@ -220,10 +224,11 @@ foreach zSetup [list { set testname normal sqlite3 db test.db } { + if {[wal_is_wal_mode]} continue set testname tempdb sqlite3 db "" } { if {[catch {set ::permutations_test_prefix} z] == 0 && $z eq "journaltest"} { continue @@ -239,22 +244,24 @@ unset -nocomplain ::lSavepoint unset -nocomplain ::aEntry catch { db close } - file delete -force test.db + file delete -force test.db test.db-wal test.db-journal eval $zSetup sql $DATABASE_SCHEMA + + wal_set_journal_mode do_test savepoint6-$testname.setup { savepoint one insert_rows [random_integers 100 1000] release one checkdb } {ok} - for {set i 0} {$i < 1000} {incr i} { + for {set i 0} {$i < $::savepoint6_iterations} {incr i} { do_test savepoint6-$testname.$i.1 { savepoint_op checkdb } {ok} @@ -262,11 +269,13 @@ database_op database_op checkdb } {ok} } + + wal_check_journal_mode savepoint6-$testname.walok } unset -nocomplain ::lSavepoint unset -nocomplain ::aEntry finish_test Index: test/tclsqlite.test ================================================================== --- test/tclsqlite.test +++ test/tclsqlite.test @@ -33,11 +33,11 @@ lappend v $msg } [list 1 "wrong # args: should be \"$r\""] do_test tcl-1.2 { set v [catch {db bogus} msg] lappend v $msg -} {1 {bad option "bogus": must be authorizer, backup, busy, cache, changes, close, collate, collation_needed, commit_hook, complete, copy, enable_load_extension, errorcode, eval, exists, function, incrblob, interrupt, last_insert_rowid, nullvalue, onecolumn, profile, progress, rekey, restore, rollback_hook, status, timeout, total_changes, trace, transaction, unlock_notify, update_hook, or version}} +} {1 {bad option "bogus": must be authorizer, backup, busy, cache, changes, close, collate, collation_needed, commit_hook, complete, copy, enable_load_extension, errorcode, eval, exists, function, incrblob, interrupt, last_insert_rowid, nullvalue, onecolumn, profile, progress, rekey, restore, rollback_hook, status, timeout, total_changes, trace, transaction, unlock_notify, update_hook, version, or wal_hook}} do_test tcl-1.2.1 { set v [catch {db cache bogus} msg] lappend v $msg } {1 {bad option "bogus": must be flush or size}} do_test tcl-1.2.2 { Index: test/tester.tcl ================================================================== --- test/tester.tcl +++ test/tester.tcl @@ -141,10 +141,11 @@ proc reset_db {} { catch {db close} file delete -force test.db file delete -force test.db-journal + file delete -force test.db-wal sqlite3 db ./test.db set ::DB [sqlite3_connection_pointer db] if {[info exists ::SETUP_SQL]} { db eval $::SETUP_SQL } @@ -979,12 +980,48 @@ } ifcapable trigger&&foreignkey { $db eval "PRAGMA foreign_keys = $pk" } } + +#------------------------------------------------------------------------- +# If a test script is executed with global variable +# $::permutations_test_prefix set to "wal", then the tests are run +# in WAL mode. Otherwise, they should be run in rollback mode. The +# following Tcl procs are used to make this less intrusive: +# +# wal_set_journal_mode ?DB? +# +# If running a WAL test, execute "PRAGMA journal_mode = wal" using +# connection handle DB. Otherwise, this command is a no-op. +# +# wal_check_journal_mode TESTNAME ?DB? +# +# If running a WAL test, execute a tests case that fails if the main +# database for connection handle DB is not currently a WAL database. +# Otherwise (if not running a WAL permutation) this is a no-op. +# +# wal_is_wal_mode +# +# Returns true if this test should be run in WAL mode. False otherwise. +# +proc wal_is_wal_mode {} { + expr { [catch {set ::permutations_test_prefix} v]==0 && $v == "wal" } +} +proc wal_set_journal_mode {{db db}} { + if { [wal_is_wal_mode] } { + $db eval "PRAGMA journal_mode = WAL" + } +} +proc wal_check_journal_mode {testname {db db}} { + if { [wal_is_wal_mode] } { + $db eval { SELECT * FROM sqlite_master } + do_test $testname [list $db eval "PRAGMA main.journal_mode"] {wal} + } +} # If the library is compiled with the SQLITE_DEFAULT_AUTOVACUUM macro set # to non-zero, then set the global variable $AUTOVACUUM to 1. set AUTOVACUUM $sqlite_options(default_autovacuum) source $testdir/thread_common.tcl Index: test/thread_common.tcl ================================================================== --- test/thread_common.tcl +++ test/thread_common.tcl @@ -78,11 +78,11 @@ } } } proc thread_spawn {varname args} { - sqlthread spawn $varname [join $args ;] + sqlthread spawn $varname [join $args {;}] } # Return true if this build can run the multi-threaded tests. # proc run_thread_tests {{print_warning 0}} { Index: test/trans.test ================================================================== --- test/trans.test +++ test/trans.test @@ -17,10 +17,11 @@ set testdir [file dirname $argv0] source $testdir/tester.tcl # Create several tables to work with. # +wal_set_journal_mode do_test trans-1.0 { execsql { CREATE TABLE one(a int PRIMARY KEY, b text); INSERT INTO one VALUES(1,'one'); INSERT INTO one VALUES(2,'two'); @@ -44,10 +45,11 @@ } {one two three} do_test trans-1.10 { execsql {SELECT b FROM two ORDER BY a} altdb } {I V X} integrity_check trans-1.11 +wal_check_journal_mode trans-1.12 # Basic transactions # do_test trans-2.1 { set v [catch {execsql {BEGIN}} msg] @@ -80,10 +82,11 @@ SELECT a FROM two ORDER BY a; END; } } {1 2 3 1 5 10} integrity_check trans-2.11 +wal_check_journal_mode trans-2.12 # Check the locking behavior # do_test trans-3.1 { execsql { @@ -160,10 +163,11 @@ SELECT a FROM one ORDER BY a; } db} msg] lappend v $msg } {0 {1 2 3 4}} integrity_check trans-3.15 +wal_check_journal_mode trans-3.16 do_test trans-4.1 { set v [catch {execsql { COMMIT; } db} msg] @@ -226,10 +230,12 @@ SELECT a FROM one ORDER BY a; } altdb} msg] lappend v $msg } {0 {1 2 3 4}} integrity_check trans-4.12 +wal_check_journal_mode trans-4.13 +wal_check_journal_mode trans-4.14 altdb do_test trans-4.98 { altdb close execsql { DROP TABLE one; DROP TABLE two; @@ -773,10 +779,11 @@ } do_test trans-7.14 { execsql {SELECT md5sum(type,name,tbl_name,rootpage,sql) FROM sqlite_master} } $checksum2 integrity_check trans-7.15 +wal_check_journal_mode trans-7.16 # Arrange for another process to begin modifying the database but abort # and die in the middle of the modification. Then have this process read # the database. This process should detect the journal file and roll it # back. Verify that this happens correctly. @@ -822,11 +829,11 @@ } $checksum do_test trans-8.5 { execsql {SELECT md5sum(type,name,tbl_name,rootpage,sql) FROM sqlite_master} } $checksum2 integrity_check trans-8.6 - +wal_check_journal_mode trans-8.7 # In the following sequence of tests, compute the MD5 sum of the content # of a table, make lots of modifications to that table, then do a rollback. # Verify that after the rollback, the MD5 checksum is unchanged. # @@ -852,10 +859,11 @@ INSERT INTO t3 SELECT randstr(10,400) FROM t3; COMMIT; SELECT count(*) FROM t3; } } {1024} +wal_check_journal_mode trans-9.1.1 # The following procedure computes a "signature" for table "t3". If # T3 changes in any way, the signature should change. # # This is used to test ROLLBACK. We gather a signature for t3, then @@ -938,9 +946,11 @@ expr {$sqlite_fullsync_count==0} } {1} } } } + + wal_check_journal_mode trans-9.$i.6-$cnt set ::pager_old_format 0 } finish_test ADDED test/wal.test Index: test/wal.test ================================================================== --- /dev/null +++ test/wal.test @@ -0,0 +1,1053 @@ +# 2010 April 13 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# This file implements regression tests for SQLite library. The +# focus of this file is testing the operation of the library in +# "PRAGMA journal_mode=WAL" mode. +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl +source $testdir/lock_common.tcl + +ifcapable !wal {finish_test ; return } + +proc reopen_db {} { + catch { db close } + file delete -force test.db test.db-wal test.db-wal-summary + sqlite3_wal db test.db +} + +set ::blobcnt 0 +proc blob {nByte} { + incr ::blobcnt + return [string range [string repeat "${::blobcnt}x" $nByte] 1 $nByte] +} + +proc sqlite3_wal {args} { + eval sqlite3 $args + [lindex $args 0] eval { PRAGMA page_size = 1024 } + [lindex $args 0] eval { PRAGMA journal_mode = wal } + [lindex $args 0] eval { PRAGMA synchronous = normal } + [lindex $args 0] function blob blob +} + +proc log_file_size {nFrame pgsz} { + expr {12 + ($pgsz+16)*$nFrame} +} + +proc log_deleted {logfile} { + return [expr [file exists $logfile]==0] +} + +# +# These are 'warm-body' tests used while developing the WAL code. They +# serve to prove that a few really simple cases work: +# +# wal-1.*: Read and write the database. +# wal-2.*: Test MVCC with one reader, one writer. +# wal-3.*: Test transaction rollback. +# wal-4.*: Test savepoint/statement rollback. +# wal-5.*: Test the temp database. +# wal-6.*: Test creating databases with different page sizes. +# + +do_test wal-0.1 { + execsql { PRAGMA synchronous = normal } + execsql { PRAGMA journal_mode = wal } +} {wal} +do_test wal-0.2 { + file size test.db +} {1024} + +do_test wal-1.0 { + execsql { + BEGIN; + CREATE TABLE t1(a, b); + } + list [file exists test.db-journal] \ + [file exists test.db-wal] \ + [file size test.db] +} {0 1 1024} +do_test wal-1.1 { + execsql COMMIT + list [file exists test.db-journal] [file exists test.db-wal] +} {0 1} +do_test wal-1.2 { + # There are now two pages in the log. + file size test.db-wal +} [log_file_size 2 1024] + +do_test wal-1.3 { + execsql { SELECT * FROM sqlite_master } +} {table t1 t1 2 {CREATE TABLE t1(a, b)}} + +do_test wal-1.4 { + execsql { INSERT INTO t1 VALUES(1, 2) } + execsql { INSERT INTO t1 VALUES(3, 4) } + execsql { INSERT INTO t1 VALUES(5, 6) } + execsql { INSERT INTO t1 VALUES(7, 8) } + execsql { INSERT INTO t1 VALUES(9, 10) } +} {} + +do_test wal-1.5 { + execsql { SELECT * FROM t1 } +} {1 2 3 4 5 6 7 8 9 10} + +do_test wal-2.1 { + sqlite3_wal db2 ./test.db + execsql { BEGIN; SELECT * FROM t1 } db2 +} {1 2 3 4 5 6 7 8 9 10} + +do_test wal-2.2 { + execsql { INSERT INTO t1 VALUES(11, 12) } + execsql { SELECT * FROM t1 } +} {1 2 3 4 5 6 7 8 9 10 11 12} + +do_test wal-2.3 { + execsql { SELECT * FROM t1 } db2 +} {1 2 3 4 5 6 7 8 9 10} + +do_test wal-2.4 { + execsql { INSERT INTO t1 VALUES(13, 14) } + execsql { SELECT * FROM t1 } +} {1 2 3 4 5 6 7 8 9 10 11 12 13 14} + +do_test wal-2.5 { + execsql { SELECT * FROM t1 } db2 +} {1 2 3 4 5 6 7 8 9 10} + +do_test wal-2.6 { + execsql { COMMIT; SELECT * FROM t1 } db2 +} {1 2 3 4 5 6 7 8 9 10 11 12 13 14} + +do_test wal-3.1 { + execsql { BEGIN; DELETE FROM t1 } + execsql { SELECT * FROM t1 } +} {} +do_test wal-3.2 { + execsql { SELECT * FROM t1 } db2 +} {1 2 3 4 5 6 7 8 9 10 11 12 13 14} +do_test wal-3.3 { + execsql { ROLLBACK } + execsql { SELECT * FROM t1 } +} {1 2 3 4 5 6 7 8 9 10 11 12 13 14} +db2 close + +#------------------------------------------------------------------------- +# The following tests, wal-4.*, test that savepoints work with WAL +# databases. +# +do_test wal-4.1 { + execsql { + DELETE FROM t1; + BEGIN; + INSERT INTO t1 VALUES('a', 'b'); + SAVEPOINT sp; + INSERT INTO t1 VALUES('c', 'd'); + SELECT * FROM t1; + } +} {a b c d} +do_test wal-4.2 { + execsql { + ROLLBACK TO sp; + SELECT * FROM t1; + } +} {a b} +do_test wal-4.3 { + execsql { + COMMIT; + SELECT * FROM t1; + } +} {a b} + +do_test wal-4.4.1 { + db close + sqlite3 db test.db + db func blob blob + list [execsql { SELECT * FROM t1 }] [file size test.db-wal] +} {{a b} 0} +do_test wal-4.4.2 { + execsql { PRAGMA cache_size = 10 } + execsql { + CREATE TABLE t2(a, b); + INSERT INTO t2 VALUES(blob(400), blob(400)); + SAVEPOINT tr; + INSERT INTO t2 SELECT blob(400), blob(400) FROM t2; /* 2 */ + INSERT INTO t2 SELECT blob(400), blob(400) FROM t2; /* 4 */ + INSERT INTO t2 SELECT blob(400), blob(400) FROM t2; /* 8 */ + INSERT INTO t2 SELECT blob(400), blob(400) FROM t2; /* 16 */ + INSERT INTO t2 SELECT blob(400), blob(400) FROM t2; /* 32 */ + INSERT INTO t1 SELECT blob(400), blob(400) FROM t1; /* 2 */ + INSERT INTO t1 SELECT blob(400), blob(400) FROM t1; /* 4 */ + INSERT INTO t1 SELECT blob(400), blob(400) FROM t1; /* 8 */ + INSERT INTO t1 SELECT blob(400), blob(400) FROM t1; /* 16 */ + INSERT INTO t1 SELECT blob(400), blob(400) FROM t1; /* 32 */ + SELECT count(*) FROM t2; + } +} {32} +do_test wal-4.4.3 { + execsql { ROLLBACK TO tr } +} {} +do_test wal-4.4.4 { + set logsize [file size test.db-wal] + execsql { + INSERT INTO t1 VALUES('x', 'y'); + RELEASE tr; + } + expr { $logsize == [file size test.db-wal] } +} {1} +do_test wal-4.4.5 { + execsql { SELECT count(*) FROM t2 } +} {1} +do_test wal-4.4.6 { + file copy -force test.db test2.db + file copy -force test.db-wal test2.db-wal + sqlite3 db2 test2.db + execsql { SELECT count(*) FROM t2 ; SELECT count(*) FROM t1 } db2 +} {1 2} +do_test wal-4.4.7 { + execsql { PRAGMA integrity_check } db2 +} {ok} +db2 close + +do_test wal-4.5.1 { + reopen_db + db func blob blob + execsql { + PRAGMA journal_mode = WAL; + CREATE TABLE t1(a, b); + INSERT INTO t1 VALUES('a', 'b'); + } + sqlite3 db test.db + db func blob blob + list [execsql { SELECT * FROM t1 }] [file size test.db-wal] +} {{a b} 0} +do_test wal-4.5.2 { + execsql { PRAGMA cache_size = 10 } + execsql { + CREATE TABLE t2(a, b); + BEGIN; + INSERT INTO t2 VALUES(blob(400), blob(400)); + SAVEPOINT tr; + INSERT INTO t2 SELECT blob(400), blob(400) FROM t2; /* 2 */ + INSERT INTO t2 SELECT blob(400), blob(400) FROM t2; /* 4 */ + INSERT INTO t2 SELECT blob(400), blob(400) FROM t2; /* 8 */ + INSERT INTO t2 SELECT blob(400), blob(400) FROM t2; /* 16 */ + INSERT INTO t2 SELECT blob(400), blob(400) FROM t2; /* 32 */ + INSERT INTO t1 SELECT blob(400), blob(400) FROM t1; /* 2 */ + INSERT INTO t1 SELECT blob(400), blob(400) FROM t1; /* 4 */ + INSERT INTO t1 SELECT blob(400), blob(400) FROM t1; /* 8 */ + INSERT INTO t1 SELECT blob(400), blob(400) FROM t1; /* 16 */ + INSERT INTO t1 SELECT blob(400), blob(400) FROM t1; /* 32 */ + SELECT count(*) FROM t2; + } +} {32} +do_test wal-4.5.3 { + execsql { ROLLBACK TO tr } +} {} +do_test wal-4.5.4 { + set logsize [file size test.db-wal] + execsql { + INSERT INTO t1 VALUES('x', 'y'); + RELEASE tr; + COMMIT; + } + expr { $logsize == [file size test.db-wal] } +} {1} +do_test wal-4.5.5 { + execsql { SELECT count(*) FROM t2 ; SELECT count(*) FROM t1 } +} {1 2} +do_test wal-4.5.6 { + file copy -force test.db test2.db + file copy -force test.db-wal test2.db-wal + sqlite3 db2 test2.db + execsql { SELECT count(*) FROM t2 ; SELECT count(*) FROM t1 } db2 +} {1 2} +do_test wal-4.5.7 { + execsql { PRAGMA integrity_check } db2 +} {ok} +db2 close + + +reopen_db +do_test wal-5.1 { + execsql { + CREATE TEMP TABLE t2(a, b); + INSERT INTO t2 VALUES(1, 2); + } +} {} +do_test wal-5.2 { + execsql { + BEGIN; + INSERT INTO t2 VALUES(3, 4); + SELECT * FROM t2; + } +} {1 2 3 4} +do_test wal-5.3 { + execsql { + ROLLBACK; + SELECT * FROM t2; + } +} {1 2} +do_test wal-5.4 { + execsql { + CREATE TEMP TABLE t3(x UNIQUE); + BEGIN; + INSERT INTO t2 VALUES(3, 4); + INSERT INTO t3 VALUES('abc'); + } + catchsql { INSERT INTO t3 VALUES('abc') } +} {1 {column x is not unique}} +do_test wal-5.5 { + execsql { + COMMIT; + SELECT * FROM t2; + } +} {1 2 3 4} +db close + +foreach sector {512 4096} { + sqlite3_simulate_device -sectorsize $sector + foreach pgsz {512 1024 2048 4096} { + file delete -force test.db test.db-wal + do_test wal-6.$sector.$pgsz.1 { + sqlite3 db test.db -vfs devsym + execsql " + PRAGMA page_size = $pgsz; + PRAGMA journal_mode = wal; + " + execsql " + CREATE TABLE t1(a, b); + INSERT INTO t1 VALUES(1, 2); + " + db close + file size test.db + } [expr $pgsz*2] + + do_test wal-6.$sector.$pgsz.2 { + log_deleted test.db-wal + } {1} + } +} + +do_test wal-7.1 { + file delete -force test.db test.db-wal + sqlite3_wal db test.db + execsql { + PRAGMA page_size = 1024; + CREATE TABLE t1(a, b); + INSERT INTO t1 VALUES(1, 2); + } + list [file size test.db] [file size test.db-wal] +} [list 1024 [log_file_size 3 1024]] +do_test wal-7.2 { + execsql { PRAGMA wal_checkpoint } + list [file size test.db] [file size test.db-wal] +} [list 2048 [log_file_size 3 1024]] + +# Execute some transactions in auto-vacuum mode to test database file +# truncation. +# +do_test wal-8.1 { + reopen_db + catch { db close } + file delete -force test.db test.db-wal + + sqlite3 db test.db + db function blob blob + execsql { + PRAGMA auto_vacuum = 1; + PRAGMA journal_mode = wal; + PRAGMA auto_vacuum; + } +} {wal 1} +do_test wal-8.2 { + execsql { + PRAGMA page_size = 1024; + CREATE TABLE t1(x); + INSERT INTO t1 VALUES(blob(900)); + INSERT INTO t1 VALUES(blob(900)); + INSERT INTO t1 SELECT blob(900) FROM t1; /* 4 */ + INSERT INTO t1 SELECT blob(900) FROM t1; /* 8 */ + INSERT INTO t1 SELECT blob(900) FROM t1; /* 16 */ + INSERT INTO t1 SELECT blob(900) FROM t1; /* 32 */ + INSERT INTO t1 SELECT blob(900) FROM t1; /* 64 */ + PRAGMA wal_checkpoint; + } + file size test.db +} [expr 68*1024] +do_test wal-8.3 { + execsql { + DELETE FROM t1 WHERE rowid<54; + PRAGMA wal_checkpoint; + } + file size test.db +} [expr 14*1024] + +# Run some "warm-body" tests to ensure that log-summary files with more +# than 256 entries (log summaries that contain index blocks) work Ok. +# +do_test wal-9.1 { + reopen_db + execsql { + CREATE TABLE t1(x PRIMARY KEY); + INSERT INTO t1 VALUES(blob(900)); + INSERT INTO t1 VALUES(blob(900)); + INSERT INTO t1 SELECT blob(900) FROM t1; /* 4 */ + INSERT INTO t1 SELECT blob(900) FROM t1; /* 8 */ + INSERT INTO t1 SELECT blob(900) FROM t1; /* 16 */ + INSERT INTO t1 SELECT blob(900) FROM t1; /* 32 */ + INSERT INTO t1 SELECT blob(900) FROM t1; /* 64 */ + INSERT INTO t1 SELECT blob(900) FROM t1; /* 128 */ + INSERT INTO t1 SELECT blob(900) FROM t1; /* 256 */ + } + file size test.db +} 1024 +do_test wal-9.2 { + sqlite3_wal db2 test.db + execsql {PRAGMA integrity_check } db2 +} {ok} + +do_test wal-9.3 { + file delete -force test2.db test2.db-wal + file copy test.db test2.db + file copy test.db-wal test2.db-wal + sqlite3_wal db3 test2.db + execsql {PRAGMA integrity_check } db3 +} {ok} +db3 close + +do_test wal-9.4 { + execsql { PRAGMA wal_checkpoint } + db2 close + sqlite3_wal db2 test.db + execsql {PRAGMA integrity_check } db2 +} {ok} + +foreach handle {db db2 db3} { catch { $handle close } } +unset handle + +#------------------------------------------------------------------------- +# The following block of tests - wal-10.* - test that the WAL locking +# scheme works in simple cases. This block of tests is run twice. Once +# using multiple connections in the address space of the current process, +# and once with all connections except one running in external processes. +# +foreach code [list { + set ::code2_chan [launch_testfixture] + set ::code3_chan [launch_testfixture] + proc code2 {tcl} { testfixture $::code2_chan $tcl } + proc code3 {tcl} { testfixture $::code3_chan $tcl } + set tn 1 +} { + proc code2 {tcl} { uplevel #0 $tcl } + proc code3 {tcl} { uplevel #0 $tcl } + set tn 2 +}] { + + eval $code + reopen_db + + # Open connections [db2] and [db3]. Depending on which iteration this + # is, the connections may be created in this interpreter, or in + # interpreters running in other OS processes. As such, the [db2] and [db3] + # commands should only be accessed within [code2] and [code3] blocks, + # respectively. + # + code2 { sqlite3 db2 test.db ; db2 eval { PRAGMA journal_mode = WAL } } + code3 { sqlite3 db3 test.db ; db3 eval { PRAGMA journal_mode = WAL } } + + # Shorthand commands. Execute SQL using database connection [db2] or + # [db3]. Return the results. + # + proc sql2 {sql} { code2 [list db2 eval $sql] } + proc sql3 {sql} { code3 [list db3 eval $sql] } + + # Initialize the database schema and contents. + # + do_test wal-10.$tn.1 { + execsql { + CREATE TABLE t1(a, b); + INSERT INTO t1 VALUES(1, 2); + SELECT * FROM t1; + } + } {1 2} + + # Open a transaction and write to the database using [db]. Check that [db2] + # is still able to read the snapshot before the transaction was opened. + # + do_test wal-10.$tn.2 { + execsql { BEGIN; INSERT INTO t1 VALUES(3, 4); } + sql2 {SELECT * FROM t1} + } {1 2} + + # Have [db] commit the transaction. Check that [db2] is now seeing the + # new, updated snapshot. + # + do_test wal-10.$tn.3 { + execsql { COMMIT } + sql2 {SELECT * FROM t1} + } {1 2 3 4} + + # Have [db2] open a read transaction. Then write to the db via [db]. Check + # that [db2] is still seeing the original snapshot. Then read with [db3]. + # [db3] should see the newly committed data. + # + do_test wal-10.$tn.4 { + sql2 { BEGIN ; SELECT * FROM t1} + } {1 2 3 4} + do_test wal-10.$tn.5 { + execsql { INSERT INTO t1 VALUES(5, 6); } + sql2 {SELECT * FROM t1} + } {1 2 3 4} + do_test wal-10.$tn.6 { + sql3 {SELECT * FROM t1} + } {1 2 3 4 5 6} + do_test wal-10.$tn.7 { + sql2 COMMIT + } {} + + # Have [db2] open a write transaction. Then attempt to write to the + # database via [db]. This should fail (writer lock cannot be obtained). + # + # Then open a read-transaction with [db]. Commit the [db2] transaction + # to disk. Verify that [db] still cannot write to the database (because + # it is reading an old snapshot). + # + # Close the current [db] transaction. Open a new one. [db] can now write + # to the database (as it is not locked and [db] is reading the latest + # snapshot). + # + do_test wal-10.$tn.7 { + sql2 { BEGIN; INSERT INTO t1 VALUES(7, 8) ; } + catchsql { INSERT INTO t1 VALUES(9, 10) } + } {1 {database is locked}} + do_test wal-10.$tn.8 { + execsql { BEGIN ; SELECT * FROM t1 } + } {1 2 3 4 5 6} + do_test wal-10.$tn.9 { + sql2 COMMIT + catchsql { INSERT INTO t1 VALUES(9, 10) } + } {1 {database is locked}} + do_test wal-10.$tn.10 { + execsql { COMMIT; BEGIN; INSERT INTO t1 VALUES(9, 10); COMMIT; } + execsql { SELECT * FROM t1 } + } {1 2 3 4 5 6 7 8 9 10} + + # Open a read transaction with [db2]. Check that this prevents [db] from + # checkpointing the database. But not from writing to it. + # + do_test wal-10.$tn.11 { + sql2 { BEGIN; SELECT * FROM t1 } + } {1 2 3 4 5 6 7 8 9 10} + do_test wal-10.$tn.12 { + catchsql { PRAGMA wal_checkpoint } + } {1 {database is locked}} + do_test wal-10.$tn.13 { + execsql { INSERT INTO t1 VALUES(11, 12) } + sql2 {SELECT * FROM t1} + } {1 2 3 4 5 6 7 8 9 10} + + # Connection [db2] is holding a lock on a snapshot, preventing [db] from + # checkpointing the database. Add a busy-handler to [db]. If [db2] completes + # its transaction from within the busy-handler, [db] is able to complete + # the checkpoint operation. + # + proc busyhandler x { + if {$x==4} { sql2 COMMIT } + if {$x<5} { return 0 } + return 1 + } + db busy busyhandler + do_test wal-10.$tn.14 { + execsql { PRAGMA wal_checkpoint } + } {} + + # Similar to the test above. Except this time, a new read transaction is + # started (db3) while the checkpointer is waiting for an old one (db2) to + # finish. The checkpointer can finish, but any subsequent write operations + # must wait until after db3 has closed the read transaction, as db3 is a + # "region D" writer. + # + db busy {} + do_test wal-10.$tn.15 { + sql2 { BEGIN; SELECT * FROM t1; } + } {1 2 3 4 5 6 7 8 9 10 11 12} + do_test wal-10.$tn.16 { + catchsql { PRAGMA wal_checkpoint } + } {1 {database is locked}} + proc busyhandler x { + if {$x==3} { sql3 { BEGIN; SELECT * FROM t1 } } + if {$x==4} { sql2 COMMIT } + if {$x<5} { return 0 } + return 1 + } + db busy busyhandler + do_test wal-10.$tn.17 { + execsql { PRAGMA wal_checkpoint } + } {} + do_test wal-10.$tn.18 { + sql3 { SELECT * FROM t1 } + } {1 2 3 4 5 6 7 8 9 10 11 12} + do_test wal-10.$tn.19 { + catchsql { INSERT INTO t1 VALUES(13, 14) } + } {1 {database is locked}} + do_test wal-10.$tn.20 { + execsql { SELECT * FROM t1 } + } {1 2 3 4 5 6 7 8 9 10 11 12} + do_test wal-10.$tn.21 { + sql3 COMMIT + } {} + do_test wal-10.$tn.22 { + execsql { INSERT INTO t1 VALUES(13, 14) } + execsql { SELECT * FROM t1 } + } {1 2 3 4 5 6 7 8 9 10 11 12 13 14} + + # Set [db3] up as a "region D" reader again. Then upgrade it to a writer + # and back down to a reader. Then, check that a checkpoint is not possible + # (as [db3] still has a snapshot locked). + # + do_test wal-10.$tn.23 { + execsql { PRAGMA wal_checkpoint } + } {} + do_test wal-10.$tn.24 { + sql2 { BEGIN; SELECT * FROM t1; } + } {1 2 3 4 5 6 7 8 9 10 11 12 13 14} + do_test wal-10.$tn.25 { + execsql { PRAGMA wal_checkpoint } + } {} + do_test wal-10.$tn.26 { + catchsql { INSERT INTO t1 VALUES(15, 16) } + } {1 {database is locked}} + do_test wal-10.$tn.27 { + sql3 { INSERT INTO t1 VALUES(15, 16) } + } {} + do_test wal-10.$tn.28 { + code3 { + set ::STMT [sqlite3_prepare db3 "SELECT * FROM t1" -1 TAIL] + sqlite3_step $::STMT + } + sql3 COMMIT + execsql { SELECT * FROM t1 } + } {1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16} + db busy {} + do_test wal-10.$tn.29 { + execsql { INSERT INTO t1 VALUES(17, 18) } + catchsql { PRAGMA wal_checkpoint } + } {1 {database is locked}} + do_test wal-10.$tn.30 { + code3 { sqlite3_finalize $::STMT } + execsql { PRAGMA wal_checkpoint } + } {} + + # At one point, if a reader failed to upgrade to a writer because it + # was reading an old snapshot, the write-locks were not being released. + # Test that this bug has been fixed. + # + do_test wal-10.$tn.31 { + execsql { BEGIN ; SELECT * FROM t1 } + sql2 { INSERT INTO t1 VALUES(19, 20) } + catchsql { INSERT INTO t1 VALUES(21, 22) } + } {1 {database is locked}} + do_test wal-10.$tn.32 { + # This statement would fail when the bug was present. + sql2 { INSERT INTO t1 VALUES(21, 22) } + } {} + do_test wal-10.$tn.33 { + execsql { SELECT * FROM t1 ; COMMIT } + } {1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18} + do_test wal-10.$tn.34 { + execsql { SELECT * FROM t1 } + } {1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22} + + # Test that if a checkpointer cannot obtain the required locks, it + # releases all locks before returning a busy error. + # + do_test wal-10.$tn.35 { + execsql { + DELETE FROM t1; + INSERT INTO t1 VALUES('a', 'b'); + INSERT INTO t1 VALUES('c', 'd'); + } + sql2 { + BEGIN; + SELECT * FROM t1; + } + } {a b c d} + proc busyhandler x { return 1 } + db busy busyhandler + do_test wal-10.$tn.36 { + catchsql { PRAGMA wal_checkpoint } + } {1 {database is locked}} + do_test wal-10.$tn.36 { + sql3 { INSERT INTO t1 VALUES('e', 'f') } + sql2 { SELECT * FROM t1 } + } {a b c d} + do_test wal-10.$tn.37 { + sql2 COMMIT + execsql { PRAGMA wal_checkpoint } + } {} + + catch { db close } + catch { code2 { db2 close } } + catch { code3 { db3 close } } + catch { close $::code2_chan } + catch { close $::code3_chan } +} + +#------------------------------------------------------------------------- +# This block of tests, wal-11.*, test that nothing goes terribly wrong +# if frames must be written to the log file before a transaction is +# committed (in order to free up memory). +# +do_test wal-11.1 { + reopen_db + execsql { + PRAGMA cache_size = 10; + PRAGMA page_size = 1024; + CREATE TABLE t1(x PRIMARY KEY); + } + list [expr [file size test.db]/1024] [expr [file size test.db-wal]/1044] +} {1 3} +do_test wal-11.2 { + execsql { PRAGMA wal_checkpoint } + list [expr [file size test.db]/1024] [file size test.db-wal] +} [list 3 [log_file_size 3 1024]] +do_test wal-11.3 { + execsql { INSERT INTO t1 VALUES( blob(900) ) } + list [expr [file size test.db]/1024] [file size test.db-wal] +} [list 3 [log_file_size 4 1024]] + +do_test wal-11.4 { + execsql { + BEGIN; + INSERT INTO t1 SELECT blob(900) FROM t1; -- 2 + INSERT INTO t1 SELECT blob(900) FROM t1; -- 4 + INSERT INTO t1 SELECT blob(900) FROM t1; -- 8 + INSERT INTO t1 SELECT blob(900) FROM t1; -- 16 + } + list [expr [file size test.db]/1024] [file size test.db-wal] +} [list 3 [log_file_size 32 1024]] +do_test wal-11.5 { + execsql { + SELECT count(*) FROM t1; + PRAGMA integrity_check; + } +} {16 ok} +do_test wal-11.6 { + execsql COMMIT + list [expr [file size test.db]/1024] [file size test.db-wal] +} [list 3 [log_file_size 41 1024]] +do_test wal-11.7 { + execsql { + SELECT count(*) FROM t1; + PRAGMA integrity_check; + } +} {16 ok} +do_test wal-11.8 { + execsql { PRAGMA wal_checkpoint } + list [expr [file size test.db]/1024] [file size test.db-wal] +} [list 37 [log_file_size 41 1024]] +do_test wal-11.9 { + db close + list [expr [file size test.db]/1024] [log_deleted test.db-wal] +} {37 1} +sqlite3_wal db test.db +do_test wal-11.10 { + execsql { + PRAGMA cache_size = 10; + BEGIN; + INSERT INTO t1 SELECT blob(900) FROM t1; -- 32 + SELECT count(*) FROM t1; + } + list [expr [file size test.db]/1024] [file size test.db-wal] +} [list 37 [log_file_size 37 1024]] +do_test wal-11.11 { + execsql { + SELECT count(*) FROM t1; + ROLLBACK; + SELECT count(*) FROM t1; + } +} {32 16} +do_test wal-11.12 { + list [expr [file size test.db]/1024] [file size test.db-wal] +} [list 37 [log_file_size 37 1024]] +do_test wal-11.13 { + execsql { + INSERT INTO t1 VALUES( blob(900) ); + SELECT count(*) FROM t1; + PRAGMA integrity_check; + } +} {17 ok} +do_test wal-11.14 { + list [expr [file size test.db]/1024] [file size test.db-wal] +} [list 37 [log_file_size 37 1024]] + + +#------------------------------------------------------------------------- +# This block of tests, wal-12.*, tests the fix for a problem that +# could occur if a log that is a prefix of an older log is written +# into a reused log file. +# +reopen_db +do_test wal-12.1 { + execsql { + PRAGMA page_size = 1024; + CREATE TABLE t1(x, y); + CREATE TABLE t2(x, y); + INSERT INTO t1 VALUES('A', 1); + } + list [expr [file size test.db]/1024] [file size test.db-wal] +} [list 1 [log_file_size 5 1024]] +do_test wal-12.2 { + db close + sqlite3 db test.db + execsql { + PRAGMA synchronous = normal; + UPDATE t1 SET y = 0 WHERE x = 'A'; + } + list [expr [file size test.db]/1024] [expr [file size test.db-wal]/1044] +} {3 1} +do_test wal-12.3 { + execsql { INSERT INTO t2 VALUES('B', 1) } + list [expr [file size test.db]/1024] [expr [file size test.db-wal]/1044] +} {3 2} +do_test wal-12.4 { + file copy -force test.db test2.db + file copy -force test.db-wal test2.db-wal + sqlite3_wal db2 test2.db + execsql { SELECT * FROM t2 } db2 +} {B 1} +db2 close +do_test wal-12.5 { + execsql { + PRAGMA wal_checkpoint; + UPDATE t2 SET y = 2 WHERE x = 'B'; + PRAGMA wal_checkpoint; + UPDATE t1 SET y = 1 WHERE x = 'A'; + PRAGMA wal_checkpoint; + UPDATE t1 SET y = 0 WHERE x = 'A'; + SELECT * FROM t2; + } +} {B 2} +do_test wal-12.6 { + file copy -force test.db test2.db + file copy -force test.db-wal test2.db-wal + sqlite3_wal db2 test2.db + execsql { SELECT * FROM t2 } db2 +} {B 2} +db2 close +db close + +#------------------------------------------------------------------------- +# Test large log summaries. +# +do_test wal-13.1.1 { + list [file exists test.db] [file exists test.db-wal] +} {1 0} +do_test wal-13.1.2 { + set fd [open test.db-wal w] + seek $fd [expr 200*1024*1024] + puts $fd "" + close $fd + sqlite3 db test.db + execsql { SELECT * FROM t2 } +} {B 2} +do_test wal-13.1.3 { + db close + file exists test.db-wal +} {0} +do_test wal-13.1.4 { + sqlite3 db test.db + execsql { SELECT count(*) FROM t2 } +} {1} +do_test wal-13.1.5 { + for {set i 0} {$i < 6} {incr i} { + execsql { INSERT INTO t2 SELECT randomblob(400), randomblob(400) FROM t2 } + } + execsql { SELECT count(*) FROM t2 } +} [expr int(pow(2, 6))] +do_test wal-13.1.6 { + file size test.db-wal +} [log_file_size 80 1024] + +foreach code [list { + set tn 2 + proc buddy {tcl} { uplevel #0 $tcl } +} { + set tn 3 + set ::buddy [launch_testfixture] + proc buddy {tcl} { testfixture $::buddy $tcl } +}] { + + eval $code + reopen_db + + do_test wal-13.$tn.0 { + buddy { sqlite3 db2 test.db } + execsql { + PRAGMA journal_mode = WAL; + CREATE TABLE t1(x); + INSERT INTO t1 SELECT randomblob(400); + } + execsql { SELECT count(*) FROM t1 } + } {1} + + for {set ii 1} {$ii<16} {incr ii} { + do_test wal-13.$tn.$ii.a { + buddy { db2 eval { INSERT INTO t1 SELECT randomblob(400) FROM t1 } } + buddy { db2 eval { SELECT count(*) FROM t1 } } + } [expr (1<<$ii)] + do_test wal-13.$tn.$ii.b { + db eval { SELECT count(*) FROM t1 } + } [expr (1<<$ii)] + do_test wal-13.$tn.$ii.c { + db eval { SELECT count(*) FROM t1 } + } [expr (1<<$ii)] + do_test wal-13.$tn.$ii.d { + db eval { PRAGMA integrity_check } + } {ok} + } + + catch { db2 close } + catch { close $::buddy } + db close +} + +#------------------------------------------------------------------------- +# Check a fun corruption case has been fixed. +# +# The problem was that after performing a checkpoint using a connection +# that had an out-of-date pager-cache, the next time the connection was +# used it did not realize the cache was out-of-date and proceeded to +# operate with an inconsistent cache. Leading to corruption. +# + +catch { db close } +catch { db2 close } +catch { db3 close } +file delete -force test.db test.db-wal +sqlite3 db test.db +sqlite3 db2 test.db + +do_test wal-14 { + execsql { + PRAGMA journal_mode = WAL; + CREATE TABLE t1(a PRIMARY KEY, b); + INSERT INTO t1 VALUES(randomblob(10), randomblob(100)); + INSERT INTO t1 SELECT randomblob(10), randomblob(100) FROM t1; + INSERT INTO t1 SELECT randomblob(10), randomblob(100) FROM t1; + INSERT INTO t1 SELECT randomblob(10), randomblob(100) FROM t1; + } + + db2 eval { + INSERT INTO t1 SELECT randomblob(10), randomblob(100); + INSERT INTO t1 SELECT randomblob(10), randomblob(100); + INSERT INTO t1 SELECT randomblob(10), randomblob(100); + INSERT INTO t1 SELECT randomblob(10), randomblob(100); + } + + # After executing the "PRAGMA wal_checkpoint", connection [db] was being + # left with an inconsistent cache. Running the CREATE INDEX statement + # in this state led to database corruption. + catchsql { + PRAGMA wal_checkpoint; + CREATE INDEX i1 on t1(b); + } + + db2 eval { PRAGMA integrity_check } +} {ok} + +catch { db close } +catch { db2 close } + +#------------------------------------------------------------------------- +# The following block of tests - wal-15.* - focus on testing the +# implementation of the sqlite3_wal_checkpoint() interface. +# +file delete -force test.db test.db-wal +sqlite3 db test.db +do_test wal-15.1 { + execsql { + PRAGMA page_size = 1024; + PRAGMA journal_mode = WAL; + } + execsql { + CREATE TABLE t1(a, b); + INSERT INTO t1 VALUES(1, 2); + } +} {} + +# Test that an error is returned if the database name is not recognized +# +do_test wal-15.2.1 { + sqlite3_wal_checkpoint db aux +} {SQLITE_ERROR} +do_test wal-15.2.2 { + sqlite3_errcode db +} {SQLITE_ERROR} +do_test wal-15.2.3 { + sqlite3_errmsg db +} {unknown database: aux} + +# Test that an error is returned if an attempt is made to checkpoint +# if a transaction is open on the database. +# +do_test wal-15.3.1 { + execsql { + BEGIN; + INSERT INTO t1 VALUES(3, 4); + } + sqlite3_wal_checkpoint db main +} {SQLITE_LOCKED} +do_test wal-15.3.2 { + sqlite3_errcode db +} {SQLITE_LOCKED} +do_test wal-15.3.3 { + sqlite3_errmsg db +} {database table is locked} + +# Also test that an error is returned if the db cannot be checkpointed +# because of locks held by another connection. +# +sqlite3 db2 test.db +do_test wal-15.4.1 { + execsql { + BEGIN; + SELECT * FROM t1; + } db2 +} {1 2} +do_test wal-15.4.2 { + execsql { COMMIT } + sqlite3_wal_checkpoint db +} {SQLITE_BUSY} +do_test wal-15.4.3 { + sqlite3_errmsg db +} {database is locked} + +# After [db2] drops its lock, [db] may checkpoint the db. +# +do_test wal-15.4.4 { + execsql { COMMIT } db2 + sqlite3_wal_checkpoint db +} {SQLITE_OK} +do_test wal-15.4.5 { + sqlite3_errmsg db +} {not an error} +do_test wal-15.4.6 { + file size test.db +} [expr 1024*2] + +catch { db2 close } +catch { db close } +finish_test + + ADDED test/walbak.test Index: test/walbak.test ================================================================== --- /dev/null +++ test/walbak.test @@ -0,0 +1,186 @@ +# 2010 April 22 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# This file implements regression tests for SQLite library. The +# focus of this file is testing the operation of the library in +# "PRAGMA journal_mode=WAL" mode. +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl + +ifcapable !wal {finish_test ; return } + +proc log_file_size {nFrame pgsz} { + expr {12 + ($pgsz+16)*$nFrame} +} + +# Test organization: +# +# walback-1.*: Simple tests. +# walback-2.*: Test backups when the source db is modified mid-backup. +# + +# Make sure a simple backup from a WAL database works. +# +do_test walbak-1.0 { + execsql { + PRAGMA synchronous = NORMAL; + PRAGMA page_size = 1024; + PRAGMA auto_vacuum = 0; + PRAGMA journal_mode = wal; + BEGIN; + CREATE TABLE t1(a PRIMARY KEY, b); + INSERT INTO t1 VALUES('I', 'one'); + COMMIT; + } +} {wal} +do_test walbak-1.1 { + file delete -force bak.db bak.db-journal bak.db-wal + db backup bak.db + file size bak.db +} [expr 3*1024] +do_test walbak-1.2 { + sqlite3 db2 bak.db + execsql { + SELECT * FROM t1; + PRAGMA main.journal_mode; + } db2 +} {I one wal} +do_test walbak-1.3 { + execsql { PRAGMA integrity_check } db2 +} {ok} +db2 close + +# Try a VACUUM on a WAL database. +# +do_test walbak-1.4 { + execsql { + VACUUM; + PRAGMA main.journal_mode; + } +} {wal} +do_test walbak-1.5 { + list [file size test.db] [file size test.db-wal] +} [list 1024 [log_file_size 6 1024]] +do_test walbak-1.6 { + execsql { PRAGMA wal_checkpoint } + list [file size test.db] [file size test.db-wal] +} [list [expr 3*1024] [log_file_size 6 1024]] +do_test walbak-1.7 { + execsql { + CREATE TABLE t2(a, b); + INSERT INTO t2 SELECT * FROM t1; + DROP TABLE t1; + } + list [file size test.db] [file size test.db-wal] +} [list [expr 3*1024] [log_file_size 6 1024]] +do_test walbak-1.8 { + execsql { VACUUM } + list [file size test.db] [file size test.db-wal] +} [list [expr 3*1024] [log_file_size 8 1024]] +do_test walbak-1.9 { + execsql { PRAGMA wal_checkpoint } + list [file size test.db] [file size test.db-wal] +} [list [expr 2*1024] [log_file_size 8 1024]] + +#------------------------------------------------------------------------- +# Backups when the source db is modified mid-backup. +# +proc sig {{db db}} { + $db eval { + PRAGMA integrity_check; + SELECT md5sum(a, b) FROM t1; + } +} +db close +file delete test.db +sqlite3 db test.db +do_test walbak-2.1 { + execsql { PRAGMA journal_mode = WAL } + execsql { + CREATE TABLE t1(a PRIMARY KEY, b); + BEGIN; + INSERT INTO t1 VALUES(randomblob(500), randomblob(500)); + INSERT INTO t1 SELECT randomblob(500), randomblob(500) FROM t1; /* 2 */ + INSERT INTO t1 SELECT randomblob(500), randomblob(500) FROM t1; /* 4 */ + INSERT INTO t1 SELECT randomblob(500), randomblob(500) FROM t1; /* 8 */ + INSERT INTO t1 SELECT randomblob(500), randomblob(500) FROM t1; /* 16 */ + INSERT INTO t1 SELECT randomblob(500), randomblob(500) FROM t1; /* 32 */ + INSERT INTO t1 SELECT randomblob(500), randomblob(500) FROM t1; /* 64 */ + COMMIT; + } +} {} +do_test walbak-2.2 { + db backup abc.db + sqlite3 db2 abc.db + string compare [sig db] [sig db2] +} {0} + +do_test walbak-2.3 { + sqlite3_backup B db2 main db main + B step 50 + execsql { UPDATE t1 SET b = randomblob(500) } + list [B step 1000] [B finish] +} {SQLITE_DONE SQLITE_OK} +do_test walbak-2.4 { + string compare [sig db] [sig db2] +} {0} + +do_test walbak-2.5 { + db close + sqlite3 db test.db + execsql { PRAGMA cache_size = 10 } + sqlite3_backup B db2 main db main + B step 50 + execsql { + BEGIN; + UPDATE t1 SET b = randomblob(500); + } + expr [file size test.db-wal] > 10*1024 +} {1} +do_test walbak-2.6 { + B step 1000 +} {SQLITE_BUSY} +do_test walbak-2.7 { + execsql COMMIT + list [B step 1000] [B finish] +} {SQLITE_DONE SQLITE_OK} +do_test walbak-2.8 { + string compare [sig db] [sig db2] +} {0} + +do_test walbak-2.9 { + db close + sqlite3 db test.db + execsql { PRAGMA cache_size = 10 } + sqlite3_backup B db2 main db main + B step 50 + execsql { + BEGIN; + UPDATE t1 SET b = randomblob(500); + } + expr [file size test.db-wal] > 10*1024 +} {1} +do_test walbak-2.10 { + B step 1000 +} {SQLITE_BUSY} +do_test walbak-2.11 { + execsql ROLLBACK +set sigB [sig db] + list [B step 1000] [B finish] +} {SQLITE_DONE SQLITE_OK} +do_test walbak-2.12 { + string compare [sig db] [sig db2] +} {0} +db2 close + +finish_test + ADDED test/walcrash.test Index: test/walcrash.test ================================================================== --- /dev/null +++ test/walcrash.test @@ -0,0 +1,280 @@ +# 2010 February 8 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# This file implements regression tests for SQLite library. The +# focus of this file is testing the operation of the library when +# recovering a database following a simulated system failure in +# "PRAGMA journal_mode=WAL" mode. +# + +# +# These are 'warm-body' tests of database recovery used while developing +# the WAL code. They serve to prove that a few really simple cases work: +# +# walcrash-1.*: Recover a database. +# walcrash-2.*: Recover a database where the failed transaction spanned more +# than one page. +# walcrash-3.*: Recover multiple databases where the failed transaction +# was a multi-file transaction. +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl +ifcapable !wal {finish_test ; return } + +db close + +set seed 0 +set REPEATS 100 + +# walcrash-1.* +# +for {set i 1} {$i < $REPEATS} {incr i} { + file delete -force test.db test.db-wal + do_test walcrash-1.$i.1 { + crashsql -delay 4 -file test.db-wal -seed [incr seed] { + PRAGMA journal_mode = WAL; + CREATE TABLE t1(a, b); + INSERT INTO t1 VALUES(1, 1); + INSERT INTO t1 VALUES(2, 3); + INSERT INTO t1 VALUES(3, 6); + } + } {1 {child process exited abnormally}} + do_test walcrash-1.$i.2 { + sqlite3 db test.db + execsql { SELECT sum(a)==max(b) FROM t1 } + } {1} + integrity_check walcrash-1.$i.3 + db close + + do_test walcrash-1.$i.4 { + crashsql -delay 2 -file test.db-wal -seed [incr seed] { + INSERT INTO t1 VALUES(4, (SELECT sum(a) FROM t1) + 4); + INSERT INTO t1 VALUES(5, (SELECT sum(a) FROM t1) + 5); + } + } {1 {child process exited abnormally}} + do_test walcrash-1.$i.5 { + sqlite3 db test.db + execsql { SELECT sum(a)==max(b) FROM t1 } + } {1} + integrity_check walcrash-1.$i.6 + do_test walcrash-1.$i.5 { + execsql { PRAGMA main.journal_mode } + } {wal} + db close +} + +# walcrash-2.* +# +for {set i 1} {$i < $REPEATS} {incr i} { + file delete -force test.db test.db-wal + do_test walcrash-2.$i.1 { + crashsql -delay 4 -file test.db-wal -seed [incr seed] { + PRAGMA journal_mode = WAL; + CREATE TABLE t1(a PRIMARY KEY, b); + INSERT INTO t1 VALUES(1, 2); + INSERT INTO t1 VALUES(3, 4); + INSERT INTO t1 VALUES(5, 9); + } + } {1 {child process exited abnormally}} + do_test walcrash-2.$i.2 { + sqlite3 db test.db + execsql { SELECT sum(a)==max(b) FROM t1 } + } {1} + integrity_check walcrash-2.$i.3 + db close + + do_test walcrash-2.$i.4 { + crashsql -delay 2 -file test.db-wal -seed [incr seed] { + INSERT INTO t1 VALUES(6, (SELECT sum(a) FROM t1) + 6); + INSERT INTO t1 VALUES(7, (SELECT sum(a) FROM t1) + 7); + } + } {1 {child process exited abnormally}} + do_test walcrash-2.$i.5 { + sqlite3 db test.db + execsql { SELECT sum(a)==max(b) FROM t1 } + } {1} + integrity_check walcrash-2.$i.6 + do_test walcrash-2.$i.6 { + execsql { PRAGMA main.journal_mode } + } {wal} + db close +} + +# walcrash-3.* +# +# for {set i 1} {$i < $REPEATS} {incr i} { +# file delete -force test.db test.db-wal +# file delete -force test2.db test2.db-wal +# +# do_test walcrash-3.$i.1 { +# crashsql -delay 2 -file test2.db-wal -seed [incr seed] { +# PRAGMA journal_mode = WAL; +# ATTACH 'test2.db' AS aux; +# CREATE TABLE t1(a PRIMARY KEY, b); +# CREATE TABLE aux.t2(a PRIMARY KEY, b); +# BEGIN; +# INSERT INTO t1 VALUES(1, 2); +# INSERT INTO t2 VALUES(1, 2); +# COMMIT; +# } +# } {1 {child process exited abnormally}} +# +# do_test walcrash-3.$i.2 { +# sqlite3_wal db test.db +# execsql { +# ATTACH 'test2.db' AS aux; +# SELECT * FROM t1 EXCEPT SELECT * FROM t2; +# } +# } {} +# do_test walcrash-3.$i.3 { execsql { PRAGMA main.integrity_check } } {ok} +# do_test walcrash-3.$i.4 { execsql { PRAGMA aux.integrity_check } } {ok} +# +# db close +# } + +# walcrash-4.* +# +for {set i 1} {$i < $REPEATS} {incr i} { + file delete -force test.db test.db-wal + file delete -force test2.db test2.db-wal + + do_test walcrash-4.$i.1 { + crashsql -delay 3 -file test.db-wal -seed [incr seed] -blocksize 4096 { + PRAGMA journal_mode = WAL; + PRAGMA page_size = 1024; + CREATE TABLE t1(a PRIMARY KEY, b); + INSERT INTO t1 VALUES(1, 2); + INSERT INTO t1 VALUES(3, 4); + } + } {1 {child process exited abnormally}} + + do_test walcrash-4.$i.2 { + sqlite3 db test.db + execsql { + SELECT * FROM t1 WHERE a = 1; + } + } {1 2} + do_test walcrash-4.$i.3 { execsql { PRAGMA main.integrity_check } } {ok} + do_test walcrash-4.$i.4 { execsql { PRAGMA main.journal_mode } } {wal} + + db close +} + +# walcrash-5.* +# +for {set i 1} {$i < $REPEATS} {incr i} { + file delete -force test.db test.db-wal + file delete -force test2.db test2.db-wal + + do_test walcrash-5.$i.1 { + crashsql -delay 11 -file test.db-wal -seed [incr seed] -blocksize 4096 { + PRAGMA journal_mode = WAL; + PRAGMA page_size = 1024; + BEGIN; + CREATE TABLE t1(x PRIMARY KEY); + INSERT INTO t1 VALUES(randomblob(900)); + INSERT INTO t1 VALUES(randomblob(900)); + INSERT INTO t1 SELECT randomblob(900) FROM t1; /* 4 */ + COMMIT; + INSERT INTO t1 SELECT randomblob(900) FROM t1 LIMIT 4; /* 8 */ + INSERT INTO t1 SELECT randomblob(900) FROM t1 LIMIT 4; /* 12 */ + INSERT INTO t1 SELECT randomblob(900) FROM t1 LIMIT 4; /* 16 */ + INSERT INTO t1 SELECT randomblob(900) FROM t1 LIMIT 4; /* 20 */ + INSERT INTO t1 SELECT randomblob(900) FROM t1 LIMIT 4; /* 24 */ + INSERT INTO t1 SELECT randomblob(900) FROM t1 LIMIT 4; /* 28 */ + INSERT INTO t1 SELECT randomblob(900) FROM t1 LIMIT 4; /* 32 */ + + PRAGMA wal_checkpoint; + INSERT INTO t1 VALUES(randomblob(900)); + INSERT INTO t1 VALUES(randomblob(900)); + INSERT INTO t1 VALUES(randomblob(900)); + } + } {1 {child process exited abnormally}} + + do_test walcrash-5.$i.2 { + sqlite3 db test.db + execsql { SELECT count(*)==33 OR count(*)==34 FROM t1 WHERE x != 1 } + } {1} + do_test walcrash-5.$i.3 { execsql { PRAGMA main.integrity_check } } {ok} + do_test walcrash-5.$i.4 { execsql { PRAGMA main.journal_mode } } {wal} + + db close +} + +# walcrash-6.* +# +for {set i 1} {$i < $REPEATS} {incr i} { + file delete -force test.db test.db-wal + file delete -force test2.db test2.db-wal + + do_test walcrash-6.$i.1 { + crashsql -delay 12 -file test.db-wal -seed [incr seed] -blocksize 512 { + PRAGMA journal_mode = WAL; + PRAGMA page_size = 1024; + BEGIN; + CREATE TABLE t1(x PRIMARY KEY); + INSERT INTO t1 VALUES(randomblob(900)); + INSERT INTO t1 VALUES(randomblob(900)); + INSERT INTO t1 SELECT randomblob(900) FROM t1; /* 4 */ + COMMIT; + INSERT INTO t1 SELECT randomblob(900) FROM t1 LIMIT 4; /* 8 */ + INSERT INTO t1 SELECT randomblob(900) FROM t1 LIMIT 4; /* 12 */ + INSERT INTO t1 SELECT randomblob(900) FROM t1 LIMIT 4; /* 16 */ + INSERT INTO t1 SELECT randomblob(900) FROM t1 LIMIT 4; /* 20 */ + INSERT INTO t1 SELECT randomblob(900) FROM t1 LIMIT 4; /* 24 */ + INSERT INTO t1 SELECT randomblob(900) FROM t1 LIMIT 4; /* 28 */ + INSERT INTO t1 SELECT randomblob(900) FROM t1 LIMIT 4; /* 32 */ + + PRAGMA wal_checkpoint; + INSERT INTO t1 VALUES(randomblob(900)); + INSERT INTO t1 VALUES(randomblob(900)); + INSERT INTO t1 VALUES(randomblob(900)); + } + } {1 {child process exited abnormally}} + + do_test walcrash-6.$i.2 { + sqlite3 db test.db + execsql { SELECT count(*)==34 OR count(*)==35 FROM t1 WHERE x != 1 } + } {1} + do_test walcrash-6.$i.3 { execsql { PRAGMA main.integrity_check } } {ok} + do_test walcrash-6.$i.4 { execsql { PRAGMA main.journal_mode } } {wal} + + db close +} + +for {set i 1} {$i < $REPEATS} {incr i} { + file delete -force test.db test.db-wal + + do_test walcrash-7.$i.1 { + crashsql -delay 3 -file test.db -seed [incr seed] -blocksize 512 { + PRAGMA journal_mode = wal; + BEGIN; + CREATE TABLE t1(a, b); + INSERT INTO t1 VALUES(1, 2); + COMMIT; + PRAGMA wal_checkpoint; + CREATE INDEX i1 ON t1(a); + PRAGMA wal_checkpoint; + } + } {1 {child process exited abnormally}} + + do_test walcrash-7.$i.2 { + sqlite3 db test.db + execsql { SELECT b FROM t1 WHERE a = 1 } + } {2} + do_test walcrash-7.$i.3 { execsql { PRAGMA main.integrity_check } } {ok} + do_test walcrash-7.$i.4 { execsql { PRAGMA main.journal_mode } } {wal} + + db close +} + +finish_test + ADDED test/walhook.test Index: test/walhook.test ================================================================== --- /dev/null +++ test/walhook.test @@ -0,0 +1,111 @@ +# 2010 April 19 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# This file implements regression tests for SQLite library. The +# focus of this file is testing the operation of the library in +# "PRAGMA journal_mode=WAL" mode. +# +# More specifically, this file contains regression tests for the +# sqlite3_wal_hook() mechanism, including the sqlite3_wal_autocheckpoint() +# and "PRAGMA wal_autocheckpoint" convenience interfaces. +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl + +ifcapable !wal {finish_test ; return } + +proc log_file_size {nFrame pgsz} { + expr {12 + ($pgsz+16)*$nFrame} +} + +set ::wal_hook [list] +proc wal_hook {zDb nEntry} { + lappend ::wal_hook $zDb $nEntry + return 0 +} +db wal_hook wal_hook + +do_test walhook-1.1 { + execsql { + PRAGMA page_size = 1024; + PRAGMA journal_mode = wal; + PRAGMA synchronous = normal; + CREATE TABLE t1(i PRIMARY KEY, j); + } + set ::wal_hook +} {main 3} + +do_test walhook-1.2 { + set ::wal_hook [list] + execsql { INSERT INTO t1 VALUES(1, 'one') } + set ::wal_hook +} {main 5} +do_test walhook-1.3 { + proc wal_hook {args} { return 1 } + execsql { INSERT INTO t1 VALUES(2, 'two') } + file size test.db +} [expr 3*1024] +do_test walhook-1.4 { + proc wal_hook {zDb nEntry} { + execsql { PRAGMA wal_checkpoint } + return 0 + } + execsql { CREATE TABLE t2(a, b) } + file size test.db +} [expr 4*1024] + +do_test walhook-1.5 { + sqlite3 db2 test.db + proc wal_hook {zDb nEntry} { + execsql { PRAGMA wal_checkpoint } db2 + return 0 + } + execsql { CREATE TABLE t3(a PRIMARY KEY, b) } + file size test.db +} [expr 6*1024] + +db2 close +db close +sqlite3 db test.db +do_test walhook-2.1 { + execsql { PRAGMA synchronous = NORMAL } + execsql { PRAGMA wal_autocheckpoint } +} {1000} +do_test walhook-2.2 { + execsql { PRAGMA wal_autocheckpoint = 10} +} {10} +do_test walhook-2.3 { + execsql { PRAGMA wal_autocheckpoint } +} {10} + +# +# The database connection is configured with "PRAGMA wal_autocheckpoint = 10". +# Check that transactions are written to the log file until it contains at +# least 10 frames, then the database is checkpointed. Subsequent transactions +# are written into the start of the log file. +# +foreach {tn sql dbpages logpages} { + 4 "CREATE TABLE t4(x PRIMARY KEY, y)" 6 3 + 5 "INSERT INTO t4 VALUES(1, 'one')" 6 5 + 6 "INSERT INTO t4 VALUES(2, 'two')" 6 7 + 7 "INSERT INTO t4 VALUES(3, 'three')" 6 9 + 8 "INSERT INTO t4 VALUES(4, 'four')" 8 11 + 9 "INSERT INTO t4 VALUES(5, 'five')" 8 11 +} { + do_test walhook-2.$tn { + execsql $sql + list [file size test.db] [file size test.db-wal] + } [list [expr $dbpages*1024] [log_file_size $logpages 1024]] +} + +catch { db2 close } +catch { db close } +finish_test ADDED test/walmode.test Index: test/walmode.test ================================================================== --- /dev/null +++ test/walmode.test @@ -0,0 +1,261 @@ +# 2010 April 19 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# This file implements regression tests for SQLite library. The +# focus of this file is testing the operation of the library in +# "PRAGMA journal_mode=WAL" mode. +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl + +# If the library was compiled without WAL support, check that the +# "PRAGMA journal_mode=WAL" treats "WAL" as an unrecognized mode. +# +ifcapable !wal { + + do_test walmode-0.1 { + execsql { PRAGMA journal_mode = wal } + } {delete} + do_test walmode-0.2 { + execsql { PRAGMA main.journal_mode = wal } + } {delete} + do_test walmode-0.3 { + execsql { PRAGMA main.journal_mode } + } {delete} + + finish_test + return +} + +do_test walmode-1.1 { + set sqlite_sync_count 0 + execsql { PRAGMA page_size = 1024 } + execsql { PRAGMA journal_mode = wal } +} {wal} +do_test walmode-1.2 { + file size test.db +} {1024} +do_test walmode-1.3 { + set sqlite_sync_count +} {4} +do_test walmode-1.4 { + file exists test.db-wal +} {0} +do_test walmode-1.5 { + execsql { CREATE TABLE t1(a, b) } + file size test.db +} {1024} +do_test walmode-1.6 { + file exists test.db-wal +} {1} +do_test walmode-1.7 { + db close + file exists test.db-wal +} {0} + +# There is now a database file with the read and write versions set to 2 +# in the file system. This file should default to WAL mode. +# +do_test walmode-2.1 { + sqlite3 db test.db + file exists test.db-wal +} {0} +do_test walmode-2.2 { + execsql { SELECT * FROM sqlite_master } + file exists test.db-wal +} {1} +do_test walmode-2.3 { + db close + file exists test.db-wal +} {0} + +# If the first statement executed is "PRAGMA journal_mode = wal", and +# the file is already configured for WAL (read and write versions set +# to 2), then there should be no need to write the database. The +# statement should cause the client to connect to the log file. +# +set sqlite_sync_count 0 +do_test walmode-3.1 { + sqlite3 db test.db + execsql { PRAGMA journal_mode = wal } +} {wal} +do_test walmode-3.2 { + list $sqlite_sync_count [file exists test.db-wal] [file size test.db-wal] +} {0 1 0} + +# Test that changing back to journal_mode=persist works. +# +do_test walmode-4.1 { + execsql { INSERT INTO t1 VALUES(1, 2) } + execsql { PRAGMA journal_mode = persist } +} {persist} +do_test walmode-4.2 { + list [file exists test.db-journal] [file exists test.db-wal] +} {1 0} +do_test walmode-4.3 { + execsql { SELECT * FROM t1 } +} {1 2} +do_test walmode-4.4 { + db close + sqlite3 db test.db + execsql { SELECT * FROM t1 } +} {1 2} +do_test walmode-4.5 { + list [file exists test.db-journal] [file exists test.db-wal] +} {1 0} + +# Test that nothing goes wrong if a connection is prevented from changing +# from WAL to rollback mode because a second connection has the database +# open. Or from rollback to WAL. +# +do_test walmode-4.1 { + sqlite3 db2 test.db + execsql { PRAGMA main.journal_mode } db2 +} {delete} +do_test walmode-4.2 { + execsql { PRAGMA main.journal_mode = wal } db +} {wal} +do_test walmode-4.3 { + execsql { SELECT * FROM t1 } db2 +} {1 2} +do_test walmode-4.4 { + catchsql { PRAGMA journal_mode = delete } db +} {1 {database is locked}} +do_test walmode-4.5 { + execsql { PRAGMA main.journal_mode } db +} {wal} +do_test walmode-4.6 { + db2 close + execsql { PRAGMA journal_mode = delete } db +} {delete} +do_test walmode-4.7 { + execsql { PRAGMA main.journal_mode } db +} {delete} +do_test walmode-4.8 { + list [file exists test.db-journal] [file exists test.db-wal] +} {0 0} +do_test walmode-4.9 { + sqlite3 db2 test.db + execsql { + BEGIN; + SELECT * FROM t1; + } db2 +} {1 2} +do_test walmode-4.11 { + execsql { PRAGMA main.journal_mode } db +} {delete} +do_test walmode-4.10 { + catchsql { PRAGMA main.journal_mode = wal } db +} {1 {database is locked}} +do_test walmode-4.11 { + execsql { PRAGMA main.journal_mode } db +} {delete} +catch { db close } +catch { db2 close } + +# Test that it is not possible to change a temporary or in-memory database +# to WAL mode. WAL mode is for persistent file-backed databases only. +# +# walmode-5.1.*: Try to set journal_mode=WAL on [sqlite3 db :memory:] database. +# walmode-5.2.*: Try to set journal_mode=WAL on [sqlite3 db ""] database. +# walmode-5.3.*: Try to set temp.journal_mode=WAL. +# +do_test walmode-5.1.1 { + sqlite3 db :memory: + execsql { PRAGMA main.journal_mode } +} {memory} +breakpoint +do_test walmode-5.1.2 { + execsql { PRAGMA main.journal_mode = wal } +} {memory} +do_test walmode-5.1.3 { + execsql { + BEGIN; + CREATE TABLE t1(a, b); + INSERT INTO t1 VALUES(1, 2); + COMMIT; + SELECT * FROM t1; + PRAGMA main.journal_mode; + } +} {1 2 memory} +do_test walmode-5.1.4 { + execsql { PRAGMA main.journal_mode = wal } +} {memory} +do_test walmode-5.1.5 { + execsql { + INSERT INTO t1 VALUES(3, 4); + SELECT * FROM t1; + PRAGMA main.journal_mode; + } +} {1 2 3 4 memory} + +do_test walmode-5.2.1 { + sqlite3 db "" + execsql { PRAGMA main.journal_mode } +} {delete} +do_test walmode-5.2.2 { + execsql { PRAGMA main.journal_mode = wal } +} {delete} +do_test walmode-5.2.3 { + execsql { + BEGIN; + CREATE TABLE t1(a, b); + INSERT INTO t1 VALUES(1, 2); + COMMIT; + SELECT * FROM t1; + PRAGMA main.journal_mode; + } +} {1 2 delete} +do_test walmode-5.2.4 { + execsql { PRAGMA main.journal_mode = wal } +} {delete} +do_test walmode-5.2.5 { + execsql { + INSERT INTO t1 VALUES(3, 4); + SELECT * FROM t1; + PRAGMA main.journal_mode; + } +} {1 2 3 4 delete} + +if {$TEMP_STORE>=2} { + set tempJrnlMode memory +} else { + set tempJrnlMode delete +} +do_test walmode-5.3.1 { + sqlite3 db test.db + execsql { PRAGMA temp.journal_mode } +} $tempJrnlMode +do_test walmode-5.3.2 { + execsql { PRAGMA temp.journal_mode = wal } +} $tempJrnlMode +do_test walmode-5.3.3 { + execsql { + BEGIN; + CREATE TEMP TABLE t1(a, b); + INSERT INTO t1 VALUES(1, 2); + COMMIT; + SELECT * FROM t1; + PRAGMA temp.journal_mode; + } +} [list 1 2 $tempJrnlMode] +do_test walmode-5.3.4 { + execsql { PRAGMA temp.journal_mode = wal } +} $tempJrnlMode +do_test walmode-5.3.5 { + execsql { + INSERT INTO t1 VALUES(3, 4); + SELECT * FROM t1; + PRAGMA temp.journal_mode; + } +} [list 1 2 3 4 $tempJrnlMode] + +finish_test ADDED test/walslow.test Index: test/walslow.test ================================================================== --- /dev/null +++ test/walslow.test @@ -0,0 +1,73 @@ +# 2010 March 17 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# This file implements regression tests for SQLite library. The +# focus of this file is testing the operation of the library in +# "PRAGMA journal_mode=WAL" mode. The tests in this file use +# brute force methods, so may take a while to run. +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl + +ifcapable !wal {finish_test ; return } + +proc reopen_db {} { + catch { db close } + file delete -force test.db test.db-wal + sqlite3 db test.db + execsql { PRAGMA journal_mode = wal } +} + +db close +save_prng_state +for {set seed 1} {$seed<10} {incr seed} { + expr srand($seed) + restore_prng_state + reopen_db + do_test walslow-1.seed=$seed.0 { + execsql { CREATE TABLE t1(a, b) } + execsql { CREATE INDEX i1 ON t1(a) } + execsql { CREATE INDEX i2 ON t1(b) } + } {} + + for {set iTest 1} {$iTest < 100} {incr iTest} { + + do_test walslow-1.seed=$seed.$iTest.1 { + set w [expr int(rand()*2000)] + set x [expr int(rand()*2000)] + execsql { INSERT INTO t1 VALUES(randomblob($w), randomblob($x)) } + execsql { PRAGMA integrity_check } + } {ok} + + do_test walslow-1.seed=$seed.$iTest.2 { + execsql "PRAGMA wal_checkpoint;" + execsql { PRAGMA integrity_check } + } {ok} + + do_test walslow-1.seed=$seed.$iTest.3 { + file delete -force testX.db testX.db-wal + file copy test.db testX.db + file copy test.db-wal testX.db-wal + + sqlite3 db2 testX.db + execsql { PRAGMA journal_mode = WAL } db2 + execsql { PRAGMA integrity_check } db2 + } {ok} + + do_test walslow-1.seed=$seed.$iTest.4 { + execsql { SELECT count(*) FROM t1 WHERE a!=b } db2 + } [execsql { SELECT count(*) FROM t1 WHERE a!=b }] + db2 close + } +} + + +finish_test ADDED test/walthread.test Index: test/walthread.test ================================================================== --- /dev/null +++ test/walthread.test @@ -0,0 +1,512 @@ +# 2010 April 13 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# This file implements regression tests for SQLite library. The +# focus of this file is testing the operation of the library in +# "PRAGMA journal_mode=WAL" mode with multiple threads. +# + +set testdir [file dirname $argv0] + +source $testdir/tester.tcl +source $testdir/lock_common.tcl +if {[run_thread_tests]==0} { finish_test ; return } +ifcapable !wal { finish_test ; return } + +set sqlite_walsummary_mmap_incr 64 + +# How long, in seconds, to run each test for. If a test is set to run for +# 0 seconds, it is omitted entirely. +# +set seconds(walthread-1) 20 +set seconds(walthread-2) 20 +set seconds(walthread-3) 20 +set seconds(walthread-4) 20 +set seconds(walthread-5) 1 + +# The parameter is the name of a variable in the callers context. The +# variable may or may not exist when this command is invoked. +# +# If the variable does exist, its value is returned. Otherwise, this +# command uses [vwait] to wait until it is set, then returns the value. +# In other words, this is a version of the [set VARNAME] command that +# blocks until a variable exists. +# +proc wait_for_var {varname} { + if {0==[uplevel [list info exists $varname]]} { + uplevel [list vwait $varname] + } + uplevel [list set $varname] +} + +# The argument is the name of a list variable in the callers context. The +# first element of the list is removed and returned. For example: +# +# set L {a b c} +# set x [lshift L] +# assert { $x == "a" && $L == "b c" } +# +proc lshift {lvar} { + upvar $lvar L + set ret [lindex $L 0] + set L [lrange $L 1 end] + return $ret +} + + +#------------------------------------------------------------------------- +# do_thread_test TESTNAME OPTIONS... +# +# where OPTIONS are: +# +# -seconds SECONDS How many seconds to run the test for +# -init SCRIPT Script to run before test. +# -thread NAME COUNT SCRIPT Scripts to run in threads (or processes). +# -processes BOOLEAN True to use processes instead of threads. +# -check SCRIPT Script to run after test. +# +proc do_thread_test {args} { + + set A $args + + set P(testname) [lshift A] + set P(seconds) 5 + set P(init) "" + set P(threads) [list] + set P(processes) 0 + set P(check) { + set ic [db eval "PRAGMA integrity_check"] + if {$ic != "ok"} { error $ic } + } + + unset -nocomplain ::done + + while {[llength $A]>0} { + set a [lshift A] + switch -glob -- $a { + -seconds { + set P(seconds) [lshift A] + } + + -init { + set P(init) [lshift A] + } + + -processes { + set P(processes) [lshift A] + } + + -check { + set P(check) [lshift A] + } + + -thread { + set name [lshift A] + set count [lshift A] + set prg [lshift A] + lappend P(threads) [list $name $count $prg] + } + + default { + error "Unknown option: $a" + } + } + } + + if {$P(seconds) == 0} { + puts "Skipping $P(testname)" + return + } + + puts "Running $P(testname) for $P(seconds) seconds..." + + catch { db close } + file delete -force test.db test.db-journal test.db-wal + + sqlite3 db test.db + eval $P(init) + catch { db close } + + foreach T $P(threads) { + set name [lindex $T 0] + set count [lindex $T 1] + set prg [lindex $T 2] + + for {set i 1} {$i <= $count} {incr i} { + set vars " + set E(pid) $i + set E(nthread) $count + set E(seconds) $P(seconds) + " + set program [string map [list %TEST% $prg %VARS% $vars] { + + %VARS% + + proc usleep {ms} { + set ::usleep 0 + after $ms {set ::usleep 1} + vwait ::usleep + } + + proc integrity_check {{db db}} { + set ic [$db eval {PRAGMA integrity_check}] + if {$ic != "ok"} {error $ic} + } + + proc busyhandler {n} { usleep 10 ; return 0 } + + sqlite3 db test.db + db busy busyhandler + db eval { SELECT randomblob($E(pid)*5) } + + set ::finished 0 + after [expr $E(seconds) * 1000] {set ::finished 1} + proc tt_continue {} { update ; expr ($::finished==0) } + + set rc [catch { %TEST% } msg] + + catch { db close } + list $rc $msg + }] + + if {$P(processes)==0} { + sqlthread spawn ::done($name,$i) $program + } else { + testfixture_nb ::done($name,$i) $program + } + } + } + + set report " Results:" + foreach T $P(threads) { + set name [lindex $T 0] + set count [lindex $T 1] + set prg [lindex $T 2] + + set reslist [list] + for {set i 1} {$i <= $count} {incr i} { + set res [wait_for_var ::done($name,$i)] + lappend reslist [lindex $res 1] + do_test $P(testname).$name.$i [list lindex $res 0] 0 + } + + append report " $name $reslist" + } + puts $report + + sqlite3 db test.db + set res "" + if {[catch $P(check) msg]} { set res $msg } + do_test $P(testname).check [list set {} $res] "" +} + +# A wrapper around [do_thread_test] which runs the specified test twice. +# Once using processes, once using threads. This command takes the same +# arguments as [do_thread_test], except specifying the -processes switch +# is illegal. +# +proc do_thread_test2 {args} { + set name [lindex $args 0] + if {[lsearch $args -processes]>=0} { error "bad option: -processes"} + uplevel [lreplace $args 0 0 do_thread_test "$name-threads" -processes 0] + uplevel [lreplace $args 0 0 do_thread_test "$name-processes" -processes 1] +} + + +#-------------------------------------------------------------------------- +# Start 10 threads. Each thread performs both read and write +# transactions. Each read transaction consists of: +# +# 1) Reading the md5sum of all but the last table row, +# 2) Running integrity check. +# 3) Reading the value stored in the last table row, +# 4) Check that the values read in steps 1 and 3 are the same, and that +# the md5sum of all but the last table row has not changed. +# +# Each write transaction consists of: +# +# 1) Modifying the contents of t1 (inserting, updating, deleting rows). +# 2) Appending a new row to the table containing the md5sum() of all +# rows in the table. +# +# Each of the N threads runs N read transactions followed by a single write +# transaction in a loop as fast as possible. +# +# There is also a single checkpointer thread. It runs the following loop: +# +# 1) Execute "PRAGMA wal_checkpoint" +# 2) Sleep for 500 ms. +# +do_thread_test2 walthread-1 -seconds $seconds(walthread-1) -init { + execsql { + PRAGMA journal_mode = WAL; + CREATE TABLE t1(x PRIMARY KEY); + PRAGMA lock_status; + INSERT INTO t1 VALUES(randomblob(100)); + INSERT INTO t1 VALUES(randomblob(100)); + INSERT INTO t1 SELECT md5sum(x) FROM t1; + } +} -thread main 10 { + + proc read_transaction {} { + set results [db eval { + BEGIN; + PRAGMA integrity_check; + SELECT md5sum(x) FROM t1 WHERE rowid != (SELECT max(rowid) FROM t1); + SELECT x FROM t1 WHERE rowid = (SELECT max(rowid) FROM t1); + SELECT md5sum(x) FROM t1 WHERE rowid != (SELECT max(rowid) FROM t1); + COMMIT; + }] + + if {[llength $results]!=4 + || [lindex $results 0] != "ok" + || [lindex $results 1] != [lindex $results 2] + || [lindex $results 2] != [lindex $results 3] + } { + error "Failed read transaction: $results" + } + } + + proc write_transaction {} { + db eval { + BEGIN; + INSERT INTO t1 VALUES(randomblob(100)); + INSERT INTO t1 VALUES(randomblob(100)); + INSERT INTO t1 SELECT md5sum(x) FROM t1; + COMMIT; + } + } + + set nRun 0 + while {[tt_continue]} { + read_transaction + write_transaction + incr nRun + } + set nRun + +} -thread ckpt 1 { + set nRun 0 + while {[tt_continue]} { + db eval "PRAGMA wal_checkpoint" + usleep 500 + incr nRun + } + set nRun +} + +#-------------------------------------------------------------------------- +# This test has clients run the following procedure as fast as possible +# in a loop: +# +# 1. Open a database handle. +# 2. Execute a read-only transaction on the db. +# 3. Do "PRAGMA journal_mode = XXX", where XXX is one of WAL or DELETE. +# Ignore any SQLITE_BUSY error. +# 4. Execute a write transaction to insert a row into the db. +# 5. Run "PRAGMA integrity_check" +# +# At present, there are 4 clients in total. 2 do "journal_mode = WAL", and +# two do "journal_mode = DELETE". +# +# Each client returns a string of the form "W w, R r", where W is the +# number of write-transactions performed using a WAL journal, and D is +# the number of write-transactions performed using a rollback journal. +# For example, "192 w, 185 r". +# +do_thread_test2 walthread-2 -seconds $seconds(walthread-2) -init { + execsql { CREATE TABLE t1(x INTEGER PRIMARY KEY, y UNIQUE) } +} -thread RB 2 { + + db close + set nRun 0 + set nDel 0 + while {[tt_continue]} { + sqlite3 db test.db + db busy busyhandler + db eval { SELECT * FROM sqlite_master } + catch { db eval { PRAGMA journal_mode = DELETE } } + db eval { + BEGIN; + INSERT INTO t1 VALUES(NULL, randomblob(100+$E(pid))); + } + incr nRun 1 + incr nDel [file exists test.db-journal] + if {[file exists test.db-journal] + [file exists test.db-wal] != 1} { + error "File-system looks bad..." + } + db eval COMMIT + + integrity_check + db close + } + list $nRun $nDel + set {} "[expr $nRun-$nDel] w, $nDel r" + +} -thread WAL 2 { + db close + set nRun 0 + set nDel 0 + while {[tt_continue]} { + sqlite3 db test.db + db busy busyhandler + db eval { SELECT * FROM sqlite_master } + catch { db eval { PRAGMA journal_mode = WAL } } + db eval { + BEGIN; + INSERT INTO t1 VALUES(NULL, randomblob(110+$E(pid))); + } + incr nRun 1 + incr nDel [file exists test.db-journal] + if {[file exists test.db-journal] + [file exists test.db-wal] != 1} { + error "File-system looks bad..." + } + db eval COMMIT + + integrity_check + db close + } + set {} "[expr $nRun-$nDel] w, $nDel r" +} + +do_thread_test walthread-3 -seconds $seconds(walthread-3) -init { + execsql { + PRAGMA journal_mode = WAL; + CREATE TABLE t1(cnt PRIMARY KEY, sum1, sum2); + CREATE INDEX i1 ON t1(sum1); + CREATE INDEX i2 ON t1(sum2); + INSERT INTO t1 VALUES(0, 0, 0); + } +} -thread t 10 { + + set nextwrite $E(pid) + + proc wal_hook {zDb nEntry} { + if {$nEntry>10} { return 1 } + return 0 + } + db wal_hook wal_hook + + while {[tt_continue]} { + set max 0 + while { $max != ($nextwrite-1) && [tt_continue] } { + set max [db eval { SELECT max(cnt) FROM t1 }] + } + + if {[tt_continue]} { + set sum1 [db eval { SELECT sum(cnt) FROM t1 }] + set sum2 [db eval { SELECT sum(sum1) FROM t1 }] + db eval { INSERT INTO t1 VALUES($nextwrite, $sum1, $sum2) } + incr nextwrite $E(nthread) + integrity_check + } + } + + set {} ok +} -check { + puts " Final db contains [db eval {SELECT count(*) FROM t1}] rows" + puts " Final integrity-check says: [db eval {PRAGMA integrity_check}]" + + # Check that the contents of the database are Ok. + set c 0 + set s1 0 + set s2 0 + db eval { SELECT cnt, sum1, sum2 FROM t1 ORDER BY cnt } { + if {$c != $cnt || $s1 != $sum1 || $s2 != $sum2} { + error "database content is invalid" + } + incr s2 $s1 + incr s1 $c + incr c 1 + } +} + +do_thread_test2 walthread-4 -seconds $seconds(walthread-4) -init { + execsql { + PRAGMA journal_mode = WAL; + CREATE TABLE t1(a INTEGER PRIMARY KEY, b UNIQUE); + } +} -thread r 1 { + # This connection only ever reads the database. Therefore the + # busy-handler is not required. Disable it to check that this is true. + db busy {} + while {[tt_continue]} integrity_check + set {} ok +} -thread w 1 { + + proc wal_hook {zDb nEntry} { + if {$nEntry>15} { return 1 } + return 0 + } + db wal_hook wal_hook + set row 1 + while {[tt_continue]} { + db eval { REPLACE INTO t1 VALUES($row, randomblob(300)) } + incr row + if {$row == 10} { set row 1 } + } + + set {} ok +} + + +# This test case attempts to provoke a deadlock condition that existed in +# the unix VFS at one point. The problem occurred only while recovering a +# very large wal file (one that requires a wal-index larger than the +# initial default allocation of 64KB). +# +do_thread_test walthread-5 -seconds $seconds(walthread-5) -init { + + proc log_file_size {nFrame pgsz} { + expr {12 + ($pgsz+16)*$nFrame} + } + + execsql { + PRAGMA page_size = 1024; + PRAGMA journal_mode = WAL; + CREATE TABLE t1(x); + BEGIN; + INSERT INTO t1 VALUES(randomblob(900)); + INSERT INTO t1 SELECT randomblob(900) FROM t1; /* 2 */ + INSERT INTO t1 SELECT randomblob(900) FROM t1; /* 4 */ + INSERT INTO t1 SELECT randomblob(900) FROM t1; /* 8 */ + INSERT INTO t1 SELECT randomblob(900) FROM t1; /* 16 */ + INSERT INTO t1 SELECT randomblob(900) FROM t1; /* 32 */ + INSERT INTO t1 SELECT randomblob(900) FROM t1; /* 64 */ + INSERT INTO t1 SELECT randomblob(900) FROM t1; /* 128 */ + INSERT INTO t1 SELECT randomblob(900) FROM t1; /* 256 */ + INSERT INTO t1 SELECT randomblob(900) FROM t1; /* 512 */ + INSERT INTO t1 SELECT randomblob(900) FROM t1; /* 1024 */ + INSERT INTO t1 SELECT randomblob(900) FROM t1; /* 2048 */ + INSERT INTO t1 SELECT randomblob(900) FROM t1; /* 4096 */ + INSERT INTO t1 SELECT randomblob(900) FROM t1; /* 8192 */ + INSERT INTO t1 SELECT randomblob(900) FROM t1; /* 16384 */ + INSERT INTO t1 SELECT randomblob(900) FROM t1; /* 32768 */ + INSERT INTO t1 SELECT randomblob(900) FROM t1; /* 65536 */ + COMMIT; + } + + file copy -force test.db-wal bak.db-wal + file copy -force test.db bak.db + db close + + file copy -force bak.db-wal test.db-wal + file copy -force bak.db test.db + + if {[file size test.db-wal] < [log_file_size [expr 64*1024] 1024]} { + error "Somehow failed to create a large log file" + } + puts "Database with large log file recovered. Now running clients..." +} -thread T 5 { + db eval { SELECT count(*) FROM t1 } +} + +finish_test + Index: tool/mksqlite3c.tcl ================================================================== --- tool/mksqlite3c.tcl +++ tool/mksqlite3c.tcl @@ -107,10 +107,11 @@ sqliteicu.h sqliteInt.h sqliteLimit.h vdbe.h vdbeInt.h + wal.h } { set available_hdr($hdr) 1 } set available_hdr(sqliteInt.h) 0 @@ -241,10 +242,11 @@ bitvec.c pcache.c pcache1.c rowset.c + wal.c pager.c btmutex.c btree.c backup.c