Index: src/btree.c ================================================================== --- src/btree.c +++ src/btree.c @@ -3140,11 +3140,14 @@ if( rc==SQLITE_OK && wrflag ){ if( (pBt->btsFlags & BTS_READ_ONLY)!=0 ){ rc = SQLITE_READONLY; }else{ - rc = sqlite3PagerBegin(pBt->pPager,wrflag>1,sqlite3TempInMemory(p->db)); + int bSubjInMem = sqlite3TempInMemory(p->db); + int exFlag = p->db->bUnlocked ? -1 : (wrflag>1); + assert( p->db->bUnlocked==0 || wrflag==1 ); + rc = sqlite3PagerBegin(pBt->pPager, exFlag, bSubjInMem); if( rc==SQLITE_OK ){ rc = newDatabase(pBt); } } } @@ -3668,12 +3671,19 @@ int sqlite3BtreeCommitPhaseOne(Btree *p, const char *zMaster){ int rc = SQLITE_OK; if( p->inTrans==TRANS_WRITE ){ BtShared *pBt = p->pBt; sqlite3BtreeEnter(p); + #ifndef SQLITE_OMIT_AUTOVACUUM - if( pBt->autoVacuum ){ + /* Figure out if this is a commit of an UNLOCKED transaction that + ** requires a snapshot upgrade. If so, skip any auto-vacuum + ** processing. */ + if( pBt->autoVacuum && ( + 0==pBt->db->bUnlocked + || 0==sqlite3PagerCommitRequiresUpgrade(pBt->pPager) + )){ rc = autoVacuumCommit(pBt); if( rc!=SQLITE_OK ){ sqlite3BtreeLeave(p); return rc; } @@ -3680,11 +3690,13 @@ } if( pBt->bDoTruncate ){ sqlite3PagerTruncateImage(pBt->pPager, pBt->nPage); } #endif - rc = sqlite3PagerCommitPhaseOne(pBt->pPager, zMaster, 0); + if( rc==SQLITE_OK ){ + rc = sqlite3PagerCommitPhaseOne(pBt->pPager, zMaster, 0); + } sqlite3BtreeLeave(p); } return rc; } @@ -9576,5 +9588,17 @@ /* ** Return the size of the header added to each page by this module. */ int sqlite3HeaderSizeBtree(void){ return ROUND8(sizeof(MemPage)); } + +int sqlite3BtreeExclusiveLock(Btree *p){ + int rc; + BtShared *pBt = p->pBt; + sqlite3BtreeEnter(p); + rc = sqlite3PagerExclusiveLock(pBt->pPager, pBt->pPage1->pDbPage); + sqlite3BtreeLeave(p); + return rc; +} + + + Index: src/btree.h ================================================================== --- src/btree.h +++ src/btree.h @@ -268,7 +268,8 @@ # define sqlite3BtreeHoldsMutex(X) 1 # define sqlite3BtreeHoldsAllMutexes(X) 1 # define sqlite3SchemaMutexHeld(X,Y,Z) 1 #endif +int sqlite3BtreeExclusiveLock(Btree*); #endif /* _BTREE_H_ */ Index: src/build.c ================================================================== --- src/build.c +++ src/build.c @@ -3826,17 +3826,17 @@ if( sqlite3AuthCheck(pParse, SQLITE_TRANSACTION, "BEGIN", 0, 0) ){ return; } v = sqlite3GetVdbe(pParse); if( !v ) return; - if( type!=TK_DEFERRED ){ + if( type==TK_IMMEDIATE || type==TK_EXCLUSIVE ){ for(i=0; inDb; i++){ sqlite3VdbeAddOp2(v, OP_Transaction, i, (type==TK_EXCLUSIVE)+1); sqlite3VdbeUsesBtree(v, i); } } - sqlite3VdbeAddOp2(v, OP_AutoCommit, 0, 0); + sqlite3VdbeAddOp3(v, OP_AutoCommit, 0, 0, (type==TK_UNLOCKED)); } /* ** Commit a transaction */ Index: src/main.c ================================================================== --- src/main.c +++ src/main.c @@ -2735,10 +2735,11 @@ assert( sizeof(db->aLimit)==sizeof(aHardLimit) ); memcpy(db->aLimit, aHardLimit, sizeof(db->aLimit)); db->aLimit[SQLITE_LIMIT_WORKER_THREADS] = SQLITE_DEFAULT_WORKER_THREADS; db->autoCommit = 1; + db->bUnlocked = 0; db->nextAutovac = -1; db->szMmap = sqlite3GlobalConfig.szMmap; db->nextPagesize = 0; db->nMaxSorterMmap = 0x7FFFFFFF; db->flags |= SQLITE_ShortColNames | SQLITE_EnableTrigger | SQLITE_CacheSpill Index: src/pager.c ================================================================== --- src/pager.c +++ src/pager.c @@ -3064,10 +3064,11 @@ ** any pages with page numbers greater than nTruncate into the WAL file. ** They will never be read by any client. So remove them from the pDirty ** list here. */ PgHdr **ppNext = &pList; nList = 0; + for(p=pList; (*ppNext = p)!=0; p=p->pDirty){ if( p->pgno<=nTruncate ){ ppNext = &p->pDirty; nList++; } @@ -4079,11 +4080,11 @@ || pPager->eState==PAGER_WRITER_DBMOD ); assert( assert_pager_state(pPager) ); assert( !pagerUseWal(pPager) ); - rc = sqlite3PagerExclusiveLock(pPager); + rc = sqlite3PagerExclusiveLock(pPager, 0); if( rc!=SQLITE_OK ) return rc; if( !pPager->noSync ){ assert( !pPager->tempFile ); if( isOpen(pPager->jfd) && pPager->journalMode!=PAGER_JOURNALMODE_MEMORY ){ @@ -4427,10 +4428,14 @@ return SQLITE_OK; } pPg->pDirty = 0; if( pagerUseWal(pPager) ){ + /* If the transaction is a "BEGIN UNLOCKED" transaction, the page + ** cannot be flushed to disk. Return early in this case. */ + if( sqlite3WalIsInTrans(pPager->pWal)==0 ) return SQLITE_OK; + /* Write a single frame for this page to the log. */ rc = subjournalPageIfRequired(pPg); if( rc==SQLITE_OK ){ rc = pagerWalFrames(pPager, pPg, 0, 0); } @@ -5546,14 +5551,17 @@ /* ** Begin a write-transaction on the specified pager object. If a ** write-transaction has already been opened, this function is a no-op. ** -** If the exFlag argument is false, then acquire at least a RESERVED -** lock on the database file. If exFlag is true, then acquire at least +** If the exFlag argument is 0, then acquire at least a RESERVED +** lock on the database file. If exFlag is >0, then acquire at least ** an EXCLUSIVE lock. If such a lock is already held, no locking ** functions need be called. +** +** If (exFlag<0) and the database is in WAL mode, do not take any locks. +** The transaction will run in UNLOCKED mode instead. ** ** If the subjInMemory argument is non-zero, then any sub-journal opened ** within this transaction will be opened as an in-memory file. This ** has no effect if the sub-journal is already opened (as it may be when ** running in exclusive mode) or if the transaction does not require a @@ -5586,19 +5594,21 @@ /* Grab the write lock on the log file. If successful, upgrade to ** PAGER_RESERVED state. Otherwise, return an error code to the caller. ** The busy-handler is not invoked if another connection already ** holds the write-lock. If possible, the upper layer will call it. */ - rc = sqlite3WalBeginWriteTransaction(pPager->pWal); + if( exFlag>=0 ){ + rc = sqlite3WalBeginWriteTransaction(pPager->pWal); + } }else{ /* Obtain a RESERVED lock on the database file. If the exFlag parameter ** is true, then immediately upgrade this to an EXCLUSIVE lock. The ** busy-handler callback can be used when upgrading to the EXCLUSIVE ** lock, but not when obtaining the RESERVED lock. */ rc = pagerLockDb(pPager, RESERVED_LOCK); - if( rc==SQLITE_OK && exFlag ){ + if( rc==SQLITE_OK && exFlag>0 ){ rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK); } } if( rc==SQLITE_OK ){ @@ -6054,22 +6064,64 @@ ** If the EXCLUSIVE lock is already held or the attempt to obtain it is ** successful, or the connection is in WAL mode, SQLITE_OK is returned. ** Otherwise, either SQLITE_BUSY or an SQLITE_IOERR_XXX error code is ** returned. */ -int sqlite3PagerExclusiveLock(Pager *pPager){ +int sqlite3PagerExclusiveLock(Pager *pPager, PgHdr *pPage1){ int rc = SQLITE_OK; assert( pPager->eState==PAGER_WRITER_CACHEMOD || pPager->eState==PAGER_WRITER_DBMOD || pPager->eState==PAGER_WRITER_LOCKED ); assert( assert_pager_state(pPager) ); if( 0==pagerUseWal(pPager) ){ rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK); + }else{ + Wal *pWal = pPager->pWal; + if( 0==sqlite3WalIsInTrans(pWal) ){ + /* TODO: There must be an optimization opportunity here, as this call + ** to PcacheDirtyList() sorts the list of dirty pages, even though it + ** is not really required - and will be sorted again in CommitPhaseOne() + ** in any case. */ + PgHdr *pList = sqlite3PcacheDirtyList(pPager->pPCache); + + /* This is an UNLOCKED transaction. Attempt to lock the wal database + ** here. If SQLITE_BUSY (but not SQLITE_BUSY_SNAPSHOT) is returned, + ** invoke the busy-handler and try again for as long as it returns + ** non-zero. */ + do { + /* rc = sqlite3WalBeginWriteTransaction(pWal); */ + rc = sqlite3WalLockForCommit(pWal, pList, pPage1); + }while( rc==SQLITE_BUSY + && pPager->xBusyHandler(pPager->pBusyHandlerArg) + ); + } } return rc; } + +/* +** If this is a WAL mode connection and the WRITER lock is currently held, +** relinquish it. +*/ +void sqlite3PagerDropExclusiveLock(Pager *pPager){ + if( pagerUseWal(pPager) ){ + sqlite3WalEndWriteTransaction(pPager->pWal); + } +} + +/* +** Return true if this is a WAL database and snapshot upgrade is required +** before the current transaction can be committed. +*/ +int sqlite3PagerCommitRequiresUpgrade(Pager *pPager){ + int res = 0; + if( pagerUseWal(pPager) ){ + res = sqlite3WalCommitRequiresUpgrade(pPager->pWal); + } + return res; +} /* ** Sync the database file for the pager pPager. zMaster points to the name ** of a master journal file that should be written into the individual ** journal file. zMaster may be NULL, which is interpreted as no master Index: src/pager.h ================================================================== --- src/pager.h +++ src/pager.h @@ -148,18 +148,21 @@ /* Functions used to manage pager transactions and savepoints. */ void sqlite3PagerPagecount(Pager*, int*); int sqlite3PagerBegin(Pager*, int exFlag, int); int sqlite3PagerCommitPhaseOne(Pager*,const char *zMaster, int); -int sqlite3PagerExclusiveLock(Pager*); +int sqlite3PagerExclusiveLock(Pager*, DbPage *pPage1); int sqlite3PagerSync(Pager *pPager, const char *zMaster); int sqlite3PagerCommitPhaseTwo(Pager*); int sqlite3PagerRollback(Pager*); int sqlite3PagerOpenSavepoint(Pager *pPager, int n); int sqlite3PagerSavepoint(Pager *pPager, int op, int iSavepoint); int sqlite3PagerSharedLock(Pager *pPager); +void sqlite3PagerDropExclusiveLock(Pager*); +int sqlite3PagerCommitRequiresUpgrade(Pager*); + #ifndef SQLITE_OMIT_WAL int sqlite3PagerCheckpoint(Pager *pPager, int, int*, int*); int sqlite3PagerWalSupported(Pager *pPager); int sqlite3PagerWalCallback(Pager *pPager); int sqlite3PagerOpenWal(Pager *pPager, int *pisOpen); Index: src/parse.y ================================================================== --- src/parse.y +++ src/parse.y @@ -119,10 +119,11 @@ %type transtype {int} transtype(A) ::= . {A = TK_DEFERRED;} transtype(A) ::= DEFERRED(X). {A = @X;} transtype(A) ::= IMMEDIATE(X). {A = @X;} transtype(A) ::= EXCLUSIVE(X). {A = @X;} +transtype(A) ::= UNLOCKED(X). {A = @X;} cmd ::= COMMIT trans_opt. {sqlite3CommitTransaction(pParse);} cmd ::= END trans_opt. {sqlite3CommitTransaction(pParse);} cmd ::= ROLLBACK trans_opt. {sqlite3RollbackTransaction(pParse);} savepoint_opt ::= SAVEPOINT. Index: src/sqliteInt.h ================================================================== --- src/sqliteInt.h +++ src/sqliteInt.h @@ -1145,10 +1145,11 @@ int errCode; /* Most recent error code (SQLITE_*) */ int errMask; /* & result codes with this before returning */ u16 dbOptFlags; /* Flags to enable/disable optimizations */ u8 enc; /* Text encoding */ u8 autoCommit; /* The auto-commit flag. */ + u8 bUnlocked; /* Current transaction is "UNLOCKED" */ u8 temp_store; /* 1: file 2: memory 0: default */ u8 mallocFailed; /* True if we have seen a malloc failure */ u8 dfltLockMode; /* Default locking-mode for attached dbs */ signed char nextAutovac; /* Autovac setting after VACUUM if >=0 */ u8 suppressErr; /* Do not issue error messages if true */ Index: src/vacuum.c ================================================================== --- src/vacuum.c +++ src/vacuum.c @@ -354,10 +354,11 @@ ** by manually setting the autoCommit flag to true and detaching the ** vacuum database. The vacuum_db journal file is deleted when the pager ** is closed by the DETACH. */ db->autoCommit = 1; + db->bUnlocked = 0; if( pDb ){ sqlite3BtreeClose(pDb->pBt); pDb->pBt = 0; pDb->pSchema = 0; Index: src/vdbe.c ================================================================== --- src/vdbe.c +++ src/vdbe.c @@ -2897,10 +2897,11 @@ int isTransaction = pSavepoint->pNext==0 && db->isTransactionSavepoint; if( isTransaction && p1==SAVEPOINT_RELEASE ){ if( (rc = sqlite3VdbeCheckFk(p, 1))!=SQLITE_OK ){ goto vdbe_return; } + assert( db->bUnlocked==0 ); db->autoCommit = 1; if( sqlite3VdbeHalt(p)==SQLITE_BUSY ){ p->pc = (int)(pOp - aOp); db->autoCommit = 0; p->rc = rc = SQLITE_BUSY; @@ -2968,33 +2969,43 @@ } break; } -/* Opcode: AutoCommit P1 P2 * * * +/* Opcode: AutoCommit P1 P2 P3 * * ** ** Set the database auto-commit flag to P1 (1 or 0). If P2 is true, roll ** back any currently active btree transactions. If there are any active ** VMs (apart from this one), then a ROLLBACK fails. A COMMIT fails if ** there are active writing VMs or active VMs that use shared cache. +** +** If P3 is non-zero, then this instruction is being executed as part of +** a "BEGIN UNLOCKED" command. ** ** This instruction causes the VM to halt. */ case OP_AutoCommit: { int desiredAutoCommit; int iRollback; int turnOnAC; + int bUnlocked; + int hrc; desiredAutoCommit = pOp->p1; iRollback = pOp->p2; + bUnlocked = pOp->p3; turnOnAC = desiredAutoCommit && !db->autoCommit; assert( desiredAutoCommit==1 || desiredAutoCommit==0 ); assert( desiredAutoCommit==1 || iRollback==0 ); + assert( desiredAutoCommit==0 || bUnlocked==0 ); + assert( db->autoCommit==0 || db->bUnlocked==0 ); assert( db->nVdbeActive>0 ); /* At least this one VM is active */ assert( p->bIsReader ); - if( turnOnAC && !iRollback && db->nVdbeWrite>0 ){ + if( turnOnAC && !iRollback && + (db->nVdbeWrite>0 || (db->bUnlocked && db->nVdbeActive>1)) + ){ /* If this instruction implements a COMMIT and other VMs are writing ** return an error indicating that the other VMs must complete first. */ sqlite3VdbeError(p, "cannot commit transaction - " "SQL statements in progress"); @@ -3002,20 +3013,24 @@ }else if( desiredAutoCommit!=db->autoCommit ){ if( iRollback ){ assert( desiredAutoCommit==1 ); sqlite3RollbackAll(db, SQLITE_ABORT_ROLLBACK); db->autoCommit = 1; + db->bUnlocked = 0; }else if( (rc = sqlite3VdbeCheckFk(p, 1))!=SQLITE_OK ){ goto vdbe_return; }else{ db->autoCommit = (u8)desiredAutoCommit; - if( sqlite3VdbeHalt(p)==SQLITE_BUSY ){ + hrc = sqlite3VdbeHalt(p); + if( (hrc & 0xFF)==SQLITE_BUSY ){ p->pc = (int)(pOp - aOp); db->autoCommit = (u8)(1-desiredAutoCommit); - p->rc = rc = SQLITE_BUSY; + p->rc = hrc; + rc = SQLITE_BUSY; goto vdbe_return; } + db->bUnlocked = (u8)bUnlocked; } assert( db->nStatement==0 ); sqlite3CloseSavepoints(db); if( p->rc==SQLITE_OK ){ rc = SQLITE_DONE; @@ -3206,13 +3221,20 @@ pIn3 = &aMem[pOp->p3]; sqlite3VdbeMemIntegerify(pIn3); /* See note about index shifting on OP_ReadCookie */ rc = sqlite3BtreeUpdateMeta(pDb->pBt, pOp->p2, (int)pIn3->u.i); if( pOp->p2==BTREE_SCHEMA_VERSION ){ - /* When the schema cookie changes, record the new cookie internally */ - pDb->pSchema->schema_cookie = (int)pIn3->u.i; - db->flags |= SQLITE_InternChanges; + if( db->bUnlocked ){ + sqlite3VdbeError(p, "cannot modify database schema - " + "UNLOCKED transaction" + ); + rc = SQLITE_ERROR; + }else{ + /* When the schema cookie changes, record the new cookie internally */ + pDb->pSchema->schema_cookie = (int)pIn3->u.i; + db->flags |= SQLITE_InternChanges; + } }else if( pOp->p2==BTREE_FILE_FORMAT ){ /* Record changes in the file format */ pDb->pSchema->file_format = (u8)pIn3->u.i; } if( pOp->p1==1 ){ Index: src/vdbeaux.c ================================================================== --- src/vdbeaux.c +++ src/vdbeaux.c @@ -2018,15 +2018,28 @@ for(i=0; rc==SQLITE_OK && inDb; i++){ Btree *pBt = db->aDb[i].pBt; if( sqlite3BtreeIsInTrans(pBt) ){ needXcommit = 1; if( i!=1 ) nTrans++; - sqlite3BtreeEnter(pBt); - rc = sqlite3PagerExclusiveLock(sqlite3BtreePager(pBt)); - sqlite3BtreeLeave(pBt); + rc = sqlite3BtreeExclusiveLock(pBt); + } + } + + if( db->bUnlocked && (rc & 0xFF)==SQLITE_BUSY ){ + /* An SQLITE_BUSY or SQLITE_BUSY_SNAPSHOT was encountered while + ** attempting to take the WRITER lock on a wal file. Release the + ** WRITER locks on all wal files and return early. */ + for(i=0; inDb; i++){ + Btree *pBt = db->aDb[i].pBt; + if( sqlite3BtreeIsInTrans(pBt) ){ + sqlite3BtreeEnter(pBt); + sqlite3PagerDropExclusiveLock(sqlite3BtreePager(pBt)); + sqlite3BtreeLeave(pBt); + } } } + if( rc!=SQLITE_OK ){ return rc; } /* If there are any write-transactions at all, invoke the commit hook */ @@ -2425,10 +2438,11 @@ ** so, abort any other statements this handle currently has active. */ sqlite3RollbackAll(db, SQLITE_ABORT_ROLLBACK); sqlite3CloseSavepoints(db); db->autoCommit = 1; + db->bUnlocked = 0; p->nChange = 0; } } } @@ -2460,13 +2474,13 @@ ** or hit an 'OR FAIL' constraint and there are no deferred foreign ** key constraints to hold up the transaction. This means a commit ** is required. */ rc = vdbeCommit(db, p); } - if( rc==SQLITE_BUSY && p->readOnly ){ + if( (rc & 0xFF)==SQLITE_BUSY && p->readOnly ){ sqlite3VdbeLeave(p); - return SQLITE_BUSY; + return rc; }else if( rc!=SQLITE_OK ){ p->rc = rc; sqlite3RollbackAll(db, SQLITE_OK); p->nChange = 0; }else{ @@ -2487,10 +2501,11 @@ eStatementOp = SAVEPOINT_ROLLBACK; }else{ sqlite3RollbackAll(db, SQLITE_ABORT_ROLLBACK); sqlite3CloseSavepoints(db); db->autoCommit = 1; + db->bUnlocked = 0; p->nChange = 0; } } /* If eStatementOp is non-zero, then a statement transaction needs to @@ -2508,10 +2523,11 @@ p->zErrMsg = 0; } sqlite3RollbackAll(db, SQLITE_ABORT_ROLLBACK); sqlite3CloseSavepoints(db); db->autoCommit = 1; + db->bUnlocked = 0; p->nChange = 0; } } /* If this was an INSERT, UPDATE or DELETE and no statement transaction @@ -2552,11 +2568,11 @@ if( db->autoCommit ){ sqlite3ConnectionUnlocked(db); } assert( db->nVdbeActive>0 || db->autoCommit==0 || db->nStatement==0 ); - return (p->rc==SQLITE_BUSY ? SQLITE_BUSY : SQLITE_OK); + return ((p->rc & 0xFF)==SQLITE_BUSY ? SQLITE_BUSY : SQLITE_OK); } /* ** Each VDBE holds the result of the most recent sqlite3_step() call Index: src/wal.c ================================================================== --- src/wal.c +++ src/wal.c @@ -2355,42 +2355,23 @@ pWal->readLock = -1; } } /* -** Search the wal file for page pgno. If found, set *piRead to the frame that -** contains the page. Otherwise, if pgno is not in the wal file, set *piRead -** to zero. -** -** Return SQLITE_OK if successful, or an error code if an error occurs. If an -** error does occur, the final value of *piRead is undefined. +** Search the hash tables for an entry matching page number pgno. Ignore +** any entries that lie after frame iLast within the wal file. */ -int sqlite3WalFindFrame( - Wal *pWal, /* WAL handle */ - Pgno pgno, /* Database page number to read data for */ - u32 *piRead /* OUT: Frame number (or zero) */ +static int walFindFrame( + Wal *pWal, + Pgno pgno, + u32 iLast, + u32 *piRead ){ - u32 iRead = 0; /* If !=0, WAL frame to return data from */ - u32 iLast = pWal->hdr.mxFrame; /* Last page in WAL for this reader */ int iHash; /* Used to loop through N hash tables */ - - /* This routine is only be called from within a read transaction. */ - assert( pWal->readLock>=0 || pWal->lockError ); - - /* If the "last page" field of the wal-index header snapshot is 0, then - ** no data will be read from the wal under any circumstances. Return early - ** in this case as an optimization. Likewise, if pWal->readLock==0, - ** then the WAL is ignored by the reader so return early, as if the - ** WAL were empty. - */ - if( iLast==0 || pWal->readLock==0 ){ - *piRead = 0; - return SQLITE_OK; - } - - /* Search the hash table or tables for an entry matching page number - ** pgno. Each iteration of the following for() loop searches one + u32 iRead = 0; + + /* Each iteration of the following for() loop searches one ** hash table (each hash table indexes up to HASHTABLE_NPAGE frames). ** ** This code might run concurrently to the code in walIndexAppend() ** that adds entries to the wal-index (and possibly to this hash ** table). This means the value just read from the hash @@ -2435,15 +2416,52 @@ return SQLITE_CORRUPT_BKPT; } } } + *piRead = iRead; + return SQLITE_OK; +} + +/* +** Search the wal file for page pgno. If found, set *piRead to the frame that +** contains the page. Otherwise, if pgno is not in the wal file, set *piRead +** to zero. +** +** Return SQLITE_OK if successful, or an error code if an error occurs. If an +** error does occur, the final value of *piRead is undefined. +*/ +int sqlite3WalFindFrame( + Wal *pWal, /* WAL handle */ + Pgno pgno, /* Database page number to read data for */ + u32 *piRead /* OUT: Frame number (or zero) */ +){ + u32 iRead = 0; /* If !=0, WAL frame to return data from */ + u32 iLast = pWal->hdr.mxFrame; /* Last page in WAL for this reader */ + int rc; + + /* This routine is only be called from within a read transaction. */ + assert( pWal->readLock>=0 || pWal->lockError ); + + /* If the "last page" field of the wal-index header snapshot is 0, then + ** no data will be read from the wal under any circumstances. Return early + ** in this case as an optimization. Likewise, if pWal->readLock==0, + ** then the WAL is ignored by the reader so return early, as if the + ** WAL were empty. + */ + if( iLast==0 || pWal->readLock==0 ){ + *piRead = 0; + return SQLITE_OK; + } + + rc = walFindFrame(pWal, pgno, iLast, &iRead); + #ifdef SQLITE_ENABLE_EXPENSIVE_ASSERT /* If expensive assert() statements are available, do a linear search ** of the wal-index file content. Make sure the results agree with the ** result obtained using the hash indexes above. */ - { + if( rc==SQLITE_OK ){ u32 iRead2 = 0; u32 iTest; for(iTest=iLast; iTest>0; iTest--){ if( walFramePgno(pWal, iTest)==pgno ){ iRead2 = iTest; @@ -2534,10 +2552,100 @@ rc = SQLITE_BUSY_SNAPSHOT; } return rc; } + +/* +** TODO: Combine some code with BeginWriteTransaction() +** +** This function is only ever called when committing a "BEGIN UNLOCKED" +** transaction. It may be assumed that no frames have been written to +** the wal file. +*/ +int sqlite3WalLockForCommit(Wal *pWal, PgHdr *pList, PgHdr *pPage1){ + volatile WalIndexHdr *pHead; /* Head of the wal file */ + int rc; + + /* Cannot start a write transaction without first holding a read + ** transaction. */ + assert( pWal->readLock>=0 ); + + if( pWal->readOnly ){ + return SQLITE_READONLY; + } + + /* Only one writer allowed at a time. Get the write lock. Return + ** SQLITE_BUSY if unable. + */ + rc = walLockExclusive(pWal, WAL_WRITE_LOCK, 1, 0); + if( rc ){ + return rc; + } + pWal->writeLock = 1; + + /* If the database has been modified since this transaction was started, + ** check if it is still possible to commit. The transaction can be + ** committed if: + ** + ** a) None of the pages in pList have been modified since the + ** transaction opened, and + ** + ** b) The database schema cookie has not been modified since the + ** transaction was started. + */ + pHead = walIndexHdr(pWal); + if( memcmp(&pWal->hdr, (void*)pHead, sizeof(WalIndexHdr))!=0 ){ + /* TODO: Is this safe? Because it holds the WRITER lock this thread + ** has exclusive access to the live header, but might it be corrupt? */ + PgHdr *pPg; + u32 iLast = pHead->mxFrame; + for(pPg=pList; rc==SQLITE_OK && pPg; pPg=pPg->pDirty){ + u32 iSlot = 0; + rc = walFindFrame(pWal, pPg->pgno, iLast, &iSlot); + if( iSlot>pWal->hdr.mxFrame ){ + sqlite3_log(SQLITE_OK, + "cannot commit UNLOCKED transaction (conflict at page %d)", + (int)pPg->pgno + ); + rc = SQLITE_BUSY_SNAPSHOT; + } + } + + if( rc==SQLITE_OK ){ + /* Read the newest schema cookie from the wal file. */ + u32 iSlot = 0; + rc = walFindFrame(pWal, 1, iLast, &iSlot); + if( rc==SQLITE_OK && iSlot>pWal->hdr.mxFrame ){ + u8 aNew[4]; + u8 *aOld = &((u8*)pPage1->pData)[40]; + int sz; + i64 iOffset; + sz = pWal->hdr.szPage; + sz = (sz&0xfe00) + ((sz&0x0001)<<16); + iOffset = walFrameOffset(iSlot, sz) + WAL_FRAME_HDRSIZE + 40; + rc = sqlite3OsRead(pWal->pWalFd, aNew, sizeof(aNew), iOffset); + if( rc==SQLITE_OK && memcmp(aOld, aNew, sizeof(aNew)) ){ + /* TODO: New error code? SQLITE_BUSY_SCHEMA. */ + rc = SQLITE_BUSY_SNAPSHOT; + } + } + } + } + + return rc; +} + +/* +** The caller holds the WRITER lock. This function returns true if a snapshot +** upgrade is required before the transaction can be committed, or false +** otherwise. +*/ +int sqlite3WalCommitRequiresUpgrade(Wal *pWal){ + assert( pWal->writeLock ); + return memcmp(&pWal->hdr, (void*)walIndexHdr(pWal), sizeof(WalIndexHdr))!=0; +} /* ** End a write transaction. The commit has already been done. This ** routine merely releases the lock. */ @@ -2562,11 +2670,11 @@ ** Otherwise, if the callback function does not return an error, this ** function returns SQLITE_OK. */ int sqlite3WalUndo(Wal *pWal, int (*xUndo)(void *, Pgno), void *pUndoCtx){ int rc = SQLITE_OK; - if( ALWAYS(pWal->writeLock) ){ + if( pWal->writeLock ){ Pgno iMax = pWal->hdr.mxFrame; Pgno iFrame; /* Restore the clients cache of the wal-index header to the state it ** was in before the client began writing to the database. @@ -2601,11 +2709,11 @@ ** values. This function populates the array with values required to ** "rollback" the write position of the WAL handle back to the current ** point in the event of a savepoint rollback (via WalSavepointUndo()). */ void sqlite3WalSavepoint(Wal *pWal, u32 *aWalData){ - assert( pWal->writeLock ); + /* assert( pWal->writeLock ); */ aWalData[0] = pWal->hdr.mxFrame; aWalData[1] = pWal->hdr.aFrameCksum[0]; aWalData[2] = pWal->hdr.aFrameCksum[1]; aWalData[3] = pWal->nCkpt; } @@ -2617,11 +2725,11 @@ ** by a call to WalSavepoint(). */ int sqlite3WalSavepointUndo(Wal *pWal, u32 *aWalData){ int rc = SQLITE_OK; - assert( pWal->writeLock ); + assert( pWal->writeLock || aWalData[0]==pWal->hdr.mxFrame ); assert( aWalData[3]!=pWal->nCkpt || aWalData[0]<=pWal->hdr.mxFrame ); if( aWalData[3]!=pWal->nCkpt ){ /* This savepoint was opened immediately after the write-transaction ** was started. Right after that, the writer decided to wrap around @@ -2781,10 +2889,11 @@ PgHdr *pLast = 0; /* Last frame in list */ int nExtra = 0; /* Number of extra copies of last page */ int szFrame; /* The size of a single frame */ i64 iOffset; /* Next byte to write in WAL file */ WalWriter w; /* The writer */ + int bUpgrade = 0; /* True if commit requires snapshot upgrade */ assert( pList ); assert( pWal->writeLock ); /* If this frame set completes a transaction, then nTruncate>0. If @@ -2795,10 +2904,26 @@ { int cnt; for(cnt=0, p=pList; p; p=p->pDirty, cnt++){} WALTRACE(("WAL%p: frame write begin. %d frames. mxFrame=%d. %s\n", pWal, cnt, pWal->hdr.mxFrame, isCommit ? "Commit" : "Spill")); } #endif + + if( isCommit ){ + volatile WalIndexHdr *pHead = walIndexHdr(pWal); + if( pHead->mxFrame>pWal->hdr.mxFrame ){ + if( memcmp((void*)&pHead[0], (void*)&pHead[1], sizeof(WalIndexHdr))!=0 ){ + /* TODO: Deal with this case. It's quite possible, but fiddly. */ + return SQLITE_CORRUPT_BKPT; + } + memcpy(&pWal->hdr, (void*)pHead, sizeof(WalIndexHdr)); + if( nTruncatehdr.nPage ){ + /* Do not truncate the database file in this case */ + nTruncate = pWal->hdr.nPage; + } + bUpgrade = 1; + } + } /* See if it is possible to write these frames into the start of the ** log file, instead of appending to it at pWal->hdr.mxFrame. */ if( SQLITE_OK!=(rc = walRestartLog(pWal)) ){ @@ -2943,10 +3068,18 @@ if( isCommit ){ walIndexWriteHdr(pWal); pWal->iCallback = iFrame; } } + + if( rc==SQLITE_OK && bUpgrade ){ + /* If this commit required a snapshot upgrade, the pager cache is + ** not currently consistent with the head of the wal file. Zeroing + ** Wal.hdr here forces the next transaction to reset the cache + ** before beginning to read the db. */ + memset(&pWal->hdr, 0, sizeof(WalIndexHdr)); + } WALTRACE(("WAL%p: frame write %s\n", pWal, rc ? "failed" : "ok")); return rc; } @@ -3144,10 +3277,17 @@ ** WAL module is using shared-memory, return false. */ int sqlite3WalHeapMemory(Wal *pWal){ return (pWal && pWal->exclusiveMode==WAL_HEAPMEMORY_MODE ); } + +/* +** Return true if in a write transaction, false otherwise. +*/ +int sqlite3WalIsInTrans(Wal *pWal){ + return (int)pWal->writeLock; +} #ifdef SQLITE_ENABLE_ZIPVFS /* ** If the argument is not NULL, it points to a Wal object that holds a ** read-lock. This function returns the database page-size if it is known, Index: src/wal.h ================================================================== --- src/wal.h +++ src/wal.h @@ -123,10 +123,15 @@ /* Return true if the argument is non-NULL and the WAL module is using ** heap-memory for the wal-index. Otherwise, if the argument is NULL or the ** WAL module is using shared-memory, return false. */ int sqlite3WalHeapMemory(Wal *pWal); + +/* Return true if the WRITER lock is held. False otherwise. */ +int sqlite3WalIsInTrans(Wal *pWal); +int sqlite3WalLockForCommit(Wal *pWal, PgHdr *pDirtyList, PgHdr *pPage1); +int sqlite3WalCommitRequiresUpgrade(Wal *pWal); #ifdef SQLITE_ENABLE_ZIPVFS /* If the WAL file is not empty, return the number of bytes of content ** stored in each frame (i.e. the db page-size when the WAL was created). */ ADDED test/unlocked.test Index: test/unlocked.test ================================================================== --- /dev/null +++ test/unlocked.test @@ -0,0 +1,307 @@ +# 2015 July 26 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl +source $testdir/lock_common.tcl +set ::testprefix unlocked + + +do_execsql_test 1.0 { + PRAGMA journal_mode = wal; +} {wal} + +do_execsql_test 1.1 { + CREATE TABLE t1(k INTEGER PRIMARY KEY, v); + BEGIN UNLOCKED; + INSERT INTO t1 VALUES(1, 'abcd'); + COMMIT; +} + +do_execsql_test 1.2 { + SELECT * FROM t1; +} {1 abcd} + +do_execsql_test 1.3 { + BEGIN UNLOCKED; + INSERT INTO t1 VALUES(2, 'efgh'); + ROLLBACK; +} + +do_execsql_test 1.4 { + SELECT * FROM t1; +} {1 abcd} + + +#------------------------------------------------------------------------- +# UNLOCKED transactions cannot do cache spills. +# +foreach {tn trans spill} { + 1 {BEGIN UNLOCKED} 0 + 2 {BEGIN} 1 +} { + do_test 1.5.$tn { + sqlite3 db2 test.db + set walsz [file size test.db-wal] + + execsql { PRAGMA cache_size = 10 } db2 + execsql $trans db2 + execsql { + WITH cnt(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM cnt WHERE i<50) + INSERT INTO t1(v) SELECT randomblob(900) FROM cnt; + } db2 + + expr {[file size test.db-wal]==$walsz} + } [expr !$spill] + + execsql ROLLBACK db2 + db2 close +} + +#------------------------------------------------------------------------- +# UNLOCKED transactions man not be committed while there are active +# readers. +do_execsql_test 1.6.setup { + DROP TABLE t1; + CREATE TABLE t1(a, b); + INSERT INTO t1 VALUES(1, 2); + INSERT INTO t1 VALUES(3, 4); + INSERT INTO t1 VALUES(5, 6); +} +foreach {tn trans commit_ok} { + 1 {BEGIN UNLOCKED} 0 + 2 {BEGIN} 1 +} { + do_test 1.6.$tn.1 { + set stmt [sqlite3_prepare db "SELECT * FROM t1" -1 dummy] + sqlite3_step $stmt + } SQLITE_ROW + do_test 1.6.$tn.2 { + execsql $trans + execsql { INSERT INTO t1 VALUES(7, 8) } + } {} + + if { $commit_ok } { + do_test 1.6.$tn.3 { catchsql COMMIT } {0 {}} + } else { + do_test 1.6.$tn.4 { catchsql COMMIT } {/1 {cannot commit transaction .*}/} + } + + sqlite3_finalize $stmt + catchsql ROLLBACK +} + +#------------------------------------------------------------------------- +# UNLOCKED transactions may not modify the db schema. +# +foreach {tn sql} { + 1 { CREATE TABLE xx(a, b) } + 2 { DROP TABLE t1 } +} { + do_catchsql_test 1.7.$tn.1 " + BEGIN UNLOCKED; + $sql + " {1 {cannot modify database schema - UNLOCKED transaction}} + + do_execsql_test 1.7.$tn.2 ROLLBACK +} + + +do_multiclient_test tn { + + #----------------------------------------------------------------------- + # 1. Start an UNLOCKED transaction using [db1]. + # + # 2. Start and then rollback a regular transaction using [db2]. This + # can be done as the ongoing [db1] transaction is UNLOCKED. + # + # 3. The [db1] transaction can now be committed, as [db2] has relinquished + # the write lock. + # + do_test 2.$tn.1.1 { + sql1 { + PRAGMA journal_mode = wal; + CREATE TABLE t1(k INTEGER PRIMARY KEY, v); + INSERT INTO t1 VALUES(1, 'one'); + } + sql1 { + BEGIN UNLOCKED; + INSERT INTO t1 VALUES(2, 'two'); + } + code1 { sqlite3_get_autocommit db } + } 0 + + do_test 2.$tn.1.2 { + sql2 { + BEGIN; + INSERT INTO t1 VALUES(3, 'three'); + ROLLBACK; + } + } {} + + do_test 2.$tn.1.3 { + sql1 COMMIT + sql2 { SELECT * FROM t1 } + } {1 one 2 two} + + #----------------------------------------------------------------------- + # 1. Start an UNLOCKED transaction using [db1]. + # + # 2. Commit a transaction using [db2]. + # + # 3. Try to commit with [db1]. Check that SQLITE_BUSY_SNAPSHOT is returned, + # and the transaction is not rolled back. + # + do_test 2.$tn.2.1 { + sql1 { + BEGIN UNLOCKED; + INSERT INTO t1 VALUES(-1, 'hello world'); + } + } {} + + do_test 2.$tn.2.2 { + sql2 { + INSERT INTO t1 VALUES(3, 'three'); + } + } {} + + do_test 2.$tn.2.3.1 { + set rc [catch { sql1 COMMIT } msg] + list $rc $msg + } {1 {database is locked}} + + do_test 2.$tn.2.3.2 { + code1 { list [sqlite3_extended_errcode db] [sqlite3_get_autocommit db] } + } {SQLITE_BUSY_SNAPSHOT 0} + + do_test 2.$tn.2.3.3 { + sql1 { + SELECT * FROM t1; + ROLLBACK; + } + } {-1 {hello world} 1 one 2 two} + + #----------------------------------------------------------------------- + # 1. Start an UNLOCKED transaction using [db1]. + # + # 2. Open a transaction using [db2]. + # + # 3. Try to commit with [db1]. Check that SQLITE_BUSY is returned, + # and the transaction is not rolled back. + # + # 4. Have [db2] roll its transaction back. Then check that [db1] can + # commit. + # + do_test 2.$tn.3.1 { + sql1 { + BEGIN UNLOCKED; + INSERT INTO t1 VALUES(4, 'four'); + } + } {} + + do_test 2.$tn.3.2 { + sql2 { + BEGIN; + INSERT INTO t1 VALUES(-1, 'xyz'); + } + } {} + + do_test 2.$tn.3.3.1 { + set rc [catch { sql1 COMMIT } msg] + list $rc $msg + } {1 {database is locked}} + + do_test 2.$tn.3.3.2 { + code1 { list [sqlite3_extended_errcode db] [sqlite3_get_autocommit db] } + } {SQLITE_BUSY 0} + + do_test 2.$tn.3.3.3 { + sql1 { SELECT * FROM t1; } + } {1 one 2 two 3 three 4 four} + + do_test 2.$tn.3.4 { + sql2 ROLLBACK + sql1 COMMIT + sql1 { SELECT * FROM t1; } + } {1 one 2 two 3 three 4 four} + + #----------------------------------------------------------------------- + # 1. Create a second table - t2. + # + # 2. Write to t1 with [db] and t2 with [db2]. + # + # 3. See if it worked. + # + do_test 2.$tn.4.1 { + sql1 { CREATE TABLE t2(a, b) } + } {} + do_test 2.$tn.4.2 { + sql2 { + BEGIN UNLOCKED; + INSERT INTO t2 VALUES('i', 'n'); + } + + sql1 { + BEGIN UNLOCKED; + INSERT INTO t1 VALUES(5, 'five'); + COMMIT; + } + + sql2 COMMIT + } {} + + do_test 2.$tn.4.3.1 { + sql2 {SELECT * FROM t1} + } {1 one 2 two 3 three 4 four 5 five} + do_test 2.$tn.4.3.2 { + sql1 {SELECT * FROM t1} + } {1 one 2 two 3 three 4 four 5 five} + + do_test 2.$tn.4.3.3 { sql2 {SELECT * FROM t2} } {i n} + do_test 2.$tn.4.3.4 { sql1 {SELECT * FROM t2} } {i n} + + #----------------------------------------------------------------------- + # The "schema cookie" issue. + # + # 1. Begin and UNLOCKED write to "t1" using [db] + # + # 2. Create an index on t1 using [db2]. + # + # 3. Attempt to commit the UNLOCKED write. This is an SQLITE_BUSY_SNAPSHOT, + # even though there is no page collision. + # + + do_test 2.$tn.5.1 { + sql1 { + BEGIN UNLOCKED; + INSERT INTO t1 VALUES(6, 'six'); + } + } {} + + do_test 2.$tn.5.2 { + sql2 { CREATE INDEX i1 ON t1(v); } + } {} + + do_test 2.$tn.5.3 { + list [catch { sql1 { COMMIT } } msg] $msg [sqlite3_errcode db] + } {1 {database is locked} SQLITE_BUSY_SNAPSHOT} + + do_test 2.$tn.5.4 { + sql2 { PRAGMA integrity_check } + } {ok} + catch { sql1 ROLLBACK } + +} + + + +finish_test Index: tool/mkkeywordhash.c ================================================================== --- tool/mkkeywordhash.c +++ tool/mkkeywordhash.c @@ -260,10 +260,11 @@ { "TO", "TK_TO", ALWAYS }, { "TRANSACTION", "TK_TRANSACTION", ALWAYS }, { "TRIGGER", "TK_TRIGGER", TRIGGER }, { "UNION", "TK_UNION", COMPOUND }, { "UNIQUE", "TK_UNIQUE", ALWAYS }, + { "UNLOCKED", "TK_UNLOCKED", ALWAYS }, { "UPDATE", "TK_UPDATE", ALWAYS }, { "USING", "TK_USING", ALWAYS }, { "VACUUM", "TK_VACUUM", VACUUM }, { "VALUES", "TK_VALUES", ALWAYS }, { "VIEW", "TK_VIEW", VIEW },