Index: src/btree.c ================================================================== --- src/btree.c +++ src/btree.c @@ -7,11 +7,11 @@ ** May you do good and not evil. ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ************************************************************************* -** $Id: btree.c,v 1.161 2004/06/07 16:27:46 drh Exp $ +** $Id: btree.c,v 1.162 2004/06/09 17:37:23 drh Exp $ ** ** This file implements a external (disk-based) database using BTrees. ** For a detailed discussion of BTrees, refer to ** ** Donald E. Knuth, THE ART OF COMPUTER PROGRAMMING, Volume 3: @@ -259,11 +259,11 @@ ** walk up the BTree from any leaf to the root. Care must be taken to ** unref() the parent page pointer when this page is no longer referenced. ** The pageDestructor() routine handles that chore. */ struct MemPage { - u8 isInit; /* True if previously initialized */ + u8 isInit; /* True if previously initialized. MUST BE FIRST! */ u8 idxShift; /* True if Cell indices have changed */ u8 nOverflow; /* Number of overflow cell bodies in aCell[] */ u8 intKey; /* True if intkey flag is set */ u8 leaf; /* True if leaf flag is set */ u8 zeroData; /* True if table stores keys only */ @@ -959,10 +959,26 @@ pPage->pParent = 0; releasePage(pParent); } pPage->isInit = 0; } + +/* +** During a rollback, when the pager reloads information into the cache +** so that the cache is restored to its original state at the start of +** the transaction, for each page restored this routine is called. +** +** This routine needs to reset the extra data section at the end of the +** page to agree with the restored data. +*/ +static void pageReinit(void *pData, int pageSize){ + MemPage *pPage = (MemPage*)&((char*)pData)[pageSize]; + if( pPage->isInit ){ + pPage->isInit = 0; + initPage(pPage, pPage->pParent); + } +} /* ** Open a new database. ** ** Actually, this routine just sets up the internal data structures @@ -1009,10 +1025,11 @@ sqliteFree(pBt); *ppBtree = 0; return rc; } sqlite3pager_set_destructor(pBt->pPager, pageDestructor); + sqlite3pager_set_reiniter(pBt->pPager, pageReinit); pBt->pCursor = 0; pBt->pPage1 = 0; pBt->readOnly = sqlite3pager_isreadonly(pBt->pPager); pBt->pageSize = SQLITE_PAGE_SIZE; /* FIX ME - read from header */ pBt->usableSize = pBt->pageSize; Index: src/os.h ================================================================== --- src/os.h +++ src/os.h @@ -154,18 +154,16 @@ int sqlite3OsWrite(OsFile*, const void*, int amt); int sqlite3OsSeek(OsFile*, off_t offset); int sqlite3OsSync(OsFile*); int sqlite3OsTruncate(OsFile*, off_t size); int sqlite3OsFileSize(OsFile*, off_t *pSize); -int sqlite3OsReadLock(OsFile*); -int sqlite3OsWriteLock(OsFile*); -int sqlite3OsUnlock(OsFile*); int sqlite3OsRandomSeed(char*); int sqlite3OsSleep(int ms); int sqlite3OsCurrentTime(double*); void sqlite3OsEnterMutex(void); void sqlite3OsLeaveMutex(void); char *sqlite3OsFullPathname(const char*); int sqlite3OsLock(OsFile*, int); -int sqlite3OsCheckWriteLock(OsFile *id); +int sqlite3OsUnlock(OsFile*, int); +int sqlite3OsCheckReservedLock(OsFile *id); #endif /* _SQLITE_OS_H_ */ Index: src/os_common.h ================================================================== --- src/os_common.h +++ src/os_common.h @@ -41,19 +41,21 @@ #define TRACE1(X) if( sqlite3_os_trace ) sqlite3DebugPrintf(X) #define TRACE2(X,Y) if( sqlite3_os_trace ) sqlite3DebugPrintf(X,Y) #define TRACE3(X,Y,Z) if( sqlite3_os_trace ) sqlite3DebugPrintf(X,Y,Z) #define TRACE4(X,Y,Z,A) if( sqlite3_os_trace ) sqlite3DebugPrintf(X,Y,Z,A) #define TRACE5(X,Y,Z,A,B) if( sqlite3_os_trace ) sqlite3DebugPrintf(X,Y,Z,A,B) +#define TRACE6(X,Y,Z,A,B,C) if(sqlite3_os_trace) sqlite3DebugPrintf(X,Y,Z,A,B,C) #else #define TIMER_START #define TIMER_END #define SEEK(X) #define TRACE1(X) #define TRACE2(X,Y) #define TRACE3(X,Y,Z) #define TRACE4(X,Y,Z,A) #define TRACE5(X,Y,Z,A,B) +#define TRACE6(X,Y,Z,A,B,C) #endif /* ** If we compile with the SQLITE_TEST macro set, then the following block Index: src/os_unix.c ================================================================== --- src/os_unix.c +++ src/os_unix.c @@ -340,29 +340,29 @@ OsFile *id, int *pReadonly ){ int rc; id->dirfd = -1; - id->fd = open(zFilename, O_RDWR|O_CREAT|O_LARGEFILE|O_BINARY, 0644); - if( id->fd<0 ){ - id->fd = open(zFilename, O_RDONLY|O_LARGEFILE|O_BINARY); - if( id->fd<0 ){ + id->h = open(zFilename, O_RDWR|O_CREAT|O_LARGEFILE|O_BINARY, 0644); + if( id->h<0 ){ + id->h = open(zFilename, O_RDONLY|O_LARGEFILE|O_BINARY); + if( id->h<0 ){ return SQLITE_CANTOPEN; } *pReadonly = 1; }else{ *pReadonly = 0; } sqlite3OsEnterMutex(); - rc = findLockInfo(id->fd, &id->pLock, &id->pOpen); + rc = findLockInfo(id->h, &id->pLock, &id->pOpen); sqlite3OsLeaveMutex(); if( rc ){ - close(id->fd); + close(id->h); return SQLITE_NOMEM; } id->locktype = 0; - TRACE3("OPEN %-3d %s\n", id->fd, zFilename); + TRACE3("OPEN %-3d %s\n", id->h, zFilename); OpenCounter(+1); return SQLITE_OK; } @@ -384,28 +384,28 @@ int rc; if( access(zFilename, 0)==0 ){ return SQLITE_CANTOPEN; } id->dirfd = -1; - id->fd = open(zFilename, + id->h = open(zFilename, O_RDWR|O_CREAT|O_EXCL|O_NOFOLLOW|O_LARGEFILE|O_BINARY, 0600); - if( id->fd<0 ){ + if( id->h<0 ){ return SQLITE_CANTOPEN; } sqlite3OsEnterMutex(); - rc = findLockInfo(id->fd, &id->pLock, &id->pOpen); + rc = findLockInfo(id->h, &id->pLock, &id->pOpen); sqlite3OsLeaveMutex(); if( rc ){ - close(id->fd); + close(id->h); unlink(zFilename); return SQLITE_NOMEM; } id->locktype = 0; if( delFlag ){ unlink(zFilename); } - TRACE3("OPEN-EX %-3d %s\n", id->fd, zFilename); + TRACE3("OPEN-EX %-3d %s\n", id->h, zFilename); OpenCounter(+1); return SQLITE_OK; } /* @@ -416,23 +416,23 @@ ** On failure, return SQLITE_CANTOPEN. */ int sqlite3OsOpenReadOnly(const char *zFilename, OsFile *id){ int rc; id->dirfd = -1; - id->fd = open(zFilename, O_RDONLY|O_LARGEFILE|O_BINARY); - if( id->fd<0 ){ + id->h = open(zFilename, O_RDONLY|O_LARGEFILE|O_BINARY); + if( id->h<0 ){ return SQLITE_CANTOPEN; } sqlite3OsEnterMutex(); - rc = findLockInfo(id->fd, &id->pLock, &id->pOpen); + rc = findLockInfo(id->h, &id->pLock, &id->pOpen); sqlite3OsLeaveMutex(); if( rc ){ - close(id->fd); + close(id->h); return SQLITE_NOMEM; } id->locktype = 0; - TRACE3("OPEN-RO %-3d %s\n", id->fd, zFilename); + TRACE3("OPEN-RO %-3d %s\n", id->h, zFilename); OpenCounter(+1); return SQLITE_OK; } /* @@ -453,11 +453,11 @@ */ int sqlite3OsOpenDirectory( const char *zDirname, OsFile *id ){ - if( id->fd<0 ){ + if( id->h<0 ){ /* Do not open the directory if the corresponding file is not already ** open. */ return SQLITE_CANTOPEN; } assert( id->dirfd<0 ); @@ -508,11 +508,11 @@ /* ** Close a file. */ int sqlite3OsClose(OsFile *id){ - sqlite3OsUnlock(id); + sqlite3OsUnlock(id, NO_LOCK); if( id->dirfd>=0 ) close(id->dirfd); id->dirfd = -1; sqlite3OsEnterMutex(); if( id->pOpen->nLock ){ /* If there are outstanding locks, do not actually close the file just @@ -526,20 +526,20 @@ aNew = sqliteRealloc( pOpen->aPending, pOpen->nPending*sizeof(int) ); if( aNew==0 ){ /* If a malloc fails, just leak the file descriptor */ }else{ pOpen->aPending = aNew; - pOpen->aPending[pOpen->nPending-1] = id->fd; + pOpen->aPending[pOpen->nPending-1] = id->h; } }else{ /* There are no outstanding locks so we can close the file immediately */ - close(id->fd); + close(id->h); } releaseLockInfo(id->pLock); releaseOpenCnt(id->pOpen); sqlite3OsLeaveMutex(); - TRACE2("CLOSE %-3d\n", id->fd); + TRACE2("CLOSE %-3d\n", id->h); OpenCounter(-1); return SQLITE_OK; } /* @@ -549,13 +549,13 @@ */ int sqlite3OsRead(OsFile *id, void *pBuf, int amt){ int got; SimulateIOError(SQLITE_IOERR); TIMER_START; - got = read(id->fd, pBuf, amt); + got = read(id->h, pBuf, amt); TIMER_END; - TRACE4("READ %-3d %7d %d\n", id->fd, last_page, elapse); + TRACE4("READ %-3d %7d %d\n", id->h, last_page, elapse); SEEK(0); /* if( got<0 ) got = 0; */ if( got==amt ){ return SQLITE_OK; }else{ @@ -569,16 +569,16 @@ */ int sqlite3OsWrite(OsFile *id, const void *pBuf, int amt){ int wrote = 0; SimulateIOError(SQLITE_IOERR); TIMER_START; - while( amt>0 && (wrote = write(id->fd, pBuf, amt))>0 ){ + while( amt>0 && (wrote = write(id->h, pBuf, amt))>0 ){ amt -= wrote; pBuf = &((char*)pBuf)[wrote]; } TIMER_END; - TRACE4("WRITE %-3d %7d %d\n", id->fd, last_page, elapse); + TRACE4("WRITE %-3d %7d %d\n", id->h, last_page, elapse); SEEK(0); if( amt>0 ){ return SQLITE_FULL; } return SQLITE_OK; @@ -587,11 +587,11 @@ /* ** Move the read/write pointer in a file. */ int sqlite3OsSeek(OsFile *id, off_t offset){ SEEK(offset/1024 + 1); - lseek(id->fd, offset, SEEK_SET); + lseek(id->h, offset, SEEK_SET); return SQLITE_OK; } /* ** Make sure all writes to a particular file are committed to disk. @@ -604,12 +604,12 @@ ** the directory entry for the journal was never created) and the transaction ** will not roll back - possibly leading to database corruption. */ int sqlite3OsSync(OsFile *id){ SimulateIOError(SQLITE_IOERR); - TRACE2("SYNC %-3d\n", id->fd); - if( fsync(id->fd) ){ + TRACE2("SYNC %-3d\n", id->h); + if( fsync(id->h) ){ return SQLITE_IOERR; }else{ if( id->dirfd>=0 ){ TRACE2("DIRSYNC %-3d\n", id->dirfd); fsync(id->dirfd); @@ -623,20 +623,20 @@ /* ** Truncate an open file to a specified size */ int sqlite3OsTruncate(OsFile *id, off_t nByte){ SimulateIOError(SQLITE_IOERR); - return ftruncate(id->fd, nByte)==0 ? SQLITE_OK : SQLITE_IOERR; + return ftruncate(id->h, nByte)==0 ? SQLITE_OK : SQLITE_IOERR; } /* ** Determine the current size of a file in bytes */ int sqlite3OsFileSize(OsFile *id, off_t *pSize){ struct stat buf; SimulateIOError(SQLITE_IOERR); - if( fstat(id->fd, &buf)!=0 ){ + if( fstat(id->h, &buf)!=0 ){ return SQLITE_IOERR; } *pSize = buf.st_size; return SQLITE_OK; } @@ -645,11 +645,11 @@ ** This routine checks if there is a RESERVED lock held on the specified ** file by this or any other process. If such a lock is held, return ** non-zero. If the file is unlocked or holds only SHARED locks, then ** return zero. */ -int sqlite3OsCheckWriteLock(OsFile *id){ +int sqlite3OsCheckReservedLock(OsFile *id){ int r = 0; sqlite3OsEnterMutex(); /* Needed because id->pLock is shared across threads */ /* Check if a thread in this process holds such a lock */ @@ -663,18 +663,18 @@ struct flock lock; lock.l_whence = SEEK_SET; lock.l_start = RESERVED_BYTE; lock.l_len = 1; lock.l_type = F_WRLCK; - fcntl(id->fd, F_GETLK, &lock); + fcntl(id->h, F_GETLK, &lock); if( lock.l_type!=F_UNLCK ){ r = 1; } } sqlite3OsLeaveMutex(); - TRACE3("TEST WR-LOCK %d %d\n", id->fd, r); + TRACE3("TEST WR-LOCK %d %d\n", id->h, r); return r; } /* @@ -696,23 +696,21 @@ ** SHARED -> RESERVED ** SHARED -> (PENDING) -> EXCLUSIVE ** RESERVED -> (PENDING) -> EXCLUSIVE ** PENDING -> EXCLUSIVE ** -** This routine will only increase a lock. The sqlite3OsUnlock() routine -** erases all locks at once and returns us immediately to locking level 0. -** It is not possible to lower the locking level one step at a time. You -** must go straight to locking level 0. +** This routine will only increase a lock. Use the sqlite3OsUnlock() +** routine to lower a locking level. */ int sqlite3OsLock(OsFile *id, int locktype){ int rc = SQLITE_OK; struct lockInfo *pLock = id->pLock; struct flock lock; int s; - TRACE5("LOCK %d %d was %d(%d)\n", - id->fd, locktype, id->locktype, pLock->locktype); + TRACE6("LOCK %d %d was %d(%d,%d)\n", + id->h, locktype, id->locktype, pLock->locktype, pLock->cnt); /* If there is already a lock of this type or more restrictive on the ** OsFile, do nothing. Don't use the end_lock: exit path, as ** sqlite3OsEnterMutex() hasn't been called yet. */ @@ -769,26 +767,26 @@ /* Temporarily grab a PENDING lock. This prevents new SHARED locks from ** being formed if a PENDING lock is already held. */ lock.l_type = F_RDLCK; lock.l_start = PENDING_BYTE; - s = fcntl(id->fd, F_SETLK, &lock); + s = fcntl(id->h, F_SETLK, &lock); if( s ){ rc = (errno==EINVAL) ? SQLITE_NOLFS : SQLITE_BUSY; goto end_lock; } /* Now get the read-lock */ lock.l_start = SHARED_FIRST; lock.l_len = SHARED_SIZE; - s = fcntl(id->fd, F_SETLK, &lock); + s = fcntl(id->h, F_SETLK, &lock); /* Drop the temporary PENDING lock */ lock.l_start = PENDING_BYTE; lock.l_len = 1L; lock.l_type = F_UNLCK; - fcntl(id->fd, F_SETLK, &lock); + fcntl(id->h, F_SETLK, &lock); if( s ){ rc = (errno==EINVAL) ? SQLITE_NOLFS : SQLITE_BUSY; }else{ id->locktype = SHARED_LOCK; id->pOpen->nLock++; @@ -813,11 +811,11 @@ lock.l_len = SHARED_SIZE; break; default: assert(0); } - s = fcntl(id->fd, F_SETLK, &lock); + s = fcntl(id->h, F_SETLK, &lock); if( s ){ rc = (errno==EINVAL) ? SQLITE_NOLFS : SQLITE_BUSY; } } @@ -826,51 +824,67 @@ pLock->locktype = locktype; } end_lock: sqlite3OsLeaveMutex(); - TRACE4("LOCK %d %d %s\n", id->fd, locktype, rc==SQLITE_OK ? "ok" : "failed"); + TRACE4("LOCK %d %d %s\n", id->h, locktype, rc==SQLITE_OK ? "ok" : "failed"); return rc; } /* -** Unlock the given file descriptor. If the file descriptor was -** not previously locked, then this routine is a no-op. If this -** library was compiled with large file support (LFS) but LFS is not -** available on the host, then an SQLITE_NOLFS is returned. +** Lower the locking level on file descriptor id to locktype. locktype +** must be either NO_LOCK or SHARED_LOCK. +** +** If the locking level of the file descriptor is already at or below +** the requested locking level, this routine is a no-op. +** +** It is not possible for this routine to fail. */ -int sqlite3OsUnlock(OsFile *id){ - int rc; - if( !id->locktype ) return SQLITE_OK; - id->locktype = 0; +int sqlite3OsUnlock(OsFile *id, int locktype){ + struct lockInfo *pLock; + struct flock lock; + + TRACE6("UNLOCK %d %d was %d(%d,%d)\n", + id->h, locktype, id->locktype, id->pLock->locktype, id->pLock->cnt); + + assert( locktype<=SHARED_LOCK ); + if( id->locktype<=locktype ){ + return SQLITE_OK; + } sqlite3OsEnterMutex(); - assert( id->pLock->cnt!=0 ); - if( id->pLock->cnt>1 ){ - id->pLock->cnt--; - rc = SQLITE_OK; - }else{ - struct flock lock; - int s; + pLock = id->pLock; + assert( pLock->cnt!=0 ); + if( id->locktype>SHARED_LOCK ){ + assert( pLock->locktype==id->locktype ); lock.l_type = F_UNLCK; lock.l_whence = SEEK_SET; - lock.l_start = lock.l_len = 0L; - s = fcntl(id->fd, F_SETLK, &lock); - if( s!=0 ){ - rc = (errno==EINVAL) ? SQLITE_NOLFS : SQLITE_BUSY; - }else{ - rc = SQLITE_OK; - id->pLock->cnt = 0; - id->pLock->locktype = 0; - } - } - - if( rc==SQLITE_OK ){ + lock.l_start = PENDING_BYTE; + lock.l_len = 2L; assert( PENDING_BYTE+1==RESERVED_BYTE ); + fcntl(id->h, F_SETLK, &lock); + pLock->locktype = SHARED_LOCK; + } + if( locktype==NO_LOCK ){ + struct openCnt *pOpen; + + /* Decrement the shared lock counter. Release the lock using an + ** OS call only when all threads in this same process have released + ** the lock. + */ + pLock->cnt--; + if( pLock->cnt==0 ){ + lock.l_type = F_UNLCK; + lock.l_whence = SEEK_SET; + lock.l_start = lock.l_len = 0L; + fcntl(id->h, F_SETLK, &lock); + pLock->locktype = NO_LOCK; + } + /* Decrement the count of locks against this same file. When the ** count reaches zero, close any other file descriptors whose close ** was deferred because of outstanding locks. */ - struct openCnt *pOpen = id->pOpen; + pOpen = id->pOpen; pOpen->nLock--; assert( pOpen->nLock>=0 ); if( pOpen->nLock==0 && pOpen->nPending>0 ){ int i; for(i=0; inPending; i++){ @@ -880,12 +894,12 @@ pOpen->nPending = 0; pOpen->aPending = 0; } } sqlite3OsLeaveMutex(); - id->locktype = 0; - return rc; + id->locktype = locktype; + return SQLITE_OK; } /* ** Get information to seed the random number generator. The seed ** is written into the buffer zBuf[256]. The calling function must Index: src/os_unix.h ================================================================== --- src/os_unix.h +++ src/os_unix.h @@ -62,11 +62,11 @@ */ typedef struct OsFile OsFile; struct OsFile { struct openCnt *pOpen; /* Info about all open fd's on this inode */ struct lockInfo *pLock; /* Info about locks on this inode */ - int fd; /* The file descriptor */ + int h; /* The file descriptor */ int locktype; /* The type of lock held on this fd */ int dirfd; /* File descriptor for the directory */ }; /* Index: src/os_win.c ================================================================== --- src/os_win.c +++ src/os_win.c @@ -506,11 +506,11 @@ /* ** This routine checks if there is a RESERVED lock held on the specified ** file by this or any other process. If such a lock is held, return ** non-zero, otherwise zero. */ -int sqlite3OsCheckWriteLock(OsFile *id){ +int sqlite3OsCheckReservedLock(OsFile *id){ int rc; if( id->locktype>=RESERVED_LOCK ){ rc = 1; TRACE3("TEST WR-LOCK %d %d (local)\n", id->h, rc); }else{ @@ -523,32 +523,37 @@ } return rc; } /* -** Unlock the given file descriptor. If the file descriptor was -** not previously locked, then this routine is a no-op. If this -** library was compiled with large file support (LFS) but LFS is not -** available on the host, then an SQLITE_NOLFS is returned. +** Lower the locking level on file descriptor id to locktype. locktype +** must be either NO_LOCK or SHARED_LOCK. +** +** If the locking level of the file descriptor is already at or below +** the requested locking level, this routine is a no-op. +** +** It is not possible for this routine to fail. */ -int sqlite3OsUnlock(OsFile *id){ +int sqlite3OsUnlock(OsFile *id, int locktype){ int rc, type; - TRACE4("UNLOCK %d was %d(%d)\n", id->h, id->locktype, id->sharedLockByte); + assert( locktype<=SHARED_LOCK ); + TRACE4("UNLOCK %d to %d was %d(%d)\n", id->h, locktype, + id->locktype, id->sharedLockByte); type = id->locktype; if( type>=EXCLUSIVE_LOCK ){ UnlockFile(id->h, SHARED_FIRST, 0, SHARED_SIZE, 0); } if( type>=RESERVED_LOCK ){ UnlockFile(id->h, RESERVED_BYTE, 0, 1, 0); } - if( type>=SHARED_LOCK && type=SHARED_LOCK && type=PENDING_LOCK ){ UnlockFile(id->h, PENDING_BYTE, 0, 1, 0); } - id->locktype = NO_LOCK; + id->locktype = locktype; return SQLITE_OK; } /* ** Get information to seed the random number generator. The seed Index: src/pager.c ================================================================== --- src/pager.c +++ src/pager.c @@ -16,11 +16,11 @@ ** is separate from the database file. The pager also implements file ** locking to prevent two processes from writing the same database ** file simultaneously, or one process from reading the database while ** another is writing. ** -** @(#) $Id: pager.c,v 1.114 2004/06/09 14:17:21 drh Exp $ +** @(#) $Id: pager.c,v 1.115 2004/06/09 17:37:28 drh Exp $ */ #include "os.h" /* Must be first to enable large file support */ #include "sqliteInt.h" #include "pager.h" #include @@ -47,39 +47,46 @@ /* ** The page cache as a whole is always in one of the following ** states: ** -** SQLITE_UNLOCK The page cache is not currently reading or +** PAGER_UNLOCK The page cache is not currently reading or ** writing the database file. There is no ** data held in memory. This is the initial ** state. ** -** SQLITE_READLOCK The page cache is reading the database. +** PAGER_SHARED The page cache is reading the database. ** Writing is not permitted. There can be ** multiple readers accessing the same database ** file at the same time. ** -** SQLITE_WRITELOCK The page cache is writing the database. +** PAGER_RESERVED Writing is permitted to the page cache only. +** The original database file has not been modified. +** Other processes may still be reading the on-disk +** database file. +** +** PAGER_EXCLUSIVE The page cache is writing the database. ** Access is exclusive. No other processes or ** threads can be reading or writing while one ** process is writing. ** -** The page cache comes up in SQLITE_UNLOCK. The first time a -** sqlite_page_get() occurs, the state transitions to SQLITE_READLOCK. +** The page cache comes up in PAGER_UNLOCK. The first time a +** sqlite_page_get() occurs, the state transitions to PAGER_SHARED. ** After all pages have been released using sqlite_page_unref(), -** the state transitions back to SQLITE_UNLOCK. The first time +** the state transitions back to PAGER_UNLOCK. The first time ** that sqlite_page_write() is called, the state transitions to -** SQLITE_WRITELOCK. (Note that sqlite_page_write() can only be +** PAGER_RESERVED. (Note that sqlite_page_write() can only be ** called on an outstanding page which means that the pager must -** be in SQLITE_READLOCK before it transitions to SQLITE_WRITELOCK.) +** be in PAGER_SHARED before it transitions to PAGER_RESERVED.) ** The sqlite_page_rollback() and sqlite_page_commit() functions -** transition the state from SQLITE_WRITELOCK back to SQLITE_READLOCK. +** transition the state from PAGER_RESERVED to PAGER_EXCLUSIVE to +** PAGER_SHARED. */ -#define SQLITE_UNLOCK 0 -#define SQLITE_READLOCK 1 -#define SQLITE_WRITELOCK 2 +#define PAGER_UNLOCK 0 +#define PAGER_SHARED 1 +#define PAGER_RESERVED 2 +#define PAGER_EXCLUSIVE 3 /* ** Each in-memory image of a page begins with the following header. ** This header is only visible to this pager module. The client @@ -180,10 +187,11 @@ int nRec; /* Number of pages written to the journal */ u32 cksumInit; /* Quasi-random value added to every checksum */ int stmtNRec; /* Number of records in stmt subjournal */ int nExtra; /* Add this many bytes to each in-memory page */ void (*xDestructor)(void*,int); /* Call this routine when freeing pages */ + void (*xReiniter)(void*,int); /* Call this routine when reloading pages */ int pageSize; /* Number of bytes in a page */ int nPage; /* Total number of in-memory pages */ int nRef; /* Number of in-memory pages with PgHdr.nRef>0 */ int mxPage; /* Maximum number of pages to hold in cache */ int nHit, nMiss, nOvfl; /* Cache hits, missing, and LRU overflows */ @@ -195,16 +203,16 @@ u8 stmtOpen; /* True if the statement subjournal is open */ u8 stmtInUse; /* True we are in a statement subtransaction */ u8 stmtAutoopen; /* Open stmt journal when main journal is opened*/ u8 noSync; /* Do not sync the journal if true */ u8 fullSync; /* Do extra syncs of the journal for robustness */ - u8 state; /* SQLITE_UNLOCK, _READLOCK or _WRITELOCK */ + u8 state; /* PAGER_UNLOCK, _SHARED, _RESERVED, etc. */ u8 errMask; /* One of several kinds of errors */ u8 tempFile; /* zFilename is a temporary file */ u8 readOnly; /* True for a read-only database */ u8 needSync; /* True if an fsync() is needed on the journal */ - u8 dirtyFile; /* True if database file has changed in any way */ + u8 dirtyCache; /* True if cached pages have changed */ u8 alwaysRollback; /* Disable dont_rollback() for all pages */ u8 memDb; /* True to inhibit all file I/O */ u8 *aInJournal; /* One bit for each page in the database file */ u8 *aInStmt; /* One bit for each page in the database */ PgHdr *pFirst, *pLast; /* List of free pages */ @@ -477,34 +485,35 @@ pPager->pFirstSynced = 0; pPager->pLast = 0; pPager->pAll = 0; memset(pPager->aHash, 0, sizeof(pPager->aHash)); pPager->nPage = 0; - if( pPager->state>=SQLITE_WRITELOCK ){ + if( pPager->state>=PAGER_RESERVED ){ sqlite3pager_rollback(pPager); } - sqlite3OsUnlock(&pPager->fd); - pPager->state = SQLITE_UNLOCK; + sqlite3OsUnlock(&pPager->fd, NO_LOCK); + pPager->state = PAGER_UNLOCK; pPager->dbSize = -1; pPager->nRef = 0; assert( pPager->journalOpen==0 ); } /* ** When this routine is called, the pager has the journal file open and -** a write lock on the database. This routine releases the database -** write lock and acquires a read lock in its place. The journal file -** is deleted and closed. +** a RESERVED or EXCLUSIVE lock on the database. This routine releases +** the database lock and acquires a SHARED lock in its place. The journal +** file is deleted and closed. ** ** TODO: Consider keeping the journal file open for temporary databases. ** This might give a performance improvement on windows where opening ** a file is an expensive operation. */ static int pager_unwritelock(Pager *pPager){ - int rc; PgHdr *pPg; - if( pPager->statestatestmtOpen ){ sqlite3OsClose(&pPager->stfd); pPager->stmtOpen = 0; } @@ -518,23 +527,15 @@ pPg->inJournal = 0; pPg->dirty = 0; pPg->needSync = 0; } }else{ - assert( pPager->dirtyFile==0 || pPager->useJournal==0 ); - } - rc = sqlite3OsLock(&pPager->fd, SHARED_LOCK); - if( rc==SQLITE_OK ){ - pPager->state = SQLITE_READLOCK; - }else{ - /* This can only happen if a process does a BEGIN, then forks and the - ** child process does the COMMIT. Because of the semantics of unix - ** file locking, the unlock will fail. - */ - pPager->state = SQLITE_UNLOCK; - } - return rc; + assert( pPager->dirtyCache==0 || pPager->useJournal==0 ); + } + sqlite3OsUnlock(&pPager->fd, SHARED_LOCK); + pPager->state = PAGER_SHARED; + return SQLITE_OK; } /* ** Compute and return a checksum for the page of data. ** @@ -586,11 +587,11 @@ /* Playback the page. Update the in-memory copy of the page ** at the same time, if there is one. */ pPg = pager_lookup(pPager, pgRec.pgno); - TRACE2("PLAYBACK %d\n", pgRec.pgno); + TRACE2("PLAYBACK page %d\n", pgRec.pgno); sqlite3OsSeek(&pPager->fd, (pgRec.pgno-1)*(off_t)SQLITE_PAGE_SIZE); rc = sqlite3OsWrite(&pPager->fd, pgRec.aData, SQLITE_PAGE_SIZE); if( pPg ){ /* No page should ever be rolled back that is in use, except for page ** 1 which is held in use in order to keep the lock on the database @@ -718,10 +719,49 @@ if( master_open ){ sqlite3OsClose(&master); } return rc; } + +/* +** Make every page in the cache agree with what is on disk. In other words, +** reread the disk to reset the state of the cache. +** +** This routine is called after a rollback in which some of the dirty cache +** pages had never been written out to disk. We need to roll back the +** cache content and the easiest way to do that is to reread the old content +** back from the disk. +*/ +static int pager_reload_cache(Pager *pPager){ + PgHdr *pPg; + int rc = SQLITE_OK; + for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){ + char zBuf[SQLITE_PAGE_SIZE]; + if( !pPg->dirty ) continue; + if( (int)pPg->pgno <= pPager->origDbSize ){ + sqlite3OsSeek(&pPager->fd, SQLITE_PAGE_SIZE*(off_t)(pPg->pgno-1)); + rc = sqlite3OsRead(&pPager->fd, zBuf, SQLITE_PAGE_SIZE); + TRACE2("REFETCH page %d\n", pPg->pgno); + CODEC(pPager, zBuf, pPg->pgno, 2); + if( rc ) break; + }else{ + memset(zBuf, 0, SQLITE_PAGE_SIZE); + } + if( pPg->nRef==0 || memcmp(zBuf, PGHDR_TO_DATA(pPg), SQLITE_PAGE_SIZE) ){ + memcpy(PGHDR_TO_DATA(pPg), zBuf, SQLITE_PAGE_SIZE); + if( pPager->xReiniter ){ + pPager->xReiniter(PGHDR_TO_DATA(pPg), pPager->pageSize); + }else{ + memset(PGHDR_TO_EXTRA(pPg), 0, pPager->nExtra); + } + } + pPg->needSync = 0; + pPg->dirty = 0; + } + return rc; +} + /* ** Playback the journal and thus restore the database file to ** the state it was in before we started making changes. ** @@ -885,30 +925,11 @@ /* Pages that have been written to the journal but never synced ** where not restored by the loop above. We have to restore those ** pages by reading them back from the original database. */ if( rc==SQLITE_OK ){ - PgHdr *pPg; - for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){ - char zBuf[SQLITE_PAGE_SIZE]; - if( !pPg->dirty ) continue; - if( (int)pPg->pgno <= pPager->origDbSize ){ - sqlite3OsSeek(&pPager->fd, SQLITE_PAGE_SIZE*(off_t)(pPg->pgno-1)); - rc = sqlite3OsRead(&pPager->fd, zBuf, SQLITE_PAGE_SIZE); - TRACE2("REFETCH %d\n", pPg->pgno); - CODEC(pPager, zBuf, pPg->pgno, 2); - if( rc ) break; - }else{ - memset(zBuf, 0, SQLITE_PAGE_SIZE); - } - if( pPg->nRef==0 || memcmp(zBuf, PGHDR_TO_DATA(pPg), SQLITE_PAGE_SIZE) ){ - memcpy(PGHDR_TO_DATA(pPg), zBuf, SQLITE_PAGE_SIZE); - memset(PGHDR_TO_EXTRA(pPg), 0, pPager->nExtra); - } - pPg->needSync = 0; - pPg->dirty = 0; - } + pager_reload_cache(pPager); } end_playback: if( zMaster ){ /* If there was a master journal and this routine will return true, @@ -1163,11 +1184,11 @@ pPager->pageSize = SQLITE_PAGE_SIZE; pPager->stmtSize = 0; pPager->stmtJSize = 0; pPager->nPage = 0; pPager->mxPage = mxPage>5 ? mxPage : 10; - pPager->state = SQLITE_UNLOCK; + pPager->state = PAGER_UNLOCK; pPager->errMask = 0; pPager->tempFile = tempFile; pPager->memDb = memDb; pPager->readOnly = readOnly; pPager->needSync = 0; @@ -1191,10 +1212,21 @@ ** Destructors are only called by sqlite3pager_unref(). */ void sqlite3pager_set_destructor(Pager *pPager, void (*xDesc)(void*,int)){ pPager->xDestructor = xDesc; } + +/* +** Set the reinitializer for this pager. If not NULL, the reinitializer +** is called when the content of a page in cache is restored to its original +** value as a result of a rollback. The callback gives higher-level code +** an opportunity to restore the EXTRA section to agree with the restored +** page data. +*/ +void sqlite3pager_set_reiniter(Pager *pPager, void (*xReinit)(void*,int)){ + pPager->xReiniter = xReinit; +} /* ** Return the total number of pages in the disk file associated with ** pPager. */ @@ -1207,11 +1239,11 @@ if( sqlite3OsFileSize(&pPager->fd, &n)!=SQLITE_OK ){ pPager->errMask |= PAGER_ERR_DISK; return 0; } n /= SQLITE_PAGE_SIZE; - if( pPager->state!=SQLITE_UNLOCK ){ + if( pPager->state!=PAGER_UNLOCK ){ pPager->dbSize = n; } return n; } @@ -1328,21 +1360,22 @@ ** result in a coredump. */ int sqlite3pager_close(Pager *pPager){ PgHdr *pPg, *pNext; switch( pPager->state ){ - case SQLITE_WRITELOCK: { + case PAGER_RESERVED: + case PAGER_EXCLUSIVE: { sqlite3pager_rollback(pPager); if( !pPager->memDb ){ - sqlite3OsUnlock(&pPager->fd); + sqlite3OsUnlock(&pPager->fd, NO_LOCK); } assert( pPager->journalOpen==0 ); break; } - case SQLITE_READLOCK: { + case PAGER_SHARED: { if( !pPager->memDb ){ - sqlite3OsUnlock(&pPager->fd); + sqlite3OsUnlock(&pPager->fd, NO_LOCK); } break; } default: { /* Do nothing */ @@ -1483,11 +1516,11 @@ #endif if( journal_format>=3 ){ /* Write the nRec value into the journal file header */ off_t szJ; if( pPager->fullSync ){ - TRACE1("SYNC\n"); + TRACE2("SYNC journal of %d\n", pPager->fd.h); rc = sqlite3OsSync(&pPager->jfd); if( rc!=0 ) return rc; } sqlite3OsSeek(&pPager->jfd, sizeof(aJournalMagic1)); rc = write32bits(&pPager->jfd, pPager->nRec); @@ -1504,11 +1537,11 @@ szJ = JOURNAL_HDR_SZ(pPager, journal_format) + pPager->nRec*JOURNAL_PG_SZ(journal_format); sqlite3OsSeek(&pPager->jfd, szJ); } - TRACE1("SYNC\n"); + TRACE2("SYNC journal of %d\n", pPager->fd.h); rc = sqlite3OsSync(&pPager->jfd); if( rc!=0 ) return rc; pPager->journalStarted = 1; } pPager->needSync = 0; @@ -1552,19 +1585,21 @@ /* At this point there may be either a RESERVED or EXCLUSIVE lock on the ** database file. If there is already an EXCLUSIVE lock, the following ** calls to sqlite3OsLock() are no-ops. ** - ** The upgrade from a RESERVED to PENDING might return SQLITE_BUSY on - ** windows because the windows locking mechanism acquires a transient - ** PENDING lock during its attempts to get a SHARED lock. So if another - ** process were trying to get a SHARED lock at the same time this process - ** is upgrading from RESERVED to PENDING, the two could collide. + ** Moving the lock from RESERVED to EXCLUSIVE actually involves going + ** through an intermediate state PENDING. A PENDING lock prevents new + ** readers from attaching to the database but is unsufficient for us to + ** write. The idea of a PENDING lock is to prevent new readers from + ** coming in while we wait for existing readers to clear. ** - ** The upgrade from PENDING to EXCLUSIVE can return SQLITE_BUSY if there - ** are still active readers that were created before the PENDING lock - ** was acquired. + ** While the pager is in the RESERVED state, the original database file + ** is unchanged and we can rollback without having to playback the + ** journal into the original database file. Once we transition to + ** EXCLUSIVE, it means the database file has been changed and any rollback + ** will require a journal playback. */ do { rc = sqlite3OsLock(&pPager->fd, EXCLUSIVE_LOCK); }while( rc==SQLITE_BUSY && pPager->pBusyHandler && @@ -1572,16 +1607,17 @@ pPager->pBusyHandler->xFunc(pPager->pBusyHandler->pArg, "", busy++) ); if( rc!=SQLITE_OK ){ return rc; } + pPager->state = PAGER_EXCLUSIVE; while( pList ){ assert( pList->dirty ); sqlite3OsSeek(&pPager->fd, (pList->pgno-1)*(off_t)SQLITE_PAGE_SIZE); CODEC(pPager, PGHDR_TO_DATA(pList), pList->pgno, 6); - TRACE2("STORE %d\n", pList->pgno); + TRACE2("STORE page %d\n", pList->pgno); rc = sqlite3OsWrite(&pPager->fd, PGHDR_TO_DATA(pList), SQLITE_PAGE_SIZE); CODEC(pPager, PGHDR_TO_DATA(pList), pList->pgno, 0); if( rc ) return rc; pList->dirty = 0; pList = pList->pDirty; @@ -1655,31 +1691,29 @@ pPager->pBusyHandler->xFunc(pPager->pBusyHandler->pArg, "", busy++) ); if( rc!=SQLITE_OK ){ return rc; } - pPager->state = SQLITE_READLOCK; + pPager->state = PAGER_SHARED; /* If a journal file exists, and there is no RESERVED lock on the ** database file, then it either needs to be played back or deleted. */ if( pPager->useJournal && sqlite3OsFileExists(pPager->zJournal) && - !sqlite3OsCheckWriteLock(&pPager->fd) + !sqlite3OsCheckReservedLock(&pPager->fd) ){ int rc; /* Get an EXCLUSIVE lock on the database file. */ rc = sqlite3OsLock(&pPager->fd, EXCLUSIVE_LOCK); if( rc!=SQLITE_OK ){ - if( sqlite3OsUnlock(&pPager->fd)!=SQLITE_OK ){ - /* This should never happen! */ - rc = SQLITE_INTERNAL; - } + sqlite3OsUnlock(&pPager->fd, NO_LOCK); + pPager->state = PAGER_UNLOCK; return rc; } - pPager->state = SQLITE_WRITELOCK; + pPager->state = PAGER_EXCLUSIVE; /* Open the journal for reading only. Return SQLITE_BUSY if ** we are unable to open the journal file. ** ** The journal file does not need to be locked itself. The @@ -1687,12 +1721,12 @@ ** a write lock, so there is never any chance of two or more ** processes opening the journal at the same time. */ rc = sqlite3OsOpenReadOnly(pPager->zJournal, &pPager->jfd); if( rc!=SQLITE_OK ){ - rc = sqlite3OsUnlock(&pPager->fd); - assert( rc==SQLITE_OK ); + sqlite3OsUnlock(&pPager->fd, NO_LOCK); + pPager->state = PAGER_UNLOCK; return SQLITE_BUSY; } pPager->journalOpen = 1; pPager->journalStarted = 0; @@ -1706,12 +1740,12 @@ } pPg = 0; }else{ /* Search for page in cache */ pPg = pager_lookup(pPager, pgno); - if( pPager->memDb && pPager->state==SQLITE_UNLOCK ){ - pPager->state = SQLITE_READLOCK; + if( pPager->memDb && pPager->state==PAGER_UNLOCK ){ + pPager->state = PAGER_SHARED; } } if( pPg==0 ){ /* The requested page is not in the page cache. */ int h; @@ -1826,11 +1860,11 @@ }else{ int rc; assert( pPager->memDb==0 ); sqlite3OsSeek(&pPager->fd, (pgno-1)*(off_t)SQLITE_PAGE_SIZE); rc = sqlite3OsRead(&pPager->fd, PGHDR_TO_DATA(pPg), SQLITE_PAGE_SIZE); - TRACE2("FETCH %d\n", pPg->pgno); + TRACE2("FETCH page %d\n", pPg->pgno); CODEC(pPager, PGHDR_TO_DATA(pPg), pPg->pgno, 3); if( rc!=SQLITE_OK ){ off_t fileSize; if( sqlite3OsFileSize(&pPager->fd,&fileSize)!=SQLITE_OK || fileSize>=pgno*SQLITE_PAGE_SIZE ){ @@ -1925,34 +1959,34 @@ } return SQLITE_OK; } /* -** Create a journal file for pPager. There should already be a write -** lock on the database file when this routine is called. +** Create a journal file for pPager. There should already be a RESERVED +** or EXCLUSIVE lock on the database file when this routine is called. ** ** Return SQLITE_OK if everything. Return an error code and release the ** write lock if anything goes wrong. */ static int pager_open_journal(Pager *pPager){ int rc; - assert( pPager->state==SQLITE_WRITELOCK ); + assert( pPager->state>=PAGER_RESERVED ); assert( pPager->journalOpen==0 ); assert( pPager->useJournal ); sqlite3pager_pagecount(pPager); pPager->aInJournal = sqliteMalloc( pPager->dbSize/8 + 1 ); if( pPager->aInJournal==0 ){ - sqlite3OsLock(&pPager->fd, SHARED_LOCK); - pPager->state = SQLITE_READLOCK; + sqlite3OsUnlock(&pPager->fd, SHARED_LOCK); + pPager->state = PAGER_SHARED; return SQLITE_NOMEM; } rc = sqlite3OsOpenExclusive(pPager->zJournal, &pPager->jfd,pPager->tempFile); if( rc!=SQLITE_OK ){ sqliteFree(pPager->aInJournal); pPager->aInJournal = 0; - sqlite3OsLock(&pPager->fd, SHARED_LOCK); - pPager->state = SQLITE_READLOCK; + sqlite3OsUnlock(&pPager->fd, SHARED_LOCK); + pPager->state = PAGER_SHARED; return SQLITE_CANTOPEN; } sqlite3OsOpenDirectory(pPager->zDirectory, &pPager->jfd); pPager->journalOpen = 1; pPager->journalStarted = 0; @@ -2037,32 +2071,27 @@ ** ** A journal file is opened if this is not a temporary file. For temporary ** files, the opening of the journal file is deferred until there is an ** actual need to write to the journal. ** -** If the database is already write-locked, this routine is a no-op. +** If the database is already reserved for writing, this routine is a no-op. */ int sqlite3pager_begin(void *pData, int nMaster){ PgHdr *pPg = DATA_TO_PGHDR(pData); Pager *pPager = pPg->pPager; int rc = SQLITE_OK; assert( pPg->nRef>0 ); assert( nMaster>=0 ); - assert( pPager->state!=SQLITE_UNLOCK ); - if( pPager->state==SQLITE_READLOCK ){ + assert( pPager->state!=PAGER_UNLOCK ); + if( pPager->state==PAGER_SHARED ){ assert( pPager->aInJournal==0 ); if( pPager->memDb ){ - pPager->state = SQLITE_WRITELOCK; + pPager->state = PAGER_EXCLUSIVE; pPager->origDbSize = pPager->dbSize; }else{ int busy = 1; do { - /* If the library grabs an EXCLUSIVE lock here, as in the commented - ** out line, then it exhibits the old locking behaviour - a writer - ** excludes all readers, not just other writers. - */ - /* rc = sqlite3OsLock(&pPager->fd, EXCLUSIVE_LOCK); */ rc = sqlite3OsLock(&pPager->fd, RESERVED_LOCK); }while( rc==SQLITE_BUSY && pPager->pBusyHandler && pPager->pBusyHandler->xFunc && pPager->pBusyHandler->xFunc(pPager->pBusyHandler->pArg, "", busy++) @@ -2069,13 +2098,13 @@ ); if( rc!=SQLITE_OK ){ return rc; } pPager->nMaster = nMaster; - pPager->state = SQLITE_WRITELOCK; - pPager->dirtyFile = 0; - TRACE1("TRANSACTION\n"); + pPager->state = PAGER_RESERVED; + pPager->dirtyCache = 0; + TRACE3("TRANSACTION %d nMaster=%d\n", pPager->fd.h, nMaster); if( pPager->useJournal && !pPager->tempFile ){ rc = pager_open_journal(pPager); } } } @@ -2086,11 +2115,11 @@ ** Mark a data page as writeable. The page is written into the journal ** if it is not there already. This routine must be called before making ** changes to a page. ** ** The first time this routine is called, the pager creates a new -** journal and acquires a write lock on the database. If the write +** journal and acquires a RESERVED lock on the database. If the RESERVED ** lock could not be acquired, this routine returns SQLITE_BUSY. The ** calling routine must check for that return value and be careful not to ** change any page data until this routine returns SQLITE_OK. ** ** If the journal file could not be written because the disk is full, @@ -2116,11 +2145,11 @@ /* Mark the page as dirty. If the page has already been written ** to the journal then we can return right away. */ pPg->dirty = 1; if( pPg->inJournal && (pPg->inStmt || pPager->stmtInUse==0) ){ - pPager->dirtyFile = 1; + pPager->dirtyCache = 1; return SQLITE_OK; } /* If we get this far, it means that the page needs to be ** written to the transaction journal or the ckeckpoint journal @@ -2127,34 +2156,34 @@ ** or both. ** ** First check to see that the transaction journal exists and ** create it if it does not. */ - assert( pPager->state!=SQLITE_UNLOCK ); + assert( pPager->state!=PAGER_UNLOCK ); rc = sqlite3pager_begin(pData, 0); if( rc!=SQLITE_OK ){ return rc; } - assert( pPager->state==SQLITE_WRITELOCK ); + assert( pPager->state>=PAGER_RESERVED ); if( !pPager->journalOpen && pPager->useJournal ){ rc = pager_open_journal(pPager); if( rc!=SQLITE_OK ) return rc; } assert( pPager->journalOpen || !pPager->useJournal ); - pPager->dirtyFile = 1; + pPager->dirtyCache = 1; - /* The transaction journal now exists and we have a write lock on the - ** main database file. Write the current page to the transaction - ** journal if it is not there already. + /* The transaction journal now exists and we have a RESERVED or an + ** EXCLUSIVE lock on the main database file. Write the current page to + ** the transaction journal if it is not there already. */ if( !pPg->inJournal && (pPager->useJournal || pPager->memDb) ){ if( (int)pPg->pgno <= pPager->origDbSize ){ int szPg; u32 saved; if( pPager->memDb ){ PgHistory *pHist = PGHDR_TO_HIST(pPg, pPager); - TRACE2("JOURNAL %d\n", pPg->pgno); + TRACE2("JOURNAL page %d\n", pPg->pgno); assert( pHist->pOrig==0 ); pHist->pOrig = sqliteMallocRaw( pPager->pageSize ); if( pHist->pOrig ){ memcpy(pHist->pOrig, PGHDR_TO_DATA(pPg), pPager->pageSize); } @@ -2169,11 +2198,11 @@ szPg = SQLITE_PAGE_SIZE+4; } store32bits(pPg->pgno, pPg, -4); CODEC(pPager, pData, pPg->pgno, 7); rc = sqlite3OsWrite(&pPager->jfd, &((char*)pData)[-4], szPg); - TRACE3("JOURNAL %d %d\n", pPg->pgno, pPg->needSync); + TRACE3("JOURNAL page %d needSync=%d\n", pPg->pgno, pPg->needSync); CODEC(pPager, pData, pPg->pgno, 0); if( journal_format>=JOURNAL_FORMAT_3 ){ *(u32*)PGHDR_TO_EXTRA(pPg) = saved; } if( rc!=SQLITE_OK ){ @@ -2191,11 +2220,11 @@ page_add_to_stmt_list(pPg); } } }else{ pPg->needSync = !pPager->journalStarted && !pPager->noSync; - TRACE3("APPEND %d %d\n", pPg->pgno, pPg->needSync); + TRACE3("APPEND page %d needSync=%d\n", pPg->pgno, pPg->needSync); } if( pPg->needSync ){ pPager->needSync = 1; } } @@ -2212,16 +2241,16 @@ assert( pHist->pStmt==0 ); pHist->pStmt = sqliteMallocRaw( pPager->pageSize ); if( pHist->pStmt ){ memcpy(pHist->pStmt, PGHDR_TO_DATA(pPg), pPager->pageSize); } - TRACE2("STMT-JOURNAL %d\n", pPg->pgno); + TRACE2("STMT-JOURNAL page %d\n", pPg->pgno); }else{ store32bits(pPg->pgno, pPg, -4); CODEC(pPager, pData, pPg->pgno, 7); rc = sqlite3OsWrite(&pPager->stfd, ((char*)pData)-4, SQLITE_PAGE_SIZE+4); - TRACE2("STMT-JOURNAL %d\n", pPg->pgno); + TRACE2("STMT-JOURNAL page %d\n", pPg->pgno); CODEC(pPager, pData, pPg->pgno, 0); if( rc!=SQLITE_OK ){ sqlite3pager_rollback(pPager); pPager->errMask |= PAGER_ERR_FULL; return rc; @@ -2308,11 +2337,11 @@ ** size. If you do not write this page and the size of the file ** on the disk ends up being too small, that can lead to database ** corruption during the next transaction. */ }else{ - TRACE2("DONT_WRITE %d\n", pgno); + TRACE3("DONT_WRITE page %d of %d\n", pgno, pPager->fd.h); pPg->dirty = 0; } } } @@ -2324,21 +2353,21 @@ */ void sqlite3pager_dont_rollback(void *pData){ PgHdr *pPg = DATA_TO_PGHDR(pData); Pager *pPager = pPg->pPager; - if( pPager->state!=SQLITE_WRITELOCK || pPager->journalOpen==0 ) return; + if( pPager->state!=PAGER_EXCLUSIVE || pPager->journalOpen==0 ) return; if( pPg->alwaysRollback || pPager->alwaysRollback || pPager->memDb ) return; if( !pPg->inJournal && (int)pPg->pgno <= pPager->origDbSize ){ assert( pPager->aInJournal!=0 ); pPager->aInJournal[pPg->pgno/8] |= 1<<(pPg->pgno&7); pPg->inJournal = 1; if( pPager->stmtInUse ){ pPager->aInStmt[pPg->pgno/8] |= 1<<(pPg->pgno&7); page_add_to_stmt_list(pPg); } - TRACE2("DONT_ROLLBACK %d\n", pPg->pgno); + TRACE3("DONT_ROLLBACK page %d of %d\n", pPg->pgno, pPager->fd.h); } if( pPager->stmtInUse && !pPg->inStmt && (int)pPg->pgno<=pPager->stmtSize ){ assert( pPg->inJournal || (int)pPg->pgno>pPager->origDbSize ); assert( pPager->aInStmt!=0 ); pPager->aInStmt[pPg->pgno/8] |= 1<<(pPg->pgno&7); @@ -2377,14 +2406,14 @@ } if( pPager->errMask!=0 ){ rc = pager_errcode(pPager); return rc; } - if( pPager->state!=SQLITE_WRITELOCK ){ + if( pPager->statefd.h); if( pPager->memDb ){ pPg = pager_get_all_dirty_pages(pPager); while( pPg ){ clearHistory(PGHDR_TO_HIST(pPg, pPager)); pPg->dirty = 0; @@ -2392,23 +2421,23 @@ pPg->inStmt = 0; pPg->pPrevStmt = pPg->pNextStmt = 0; pPg = pPg->pDirty; } pPager->pStmt = 0; - pPager->state = SQLITE_READLOCK; + pPager->state = PAGER_SHARED; return SQLITE_OK; } -#if 0 - if( pPager->dirtyFile==0 ){ + if( pPager->dirtyCache==0 ){ /* Exit early (without doing the time-consuming sqlite3OsSync() calls) ** if there have been no changes to the database file. */ assert( pPager->needSync==0 ); rc = pager_unwritelock(pPager); pPager->dbSize = -1; return rc; } assert( pPager->journalOpen ); +#if 0 rc = syncJournal(pPager, 0); if( rc!=SQLITE_OK ){ goto commit_abort; } pPg = pager_get_all_dirty_pages(pPager); @@ -2418,12 +2447,13 @@ goto commit_abort; } } #endif rc = sqlite3pager_sync(pPager, 0); - if( rc!=SQLITE_OK ) goto commit_abort; - + if( rc!=SQLITE_OK ){ + goto commit_abort; + } rc = pager_unwritelock(pPager); pPager->dbSize = -1; return rc; /* Jump here if anything goes wrong during the commit process. @@ -2435,11 +2465,11 @@ } return rc; } /* -** Rollback all changes. The database falls back to read-only mode. +** Rollback all changes. The database falls back to PAGER_SHARED mode. ** All in-memory cache pages revert to their original data contents. ** The journal is deleted. ** ** This routine cannot fail unless some other process is not following ** the correct locking protocol (SQLITE_PROTOCOL) or unless some other @@ -2448,11 +2478,11 @@ ** codes are returned for all these occasions. Otherwise, ** SQLITE_OK is returned. */ int sqlite3pager_rollback(Pager *pPager){ int rc; - TRACE1("ROLLBACK\n"); + TRACE2("ROLLBACK %d\n", pPager->fd.h); if( pPager->memDb ){ PgHdr *p; for(p=pPager->pAll; p; p=p->pNextAll){ PgHistory *pHist; if( !p->dirty ) continue; @@ -2471,30 +2501,36 @@ } pPager->pStmt = 0; pPager->dbSize = pPager->origDbSize; memoryTruncate(pPager); pPager->stmtInUse = 0; - pPager->state = SQLITE_READLOCK; + pPager->state = PAGER_SHARED; return SQLITE_OK; } - if( !pPager->dirtyFile || !pPager->journalOpen ){ + if( !pPager->dirtyCache || !pPager->journalOpen ){ rc = pager_unwritelock(pPager); pPager->dbSize = -1; return rc; } if( pPager->errMask!=0 && pPager->errMask!=PAGER_ERR_FULL ){ - if( pPager->state>=SQLITE_WRITELOCK ){ + if( pPager->state>=PAGER_EXCLUSIVE ){ pager_playback(pPager, 1); } return pager_errcode(pPager); } - if( pPager->state!=SQLITE_WRITELOCK ){ - return SQLITE_OK; + if( pPager->state==PAGER_RESERVED ){ + int rc2; + rc = pager_reload_cache(pPager); + rc2 = pager_unwritelock(pPager); + if( rc==SQLITE_OK ){ + rc = rc2; + } + }else{ + rc = pager_playback(pPager, 1); } - rc = pager_playback(pPager, 1); if( rc!=SQLITE_OK ){ rc = SQLITE_CORRUPT; pPager->errMask |= PAGER_ERR_CORRUPT; } pPager->dbSize = -1; @@ -2535,11 +2571,11 @@ */ int sqlite3pager_stmt_begin(Pager *pPager){ int rc; char zTemp[SQLITE_TEMPNAME_SIZE]; assert( !pPager->stmtInUse ); - TRACE1("STMT-BEGIN\n"); + TRACE2("STMT-BEGIN %d\n", pPager->fd.h); if( pPager->memDb ){ pPager->stmtInUse = 1; pPager->stmtSize = pPager->dbSize; return SQLITE_OK; } @@ -2584,11 +2620,11 @@ ** Commit a statement. */ int sqlite3pager_stmt_commit(Pager *pPager){ if( pPager->stmtInUse ){ PgHdr *pPg, *pNext; - TRACE1("STMT-COMMIT\n"); + TRACE2("STMT-COMMIT %d\n", pPager->fd.h); if( !pPager->memDb ){ sqlite3OsSeek(&pPager->stfd, 0); /* sqlite3OsTruncate(&pPager->stfd, 0); */ sqliteFree( pPager->aInStmt ); pPager->aInStmt = 0; @@ -2616,11 +2652,11 @@ ** Rollback a statement. */ int sqlite3pager_stmt_rollback(Pager *pPager){ int rc; if( pPager->stmtInUse ){ - TRACE1("STMT-ROLLBACK\n"); + TRACE2("STMT-ROLLBACK %d\n", pPager->fd.h); if( pPager->memDb ){ PgHdr *pPg; for(pPg=pPager->pStmt; pPg; pPg=pPg->pNextStmt){ PgHistory *pHist = PGHDR_TO_HIST(pPg, pPager); if( pHist->pStmt ){ @@ -2680,11 +2716,11 @@ int rc = SQLITE_OK; /* If this is an in-memory db, or no pages have been written to, this ** function is a no-op. */ - if( !pPager->memDb && pPager->dirtyFile ){ + if( !pPager->memDb && pPager->dirtyCache ){ PgHdr *pPg; assert( pPager->journalOpen ); /* Sync the journal file */ rc = syncJournal(pPager, zMaster); Index: src/pager.h ================================================================== --- src/pager.h +++ src/pager.h @@ -11,11 +11,11 @@ ************************************************************************* ** This header file defines the interface that the sqlite page cache ** subsystem. The page cache subsystem reads and writes a file a page ** at a time and provides a journal for rollback. ** -** @(#) $Id: pager.h,v 1.32 2004/06/09 14:17:21 drh Exp $ +** @(#) $Id: pager.h,v 1.33 2004/06/09 17:37:28 drh Exp $ */ /* ** The size of a page. ** @@ -71,10 +71,11 @@ */ int sqlite3pager_open(Pager **ppPager, const char *zFilename, int nPage, int nExtra, int useJournal, void *pBusyHandler); void sqlite3pager_set_destructor(Pager*, void(*)(void*,int)); +void sqlite3pager_set_reiniter(Pager*, void(*)(void*,int)); void sqlite3pager_set_cachesize(Pager*, int); int sqlite3pager_close(Pager *pPager); int sqlite3pager_get(Pager *pPager, Pgno pgno, void **ppPage); void *sqlite3pager_lookup(Pager *pPager, Pgno pgno); int sqlite3pager_ref(void*); Index: src/test1.c ================================================================== --- src/test1.c +++ src/test1.c @@ -11,11 +11,11 @@ ************************************************************************* ** Code for testing the printf() interface to SQLite. This code ** is not included in the SQLite library. It is used for automated ** testing of the SQLite library. ** -** $Id: test1.c,v 1.73 2004/06/09 14:01:51 drh Exp $ +** $Id: test1.c,v 1.74 2004/06/09 17:37:28 drh Exp $ */ #include "sqliteInt.h" #include "tcl.h" #include "os.h" #include @@ -1780,11 +1780,11 @@ } if( getFilePointer(interp, Tcl_GetString(objv[1]), &pFile) ){ return TCL_ERROR; } - rc = sqlite3OsUnlock(pFile); + rc = sqlite3OsUnlock(pFile, NO_LOCK); if( rc!=SQLITE_OK ){ Tcl_SetResult(interp, (char *)errorName(rc), TCL_STATIC); return TCL_ERROR; } return TCL_OK; Index: src/vdbeaux.c ================================================================== --- src/vdbeaux.c +++ src/vdbeaux.c @@ -932,11 +932,11 @@ /* The simple case - no more than one database file (not counting the TEMP ** database) has a transaction active. There is no need for the ** master-journal. */ - if( nTrans<=1 ){ + if( nTrans<=100 ){ /**** FIX ME ****/ for(i=0; rc==SQLITE_OK && inDb; i++){ Btree *pBt = db->aDb[i].pBt; if( pBt ){ rc = sqlite3BtreeSync(pBt, 0); } @@ -962,16 +962,14 @@ char const *zMainFile = sqlite3BtreeGetFilename(db->aDb[0].pBt); OsFile master; /* Select a master journal file name */ do { - int random; - if( zMaster ){ - sqliteFree(zMaster); - } + u32 random; + sqliteFree(zMaster); sqlite3Randomness(sizeof(random), &random); - zMaster = sqlite3_mprintf("%s%d", zMainFile, random); + zMaster = sqlite3MPrintf("%s-mj%08X", zMainFile, random); if( !zMaster ){ return SQLITE_NOMEM; } }while( sqlite3OsFileExists(zMaster) ); Index: test/attach2.test ================================================================== --- test/attach2.test +++ test/attach2.test @@ -10,13 +10,12 @@ #*********************************************************************** # This file implements regression tests for SQLite library. The # focus of this script is testing the ATTACH and DETACH commands # and related functionality. # -# $Id: attach2.test,v 1.13 2004/06/09 14:01:58 drh Exp $ +# $Id: attach2.test,v 1.14 2004/06/09 17:37:29 drh Exp $ # -set sqlite_os_trace 0 set testdir [file dirname $argv0] source $testdir/tester.tcl @@ -145,10 +144,18 @@ db close for {set i 2} {$i<=15} {incr i} { catch {db$i close} } + +# A procedure to verify the status of locks on a database. +# +proc lock_status {testnum db expected_result} { + do_test attach2-$testnum [subst { + execsql {PRAGMA lock_status} $db + }] $expected_result +} set sqlite_os_trace 0 # Tests attach2-4.* test that read-locks work correctly with attached # databases. do_test attach2-4.1 { @@ -156,78 +163,116 @@ sqlite db2 test.db execsql {ATTACH 'test2.db' as file2} execsql {ATTACH 'test2.db' as file2} db2 } {} +lock_status 4.1.1 db {main unlocked temp unlocked file2 unlocked} +lock_status 4.1.2 db2 {main unlocked temp unlocked file2 unlocked} + do_test attach2-4.2 { # Handle 'db' read-locks test.db execsql {BEGIN} execsql {SELECT * FROM t1} # Lock status: # db - shared(main) # db2 - } {} + +lock_status 4.2.1 db {main shared temp shared file2 unlocked} +lock_status 4.2.2 db2 {main unlocked temp unlocked file2 unlocked} + do_test attach2-4.3 { # The read lock held by db does not prevent db2 from reading test.db execsql {SELECT * FROM t1} db2 } {} + +lock_status 4.3.1 db {main shared temp shared file2 unlocked} +lock_status 4.3.2 db2 {main unlocked temp unlocked file2 unlocked} + do_test attach2-4.4 { # db is holding a read lock on test.db, so we should not be able # to commit a write to test.db from db2 catchsql { INSERT INTO t1 VALUES(1, 2) } db2 } {1 {database is locked}} + +lock_status 4.4.1 db {main shared temp shared file2 unlocked} +lock_status 4.4.2 db2 {main unlocked temp unlocked file2 unlocked} + do_test attach2-4.5 { # Handle 'db2' reserves file2. execsql {BEGIN} db2 execsql {INSERT INTO file2.t1 VALUES(1, 2)} db2 # Lock status: # db - shared(main) # db2 - reserved(file2) } {} + +lock_status 4.5.1 db {main shared temp shared file2 unlocked} +lock_status 4.5.2 db2 {main unlocked temp reserved file2 reserved} + do_test attach2-4.6.1 { # Reads are allowed against a reserved database. catchsql { SELECT * FROM file2.t1; } # Lock status: # db - shared(main), shared(file2) # db2 - reserved(file2) } {0 {}} + +lock_status 4.6.1.1 db {main shared temp shared file2 shared} +lock_status 4.6.1.2 db2 {main unlocked temp reserved file2 reserved} + do_test attach2-4.6.2 { # Writes against a reserved database are not allowed. catchsql { UPDATE file2.t1 SET a=0; } } {1 {database is locked}} + +lock_status 4.6.2.1 db {main shared temp reserved file2 shared} +lock_status 4.6.2.2 db2 {main unlocked temp reserved file2 reserved} + do_test attach2-4.7 { # Ensure handle 'db' retains the lock on the main file after # failing to obtain a write-lock on file2. catchsql { INSERT INTO t1 VALUES(1, 2) } db2 -} {1 {database is locked}} +} {0 {}} + +lock_status 4.7.1 db {main shared temp reserved file2 shared} +lock_status 4.7.2 db2 {main reserved temp reserved file2 reserved} + do_test attach2-4.8 { - # Read lock the main file with db2. Now both db and db2 have a read lock - # on the main file, db2 has a write-lock on file2. + # We should still be able to read test.db from db2 execsql {SELECT * FROM t1} db2 - # Lock status: - # db - shared(main), shared(file2) - # db2 - shared(main), reserved(file2) -} {} +} {1 2} + +lock_status 4.8.1 db {main shared temp reserved file2 shared} +lock_status 4.8.2 db2 {main reserved temp reserved file2 reserved} + do_test attach2-4.9 { # Try to upgrade the handle 'db' lock. catchsql { INSERT INTO t1 VALUES(1, 2) } - list $r $msg } {1 {database is locked}} + +lock_status 4.9.1 db {main shared temp reserved file2 shared} +lock_status 4.9.2 db2 {main reserved temp reserved file2 reserved} + do_test attach2-4.10 { # Release the locks held by handle 'db2' execsql {COMMIT} db2 } {} + +lock_status 4.10.1 db {main shared temp reserved file2 shared} +lock_status 4.10.2 db2 {main unlocked temp unlocked file2 unlocked} + do_test attach2-4.11 { execsql {SELECT * FROM file2.t1} } {1 2} do_test attach2-4.12 { execsql {INSERT INTO t1 VALUES(1, 2)}