Index: src/os.c ================================================================== --- src/os.c +++ src/os.c @@ -99,28 +99,28 @@ return id->pMethods->xDeviceCharacteristics(id); } int sqlite3OsShmOpen(sqlite3_file *id){ return id->pMethods->xShmOpen(id); } -int sqlite3OsShmSize(sqlite3_file *id, int reqSize, int *pNewSize){ - return id->pMethods->xShmSize(id, reqSize, pNewSize); -} -int sqlite3OsShmGet(sqlite3_file *id,int reqSize,int *pSize,void volatile **pp){ - return id->pMethods->xShmGet(id, reqSize, pSize, pp); -} -int sqlite3OsShmRelease(sqlite3_file *id){ - return id->pMethods->xShmRelease(id); -} int sqlite3OsShmLock(sqlite3_file *id, int offset, int n, int flags){ return id->pMethods->xShmLock(id, offset, n, flags); } void sqlite3OsShmBarrier(sqlite3_file *id){ id->pMethods->xShmBarrier(id); } int sqlite3OsShmClose(sqlite3_file *id, int deleteFlag){ return id->pMethods->xShmClose(id, deleteFlag); } +int sqlite3OsShmMap( + sqlite3_file *id, + int iPage, + int pgsz, + int isWrite, + void volatile **pp +){ + return id->pMethods->xShmMap(id, iPage, pgsz, isWrite, pp); +} /* ** The next group of routines are convenience wrappers around the ** VFS methods. */ Index: src/os.h ================================================================== --- src/os.h +++ src/os.h @@ -246,16 +246,14 @@ int sqlite3OsFileControl(sqlite3_file*,int,void*); #define SQLITE_FCNTL_DB_UNCHANGED 0xca093fa0 int sqlite3OsSectorSize(sqlite3_file *id); int sqlite3OsDeviceCharacteristics(sqlite3_file *id); int sqlite3OsShmOpen(sqlite3_file *id); -int sqlite3OsShmSize(sqlite3_file *id, int, int*); -int sqlite3OsShmGet(sqlite3_file *id, int, int*, void volatile**); -int sqlite3OsShmRelease(sqlite3_file *id); int sqlite3OsShmLock(sqlite3_file *id, int, int, int); void sqlite3OsShmBarrier(sqlite3_file *id); int sqlite3OsShmClose(sqlite3_file *id, int); +int sqlite3OsShmMap(sqlite3_file *,int,int,int,void volatile **); /* ** Functions for accessing sqlite3_vfs methods */ int sqlite3OsOpen(sqlite3_vfs *, const char *, sqlite3_file*, int, int *); Index: src/os_unix.c ================================================================== --- src/os_unix.c +++ src/os_unix.c @@ -3126,25 +3126,19 @@ ** zFilename ** ** Either unixShmNode.mutex must be held or unixShmNode.nRef==0 and ** unixMutexHeld() is true when reading or writing any other field ** in this structure. -** -** To avoid deadlocks, mutex and mutexBuf are always released in the -** reverse order that they are acquired. mutexBuf is always acquired -** first and released last. This invariant is check by asserting -** sqlite3_mutex_notheld() on mutex whenever mutexBuf is acquired or -** released. */ struct unixShmNode { unixInodeInfo *pInode; /* unixInodeInfo that owns this SHM node */ sqlite3_mutex *mutex; /* Mutex to access this object */ - sqlite3_mutex *mutexBuf; /* Mutex to access zBuf[] */ char *zFilename; /* Name of the mmapped file */ int h; /* Open file descriptor */ - int szMap; /* Size of the mapping into memory */ - char *pMMapBuf; /* Where currently mmapped(). NULL if unmapped */ + int szRegion; /* Size of shared-memory regions */ + int nRegion; /* Size of array apRegion */ + char **apRegion; /* Array of mapped shared-memory regions */ int nRef; /* Number of unixShm objects pointing to this */ unixShm *pFirst; /* All unixShm objects pointing to this */ #ifdef SQLITE_DEBUG u8 exclMask; /* Mask of exclusive locks held */ u8 sharedMask; /* Mask of shared locks held */ @@ -3167,11 +3161,10 @@ */ struct unixShm { unixShmNode *pShmNode; /* The underlying unixShmNode object */ unixShm *pNext; /* Next unixShm with the same unixShmNode */ u8 hasMutex; /* True if holding the unixShmNode mutex */ - u8 hasMutexBuf; /* True if holding pFile->mutexBuf */ u16 sharedMask; /* Mask of shared locks held */ u16 exclMask; /* Mask of exclusive locks held */ #ifdef SQLITE_DEBUG u8 id; /* Id of this connection within its unixShmNode */ #endif @@ -3264,14 +3257,17 @@ */ static void unixShmPurge(unixFile *pFd){ unixShmNode *p = pFd->pInode->pShmNode; assert( unixMutexHeld() ); if( p && p->nRef==0 ){ + int i; assert( p->pInode==pFd->pInode ); if( p->mutex ) sqlite3_mutex_free(p->mutex); - if( p->mutexBuf ) sqlite3_mutex_free(p->mutexBuf); - if( p->pMMapBuf ) munmap(p->pMMapBuf, p->szMap); + for(i=0; inRegion; i++){ + munmap(p->apRegion[i], p->szRegion); + } + sqlite3_free(p->apRegion); if( p->h>=0 ) close(p->h); p->pInode->pShmNode = 0; sqlite3_free(p); } } @@ -3343,15 +3339,10 @@ pShmNode->mutex = sqlite3_mutex_alloc(SQLITE_MUTEX_FAST); if( pShmNode->mutex==0 ){ rc = SQLITE_NOMEM; goto shm_open_err; } - pShmNode->mutexBuf = sqlite3_mutex_alloc(SQLITE_MUTEX_FAST); - if( pShmNode->mutexBuf==0 ){ - rc = SQLITE_NOMEM; - goto shm_open_err; - } pShmNode->h = open(pShmNode->zFilename, O_RDWR|O_CREAT, 0664); if( pShmNode->h<0 ){ rc = SQLITE_CANTOPEN_BKPT; goto shm_open_err; @@ -3418,11 +3409,10 @@ sqlite3_mutex_enter(pShmNode->mutex); for(pp=&pShmNode->pFirst; (*pp)!=p; pp = &(*pp)->pNext){} *pp = p->pNext; /* Free the connection p */ - assert( p->hasMutexBuf==0 ); sqlite3_free(p); pDbFd->pShm = 0; sqlite3_mutex_leave(pShmNode->mutex); /* If pShmNode->nRef has reached 0, then close the underlying @@ -3436,152 +3426,10 @@ } unixLeaveMutex(); return SQLITE_OK; } - -/* -** Changes the size of the underlying storage for a shared-memory segment. -** -** The reqSize parameter is the new requested size of the shared memory. -** This implementation is free to increase the shared memory size to -** any amount greater than or equal to reqSize. If the shared memory is -** already as big or bigger as reqSize, this routine is a no-op. -** -** The reqSize parameter is the minimum size requested. The implementation -** is free to expand the storage to some larger amount if it chooses. -*/ -static int unixShmSize( - sqlite3_file *fd, /* The open database file holding SHM */ - int reqSize, /* Requested size. -1 for query only */ - int *pNewSize /* Write new size here */ -){ - unixFile *pDbFd = (unixFile*)fd; - unixShm *p = pDbFd->pShm; - unixShmNode *pShmNode = p->pShmNode; - int rc = SQLITE_OK; - struct stat sStat; - - assert( pShmNode==pDbFd->pInode->pShmNode ); - assert( pShmNode->pInode==pDbFd->pInode ); - - while( 1 ){ - if( fstat(pShmNode->h, &sStat)==0 ){ - *pNewSize = (int)sStat.st_size; - if( reqSize<=(int)sStat.st_size ) break; - }else{ - *pNewSize = 0; - rc = SQLITE_IOERR_SHMSIZE; - break; - } - rc = ftruncate(pShmNode->h, reqSize); - reqSize = -1; - } - return rc; -} - -/* -** Release the lock held on the shared memory segment to that other -** threads are free to resize it if necessary. -** -** If the lock is not currently held, this routine is a harmless no-op. -** -** If the shared-memory object is in lock state RECOVER, then we do not -** really want to release the lock, so in that case too, this routine -** is a no-op. -*/ -static int unixShmRelease(sqlite3_file *fd){ - unixFile *pDbFd = (unixFile*)fd; - unixShm *p = pDbFd->pShm; - - if( p->hasMutexBuf ){ - assert( sqlite3_mutex_notheld(p->pShmNode->mutex) ); - sqlite3_mutex_leave(p->pShmNode->mutexBuf); - p->hasMutexBuf = 0; - } - return SQLITE_OK; -} - -/* -** Map the shared storage into memory. -** -** If reqMapSize is positive, then an attempt is made to make the -** mapping at least reqMapSize bytes in size. However, the mapping -** will never be larger than the size of the underlying shared memory -** as set by prior calls to xShmSize(). -** -** *ppBuf is made to point to the memory which is a mapping of the -** underlying storage. A mutex is acquired to prevent other threads -** from running while *ppBuf is in use in order to prevent other threads -** remapping *ppBuf out from under this thread. The unixShmRelease() -** call will release the mutex. However, if the lock state is CHECKPOINT, -** the mutex is not acquired because CHECKPOINT will never remap the -** buffer. RECOVER might remap, though, so CHECKPOINT will acquire -** the mutex if and when it promotes to RECOVER. -** -** RECOVER needs to be atomic. The same mutex that prevents *ppBuf from -** being remapped also prevents more than one thread from being in -** RECOVER at a time. But, RECOVER sometimes wants to remap itself. -** To prevent RECOVER from losing its lock while remapping, the -** mutex is not released by unixShmRelease() when in RECOVER. -** -** *pNewMapSize is set to the size of the mapping. Usually *pNewMapSize -** will be reqMapSize or larger, though it could be smaller if the -** underlying shared memory has never been enlarged to reqMapSize bytes -** by prior calls to xShmSize(). -** -** *ppBuf might be NULL and zero if no space has -** yet been allocated to the underlying storage. -*/ -static int unixShmGet( - sqlite3_file *fd, /* Database file holding shared memory */ - int reqMapSize, /* Requested size of mapping. -1 means don't care */ - int *pNewMapSize, /* Write new size of mapping here */ - void volatile **ppBuf /* Write mapping buffer origin here */ -){ - unixFile *pDbFd = (unixFile*)fd; - unixShm *p = pDbFd->pShm; - unixShmNode *pShmNode = p->pShmNode; - int rc = SQLITE_OK; - - assert( pShmNode==pDbFd->pInode->pShmNode ); - assert( pShmNode->pInode==pDbFd->pInode ); - - if( p->hasMutexBuf==0 ){ - assert( sqlite3_mutex_notheld(pShmNode->mutex) ); - sqlite3_mutex_enter(pShmNode->mutexBuf); - p->hasMutexBuf = 1; - } - sqlite3_mutex_enter(pShmNode->mutex); - if( pShmNode->szMap==0 || reqMapSize>pShmNode->szMap ){ - int actualSize; - if( unixShmSize(fd, -1, &actualSize)!=SQLITE_OK ){ - actualSize = 0; - } - reqMapSize = actualSize; - if( pShmNode->pMMapBuf || reqMapSize<=0 ){ - munmap(pShmNode->pMMapBuf, pShmNode->szMap); - } - if( reqMapSize>0 ){ - pShmNode->pMMapBuf = mmap(0, reqMapSize, PROT_READ|PROT_WRITE, MAP_SHARED, - pShmNode->h, 0); - pShmNode->szMap = pShmNode->pMMapBuf ? reqMapSize : 0; - }else{ - pShmNode->pMMapBuf = 0; - pShmNode->szMap = 0; - } - } - *pNewMapSize = pShmNode->szMap; - *ppBuf = pShmNode->pMMapBuf; - sqlite3_mutex_leave(pShmNode->mutex); - if( *ppBuf==0 ){ - /* Do not hold the mutex if a NULL pointer is being returned. */ - unixShmRelease(fd); - } - return rc; -} - /* ** Change the lock state for a shared-memory segment. ** ** Note that the relationship between SHAREd and EXCLUSIVE locks is a little @@ -3698,25 +3546,118 @@ ** ** All loads and stores begun before the barrier must complete before ** any load or store begun after the barrier. */ static void unixShmBarrier( - sqlite3_file *fd /* Database file holding the shared memory */ + sqlite3_file *fd /* Database file holding the shared memory */ ){ unixEnterMutex(); unixLeaveMutex(); } +/* +** This function is called to obtain a pointer to region iRegion of the +** shared-memory associated with the database file fd. Shared-memory regions +** are numbered starting from zero. Each shared-memory region is szRegion +** bytes in size. +** +** If an error occurs, an error code is returned and *pp is set to NULL. +** +** Otherwise, if the isWrite parameter is 0 and the requested shared-memory +** region has not been allocated (by any client, including one running in a +** separate process), then *pp is set to NULL and SQLITE_OK returned. If +** isWrite is non-zero and the requested shared-memory region has not yet +** been allocated, it is allocated by this function. +** +** If the shared-memory region has already been allocated or is allocated by +** this call as described above, then it is mapped into this processes +** address space (if it is not already), *pp is set to point to the mapped +** memory and SQLITE_OK returned. +*/ +static int unixShmMap( + sqlite3_file *fd, /* Handle open on database file */ + int iRegion, /* Region to retrieve */ + int szRegion, /* Size of regions */ + int isWrite, /* True to extend file if necessary */ + void volatile **pp /* OUT: Mapped memory */ +){ + unixFile *pDbFd = (unixFile*)fd; + unixShm *p = pDbFd->pShm; + unixShmNode *pShmNode = p->pShmNode; + int rc = SQLITE_OK; + + sqlite3_mutex_enter(pShmNode->mutex); + assert( szRegion==pShmNode->szRegion || pShmNode->nRegion==0 ); + + if( pShmNode->nRegion<=iRegion ){ + char **apNew; /* New apRegion[] array */ + int nByte = (iRegion+1)*szRegion; /* Minimum required file size */ + struct stat sStat; /* Used by fstat() */ + + pShmNode->szRegion = szRegion; + + /* The requested region is not mapped into this processes address space. + ** Check to see if it has been allocated (i.e. if the wal-index file is + ** large enough to contain the requested region). + */ + if( fstat(pShmNode->h, &sStat) ){ + rc = SQLITE_IOERR_SHMSIZE; + goto shmpage_out; + } + + if( sStat.st_sizeh, nByte) ){ + rc = SQLITE_IOERR_SHMSIZE; + goto shmpage_out; + } + } + + /* Map the requested memory region into this processes address space. */ + apNew = (char **)sqlite3_realloc( + pShmNode->apRegion, (iRegion+1)*sizeof(char *) + ); + if( !apNew ){ + rc = SQLITE_IOERR_NOMEM; + goto shmpage_out; + } + pShmNode->apRegion = apNew; + while(pShmNode->nRegion<=iRegion){ + void *pMem = mmap(0, szRegion, PROT_READ|PROT_WRITE, + MAP_SHARED, pShmNode->h, iRegion*szRegion + ); + if( pMem==MAP_FAILED ){ + rc = SQLITE_IOERR; + goto shmpage_out; + } + pShmNode->apRegion[pShmNode->nRegion] = pMem; + pShmNode->nRegion++; + } + } + +shmpage_out: + if( pShmNode->nRegion>iRegion ){ + *pp = pShmNode->apRegion[iRegion]; + }else{ + *pp = 0; + } + sqlite3_mutex_leave(pShmNode->mutex); + return rc; +} #else # define unixShmOpen 0 -# define unixShmSize 0 -# define unixShmGet 0 -# define unixShmRelease 0 # define unixShmLock 0 # define unixShmBarrier 0 # define unixShmClose 0 +# define unixShmMap 0 #endif /* #ifndef SQLITE_OMIT_WAL */ /* ** Here ends the implementation of all sqlite3_file methods. ** @@ -3771,16 +3712,14 @@ CKLOCK, /* xCheckReservedLock */ \ unixFileControl, /* xFileControl */ \ unixSectorSize, /* xSectorSize */ \ unixDeviceCharacteristics, /* xDeviceCapabilities */ \ unixShmOpen, /* xShmOpen */ \ - unixShmSize, /* xShmSize */ \ - unixShmGet, /* xShmGet */ \ - unixShmRelease, /* xShmRelease */ \ unixShmLock, /* xShmLock */ \ unixShmBarrier, /* xShmBarrier */ \ - unixShmClose /* xShmClose */ \ + unixShmClose, /* xShmClose */ \ + unixShmMap /* xShmMap */ \ }; \ static const sqlite3_io_methods *FINDER##Impl(const char *z, unixFile *p){ \ UNUSED_PARAMETER(z); UNUSED_PARAMETER(p); \ return &METHOD; \ } \ Index: src/os_win.c ================================================================== --- src/os_win.c +++ src/os_win.c @@ -1214,17 +1214,21 @@ ** sqlite3_mutex_notheld() on mutex whenever mutexBuf is acquired or ** released. */ struct winShmNode { sqlite3_mutex *mutex; /* Mutex to access this object */ - sqlite3_mutex *mutexBuf; /* Mutex to access zBuf[] */ char *zFilename; /* Name of the file */ winFile hFile; /* File handle from winOpen */ - HANDLE hMap; /* File handle from CreateFileMapping */ + + int szRegion; /* Size of shared-memory regions */ + int nRegion; /* Size of array apRegion */ + struct ShmRegion { + HANDLE hMap; /* File handle from CreateFileMapping */ + void *pMap; + } *aRegion; DWORD lastErrno; /* The Windows errno from the last I/O error */ - int szMap; /* Size of the mapping of file into memory */ - char *pMMapBuf; /* Where currently mmapped(). NULL if unmapped */ + int nRef; /* Number of winShm objects pointing to this */ winShm *pFirst; /* All winShm objects pointing to this */ winShmNode *pNext; /* Next in list of all winShmNode objects */ #ifdef SQLITE_DEBUG u8 nextShmId; /* Next available winShm.id value */ @@ -1323,23 +1327,22 @@ winShmNode *p; assert( winShmMutexHeld() ); pp = &winShmNodeList; while( (p = *pp)!=0 ){ if( p->nRef==0 ){ + int i; if( p->mutex ) sqlite3_mutex_free(p->mutex); - if( p->mutexBuf ) sqlite3_mutex_free(p->mutexBuf); - if( p->pMMapBuf ){ - UnmapViewOfFile(p->pMMapBuf); - } - if( INVALID_HANDLE_VALUE != p->hMap ){ - CloseHandle(p->hMap); + for(i=0; inRegion; i++){ + UnmapViewOfFile(p->aRegion[i].pMap); + CloseHandle(p->aRegion[i].hMap); } if( p->hFile.h != INVALID_HANDLE_VALUE ) { winClose((sqlite3_file *)&p->hFile); } if( deleteFlag ) winDelete(pVfs, p->zFilename, 0); *pp = p->pNext; + sqlite3_free(p->aRegion); sqlite3_free(p); }else{ pp = &p->pNext; } } @@ -1402,24 +1405,17 @@ if( pShmNode ){ sqlite3_free(pNew); }else{ pShmNode = pNew; pNew = 0; - pShmNode->pMMapBuf = NULL; - pShmNode->hMap = INVALID_HANDLE_VALUE; ((winFile*)(&pShmNode->hFile))->h = INVALID_HANDLE_VALUE; pShmNode->pNext = winShmNodeList; winShmNodeList = pShmNode; pShmNode->mutex = sqlite3_mutex_alloc(SQLITE_MUTEX_FAST); if( pShmNode->mutex==0 ){ rc = SQLITE_NOMEM; - goto shm_open_err; - } - pShmNode->mutexBuf = sqlite3_mutex_alloc(SQLITE_MUTEX_FAST); - if( pShmNode->mutexBuf==0 ){ - rc = SQLITE_NOMEM; goto shm_open_err; } rc = winOpen(pDbFd->pVfs, pShmNode->zFilename, /* Name of the file (UTF-8) */ (sqlite3_file*)&pShmNode->hFile, /* File handle here */ @@ -1505,177 +1501,119 @@ return SQLITE_OK; } /* -** Increase the size of the underlying storage for a shared-memory segment. -** -** The reqSize parameter is the new requested minimum size of the underlying -** shared memory. This routine may choose to make the shared memory larger -** than this value (for example to round the shared memory size up to an -** operating-system dependent page size.) -** -** This routine will only grow the size of shared memory. A request for -** a smaller size is a no-op. -*/ -static int winShmSize( - sqlite3_file *fd, /* Database holding the shared memory */ - int reqSize, /* Requested size. -1 for query only */ - int *pNewSize /* Write new size here */ -){ - winFile *pDbFd = (winFile*)fd; - winShm *p = pDbFd->pShm; - winShmNode *pShmNode = p->pShmNode; - int rc = SQLITE_OK; - - *pNewSize = 0; - if( reqSize>=0 ){ - sqlite3_int64 sz; - rc = winFileSize((sqlite3_file *)&pShmNode->hFile, &sz); - if( SQLITE_OK==rc && reqSize>sz ){ - rc = winTruncate((sqlite3_file *)&pShmNode->hFile, reqSize); - } - } - if( SQLITE_OK==rc ){ - sqlite3_int64 sz; - rc = winFileSize((sqlite3_file *)&pShmNode->hFile, &sz); - if( SQLITE_OK==rc ){ - *pNewSize = (int)sz; - }else{ - rc = SQLITE_IOERR; - } - } - return rc; -} - - -/* -** Map the shared storage into memory. The minimum size of the -** mapping should be reqMapSize if reqMapSize is positive. If -** reqMapSize is zero or negative, the implementation can choose -** whatever mapping size is convenient. -** -** *ppBuf is made to point to the memory which is a mapping of the -** underlying storage. A mutex is acquired to prevent other threads -** from running while *ppBuf is in use in order to prevent other threads -** remapping *ppBuf out from under this thread. The winShmRelease() -** call will release the mutex. However, if the lock state is CHECKPOINT, -** the mutex is not acquired because CHECKPOINT will never remap the -** buffer. RECOVER might remap, though, so CHECKPOINT will acquire -** the mutex if and when it promotes to RECOVER. -** -** RECOVER needs to be atomic. The same mutex that prevents *ppBuf from -** being remapped also prevents more than one thread from being in -** RECOVER at a time. But, RECOVER sometimes wants to remap itself. -** To prevent RECOVER from losing its lock while remapping, the -** mutex is not released by winShmRelease() when in RECOVER. -** -** *pNewMapSize is set to the size of the mapping. -** -** *ppBuf and *pNewMapSize might be NULL and zero if no space has -** yet been allocated to the underlying storage. -*/ -static int winShmGet( - sqlite3_file *fd, /* The database file holding the shared memory */ - int reqMapSize, /* Requested size of mapping. -1 means don't care */ - int *pNewMapSize, /* Write new size of mapping here */ - void volatile **ppBuf /* Write mapping buffer origin here */ +** This function is called to obtain a pointer to region iRegion of the +** shared-memory associated with the database file fd. Shared-memory regions +** are numbered starting from zero. Each shared-memory region is szRegion +** bytes in size. +** +** If an error occurs, an error code is returned and *pp is set to NULL. +** +** Otherwise, if the isWrite parameter is 0 and the requested shared-memory +** region has not been allocated (by any client, including one running in a +** separate process), then *pp is set to NULL and SQLITE_OK returned. If +** isWrite is non-zero and the requested shared-memory region has not yet +** been allocated, it is allocated by this function. +** +** If the shared-memory region has already been allocated or is allocated by +** this call as described above, then it is mapped into this processes +** address space (if it is not already), *pp is set to point to the mapped +** memory and SQLITE_OK returned. +*/ +static int winShmMap( + sqlite3_file *fd, /* Handle open on database file */ + int iRegion, /* Region to retrieve */ + int szRegion, /* Size of regions */ + int isWrite, /* True to extend file if necessary */ + void volatile **pp /* OUT: Mapped memory */ ){ winFile *pDbFd = (winFile*)fd; winShm *p = pDbFd->pShm; winShmNode *pShmNode = p->pShmNode; int rc = SQLITE_OK; - if( p->hasMutexBuf==0 ){ - assert( sqlite3_mutex_notheld(pShmNode->mutex) ); - sqlite3_mutex_enter(pShmNode->mutexBuf); - p->hasMutexBuf = 1; - } - sqlite3_mutex_enter(pShmNode->mutex); - if( pShmNode->szMap==0 || reqMapSize>pShmNode->szMap ){ - int actualSize; - if( winShmSize(fd, -1, &actualSize)==SQLITE_OK - && reqMapSizepMMapBuf ){ - if( !UnmapViewOfFile(pShmNode->pMMapBuf) ){ - pShmNode->lastErrno = GetLastError(); - rc = SQLITE_IOERR; - } - CloseHandle(pShmNode->hMap); - pShmNode->hMap = INVALID_HANDLE_VALUE; - } - if( SQLITE_OK == rc ){ - pShmNode->pMMapBuf = 0; - if( reqMapSize == 0 ){ - /* can't create 0 byte file mapping in Windows */ - pShmNode->szMap = 0; - }else{ - /* create the file mapping object */ - if( INVALID_HANDLE_VALUE == pShmNode->hMap ){ - /* TBD provide an object name to each file - ** mapping so it can be re-used across processes. - */ - pShmNode->hMap = CreateFileMapping(pShmNode->hFile.h, - NULL, - PAGE_READWRITE, - 0, - reqMapSize, - NULL); - } - if( NULL==pShmNode->hMap ){ - pShmNode->lastErrno = GetLastError(); - rc = SQLITE_IOERR; - pShmNode->szMap = 0; - pShmNode->hMap = INVALID_HANDLE_VALUE; - }else{ - pShmNode->pMMapBuf = MapViewOfFile(pShmNode->hMap, - FILE_MAP_WRITE | FILE_MAP_READ, - 0, - 0, - reqMapSize); - if( !pShmNode->pMMapBuf ){ - pShmNode->lastErrno = GetLastError(); - rc = SQLITE_IOERR; - pShmNode->szMap = 0; - }else{ - pShmNode->szMap = reqMapSize; - } - } - } - } - } - *pNewMapSize = pShmNode->szMap; - *ppBuf = pShmNode->pMMapBuf; + sqlite3_mutex_enter(pShmNode->mutex); + assert( szRegion==pShmNode->szRegion || pShmNode->nRegion==0 ); + + if( pShmNode->nRegion<=iRegion ){ + struct ShmRegion *apNew; /* New aRegion[] array */ + int nByte = (iRegion+1)*szRegion; /* Minimum required file size */ + sqlite3_int64 sz; /* Current size of wal-index file */ + + pShmNode->szRegion = szRegion; + + /* The requested region is not mapped into this processes address space. + ** Check to see if it has been allocated (i.e. if the wal-index file is + ** large enough to contain the requested region). + */ + rc = winFileSize((sqlite3_file *)&pShmNode->hFile, &sz); + if( rc!=SQLITE_OK ){ + goto shmpage_out; + } + + if( szhFile, nByte); + if( rc!=SQLITE_OK ){ + goto shmpage_out; + } + } + + /* Map the requested memory region into this processes address space. */ + apNew = (struct ShmRegion *)sqlite3_realloc( + pShmNode->aRegion, (iRegion+1)*sizeof(apNew[0]) + ); + if( !apNew ){ + rc = SQLITE_IOERR_NOMEM; + goto shmpage_out; + } + pShmNode->aRegion = apNew; + + while( pShmNode->nRegion<=iRegion ){ + HANDLE hMap; /* file-mapping handle */ + void *pMap = 0; /* Mapped memory region */ + + hMap = CreateFileMapping(pShmNode->hFile.h, + NULL, PAGE_READWRITE, 0, nByte, NULL + ); + if( hMap ){ + pMap = MapViewOfFile(hMap, FILE_MAP_WRITE | FILE_MAP_READ, + 0, 0, nByte + ); + } + if( !pMap ){ + pShmNode->lastErrno = GetLastError(); + rc = SQLITE_IOERR; + if( hMap ) CloseHandle(hMap); + goto shmpage_out; + } + + pShmNode->aRegion[pShmNode->nRegion].pMap = pMap; + pShmNode->aRegion[pShmNode->nRegion].hMap = hMap; + pShmNode->nRegion++; + } + } + +shmpage_out: + if( pShmNode->nRegion>iRegion ){ + char *p = (char *)pShmNode->aRegion[iRegion].pMap; + *pp = (void *)&p[iRegion*szRegion]; + }else{ + *pp = 0; + } sqlite3_mutex_leave(pShmNode->mutex); return rc; } -/* -** Release the lock held on the shared memory segment so that other -** threads are free to resize it if necessary. -** -** If the lock is not currently held, this routine is a harmless no-op. -** -** If the shared-memory object is in lock state RECOVER, then we do not -** really want to release the lock, so in that case too, this routine -** is a no-op. -*/ -static int winShmRelease(sqlite3_file *fd){ - winFile *pDbFd = (winFile*)fd; - winShm *p = pDbFd->pShm; - if( p->hasMutexBuf ){ - winShmNode *pShmNode = p->pShmNode; - assert( sqlite3_mutex_notheld(pShmNode->mutex) ); - sqlite3_mutex_leave(pShmNode->mutexBuf); - p->hasMutexBuf = 0; - } - return SQLITE_OK; -} - /* ** Change the lock state for a shared-memory segment. */ static int winShmLock( sqlite3_file *fd, /* Database file holding the shared memory */ @@ -1754,16 +1692,14 @@ winCheckReservedLock, winFileControl, winSectorSize, winDeviceCharacteristics, winShmOpen, /* xShmOpen */ - winShmSize, /* xShmSize */ - winShmGet, /* xShmGet */ - winShmRelease, /* xShmRelease */ winShmLock, /* xShmLock */ winShmBarrier, /* xShmBarrier */ - winShmClose /* xShmClose */ + winShmClose, /* xShmClose */ + winShmMap /* xShmMap */ }; /*************************************************************************** ** Here ends the I/O methods that form the sqlite3_io_methods object. ** Index: src/sqlite.h.in ================================================================== --- src/sqlite.h.in +++ src/sqlite.h.in @@ -658,16 +658,14 @@ int (*xFileControl)(sqlite3_file*, int op, void *pArg); int (*xSectorSize)(sqlite3_file*); int (*xDeviceCharacteristics)(sqlite3_file*); /* Methods above are valid for version 1 */ int (*xShmOpen)(sqlite3_file*); - int (*xShmSize)(sqlite3_file*, int reqSize, int *pNewSize); - int (*xShmGet)(sqlite3_file*, int reqSize, int *pSize, void volatile**); - int (*xShmRelease)(sqlite3_file*); int (*xShmLock)(sqlite3_file*, int offset, int n, int flags); void (*xShmBarrier)(sqlite3_file*); int (*xShmClose)(sqlite3_file*, int deleteFlag); + int (*xShmMap)(sqlite3_file*, int iPage, int pgsz, int, void volatile**); /* Methods above are valid for version 2 */ /* Additional methods may be added in future releases */ }; /* Index: src/test6.c ================================================================== --- src/test6.c +++ src/test6.c @@ -524,34 +524,28 @@ ** Pass-throughs for WAL support. */ static int cfShmOpen(sqlite3_file *pFile){ return sqlite3OsShmOpen(((CrashFile*)pFile)->pRealFile); } -static int cfShmSize(sqlite3_file *pFile, int reqSize, int *pNew){ - return sqlite3OsShmSize(((CrashFile*)pFile)->pRealFile, reqSize, pNew); -} -static int cfShmGet( - sqlite3_file *pFile, - int reqSize, - int *pSize, - void volatile **pp -){ - return sqlite3OsShmGet(((CrashFile*)pFile)->pRealFile, reqSize, pSize, pp); -} -static int cfShmRelease(sqlite3_file *pFile){ - return sqlite3OsShmRelease(((CrashFile*)pFile)->pRealFile); -} static int cfShmLock(sqlite3_file *pFile, int ofst, int n, int flags){ return sqlite3OsShmLock(((CrashFile*)pFile)->pRealFile, ofst, n, flags); } static void cfShmBarrier(sqlite3_file *pFile){ sqlite3OsShmBarrier(((CrashFile*)pFile)->pRealFile); } static int cfShmClose(sqlite3_file *pFile, int delFlag){ return sqlite3OsShmClose(((CrashFile*)pFile)->pRealFile, delFlag); } - +static int cfShmMap( + sqlite3_file *pFile, /* Handle open on database file */ + int iRegion, /* Region to retrieve */ + int sz, /* Size of regions */ + int w, /* True to extend file if necessary */ + void volatile **pp /* OUT: Mapped memory */ +){ + return sqlite3OsShmMap(((CrashFile*)pFile)->pRealFile, iRegion, sz, w, pp); +} static const sqlite3_io_methods CrashFileVtab = { 2, /* iVersion */ cfClose, /* xClose */ cfRead, /* xRead */ @@ -564,16 +558,14 @@ cfCheckReservedLock, /* xCheckReservedLock */ cfFileControl, /* xFileControl */ cfSectorSize, /* xSectorSize */ cfDeviceCharacteristics, /* xDeviceCharacteristics */ cfShmOpen, /* xShmOpen */ - cfShmSize, /* xShmSize */ - cfShmGet, /* xShmGet */ - cfShmRelease, /* xShmRelease */ cfShmLock, /* xShmLock */ cfShmBarrier, /* xShmBarrier */ - cfShmClose /* xShmClose */ + cfShmClose, /* xShmClose */ + cfShmMap /* xShmMap */ }; /* ** Application data for the crash VFS */ Index: src/test_devsym.c ================================================================== --- src/test_devsym.c +++ src/test_devsym.c @@ -49,16 +49,14 @@ static int devsymCheckReservedLock(sqlite3_file*, int *); static int devsymFileControl(sqlite3_file*, int op, void *pArg); static int devsymSectorSize(sqlite3_file*); static int devsymDeviceCharacteristics(sqlite3_file*); static int devsymShmOpen(sqlite3_file*); -static int devsymShmSize(sqlite3_file*,int,int*); -static int devsymShmGet(sqlite3_file*,int,int*,volatile void**); -static int devsymShmRelease(sqlite3_file*); static int devsymShmLock(sqlite3_file*,int,int,int); static void devsymShmBarrier(sqlite3_file*); static int devsymShmClose(sqlite3_file*,int); +static int devsymShmMap(sqlite3_file*,int,int,int, void volatile **); /* ** Method declarations for devsym_vfs. */ static int devsymOpen(sqlite3_vfs*, const char *, sqlite3_file*, int , int *); @@ -118,16 +116,14 @@ devsymCheckReservedLock, /* xCheckReservedLock */ devsymFileControl, /* xFileControl */ devsymSectorSize, /* xSectorSize */ devsymDeviceCharacteristics, /* xDeviceCharacteristics */ devsymShmOpen, /* xShmOpen */ - devsymShmSize, /* xShmSize */ - devsymShmGet, /* xShmGet */ - devsymShmRelease, /* xShmRelease */ devsymShmLock, /* xShmLock */ devsymShmBarrier, /* xShmBarrier */ - devsymShmClose /* xShmClose */ + devsymShmClose, /* xShmClose */ + devsymShmMap /* xShmMap */ }; struct DevsymGlobal { sqlite3_vfs *pVfs; int iDeviceChar; @@ -244,27 +240,10 @@ */ static int devsymShmOpen(sqlite3_file *pFile){ devsym_file *p = (devsym_file *)pFile; return sqlite3OsShmOpen(p->pReal); } -static int devsymShmSize(sqlite3_file *pFile, int reqSize, int *pSize){ - devsym_file *p = (devsym_file *)pFile; - return sqlite3OsShmSize(p->pReal, reqSize, pSize); -} -static int devsymShmGet( - sqlite3_file *pFile, - int reqSz, - int *pSize, - void volatile **pp -){ - devsym_file *p = (devsym_file *)pFile; - return sqlite3OsShmGet(p->pReal, reqSz, pSize, pp); -} -static int devsymShmRelease(sqlite3_file *pFile){ - devsym_file *p = (devsym_file *)pFile; - return sqlite3OsShmRelease(p->pReal); -} static int devsymShmLock(sqlite3_file *pFile, int ofst, int n, int flags){ devsym_file *p = (devsym_file *)pFile; return sqlite3OsShmLock(p->pReal, ofst, n, flags); } static void devsymShmBarrier(sqlite3_file *pFile){ @@ -273,10 +252,20 @@ } static int devsymShmClose(sqlite3_file *pFile, int delFlag){ devsym_file *p = (devsym_file *)pFile; return sqlite3OsShmClose(p->pReal, delFlag); } +static int devsymShmMap( + sqlite3_file *pFile, + int iRegion, + int szRegion, + int isWrite, + void volatile **pp +){ + devsym_file *p = (devsym_file *)pFile; + return sqlite3OsShmMap(p->pReal, iRegion, szRegion, isWrite, pp); +} /* ** Open an devsym file handle. Index: src/test_osinst.c ================================================================== --- src/test_osinst.c +++ src/test_osinst.c @@ -98,15 +98,13 @@ #define OS_TRUNCATE 18 #define OS_UNLOCK 19 #define OS_WRITE 20 #define OS_SHMOPEN 21 #define OS_SHMCLOSE 22 -#define OS_SHMGET 23 -#define OS_SHMRELEASE 24 +#define OS_SHMMAP 23 #define OS_SHMLOCK 25 #define OS_SHMBARRIER 26 -#define OS_SHMSIZE 27 #define OS_ANNOTATE 28 #define OS_NUMEVENTS 29 #define VFSLOG_BUFFERSIZE 8192 @@ -150,16 +148,14 @@ static int vfslogFileControl(sqlite3_file*, int op, void *pArg); static int vfslogSectorSize(sqlite3_file*); static int vfslogDeviceCharacteristics(sqlite3_file*); static int vfslogShmOpen(sqlite3_file *pFile); -static int vfslogShmSize(sqlite3_file *pFile, int reqSize, int *pNewSize); -static int vfslogShmGet(sqlite3_file *pFile, int,int*,volatile void **); -static int vfslogShmRelease(sqlite3_file *pFile); static int vfslogShmLock(sqlite3_file *pFile, int ofst, int n, int flags); static void vfslogShmBarrier(sqlite3_file*); static int vfslogShmClose(sqlite3_file *pFile, int deleteFlag); +static int vfslogShmMap(sqlite3_file *pFile,int,int,int,volatile void **); /* ** Method declarations for vfslog_vfs. */ static int vfslogOpen(sqlite3_vfs*, const char *, sqlite3_file*, int , int *); @@ -214,16 +210,14 @@ vfslogCheckReservedLock, /* xCheckReservedLock */ vfslogFileControl, /* xFileControl */ vfslogSectorSize, /* xSectorSize */ vfslogDeviceCharacteristics, /* xDeviceCharacteristics */ vfslogShmOpen, /* xShmOpen */ - vfslogShmSize, /* xShmSize */ - vfslogShmGet, /* xShmGet */ - vfslogShmRelease, /* xShmRelease */ vfslogShmLock, /* xShmLock */ vfslogShmBarrier, /* xShmBarrier */ - vfslogShmClose /* xShmClose */ + vfslogShmClose, /* xShmClose */ + vfslogShmMap /* xShmMap */ }; #if defined(SQLITE_OS_UNIX) && !defined(NO_GETTOD) #include static sqlite3_uint64 vfslog_time(){ @@ -439,45 +433,10 @@ rc = p->pReal->pMethods->xShmOpen(p->pReal); t = vfslog_time() - t; vfslog_call(p->pVfslog, OS_SHMOPEN, p->iFileId, t, rc, 0, 0); return rc; } -static int vfslogShmSize(sqlite3_file *pFile, int reqSize, int *pNewSize){ - int rc; - sqlite3_uint64 t; - VfslogFile *p = (VfslogFile *)pFile; - t = vfslog_time(); - rc = p->pReal->pMethods->xShmSize(p->pReal, reqSize, pNewSize); - t = vfslog_time() - t; - vfslog_call(p->pVfslog, OS_SHMSIZE, p->iFileId, t, rc, 0, 0); - return rc; -} -static int vfslogShmGet( - sqlite3_file *pFile, - int req, - int *pSize, - volatile void **pp -){ - int rc; - sqlite3_uint64 t; - VfslogFile *p = (VfslogFile *)pFile; - t = vfslog_time(); - rc = p->pReal->pMethods->xShmGet(p->pReal, req, pSize, pp); - t = vfslog_time() - t; - vfslog_call(p->pVfslog, OS_SHMGET, p->iFileId, t, rc, 0, 0); - return rc; -} -static int vfslogShmRelease(sqlite3_file *pFile){ - int rc; - sqlite3_uint64 t; - VfslogFile *p = (VfslogFile *)pFile; - t = vfslog_time(); - rc = p->pReal->pMethods->xShmRelease(p->pReal); - t = vfslog_time() - t; - vfslog_call(p->pVfslog, OS_SHMRELEASE, p->iFileId, t, rc, 0, 0); - return rc; -} static int vfslogShmLock(sqlite3_file *pFile, int ofst, int n, int flags){ int rc; sqlite3_uint64 t; VfslogFile *p = (VfslogFile *)pFile; t = vfslog_time(); @@ -501,10 +460,26 @@ t = vfslog_time(); rc = p->pReal->pMethods->xShmClose(p->pReal, deleteFlag); t = vfslog_time() - t; vfslog_call(p->pVfslog, OS_SHMCLOSE, p->iFileId, t, rc, 0, 0); return rc; +} +static int vfslogShmMap( + sqlite3_file *pFile, + int iRegion, + int szRegion, + int isWrite, + volatile void **pp +){ + int rc; + sqlite3_uint64 t; + VfslogFile *p = (VfslogFile *)pFile; + t = vfslog_time(); + rc = p->pReal->pMethods->xShmMap(p->pReal, iRegion, szRegion, isWrite, pp); + t = vfslog_time() - t; + vfslog_call(p->pVfslog, OS_SHMMAP, p->iFileId, t, rc, 0, 0); + return rc; } /* ** Open an vfslog file handle. @@ -824,15 +799,13 @@ case OS_SLEEP: zEvent = "xSleep"; break; case OS_CURRENTTIME: zEvent = "xCurrentTime"; break; case OS_SHMCLOSE: zEvent = "xShmClose"; break; case OS_SHMOPEN: zEvent = "xShmOpen"; break; - case OS_SHMGET: zEvent = "xShmGet"; break; - case OS_SHMSIZE: zEvent = "xShmSize"; break; - case OS_SHMRELEASE: zEvent = "xShmRelease"; break; case OS_SHMLOCK: zEvent = "xShmLock"; break; case OS_SHMBARRIER: zEvent = "xShmBarrier"; break; + case OS_SHMMAP: zEvent = "xShmMap"; break; case OS_ANNOTATE: zEvent = "annotation"; break; } return zEvent; Index: src/test_vfs.c ================================================================== --- src/test_vfs.c +++ src/test_vfs.c @@ -67,30 +67,31 @@ ** ** + Simulating IO errors, and ** + Invoking the Tcl callback script. */ #define TESTVFS_SHMOPEN_MASK 0x00000001 -#define TESTVFS_SHMSIZE_MASK 0x00000002 -#define TESTVFS_SHMGET_MASK 0x00000004 -#define TESTVFS_SHMRELEASE_MASK 0x00000008 #define TESTVFS_SHMLOCK_MASK 0x00000010 #define TESTVFS_SHMBARRIER_MASK 0x00000020 #define TESTVFS_SHMCLOSE_MASK 0x00000040 +#define TESTVFS_SHMPAGE_MASK 0x00000080 -#define TESTVFS_OPEN_MASK 0x00000080 -#define TESTVFS_SYNC_MASK 0x00000100 -#define TESTVFS_ALL_MASK 0x000001FF +#define TESTVFS_OPEN_MASK 0x00000100 +#define TESTVFS_SYNC_MASK 0x00000200 +#define TESTVFS_ALL_MASK 0x000003FF + + +#define TESTVFS_MAX_PAGES 256 /* ** A shared-memory buffer. There is one of these objects for each shared ** memory region opened by clients. If two clients open the same file, ** there are two TestvfsFile structures but only one TestvfsBuffer structure. */ struct TestvfsBuffer { char *zFile; /* Associated file name */ - int n; /* Size of allocated buffer in bytes */ - u8 *a; /* Buffer allocated using ckalloc() */ + int pgsz; /* Page size */ + u8 *aPage[TESTVFS_MAX_PAGES]; /* Array of ckalloc'd pages */ TestvfsFile *pFile; /* List of open handles */ TestvfsBuffer *pNext; /* Next in linked list of all buffers */ }; @@ -131,16 +132,14 @@ static int tvfsRandomness(sqlite3_vfs*, int nByte, char *zOut); static int tvfsSleep(sqlite3_vfs*, int microseconds); static int tvfsCurrentTime(sqlite3_vfs*, double*); static int tvfsShmOpen(sqlite3_file*); -static int tvfsShmSize(sqlite3_file*, int , int *); -static int tvfsShmGet(sqlite3_file*, int , int *, volatile void **); -static int tvfsShmRelease(sqlite3_file*); static int tvfsShmLock(sqlite3_file*, int , int, int); static void tvfsShmBarrier(sqlite3_file*); static int tvfsShmClose(sqlite3_file*, int); +static int tvfsShmPage(sqlite3_file*,int,int,int, void volatile **); static sqlite3_io_methods tvfs_io_methods = { 2, /* iVersion */ tvfsClose, /* xClose */ tvfsRead, /* xRead */ @@ -153,16 +152,14 @@ tvfsCheckReservedLock, /* xCheckReservedLock */ tvfsFileControl, /* xFileControl */ tvfsSectorSize, /* xSectorSize */ tvfsDeviceCharacteristics, /* xDeviceCharacteristics */ tvfsShmOpen, /* xShmOpen */ - tvfsShmSize, /* xShmSize */ - tvfsShmGet, /* xShmGet */ - tvfsShmRelease, /* xShmRelease */ tvfsShmLock, /* xShmLock */ tvfsShmBarrier, /* xShmBarrier */ - tvfsShmClose /* xShmClose */ + tvfsShmClose, /* xShmClose */ + tvfsShmPage /* xShmPage */ }; static int tvfsResultCode(Testvfs *p, int *pRc){ struct errcode { int eCode; @@ -441,16 +438,14 @@ sqlite3_io_methods *pMethods; pMethods = (sqlite3_io_methods *)ckalloc(sizeof(sqlite3_io_methods)); memcpy(pMethods, &tvfs_io_methods, sizeof(sqlite3_io_methods)); if( ((Testvfs *)pVfs->pAppData)->isNoshm ){ pMethods->xShmOpen = 0; - pMethods->xShmGet = 0; - pMethods->xShmSize = 0; - pMethods->xShmRelease = 0; pMethods->xShmClose = 0; pMethods->xShmLock = 0; pMethods->xShmBarrier = 0; + pMethods->xShmMap = 0; } pFile->pMethods = pMethods; } return rc; @@ -545,20 +540,10 @@ */ static int tvfsCurrentTime(sqlite3_vfs *pVfs, double *pTimeOut){ return PARENTVFS(pVfs)->xCurrentTime(PARENTVFS(pVfs), pTimeOut); } -static void tvfsGrowBuffer(TestvfsFile *pFd, int reqSize, int *pNewSize){ - TestvfsBuffer *pBuffer = pFd->pShm; - if( reqSize>pBuffer->n ){ - pBuffer->a = (u8 *)ckrealloc((char *)pBuffer->a, reqSize); - memset(&pBuffer->a[pBuffer->n], 0x55, reqSize-pBuffer->n); - pBuffer->n = reqSize; - } - *pNewSize = pBuffer->n; -} - static int tvfsInjectIoerr(Testvfs *p){ int ret = 0; if( p->ioerr ){ p->iIoerrCnt--; if( p->iIoerrCnt==0 || (p->iIoerrCnt<0 && p->ioerr==2) ){ @@ -617,74 +602,54 @@ pBuffer->pFile = pFd; pFd->pShm = pBuffer; return SQLITE_OK; } -static int tvfsShmSize( - sqlite3_file *pFile, - int reqSize, - int *pNewSize -){ - int rc = SQLITE_OK; - TestvfsFile *pFd = (TestvfsFile *)pFile; - Testvfs *p = (Testvfs *)(pFd->pVfs->pAppData); - - if( p->pScript && p->mask&TESTVFS_SHMSIZE_MASK ){ - tvfsExecTcl(p, "xShmSize", - Tcl_NewStringObj(pFd->pShm->zFile, -1), pFd->pShmId, 0 - ); - tvfsResultCode(p, &rc); - } - if( rc==SQLITE_OK && p->mask&TESTVFS_SHMSIZE_MASK && tvfsInjectIoerr(p) ){ - rc = SQLITE_IOERR; - } - if( rc==SQLITE_OK ){ - tvfsGrowBuffer(pFd, reqSize, pNewSize); - } - return rc; -} - -static int tvfsShmGet( - sqlite3_file *pFile, - int reqMapSize, - int *pMapSize, - volatile void **pp -){ - int rc = SQLITE_OK; - TestvfsFile *pFd = (TestvfsFile *)pFile; - Testvfs *p = (Testvfs *)(pFd->pVfs->pAppData); - - if( p->pScript && p->mask&TESTVFS_SHMGET_MASK ){ - tvfsExecTcl(p, "xShmGet", - Tcl_NewStringObj(pFd->pShm->zFile, -1), pFd->pShmId, - Tcl_NewIntObj(reqMapSize) - ); - tvfsResultCode(p, &rc); - } - if( rc==SQLITE_OK && p->mask&TESTVFS_SHMGET_MASK && tvfsInjectIoerr(p) ){ - rc = SQLITE_IOERR; - } - - *pMapSize = pFd->pShm->n; - *pp = pFd->pShm->a; - return rc; -} - -static int tvfsShmRelease(sqlite3_file *pFile){ - int rc = SQLITE_OK; - TestvfsFile *pFd = (TestvfsFile *)pFile; - Testvfs *p = (Testvfs *)(pFd->pVfs->pAppData); - - if( p->pScript && p->mask&TESTVFS_SHMRELEASE_MASK ){ - tvfsExecTcl(p, "xShmRelease", - Tcl_NewStringObj(pFd->pShm->zFile, -1), pFd->pShmId, 0 - ); - tvfsResultCode(p, &rc); - } - - return rc; -} +static void tvfsAllocPage(TestvfsBuffer *p, int iPage, int pgsz){ + assert( iPageaPage[iPage]==0 ){ + p->aPage[iPage] = (u8 *)ckalloc(pgsz); + memset(p->aPage[iPage], 0, pgsz); + p->pgsz = pgsz; + } +} + +static int tvfsShmPage( + sqlite3_file *pFile, /* Handle open on database file */ + int iPage, /* Page to retrieve */ + int pgsz, /* Size of pages */ + int isWrite, /* True to extend file if necessary */ + void volatile **pp /* OUT: Mapped memory */ +){ + int rc = SQLITE_OK; + TestvfsFile *pFd = (TestvfsFile *)pFile; + Testvfs *p = (Testvfs *)(pFd->pVfs->pAppData); + + if( p->pScript && p->mask&TESTVFS_SHMPAGE_MASK ){ + Tcl_Obj *pArg = Tcl_NewObj(); + Tcl_IncrRefCount(pArg); + Tcl_ListObjAppendElement(p->interp, pArg, Tcl_NewIntObj(iPage)); + Tcl_ListObjAppendElement(p->interp, pArg, Tcl_NewIntObj(pgsz)); + Tcl_ListObjAppendElement(p->interp, pArg, Tcl_NewIntObj(isWrite)); + tvfsExecTcl(p, "xShmPage", + Tcl_NewStringObj(pFd->pShm->zFile, -1), pFd->pShmId, pArg + ); + tvfsResultCode(p, &rc); + Tcl_DecrRefCount(pArg); + } + if( rc==SQLITE_OK && p->mask&TESTVFS_SHMPAGE_MASK && tvfsInjectIoerr(p) ){ + rc = SQLITE_IOERR; + } + + if( rc==SQLITE_OK && isWrite && !pFd->pShm->aPage[iPage] ){ + tvfsAllocPage(pFd->pShm, iPage, pgsz); + } + *pp = (void volatile *)pFd->pShm->aPage[iPage]; + + return rc; +} + static int tvfsShmLock( sqlite3_file *pFile, int ofst, int n, @@ -780,14 +745,17 @@ for(ppFd=&pBuffer->pFile; *ppFd!=pFd; ppFd=&((*ppFd)->pNext)); assert( (*ppFd)==pFd ); *ppFd = pFd->pNext; if( pBuffer->pFile==0 ){ + int i; TestvfsBuffer **pp; for(pp=&p->pBuffer; *pp!=pBuffer; pp=&((*pp)->pNext)); *pp = (*pp)->pNext; - ckfree((char *)pBuffer->a); + for(i=0; pBuffer->aPage[i]; i++){ + ckfree((char *)pBuffer->aPage[i]); + } ckfree((char *)pBuffer); } pFd->pShm = 0; return rc; @@ -819,47 +787,63 @@ } Tcl_ResetResult(interp); switch( (enum DB_enum)i ){ case CMD_SHM: { + Tcl_Obj *pObj; + int i; TestvfsBuffer *pBuffer; char *zName; if( objc!=3 && objc!=4 ){ Tcl_WrongNumArgs(interp, 2, objv, "FILE ?VALUE?"); return TCL_ERROR; } - zName = Tcl_GetString(objv[2]); + zName = ckalloc(p->pParent->mxPathname); + p->pParent->xFullPathname( + p->pParent, Tcl_GetString(objv[2]), + p->pParent->mxPathname, zName + ); for(pBuffer=p->pBuffer; pBuffer; pBuffer=pBuffer->pNext){ if( 0==strcmp(pBuffer->zFile, zName) ) break; } + ckfree(zName); if( !pBuffer ){ - Tcl_AppendResult(interp, "no such file: ", zName, 0); + Tcl_AppendResult(interp, "no such file: ", Tcl_GetString(objv[2]), 0); return TCL_ERROR; } if( objc==4 ){ int n; u8 *a = Tcl_GetByteArrayFromObj(objv[3], &n); - pBuffer->a = (u8 *)ckrealloc((char *)pBuffer->a, n); - pBuffer->n = n; - memcpy(pBuffer->a, a, n); + assert( pBuffer->pgsz==0 || pBuffer->pgsz==32768 ); + for(i=0; i*32768aPage[i], &a[i*32768], nByte); + } + } + + pObj = Tcl_NewObj(); + for(i=0; pBuffer->aPage[i]; i++){ + Tcl_AppendObjToObj(pObj, Tcl_NewByteArrayObj(pBuffer->aPage[i], 32768)); } - Tcl_SetObjResult(interp, Tcl_NewByteArrayObj(pBuffer->a, pBuffer->n)); + Tcl_SetObjResult(interp, pObj); break; } case CMD_FILTER: { static struct VfsMethod { char *zName; int mask; } vfsmethod [] = { { "xShmOpen", TESTVFS_SHMOPEN_MASK }, - { "xShmSize", TESTVFS_SHMSIZE_MASK }, - { "xShmGet", TESTVFS_SHMGET_MASK }, - { "xShmRelease", TESTVFS_SHMRELEASE_MASK }, { "xShmLock", TESTVFS_SHMLOCK_MASK }, { "xShmBarrier", TESTVFS_SHMBARRIER_MASK }, { "xShmClose", TESTVFS_SHMCLOSE_MASK }, + { "xShmPage", TESTVFS_SHMPAGE_MASK }, { "xSync", TESTVFS_SYNC_MASK }, { "xOpen", TESTVFS_OPEN_MASK }, }; Tcl_Obj **apElem = 0; int nElem = 0; @@ -897,10 +881,11 @@ if( p->pScript ){ Tcl_DecrRefCount(p->pScript); ckfree((char *)p->apScript); p->apScript = 0; p->nScript = 0; + p->pScript = 0; } Tcl_GetStringFromObj(objv[2], &nByte); if( nByte>0 ){ p->pScript = Tcl_DuplicateObj(objv[2]); Tcl_IncrRefCount(p->pScript); @@ -1069,10 +1054,17 @@ zVfs = Tcl_GetString(objv[1]); nByte = sizeof(Testvfs) + strlen(zVfs)+1; p = (Testvfs *)ckalloc(nByte); memset(p, 0, nByte); + /* Create the new object command before querying SQLite for a default VFS + ** to use for 'real' IO operations. This is because creating the new VFS + ** may delete an existing [testvfs] VFS of the same name. If such a VFS + ** is currently the default, the new [testvfs] may end up calling the + ** methods of a deleted object. + */ + Tcl_CreateObjCommand(interp, zVfs, testvfs_obj_cmd, p, testvfs_obj_del); p->pParent = sqlite3_vfs_find(0); p->interp = interp; p->zName = (char *)&p[1]; memcpy(p->zName, zVfs, strlen(zVfs)+1); @@ -1085,11 +1077,10 @@ pVfs->szOsFile += p->pParent->szOsFile; p->pVfs = pVfs; p->isNoshm = isNoshm; p->mask = TESTVFS_ALL_MASK; - Tcl_CreateObjCommand(interp, zVfs, testvfs_obj_cmd, p, testvfs_obj_del); sqlite3_vfs_register(pVfs, isDefault); return TCL_OK; bad_args: Index: src/wal.c ================================================================== --- src/wal.c +++ src/wal.c @@ -139,25 +139,37 @@ ** ** The wal-index consists of a header region, followed by an one or ** more index blocks. ** ** The wal-index header contains the total number of frames within the WAL -** in the the mxFrame field. Each index block contains information on -** HASHTABLE_NPAGE frames. Each index block contains two sections, a -** mapping which is a database page number for each frame, and a hash -** table used to look up frames by page number. The mapping section is -** an array of HASHTABLE_NPAGE 32-bit page numbers. The first entry on the -** array is the page number for the first frame; the second entry is the -** page number for the second frame; and so forth. The last index block -** holds a total of (mxFrame%HASHTABLE_NPAGE) page numbers. All index -** blocks other than the last are completely full with HASHTABLE_NPAGE -** page numbers. All index blocks are the same size; the mapping section -** of the last index block merely contains unused entries if mxFrame is -** not an even multiple of HASHTABLE_NPAGE. +** in the the mxFrame field. +** +** Each index block except for the first contains information on +** HASHTABLE_NPAGE frames. The first index block contains information on +** HASHTABLE_NPAGE_ONE frames. The values of HASHTABLE_NPAGE_ONE and +** HASHTABLE_NPAGE are selected so that together the wal-index header and +** first index block are the same size as all other index blocks in the +** wal-index. +** +** Each index block contains two sections, a page-mapping that contains the +** database page number associated with each wal frame, and a hash-table +** that allows users to query an index block for a specific page number. +** The page-mapping is an array of HASHTABLE_NPAGE (or HASHTABLE_NPAGE_ONE +** for the first index block) 32-bit page numbers. The first entry in the +** first index-block contains the database page number corresponding to the +** first frame in the WAL file. The first entry in the second index block +** in the WAL file corresponds to the (HASHTABLE_NPAGE_ONE+1)th frame in +** the log, and so on. +** +** The last index block in a wal-index usually contains less than the full +** complement of HASHTABLE_NPAGE (or HASHTABLE_NPAGE_ONE) page-numbers, +** depending on the contents of the WAL file. This does not change the +** allocated size of the page-mapping array - the page-mapping array merely +** contains unused entries. ** ** Even without using the hash table, the last frame for page P -** can be found by scanning the mapping sections of each index block +** can be found by scanning the page-mapping sections of each index block ** starting with the last index block and moving toward the first, and ** within each index block, starting at the end and moving toward the ** beginning. The first entry that equals P corresponds to the frame ** holding the content for that page. ** @@ -368,12 +380,12 @@ struct Wal { sqlite3_vfs *pVfs; /* The VFS used to create pDbFd */ sqlite3_file *pDbFd; /* File handle for the database file */ sqlite3_file *pWalFd; /* File handle for WAL file */ u32 iCallback; /* Value to pass to log callback (or 0) */ - int szWIndex; /* Size of the wal-index that is mapped in mem */ - volatile u32 *pWiData; /* Pointer to wal-index content in memory */ + int nWiData; /* Size of array apWiData */ + volatile u32 **apWiData; /* Pointer to wal-index content in memory */ u16 szPage; /* Database page size */ i16 readLock; /* Which read lock is being held. -1 for none */ u8 exclusiveMode; /* Non-zero if connection is in exclusive mode */ u8 isWIndexOpen; /* True if ShmOpen() called on pDbFd */ u8 writeLock; /* True if in a write transaction */ @@ -385,17 +397,14 @@ u8 lockError; /* True if a locking error has occurred */ #endif }; /* -** Return a pointer to the WalCkptInfo structure in the wal-index. +** Each page of the wal-index mapping contains a hash-table made up of +** an array of HASHTABLE_NSLOT elements of the following type. */ -static volatile WalCkptInfo *walCkptInfo(Wal *pWal){ - assert( pWal->pWiData!=0 ); - return (volatile WalCkptInfo*)&pWal->pWiData[sizeof(WalIndexHdr)/2]; -} - +typedef u16 ht_slot; /* ** This structure is used to implement an iterator that loops through ** all frames in the WAL in database page order. Where two or more frames ** correspond to the same database page, the iterator visits only the @@ -409,19 +418,98 @@ ** walIteratorFree() - Free an iterator. ** ** This functionality is used by the checkpoint code (see walCheckpoint()). */ struct WalIterator { - int iPrior; /* Last result returned from the iterator */ - int nSegment; /* Size of the aSegment[] array */ - int nFinal; /* Elements in aSegment[nSegment-1] */ + int iPrior; /* Last result returned from the iterator */ + int nSegment; /* Size of the aSegment[] array */ struct WalSegment { - int iNext; /* Next slot in aIndex[] not previously returned */ - u8 *aIndex; /* i0, i1, i2... such that aPgno[iN] ascending */ - u32 *aPgno; /* 256 page numbers. Pointer to Wal.pWiData */ - } aSegment[1]; /* One for every 256 entries in the WAL */ + int iNext; /* Next slot in aIndex[] not yet returned */ + ht_slot *aIndex; /* i0, i1, i2... such that aPgno[iN] ascend */ + u32 *aPgno; /* Array of page numbers. */ + int nEntry; /* Max size of aPgno[] and aIndex[] arrays */ + int iZero; /* Frame number associated with aPgno[0] */ + } aSegment[1]; /* One for every 32KB page in the WAL */ }; + +/* +** Define the parameters of the hash tables in the wal-index file. There +** is a hash-table following every HASHTABLE_NPAGE page numbers in the +** wal-index. +** +** Changing any of these constants will alter the wal-index format and +** create incompatibilities. +*/ +#define HASHTABLE_NPAGE 4096 /* Must be power of 2 */ +#define HASHTABLE_HASH_1 383 /* Should be prime */ +#define HASHTABLE_NSLOT (HASHTABLE_NPAGE*2) /* Must be a power of 2 */ + +/* +** The block of page numbers associated with the first hash-table in a +** wal-index is smaller than usual. This is so that there is a complete +** hash-table on each aligned 32KB page of the wal-index. +*/ +#define HASHTABLE_NPAGE_ONE (HASHTABLE_NPAGE - (WALINDEX_HDR_SIZE/sizeof(u32))) + +/* The wal-index is divided into pages of WALINDEX_PGSZ bytes each. */ +#define WALINDEX_PGSZ ( \ + sizeof(ht_slot)*HASHTABLE_NSLOT + HASHTABLE_NPAGE*sizeof(u32) \ +) + +/* +** Obtain a pointer to the iPage'th page of the wal-index. The wal-index +** is broken into pages of WALINDEX_PGSZ bytes. Wal-index pages are +** numbered from zero. +** +** If this call is successful, *ppPage is set to point to the wal-index +** page and SQLITE_OK is returned. If an error (an OOM or VFS error) occurs, +** then an SQLite error code is returned and *ppPage is set to 0. +*/ +static int walIndexPage(Wal *pWal, int iPage, volatile u32 **ppPage){ + int rc = SQLITE_OK; + + /* Enlarge the pWal->apWiData[] array if required */ + if( pWal->nWiData<=iPage ){ + int nByte = sizeof(u32 *)*(iPage+1); + volatile u32 **apNew; + apNew = (volatile u32 **)sqlite3_realloc(pWal->apWiData, nByte); + if( !apNew ){ + *ppPage = 0; + return SQLITE_NOMEM; + } + memset(&apNew[pWal->nWiData], 0, sizeof(u32 *)*(iPage+1-pWal->nWiData)); + pWal->apWiData = apNew; + pWal->nWiData = iPage+1; + } + + /* Request a pointer to the required page from the VFS */ + if( pWal->apWiData[iPage]==0 ){ + rc = sqlite3OsShmMap(pWal->pDbFd, iPage, WALINDEX_PGSZ, + pWal->writeLock, (void volatile **)&pWal->apWiData[iPage] + ); + } + + *ppPage = pWal->apWiData[iPage]; + assert( iPage==0 || *ppPage || rc!=SQLITE_OK ); + return rc; +} + +/* +** Return a pointer to the WalCkptInfo structure in the wal-index. +*/ +static volatile WalCkptInfo *walCkptInfo(Wal *pWal){ + assert( pWal->nWiData>0 && pWal->apWiData[0] ); + return (volatile WalCkptInfo*)&(pWal->apWiData[0][sizeof(WalIndexHdr)/2]); +} + +/* +** Return a pointer to the WalIndexHdr structure in the wal-index. +*/ +static volatile WalIndexHdr *walIndexHdr(Wal *pWal){ + assert( pWal->nWiData>0 && pWal->apWiData[0] ); + return (volatile WalIndexHdr*)pWal->apWiData[0]; +} /* ** The argument to this macro must be of type u32. On a little-endian ** architecture, it returns the u32 value that results from interpreting ** the 4 bytes as a big-endian value. On a big-endian architecture, it @@ -484,20 +572,19 @@ ** Write the header information in pWal->hdr into the wal-index. ** ** The checksum on pWal->hdr is updated before it is written. */ static void walIndexWriteHdr(Wal *pWal){ - WalIndexHdr *aHdr; + volatile WalIndexHdr *aHdr = walIndexHdr(pWal); + const int nCksum = offsetof(WalIndexHdr, aCksum); assert( pWal->writeLock ); pWal->hdr.isInit = 1; - walChecksumBytes(1, (u8*)&pWal->hdr, offsetof(WalIndexHdr, aCksum), - 0, pWal->hdr.aCksum); - aHdr = (WalIndexHdr*)pWal->pWiData; - memcpy(&aHdr[1], &pWal->hdr, sizeof(WalIndexHdr)); + walChecksumBytes(1, (u8*)&pWal->hdr, nCksum, 0, pWal->hdr.aCksum); + memcpy((void *)&aHdr[1], (void *)&pWal->hdr, sizeof(WalIndexHdr)); sqlite3OsShmBarrier(pWal->pDbFd); - memcpy(&aHdr[0], &pWal->hdr, sizeof(WalIndexHdr)); + memcpy((void *)&aHdr[0], (void *)&pWal->hdr, sizeof(WalIndexHdr)); } /* ** This function encodes a single frame header and writes it to a buffer ** supplied by the caller. A frame-header is made up of a series of @@ -584,23 +671,10 @@ *piPage = pgno; *pnTruncate = sqlite3Get4byte(&aFrame[4]); return 1; } -/* -** Define the parameters of the hash tables in the wal-index file. There -** is a hash-table following every HASHTABLE_NPAGE page numbers in the -** wal-index. -** -** Changing any of these constants will alter the wal-index format and -** create incompatibilities. -*/ -#define HASHTABLE_NPAGE 4096 /* Must be power of 2 and multiple of 256 */ -#define HASHTABLE_DATATYPE u16 -#define HASHTABLE_HASH_1 383 /* Should be prime */ -#define HASHTABLE_NSLOT (HASHTABLE_NPAGE*2) /* Must be a power of 2 */ -#define HASHTABLE_NBYTE (sizeof(HASHTABLE_DATATYPE)*HASHTABLE_NSLOT) #if defined(SQLITE_TEST) && defined(SQLITE_DEBUG) /* ** Names of locks. This routine is used to provide debugging output and is not ** a part of an ordinary build. @@ -661,100 +735,10 @@ SQLITE_SHM_UNLOCK | SQLITE_SHM_EXCLUSIVE); WALTRACE(("WAL%p: release EXCLUSIVE-%s cnt=%d\n", pWal, walLockName(lockIdx), n)); } -/* -** Return the index in the Wal.pWiData array that corresponds to -** frame iFrame. -** -** Wal.pWiData is an array of u32 elements that is the wal-index. -** The array begins with a header and is then followed by alternating -** "map" and "hash-table" blocks. Each "map" block consists of -** HASHTABLE_NPAGE u32 elements which are page numbers corresponding -** to frames in the WAL file. -** -** This routine returns an index X such that Wal.pWiData[X] is part -** of a "map" block that contains the page number of the iFrame-th -** frame in the WAL file. -*/ -static int walIndexEntry(u32 iFrame){ - return ( - (WALINDEX_LOCK_OFFSET+WALINDEX_LOCK_RESERVED)/sizeof(u32) - + (((iFrame-1)/HASHTABLE_NPAGE) * HASHTABLE_NBYTE)/sizeof(u32) - + (iFrame-1) - ); -} - -/* -** Return the minimum size of the shared-memory, in bytes, that is needed -** to support a wal-index containing frame iFrame. The value returned -** includes the wal-index header and the complete "block" containing iFrame, -** including the hash table segment that follows the block. -*/ -static int walMappingSize(u32 iFrame){ - const int nByte = (sizeof(u32)*HASHTABLE_NPAGE + HASHTABLE_NBYTE) ; - return ( WALINDEX_LOCK_OFFSET - + WALINDEX_LOCK_RESERVED - + nByte * ((iFrame + HASHTABLE_NPAGE - 1)/HASHTABLE_NPAGE) - ); -} - -/* -** Release our reference to the wal-index memory map, if we are holding -** it. -*/ -static void walIndexUnmap(Wal *pWal){ - if( pWal->pWiData ){ - sqlite3OsShmRelease(pWal->pDbFd); - } - pWal->pWiData = 0; - pWal->szWIndex = -1; -} - -/* -** Map the wal-index file into memory if it isn't already. -** -** The reqSize parameter is the requested size of the mapping. The -** mapping will be at least this big if the underlying storage is -** that big. But the mapping will never grow larger than the underlying -** storage. Use the walIndexRemap() to enlarget the storage space. -*/ -static int walIndexMap(Wal *pWal, int reqSize){ - int rc = SQLITE_OK; - if( pWal->pWiData==0 || reqSize>pWal->szWIndex ){ - walIndexUnmap(pWal); - rc = sqlite3OsShmGet(pWal->pDbFd, reqSize, &pWal->szWIndex, - (void volatile**)(char volatile*)&pWal->pWiData); - if( rc!=SQLITE_OK ){ - walIndexUnmap(pWal); - } - } - return rc; -} - -/* -** Enlarge the wal-index to be at least enlargeTo bytes in size and -** Remap the wal-index so that the mapping covers the full size -** of the underlying file. -** -** If enlargeTo is non-negative, then increase the size of the underlying -** storage to be at least as big as enlargeTo before remapping. -*/ -static int walIndexRemap(Wal *pWal, int enlargeTo){ - int rc; - int sz; - assert( pWal->writeLock ); - rc = sqlite3OsShmSize(pWal->pDbFd, enlargeTo, &sz); - if( rc==SQLITE_OK && sz>pWal->szWIndex ){ - walIndexUnmap(pWal); - rc = walIndexMap(pWal, sz); - } - assert( pWal->szWIndex>=enlargeTo || rc!=SQLITE_OK ); - return rc; -} - /* ** Compute a hash on a page number. The resulting hash value must land ** between 0 and (HASHTABLE_NSLOT-1). The walHashNext() function advances ** the hash to the next value in the event of a collision. */ @@ -765,53 +749,82 @@ } static int walNextHash(int iPriorHash){ return (iPriorHash+1)&(HASHTABLE_NSLOT-1); } - /* -** Find the hash table and (section of the) page number array used to -** store data for WAL frame iFrame. +** Return pointers to the hash table and page number array stored on +** page iHash of the wal-index. The wal-index is broken into 32KB pages +** numbered starting from 0. ** ** Set output variable *paHash to point to the start of the hash table ** in the wal-index file. Set *piZero to one less than the frame ** number of the first frame indexed by this hash table. If a ** slot in the hash table is set to N, it refers to frame number ** (*piZero+N) in the log. ** -** Finally, set *paPgno such that for all frames F between (*piZero+1) and -** (*piZero+HASHTABLE_NPAGE), (*paPgno)[F] is the database page number -** associated with frame F. +** Finally, set *paPgno so that *paPgno[1] is the page number of the +** first frame indexed by the hash table, frame (*piZero+1). */ -static void walHashFind( +static int walHashGet( Wal *pWal, /* WAL handle */ - u32 iFrame, /* Find the hash table indexing this frame */ - volatile HASHTABLE_DATATYPE **paHash, /* OUT: Pointer to hash index */ + int iHash, /* Find the iHash'th table */ + volatile ht_slot **paHash, /* OUT: Pointer to hash index */ volatile u32 **paPgno, /* OUT: Pointer to page number array */ u32 *piZero /* OUT: Frame associated with *paPgno[0] */ ){ - u32 iZero; + int rc; /* Return code */ volatile u32 *aPgno; - volatile HASHTABLE_DATATYPE *aHash; - - iZero = ((iFrame-1)/HASHTABLE_NPAGE) * HASHTABLE_NPAGE; - aPgno = &pWal->pWiData[walIndexEntry(iZero+1)-iZero-1]; - aHash = (HASHTABLE_DATATYPE *)&aPgno[iZero+HASHTABLE_NPAGE+1]; - - /* Assert that: - ** - ** + the mapping is large enough for this hash-table, and - ** - ** + that aPgno[iZero+1] really is the database page number associated - ** with the first frame indexed by this hash table. - */ - assert( (u32*)(&aHash[HASHTABLE_NSLOT])<=&pWal->pWiData[pWal->szWIndex/4] ); - assert( walIndexEntry(iZero+1)==(&aPgno[iZero+1] - pWal->pWiData) ); - - *paHash = aHash; - *paPgno = aPgno; - *piZero = iZero; + + rc = walIndexPage(pWal, iHash, &aPgno); + assert( rc==SQLITE_OK || iHash>0 ); + + if( rc==SQLITE_OK ){ + u32 iZero; + volatile ht_slot *aHash; + + aHash = (volatile ht_slot *)&aPgno[HASHTABLE_NPAGE]; + if( iHash==0 ){ + aPgno = &aPgno[WALINDEX_HDR_SIZE/sizeof(u32)]; + iZero = 0; + }else{ + iZero = HASHTABLE_NPAGE_ONE + (iHash-1)*HASHTABLE_NPAGE; + } + + *paPgno = &aPgno[-1]; + *paHash = aHash; + *piZero = iZero; + } + return rc; +} + +/* +** Return the number of the wal-index page that contains the hash-table +** and page-number array that contain entries corresponding to WAL frame +** iFrame. The wal-index is broken up into 32KB pages. Wal-index pages +** are numbered starting from 0. +*/ +static int walFramePage(u32 iFrame){ + int iHash = (iFrame+HASHTABLE_NPAGE-HASHTABLE_NPAGE_ONE-1) / HASHTABLE_NPAGE; + assert( (iHash==0 || iFrame>HASHTABLE_NPAGE_ONE) + && (iHash>=1 || iFrame<=HASHTABLE_NPAGE_ONE) + && (iHash<=1 || iFrame>(HASHTABLE_NPAGE_ONE+HASHTABLE_NPAGE)) + && (iHash>=2 || iFrame<=HASHTABLE_NPAGE_ONE+HASHTABLE_NPAGE) + && (iHash<=2 || iFrame>(HASHTABLE_NPAGE_ONE+2*HASHTABLE_NPAGE)) + ); + return iHash; +} + +/* +** Return the page number associated with frame iFrame in this WAL. +*/ +static u32 walFramePgno(Wal *pWal, u32 iFrame){ + int iHash = walFramePage(iFrame); + if( iHash==0 ){ + return pWal->apWiData[0][WALINDEX_HDR_SIZE/sizeof(u32) + iFrame - 1]; + } + return pWal->apWiData[iHash][(iFrame-1-HASHTABLE_NPAGE_ONE)%HASHTABLE_NPAGE]; } /* ** Remove entries from the hash table that point to WAL slots greater ** than pWal->hdr.mxFrame. @@ -823,49 +836,58 @@ ** updated. Any later hash tables will be automatically cleared when ** pWal->hdr.mxFrame advances to the point where those hash tables are ** actually needed. */ static void walCleanupHash(Wal *pWal){ - volatile HASHTABLE_DATATYPE *aHash; /* Pointer to hash table to clear */ - volatile u32 *aPgno; /* Unused return from walHashFind() */ - u32 iZero; /* frame == (aHash[x]+iZero) */ - int iLimit = 0; /* Zero values greater than this */ + volatile ht_slot *aHash; /* Pointer to hash table to clear */ + volatile u32 *aPgno; /* Page number array for hash table */ + u32 iZero; /* frame == (aHash[x]+iZero) */ + int iLimit = 0; /* Zero values greater than this */ + int nByte; /* Number of bytes to zero in aPgno[] */ + int i; /* Used to iterate through aHash[] */ assert( pWal->writeLock ); testcase( pWal->hdr.mxFrame==HASHTABLE_NPAGE-1 ); testcase( pWal->hdr.mxFrame==HASHTABLE_NPAGE ); testcase( pWal->hdr.mxFrame==HASHTABLE_NPAGE+1 ); - if( (pWal->hdr.mxFrame % HASHTABLE_NPAGE)>0 ){ - int nByte; /* Number of bytes to zero in aPgno[] */ - int i; /* Used to iterate through aHash[] */ - - walHashFind(pWal, pWal->hdr.mxFrame+1, &aHash, &aPgno, &iZero); - iLimit = pWal->hdr.mxFrame - iZero; - assert( iLimit>0 ); - for(i=0; iiLimit ){ - aHash[i] = 0; - } - } - - /* Zero the entries in the aPgno array that correspond to frames with - ** frame numbers greater than pWal->hdr.mxFrame. - */ - nByte = sizeof(u32) * (HASHTABLE_NPAGE-iLimit); - memset((void *)&aPgno[iZero+iLimit+1], 0, nByte); - assert( &((u8 *)&aPgno[iZero+iLimit+1])[nByte]==(u8 *)aHash ); - } + + if( pWal->hdr.mxFrame==0 ) return; + + /* Obtain pointers to the hash-table and page-number array containing + ** the entry that corresponds to frame pWal->hdr.mxFrame. It is guaranteed + ** that the page said hash-table and array reside on is already mapped. + */ + assert( pWal->nWiData>walFramePage(pWal->hdr.mxFrame) ); + assert( pWal->apWiData[walFramePage(pWal->hdr.mxFrame)] ); + walHashGet(pWal, walFramePage(pWal->hdr.mxFrame), &aHash, &aPgno, &iZero); + + /* Zero all hash-table entries that correspond to frame numbers greater + ** than pWal->hdr.mxFrame. + */ + iLimit = pWal->hdr.mxFrame - iZero; + assert( iLimit>0 ); + for(i=0; iiLimit ){ + aHash[i] = 0; + } + } + + /* Zero the entries in the aPgno array that correspond to frames with + ** frame numbers greater than pWal->hdr.mxFrame. + */ + nByte = ((char *)aHash - (char *)&aPgno[iLimit+1]); + memset((void *)&aPgno[iLimit+1], 0, nByte); #ifdef SQLITE_ENABLE_EXPENSIVE_ASSERT /* Verify that the every entry in the mapping region is still reachable ** via the hash table even after the cleanup. */ if( iLimit ){ int i; /* Loop counter */ int iKey; /* Hash key */ for(i=1; i<=iLimit; i++){ - for(iKey=walHash(aPgno[i+iZero]); aHash[iKey]; iKey=walNextHash(iKey)){ + for(iKey=walHash(aPgno[i]); aHash[iKey]; iKey=walNextHash(iKey)){ if( aHash[iKey]==i ) break; } assert( aHash[iKey]==i ); } } @@ -877,54 +899,51 @@ ** Set an entry in the wal-index that will map database page number ** pPage into WAL frame iFrame. */ static int walIndexAppend(Wal *pWal, u32 iFrame, u32 iPage){ int rc; /* Return code */ - int nMapping; /* Required mapping size in bytes */ - - /* Make sure the wal-index is mapped. Enlarge the mapping if required. */ - nMapping = walMappingSize(iFrame); - rc = walIndexMap(pWal, nMapping); - while( rc==SQLITE_OK && nMapping>pWal->szWIndex ){ - rc = walIndexRemap(pWal, nMapping); - } - - /* Assuming the wal-index file was successfully mapped, find the hash - ** table and section of of the page number array that pertain to frame - ** iFrame of the WAL. Then populate the page number array and the hash - ** table entry. + u32 iZero; /* One less than frame number of aPgno[1] */ + volatile u32 *aPgno; /* Page number array */ + volatile ht_slot *aHash; /* Hash table */ + + rc = walHashGet(pWal, walFramePage(iFrame), &aHash, &aPgno, &iZero); + + /* Assuming the wal-index file was successfully mapped, populate the + ** page number array and hash table entry. */ if( rc==SQLITE_OK ){ int iKey; /* Hash table key */ - u32 iZero; /* One less than frame number of aPgno[1] */ - volatile u32 *aPgno; /* Page number array */ - volatile HASHTABLE_DATATYPE *aHash; /* Hash table */ - int idx; /* Value to write to hash-table slot */ - TESTONLY( int nCollide = 0; /* Number of hash collisions */ ) + int idx; /* Value to write to hash-table slot */ + TESTONLY( int nCollide = 0; /* Number of hash collisions */ ) - walHashFind(pWal, iFrame, &aHash, &aPgno, &iZero); idx = iFrame - iZero; + assert( idx <= HASHTABLE_NSLOT/2 + 1 ); + + /* If this is the first entry to be added to this hash-table, zero the + ** entire hash table and aPgno[] array before proceding. + */ if( idx==1 ){ - memset((void*)&aPgno[iZero+1], 0, HASHTABLE_NPAGE*sizeof(u32)); - memset((void*)aHash, 0, HASHTABLE_NBYTE); - } - assert( idx <= HASHTABLE_NSLOT/2 + 1 ); - - if( aPgno[iFrame] ){ - /* If the entry in aPgno[] is already set, then the previous writer - ** must have exited unexpectedly in the middle of a transaction (after - ** writing one or more dirty pages to the WAL to free up memory). - ** Remove the remnants of that writers uncommitted transaction from - ** the hash-table before writing any new entries. - */ + int nByte = (u8 *)&aHash[HASHTABLE_NSLOT] - (u8 *)&aPgno[1]; + memset((void*)&aPgno[1], 0, nByte); + } + + /* If the entry in aPgno[] is already set, then the previous writer + ** must have exited unexpectedly in the middle of a transaction (after + ** writing one or more dirty pages to the WAL to free up memory). + ** Remove the remnants of that writers uncommitted transaction from + ** the hash-table before writing any new entries. + */ + if( aPgno[idx] ){ walCleanupHash(pWal); - assert( !aPgno[iFrame] ); + assert( !aPgno[idx] ); } - aPgno[iFrame] = iPage; + + /* Write the aPgno[] array entry and the hash-table slot. */ for(iKey=walHash(iPage); aHash[iKey]; iKey=walNextHash(iKey)){ assert( nCollide++ < idx ); } + aPgno[idx] = iPage; aHash[iKey] = idx; #ifdef SQLITE_ENABLE_EXPENSIVE_ASSERT /* Verify that the number of entries in the hash table exactly equals ** the number of entries in the mapping region. @@ -942,11 +961,11 @@ ** iteration. */ if( (idx&0x3ff)==0 ){ int i; /* Loop counter */ for(i=1; i<=idx; i++){ - for(iKey=walHash(aPgno[i+iZero]); aHash[iKey]; iKey=walNextHash(iKey)){ + for(iKey=walHash(aPgno[i]); aHash[iKey]; iKey=walNextHash(iKey)){ if( aHash[iKey]==i ) break; } assert( aHash[iKey]==i ); } } @@ -1074,13 +1093,10 @@ sqlite3_free(aFrame); } finished: - if( rc==SQLITE_OK && pWal->hdr.mxFrame==0 ){ - rc = walIndexRemap(pWal, walMappingSize(1)); - } if( rc==SQLITE_OK ){ volatile WalCkptInfo *pInfo; int i; pWal->hdr.aFrameCksum[0] = aFrameCksum[0]; pWal->hdr.aFrameCksum[1] = aFrameCksum[1]; @@ -1162,11 +1178,10 @@ } pRet->pVfs = pVfs; pRet->pWalFd = (sqlite3_file *)&pRet[1]; pRet->pDbFd = pDbFd; - pRet->szWIndex = -1; pRet->readLock = -1; sqlite3_randomness(8, &pRet->hdr.aSalt); pRet->zWalName = zWal = pVfs->szOsFile + (char*)pRet->pWalFd; sqlite3_snprintf(nWal, zWal, "%s-wal", zDbName); rc = sqlite3OsShmOpen(pDbFd); @@ -1205,57 +1220,55 @@ u32 *piFrame /* OUT: Wal frame index of next page */ ){ u32 iMin; /* Result pgno must be greater than iMin */ u32 iRet = 0xFFFFFFFF; /* 0xffffffff is never a valid page number */ int i; /* For looping through segments */ - int nBlock = p->nFinal; /* Number of entries in current segment */ iMin = p->iPrior; assert( iMin<0xffffffff ); for(i=p->nSegment-1; i>=0; i--){ struct WalSegment *pSegment = &p->aSegment[i]; - while( pSegment->iNextiNextnEntry ){ u32 iPg = pSegment->aPgno[pSegment->aIndex[pSegment->iNext]]; if( iPg>iMin ){ if( iPgaIndex[pSegment->iNext]; + *piFrame = pSegment->iZero + pSegment->aIndex[pSegment->iNext]; } break; } pSegment->iNext++; } - nBlock = 256; } *piPage = p->iPrior = iRet; return (iRet==0xFFFFFFFF); } -static void walMergesort8( - Pgno *aContent, /* Pages in wal */ - u8 *aBuffer, /* Buffer of at least *pnList items to use */ - u8 *aList, /* IN/OUT: List to sort */ +static void walMergesort( + u32 *aContent, /* Pages in wal */ + ht_slot *aBuffer, /* Buffer of at least *pnList items to use */ + ht_slot *aList, /* IN/OUT: List to sort */ int *pnList /* IN/OUT: Number of elements in aList[] */ ){ int nList = *pnList; if( nList>1 ){ int nLeft = nList / 2; /* Elements in left list */ int nRight = nList - nLeft; /* Elements in right list */ - u8 *aLeft = aList; /* Left list */ - u8 *aRight = &aList[nLeft]; /* Right list */ int iLeft = 0; /* Current index in aLeft */ int iRight = 0; /* Current index in aright */ int iOut = 0; /* Current index in output buffer */ + ht_slot *aLeft = aList; /* Left list */ + ht_slot *aRight = aList+nLeft;/* Right list */ /* TODO: Change to non-recursive version. */ - walMergesort8(aContent, aBuffer, aLeft, &nLeft); - walMergesort8(aContent, aBuffer, aRight, &nRight); + walMergesort(aContent, aBuffer, aLeft, &nLeft); + walMergesort(aContent, aBuffer, aRight, &nRight); while( iRight=nRight || aContent[aLeft[iLeft]] aContent[aList[i-1]] ); } } #endif } + +/* +** Free an iterator allocated by walIteratorInit(). +*/ +static void walIteratorFree(WalIterator *p){ + sqlite3_free(p); +} /* ** Map the wal-index into memory owned by this thread, if it is not ** mapped already. Then construct a WalInterator object that can be ** used to loop over all pages in the WAL in ascending order. @@ -1298,75 +1318,75 @@ ** WalIterator object when it has finished with it. The caller must ** also unmap the wal-index. But the wal-index must not be unmapped ** prior to the WalIterator object being destroyed. */ static int walIteratorInit(Wal *pWal, WalIterator **pp){ - u32 *aData; /* Content of the wal-index file */ - WalIterator *p; /* Return value */ - int nSegment; /* Number of segments to merge */ - u32 iLast; /* Last frame in log */ - int nByte; /* Number of bytes to allocate */ - int i; /* Iterator variable */ - int nFinal; /* Number of unindexed entries */ - u8 *aTmp; /* Temp space used by merge-sort */ - u8 *aSpace; /* Surplus space on the end of the allocation */ - - /* Make sure the wal-index is mapped into local memory */ - assert( pWal->pWiData && pWal->szWIndex>=walMappingSize(pWal->hdr.mxFrame) ); + WalIterator *p; /* Return value */ + int nSegment; /* Number of segments to merge */ + u32 iLast; /* Last frame in log */ + int nByte; /* Number of bytes to allocate */ + int i; /* Iterator variable */ + ht_slot *aTmp; /* Temp space used by merge-sort */ + ht_slot *aSpace; /* Space at the end of the allocation */ /* This routine only runs while holding SQLITE_SHM_CHECKPOINT. No other ** thread is able to write to shared memory while this routine is ** running (or, indeed, while the WalIterator object exists). Hence, - ** we can cast off the volatile qualifacation from shared memory + ** we can cast off the volatile qualification from shared memory */ assert( pWal->ckptLock ); - aData = (u32*)pWal->pWiData; + iLast = pWal->hdr.mxFrame; /* Allocate space for the WalIterator object */ - iLast = pWal->hdr.mxFrame; - nSegment = (iLast >> 8) + 1; - nFinal = (iLast & 0x000000FF); - nByte = sizeof(WalIterator) + (nSegment+1)*(sizeof(struct WalSegment)+256); + nSegment = walFramePage(iLast) + 1; + nByte = sizeof(WalIterator) + + nSegment*(sizeof(struct WalSegment)) + + (nSegment+1)*(HASHTABLE_NPAGE * sizeof(ht_slot)); p = (WalIterator *)sqlite3_malloc(nByte); if( !p ){ return SQLITE_NOMEM; } memset(p, 0, nByte); - /* Initialize the WalIterator object. Each 256-entry segment is - ** presorted in order to make iterating through all entries much - ** faster. - */ + /* Allocate space for the WalIterator object */ p->nSegment = nSegment; - aSpace = (u8 *)&p->aSegment[nSegment]; - aTmp = &aSpace[nSegment*256]; + aSpace = (ht_slot *)&p->aSegment[nSegment]; + aTmp = &aSpace[HASHTABLE_NPAGE*nSegment]; for(i=0; iaSegment[i].aPgno = &aData[walIndexEntry(i*256+1)]; - p->aSegment[i].aIndex = aSpace; - for(j=0; jaSegment[i].aPgno, aTmp, aSpace, &nIndex); - memset(&aSpace[nIndex], aSpace[nIndex-1], 256-nIndex); - aSpace += 256; - p->nFinal = nIndex; + walMergesort((u32 *)aPgno, aTmp, aSpace, &nEntry); + p->aSegment[i].iZero = iZero; + p->aSegment[i].nEntry = nEntry; + p->aSegment[i].aIndex = aSpace; + p->aSegment[i].aPgno = (u32 *)aPgno; + aSpace += HASHTABLE_NPAGE; } + assert( aSpace==aTmp ); - /* Return the fully initializd WalIterator object */ + /* Return the fully initialized WalIterator object */ *pp = p; return SQLITE_OK ; } -/* -** Free an iterator allocated by walIteratorInit(). -*/ -static void walIteratorFree(WalIterator *p){ - sqlite3_free(p); -} - /* ** Copy as much content as we can from the WAL back into the database file ** in response to an sqlite3_wal_checkpoint() request or the equivalent. ** ** The amount of information copies from WAL to database might be limited @@ -1407,11 +1427,10 @@ WalIterator *pIter = 0; /* Wal iterator context */ u32 iDbpage = 0; /* Next database page to write */ u32 iFrame = 0; /* Wal frame containing data for iDbpage */ u32 mxSafeFrame; /* Max frame that can be backfilled */ int i; /* Loop counter */ - volatile WalIndexHdr *pHdr; /* The actual wal-index header in SHM */ volatile WalCkptInfo *pInfo; /* The checkpoint status information */ /* Allocate the iterator */ rc = walIteratorInit(pWal, &pIter); if( rc!=SQLITE_OK || pWal->hdr.mxFrame==0 ){ @@ -1428,13 +1447,11 @@ ** safe to write into the database. Frames beyond mxSafeFrame might ** overwrite database pages that are in use by active readers and thus ** cannot be backfilled from the WAL. */ mxSafeFrame = pWal->hdr.mxFrame; - pHdr = (volatile WalIndexHdr*)pWal->pWiData; - pInfo = (volatile WalCkptInfo*)&pHdr[2]; - assert( pInfo==walCkptInfo(pWal) ); + pInfo = walCkptInfo(pWal); for(i=1; iaReadMark[i]; if( mxSafeFrame>=y ){ assert( y<=pWal->hdr.mxFrame ); rc = walLockExclusive(pWal, WAL_READ_LOCK(i), 1); @@ -1459,10 +1476,11 @@ rc = sqlite3OsSync(pWal->pWalFd, sync_flags); } /* Iterate through the contents of the WAL, copying data to the db file. */ while( rc==SQLITE_OK && 0==walIteratorNext(pIter, &iDbpage, &iFrame) ){ + assert( walFramePgno(pWal, iFrame)==iDbpage ); if( iFrame<=nBackfill || iFrame>mxSafeFrame ) continue; rc = sqlite3OsRead(pWal->pWalFd, zBuf, szPage, walFrameOffset(iFrame, szPage) + WAL_FRAME_HDRSIZE ); if( rc!=SQLITE_OK ) break; @@ -1470,11 +1488,11 @@ if( rc!=SQLITE_OK ) break; } /* If work was actually accomplished... */ if( rc==SQLITE_OK ){ - if( mxSafeFrame==pHdr[0].mxFrame ){ + if( mxSafeFrame==walIndexHdr(pWal)->mxFrame ){ rc = sqlite3OsTruncate(pWal->pDbFd, ((i64)pWal->hdr.nPage*(i64)szPage)); if( rc==SQLITE_OK && sync_flags ){ rc = sqlite3OsSync(pWal->pDbFd, sync_flags); } } @@ -1523,19 +1541,19 @@ pWal->exclusiveMode = 1; rc = sqlite3WalCheckpoint(pWal, sync_flags, nBuf, zBuf); if( rc==SQLITE_OK ){ isDelete = 1; } - walIndexUnmap(pWal); } walIndexClose(pWal, isDelete); sqlite3OsClose(pWal->pWalFd); if( isDelete ){ sqlite3OsDelete(pWal->pVfs, pWal->zWalName, 0); } WALTRACE(("WAL%p: closed\n", pWal)); + sqlite3_free(pWal->apWiData); sqlite3_free(pWal); } return rc; } @@ -1555,20 +1573,16 @@ ** ** If the checksum cannot be verified return non-zero. If the header ** is read successfully and the checksum verified, return zero. */ int walIndexTryHdr(Wal *pWal, int *pChanged){ - u32 aCksum[2]; /* Checksum on the header content */ - WalIndexHdr h1, h2; /* Two copies of the header content */ - WalIndexHdr *aHdr; /* Header in shared memory */ - - if( pWal->szWIndex < WALINDEX_HDR_SIZE ){ - /* The wal-index is not large enough to hold the header, then assume - ** header is invalid. */ - return 1; - } - assert( pWal->pWiData ); + u32 aCksum[2]; /* Checksum on the header content */ + WalIndexHdr h1, h2; /* Two copies of the header content */ + WalIndexHdr volatile *aHdr; /* Header in shared memory */ + + /* The first page of the wal-index must be mapped at this point. */ + assert( pWal->nWiData>0 && pWal->apWiData[0] ); /* Read the header. This might happen currently with a write to the ** same area of shared memory on a different CPU in a SMP, ** meaning it is possible that an inconsistent snapshot is read ** from the file. If this happens, return non-zero. @@ -1576,14 +1590,14 @@ ** There are two copies of the header at the beginning of the wal-index. ** When reading, read [0] first then [1]. Writes are in the reverse order. ** Memory barriers are used to prevent the compiler or the hardware from ** reordering the reads and writes. */ - aHdr = (WalIndexHdr*)pWal->pWiData; - memcpy(&h1, &aHdr[0], sizeof(h1)); + aHdr = walIndexHdr(pWal); + memcpy(&h1, (void *)&aHdr[0], sizeof(h1)); sqlite3OsShmBarrier(pWal->pDbFd); - memcpy(&h2, &aHdr[1], sizeof(h2)); + memcpy(&h2, (void *)&aHdr[1], sizeof(h2)); if( memcmp(&h1, &h2, sizeof(h1))!=0 ){ return 1; /* Dirty read */ } if( h1.isInit==0 ){ @@ -1623,50 +1637,48 @@ ** Otherwise an SQLite error code. */ static int walIndexReadHdr(Wal *pWal, int *pChanged){ int rc; /* Return code */ int badHdr; /* True if a header read failed */ + volatile u32 *page0; + /* Ensure that page 0 of the wal-index (the page that contains the + ** wal-index header) is mapped. Return early if an error occurs here. + */ assert( pChanged ); - rc = walIndexMap(pWal, walMappingSize(1)); + rc = walIndexPage(pWal, 0, &page0); if( rc!=SQLITE_OK ){ return rc; - } + }; + assert( page0 || pWal->writeLock==0 ); - /* Try once to read the header straight out. This works most of the - ** time. + /* If the first page of the wal-index has been mapped, try to read the + ** wal-index header immediately, without holding any lock. This usually + ** works, but may fail if the wal-index header is corrupt or currently + ** being modified by another user. */ - badHdr = walIndexTryHdr(pWal, pChanged); + badHdr = (page0 ? walIndexTryHdr(pWal, pChanged) : 1); /* If the first attempt failed, it might have been due to a race ** with a writer. So get a WRITE lock and try again. */ assert( badHdr==0 || pWal->writeLock==0 ); - if( badHdr ){ - rc = walLockExclusive(pWal, WAL_WRITE_LOCK, 1); - if( rc==SQLITE_OK ){ - pWal->writeLock = 1; + if( badHdr && SQLITE_OK==(rc = walLockExclusive(pWal, WAL_WRITE_LOCK, 1)) ){ + pWal->writeLock = 1; + if( SQLITE_OK==(rc = walIndexPage(pWal, 0, &page0)) ){ badHdr = walIndexTryHdr(pWal, pChanged); if( badHdr ){ /* If the wal-index header is still malformed even while holding ** a WRITE lock, it can only mean that the header is corrupted and ** needs to be reconstructed. So run recovery to do exactly that. */ rc = walIndexRecover(pWal); *pChanged = 1; } - walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1); - pWal->writeLock = 0; - } - } - - /* Make sure the mapping is large enough to cover the entire wal-index */ - if( rc==SQLITE_OK ){ - int szWanted = walMappingSize(pWal->hdr.mxFrame); - if( pWal->szWIndexwriteLock = 0; + walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1); } return rc; } @@ -1703,16 +1715,15 @@ ** update values of the aReadMark[] array in the header, but if it does ** so it takes care to hold an exclusive lock on the corresponding ** WAL_READ_LOCK() while changing values. */ static int walTryBeginRead(Wal *pWal, int *pChanged, int useWal, int cnt){ - volatile WalIndexHdr *pHdr; /* Header of the wal-index */ volatile WalCkptInfo *pInfo; /* Checkpoint information in wal-index */ u32 mxReadMark; /* Largest aReadMark[] value */ int mxI; /* Index of largest aReadMark[] value */ int i; /* Loop counter */ - int rc; /* Return code */ + int rc = SQLITE_OK; /* Return code */ assert( pWal->readLock<0 ); /* Not currently locked */ /* Take steps to avoid spinning forever if there is a protocol error. */ if( cnt>5 ){ @@ -1737,28 +1748,24 @@ rc = WAL_RETRY; }else if( rc==SQLITE_BUSY ){ rc = SQLITE_BUSY_RECOVERY; } } - }else{ - rc = walIndexMap(pWal, walMappingSize(pWal->hdr.mxFrame)); } if( rc!=SQLITE_OK ){ return rc; } - pHdr = (volatile WalIndexHdr*)pWal->pWiData; - pInfo = (volatile WalCkptInfo*)&pHdr[2]; - assert( pInfo==walCkptInfo(pWal) ); + pInfo = walCkptInfo(pWal); if( !useWal && pInfo->nBackfill==pWal->hdr.mxFrame ){ /* The WAL has been completely backfilled (or it is empty). ** and can be safely ignored. */ rc = walLockShared(pWal, WAL_READ_LOCK(0)); sqlite3OsShmBarrier(pWal->pDbFd); if( rc==SQLITE_OK ){ - if( memcmp((void *)pHdr, &pWal->hdr, sizeof(WalIndexHdr)) ){ + if( memcmp((void *)walIndexHdr(pWal), &pWal->hdr, sizeof(WalIndexHdr)) ){ /* It is not safe to allow the reader to continue here if frames ** may have been appended to the log before READ_LOCK(0) was obtained. ** When holding READ_LOCK(0), the reader ignores the entire log file, ** which implies that the database file contains a trustworthy ** snapshoT. Since holding READ_LOCK(0) prevents a checkpoint from @@ -1848,11 +1855,11 @@ ** log-wrap (either of which would require an exclusive lock on ** WAL_READ_LOCK(mxI)) has not occurred since the snapshot was valid. */ sqlite3OsShmBarrier(pWal->pDbFd); if( pInfo->aReadMark[mxI]!=mxReadMark - || memcmp((void *)pHdr, &pWal->hdr, sizeof(WalIndexHdr)) + || memcmp((void *)walIndexHdr(pWal), &pWal->hdr, sizeof(WalIndexHdr)) ){ walUnlockShared(pWal, WAL_READ_LOCK(mxI)); return WAL_RETRY; }else{ assert( mxReadMark<=pWal->hdr.mxFrame ); @@ -1881,11 +1888,10 @@ int cnt = 0; /* Number of TryBeginRead attempts */ do{ rc = walTryBeginRead(pWal, pChanged, 0, ++cnt); }while( rc==WAL_RETRY ); - walIndexUnmap(pWal); return rc; } /* ** Finish with a read transaction. All this does is release the @@ -1911,11 +1917,10 @@ Pgno pgno, /* Database page number to read data for */ int *pInWal, /* OUT: True if data is read from WAL */ int nOut, /* Size of buffer pOut in bytes */ u8 *pOut /* Buffer to write page data to */ ){ - int rc; /* Return code */ u32 iRead = 0; /* If !=0, WAL frame to return data from */ u32 iLast = pWal->hdr.mxFrame; /* Last page in WAL for this reader */ int iHash; /* Used to loop through N hash tables */ /* This routine is only be called from within a read transaction. */ @@ -1930,16 +1935,10 @@ if( iLast==0 || pWal->readLock==0 ){ *pInWal = 0; return SQLITE_OK; } - /* Ensure the wal-index is mapped. */ - rc = walIndexMap(pWal, walMappingSize(iLast)); - if( rc!=SQLITE_OK ){ - return rc; - } - /* Search the hash table or tables for an entry matching page number ** pgno. Each iteration of the following for() loop searches one ** hash table (each hash table indexes up to HASHTABLE_NPAGE frames). ** ** This code may run concurrently to the code in walIndexAppend() @@ -1961,39 +1960,39 @@ ** ** (iFrame<=iLast): ** This condition filters out entries that were added to the hash ** table after the current read-transaction had started. */ - for(iHash=iLast; iHash>0 && iRead==0; iHash-=HASHTABLE_NPAGE){ - volatile HASHTABLE_DATATYPE *aHash; /* Pointer to hash table */ - volatile u32 *aPgno; /* Pointer to array of page numbers */ + for(iHash=walFramePage(iLast); iHash>=0 && iRead==0; iHash--){ + volatile ht_slot *aHash; /* Pointer to hash table */ + volatile u32 *aPgno; /* Pointer to array of page numbers */ u32 iZero; /* Frame number corresponding to aPgno[0] */ int iKey; /* Hash slot index */ - int mxHash; /* upper bound on aHash[] values */ + int rc; - walHashFind(pWal, iHash, &aHash, &aPgno, &iZero); - mxHash = iLast - iZero; - if( mxHash > HASHTABLE_NPAGE ) mxHash = HASHTABLE_NPAGE; + rc = walHashGet(pWal, iHash, &aHash, &aPgno, &iZero); + if( rc!=SQLITE_OK ){ + return rc; + } for(iKey=walHash(pgno); aHash[iKey]; iKey=walNextHash(iKey)){ u32 iFrame = aHash[iKey] + iZero; - if( iFrame<=iLast && aPgno[iFrame]==pgno ){ + if( iFrame<=iLast && aPgno[aHash[iKey]]==pgno ){ assert( iFrame>iRead ); iRead = iFrame; } } } - assert( iRead==0 || pWal->pWiData[walIndexEntry(iRead)]==pgno ); #ifdef SQLITE_ENABLE_EXPENSIVE_ASSERT /* If expensive assert() statements are available, do a linear search ** of the wal-index file content. Make sure the results agree with the ** result obtained using the hash indexes above. */ { u32 iRead2 = 0; u32 iTest; for(iTest=iLast; iTest>0; iTest--){ - if( pWal->pWiData[walIndexEntry(iTest)]==pgno ){ + if( walFramePgno(pWal, iTest)==pgno ){ iRead2 = iTest; break; } } assert( iRead==iRead2 ); @@ -2001,11 +2000,10 @@ #endif /* If iRead is non-zero, then it is the log frame number that contains the ** required page. Read and return data from the log file. */ - walIndexUnmap(pWal); if( iRead ){ i64 iOffset = walFrameOffset(iRead, pWal->hdr.szPage) + WAL_FRAME_HDRSIZE; *pInWal = 1; return sqlite3OsRead(pWal->pWalFd, pOut, nOut, iOffset); } @@ -2055,23 +2053,16 @@ /* If another connection has written to the database file since the ** time the read transaction on this connection was started, then ** the write is disallowed. */ - rc = walIndexMap(pWal, walMappingSize(pWal->hdr.mxFrame)); - if( rc ){ - walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1); - pWal->writeLock = 0; - return rc; - } - if( memcmp(&pWal->hdr, (void*)pWal->pWiData, sizeof(WalIndexHdr))!=0 ){ + if( memcmp(&pWal->hdr, (void *)walIndexHdr(pWal), sizeof(WalIndexHdr))!=0 ){ walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1); pWal->writeLock = 0; rc = SQLITE_BUSY; } - walIndexUnmap(pWal); return rc; } /* ** End a write transaction. The commit has already been done. This @@ -2096,43 +2087,39 @@ ** function returns SQLITE_OK. */ int sqlite3WalUndo(Wal *pWal, int (*xUndo)(void *, Pgno), void *pUndoCtx){ int rc = SQLITE_OK; if( pWal->writeLock ){ - int unused; Pgno iMax = pWal->hdr.mxFrame; Pgno iFrame; - assert( pWal->pWiData==0 ); - rc = walIndexReadHdr(pWal, &unused); - if( rc==SQLITE_OK ){ - rc = walIndexMap(pWal, walMappingSize(iMax)); - } - if( rc==SQLITE_OK ){ - for(iFrame=pWal->hdr.mxFrame+1; - ALWAYS(rc==SQLITE_OK) && iFrame<=iMax; - iFrame++ - ){ - /* This call cannot fail. Unless the page for which the page number - ** is passed as the second argument is (a) in the cache and - ** (b) has an outstanding reference, then xUndo is either a no-op - ** (if (a) is false) or simply expels the page from the cache (if (b) - ** is false). - ** - ** If the upper layer is doing a rollback, it is guaranteed that there - ** are no outstanding references to any page other than page 1. And - ** page 1 is never written to the log until the transaction is - ** committed. As a result, the call to xUndo may not fail. - */ - assert( pWal->writeLock ); - assert( pWal->pWiData[walIndexEntry(iFrame)]!=1 ); - rc = xUndo(pUndoCtx, pWal->pWiData[walIndexEntry(iFrame)]); - } - walCleanupHash(pWal); - } - walIndexUnmap(pWal); - } + /* Restore the clients cache of the wal-index header to the state it + ** was in before the client began writing to the database. + */ + memcpy(&pWal->hdr, (void *)walIndexHdr(pWal), sizeof(WalIndexHdr)); + + for(iFrame=pWal->hdr.mxFrame+1; + ALWAYS(rc==SQLITE_OK) && iFrame<=iMax; + iFrame++ + ){ + /* This call cannot fail. Unless the page for which the page number + ** is passed as the second argument is (a) in the cache and + ** (b) has an outstanding reference, then xUndo is either a no-op + ** (if (a) is false) or simply expels the page from the cache (if (b) + ** is false). + ** + ** If the upper layer is doing a rollback, it is guaranteed that there + ** are no outstanding references to any page other than page 1. And + ** page 1 is never written to the log until the transaction is + ** committed. As a result, the call to xUndo may not fail. + */ + assert( walFramePgno(pWal, iFrame)!=1 ); + rc = xUndo(pUndoCtx, walFramePgno(pWal, iFrame)); + } + walCleanupHash(pWal); + } + assert( rc==SQLITE_OK ); return rc; } /* ** Argument aWalData must point to an array of WAL_SAVEPOINT_NDATA u32 @@ -2168,20 +2155,16 @@ aWalData[0] = 0; aWalData[3] = pWal->nCkpt; } if( aWalData[0]hdr.mxFrame ){ - rc = walIndexMap(pWal, walMappingSize(pWal->hdr.mxFrame)); pWal->hdr.mxFrame = aWalData[0]; pWal->hdr.aFrameCksum[0] = aWalData[1]; pWal->hdr.aFrameCksum[1] = aWalData[2]; - if( rc==SQLITE_OK ){ - walCleanupHash(pWal); - } + walCleanupHash(pWal); } - walIndexUnmap(pWal); return rc; } /* ** This function is called just before writing a set of frames to the log @@ -2197,13 +2180,11 @@ */ static int walRestartLog(Wal *pWal){ int rc = SQLITE_OK; int cnt; - if( pWal->readLock==0 - && SQLITE_OK==(rc = walIndexMap(pWal, walMappingSize(pWal->hdr.mxFrame))) - ){ + if( pWal->readLock==0 ){ volatile WalCkptInfo *pInfo = walCkptInfo(pWal); assert( pInfo->nBackfill==pWal->hdr.mxFrame ); if( pInfo->nBackfill>0 ){ rc = walLockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1); if( rc==SQLITE_OK ){ @@ -2235,15 +2216,10 @@ cnt = 0; do{ int notUsed; rc = walTryBeginRead(pWal, ¬Used, 1, ++cnt); }while( rc==WAL_RETRY ); - - /* Unmap the wal-index before returning. Otherwise the VFS layer may - ** hold a mutex for the duration of the IO performed by WalFrames(). - */ - walIndexUnmap(pWal); } return rc; } /* @@ -2265,11 +2241,10 @@ PgHdr *pLast = 0; /* Last frame in list */ int nLast = 0; /* Number of extra copies of last page */ assert( pList ); assert( pWal->writeLock ); - assert( pWal->pWiData==0 ); #if defined(SQLITE_TEST) && defined(SQLITE_DEBUG) { int cnt; for(cnt=0, p=pList; p; p=p->pDirty, cnt++){} WALTRACE(("WAL%p: frame write begin. %d frames. mxFrame=%d. %s\n", pWal, cnt, pWal->hdr.mxFrame, isCommit ? "Commit" : "Spill")); @@ -2278,14 +2253,12 @@ /* See if it is possible to write these frames into the start of the ** log file, instead of appending to it at pWal->hdr.mxFrame. */ if( SQLITE_OK!=(rc = walRestartLog(pWal)) ){ - assert( pWal->pWiData==0 ); return rc; } - assert( pWal->pWiData==0 && pWal->readLock>0 ); /* If this is the first frame written into the log, write the WAL ** header to the start of the WAL file. See comments at the top of ** this source file for a description of the WAL header format. */ @@ -2356,11 +2329,10 @@ iOffset += szPage; } rc = sqlite3OsSync(pWal->pWalFd, sync_flags); } - assert( pWal->pWiData==0 ); /* Append data to the wal-index. It is not necessary to lock the ** wal-index to do this as the SQLITE_SHM_WRITE lock held on the wal-index ** guarantees that there are no other writers, and no data that may ** be in use by existing readers is being overwritten. @@ -2389,11 +2361,10 @@ walIndexWriteHdr(pWal); pWal->iCallback = iFrame; } } - walIndexUnmap(pWal); WALTRACE(("WAL%p: frame write %s\n", pWal, rc ? "failed" : "ok")); return rc; } /* @@ -2410,11 +2381,10 @@ u8 *zBuf /* Temporary buffer to use */ ){ int rc; /* Return code */ int isChanged = 0; /* True if a new wal-index header is loaded */ - assert( pWal->pWiData==0 ); assert( pWal->ckptLock==0 ); WALTRACE(("WAL%p: checkpoint begins\n", pWal)); rc = walLockExclusive(pWal, WAL_CKPT_LOCK, 1); if( rc ){ @@ -2439,11 +2409,10 @@ */ memset(&pWal->hdr, 0, sizeof(WalIndexHdr)); } /* Release the locks. */ - walIndexUnmap(pWal); walUnlockExclusive(pWal, WAL_CKPT_LOCK, 1); pWal->ckptLock = 0; WALTRACE(("WAL%p: checkpoint %s\n", pWal, rc ? "failed" : "ok")); return rc; } Index: test/permutations.test ================================================================== --- test/permutations.test +++ test/permutations.test @@ -11,10 +11,11 @@ # # $Id: permutations.test,v 1.51 2009/07/01 18:09:02 danielk1977 Exp $ set testdir [file dirname $argv0] source $testdir/tester.tcl +db close # Argument processing. # #puts "PERM-DEBUG: argv=$argv" namespace eval ::perm { Index: test/wal2.test ================================================================== --- test/wal2.test +++ test/wal2.test @@ -73,13 +73,18 @@ # # 3. Check that the reader recovers the wal-index and reads the correct # database content. # do_test wal2-1.0 { - proc tvfs_cb {method args} { return SQLITE_OK } + proc tvfs_cb {method filename args} { + set ::filename $filename + return SQLITE_OK + } + testvfs tvfs tvfs script tvfs_cb + tvfs filter xShmOpen sqlite3 db test.db -vfs tvfs sqlite3 db2 test.db -vfs tvfs execsql { @@ -121,25 +126,19 @@ 12 15 {15 120} -1 {$READ} " { do_test wal2-1.$tn.1 { execsql { INSERT INTO t1 VALUES($iInsert) } - set ::locks [list] - set ::cb_done 0 - proc tvfs_cb {method args} { - if {$::cb_done == 0 && $method == "xShmGet"} { - set ::cb_done 1 - if {$::wal_index_hdr_mod >= 0} { - incr_tvfs_hdr [lindex $args 0] $::wal_index_hdr_mod 1 - } - } - if {$method == "xShmLock"} { lappend ::locks [lindex $args 2] } + lappend ::locks [lindex $args 2] return SQLITE_OK } - + tvfs filter xShmLock + if {$::wal_index_hdr_mod >= 0} { + incr_tvfs_hdr $::filename $::wal_index_hdr_mod 1 + } execsql { SELECT count(a), sum(a) FROM t1 } db2 } $res do_test wal2-1.$tn.2 { set ::locks @@ -172,12 +171,13 @@ ] do_test wal2-2.0 { testvfs tvfs tvfs script tvfs_cb + tvfs filter xShmOpen proc tvfs_cb {method args} { - if {$method == "xShmOpen"} { set ::shm_file [lindex $args 0] } + set ::filename [lindex $args 0] return SQLITE_OK } sqlite3 db test.db -vfs tvfs sqlite3 db2 test.db -vfs tvfs @@ -206,60 +206,50 @@ 6 9 {8 36} {9 45} 4 7 10 {9 45} {10 55} 5 8 11 {10 55} {11 66} 6 9 12 {11 66} {12 78} 7 } { + tvfs filter xShmLock + do_test wal2-2.$tn.1 { - set oldhdr [set_tvfs_hdr $::shm_file] + set oldhdr [set_tvfs_hdr $::filename] execsql { INSERT INTO t1 VALUES($iInsert) } execsql { SELECT count(a), sum(a) FROM t1 } } $res1 do_test wal2-2.$tn.2 { set ::locks [list] - set ::cb_done 0 proc tvfs_cb {method args} { - if {$::cb_done == 0 && $method == "xShmGet"} { - set ::cb_done 1 - if {$::wal_index_hdr_mod >= 0} { - incr_tvfs_hdr $::shm_file $::wal_index_hdr_mod 1 - } - } - if {$method == "xShmLock"} { - set lock [lindex $args 2] - lappend ::locks $lock - if {$lock == $::WRITER} { - set_tvfs_hdr $::shm_file $::oldhdr - } + set lock [lindex $args 2] + lappend ::locks $lock + if {$lock == $::WRITER} { + set_tvfs_hdr $::filename $::oldhdr } return SQLITE_OK } + if {$::wal_index_hdr_mod >= 0} { + incr_tvfs_hdr $::filename $::wal_index_hdr_mod 1 + } execsql { SELECT count(a), sum(a) FROM t1 } db2 } $res0 do_test wal2-2.$tn.3 { set ::locks } $LOCKS do_test wal2-2.$tn.4 { set ::locks [list] - set ::cb_done 0 proc tvfs_cb {method args} { - if {$::cb_done == 0 && $method == "xShmGet"} { - set ::cb_done 1 - if {$::wal_index_hdr_mod >= 0} { - incr_tvfs_hdr $::shm_file $::wal_index_hdr_mod 1 - } - } - if {$method == "xShmLock"} { - set lock [lindex $args 2] - lappend ::locks $lock - } + set lock [lindex $args 2] + lappend ::locks $lock return SQLITE_OK } + if {$::wal_index_hdr_mod >= 0} { + incr_tvfs_hdr $::filename $::wal_index_hdr_mod 1 + } execsql { SELECT count(a), sum(a) FROM t1 } db2 } $res1 } db close db2 close Index: test/wal3.test ================================================================== --- test/wal3.test +++ test/wal3.test @@ -351,11 +351,11 @@ T script method_callback proc method_callback {method args} { if {$method == "xShmBarrier"} { incr ::barrier_count - if {$::barrier_count == 1} { + if {$::barrier_count == 2} { # This code is executed within the xShmBarrier() callback invoked # by the client running recovery as part of writing the recovered # wal-index header. If a second client attempts to access the # database now, it reads a corrupt (partially written) wal-index # header. But it cannot even get that far, as the first client Index: test/walfault.test ================================================================== --- test/walfault.test +++ test/walfault.test @@ -117,10 +117,11 @@ PRAGMA wal_checkpoint; } } -test { faultsim_test_result {0 {}} } + #-------------------------------------------------------------------------- # faultsim_delete_and_reopen faultsim_save_and_close @@ -150,11 +151,11 @@ faultsim_save_and_close } {} do_faultsim_test walfault-5 -faults shmerr* -prep { faultsim_restore_and_reopen execsql { PRAGMA wal_autocheckpoint = 0 } - shmfault filter xShmSize + shmfault filter xShmPage } -body { execsql { CREATE TABLE t1(x); BEGIN; INSERT INTO t1 VALUES(randomblob(400)); /* 1 */ @@ -209,11 +210,11 @@ } faultsim_save_and_close } {} do_faultsim_test walfault-6 -faults shmerr* -prep { faultsim_restore_and_reopen - shmfault filter xShmSize + shmfault filter xShmPage } -body { execsql { SELECT count(*) FROM t1 } } -test { faultsim_test_result {0 16384} faultsim_integrity_check @@ -324,11 +325,11 @@ do_test walfault-10-pre1 { faultsim_delete_and_reopen execsql { PRAGMA journal_mode = WAL; - PRAGMA wal_checkpoint = 0; + PRAGMA wal_autocheckpoint = 0; CREATE TABLE z(zz INTEGER PRIMARY KEY, zzz BLOB); CREATE INDEX zzzz ON z(zzz); INSERT INTO z VALUES(NULL, randomblob(800)); INSERT INTO z VALUES(NULL, randomblob(800)); INSERT INTO z SELECT NULL, randomblob(800) FROM z; @@ -360,8 +361,87 @@ faultsim_integrity_check set n [db eval {SELECT count(*), sum(length(zzz)) FROM z}] if {$n != "64 51200"} { error "Incorrect data: $n" } } + +#-------------------------------------------------------------------------- +# Test fault injection while checkpointing a large WAL file, if the +# checkpoint is the first operation run after opening the database. +# This means that some of the required wal-index pages are mapped as part of +# the checkpoint process, which means there are a few more opportunities +# for IO errors. +# +# To speed this up, IO errors are only simulated within xShmPage() calls. +# +do_test walfault-11-pre-1 { + sqlite3 db test.db + execsql { + PRAGMA journal_mode = WAL; + PRAGMA wal_autocheckpoint = 0; + BEGIN; + CREATE TABLE abc(a PRIMARY KEY); + INSERT INTO abc VALUES(randomblob(1500)); + INSERT INTO abc VALUES(randomblob(1500)); + INSERT INTO abc SELECT randomblob(1500) FROM abc; -- 4 + INSERT INTO abc SELECT randomblob(1500) FROM abc; -- 8 + INSERT INTO abc SELECT randomblob(1500) FROM abc; -- 16 + INSERT INTO abc SELECT randomblob(1500) FROM abc; -- 32 + INSERT INTO abc SELECT randomblob(1500) FROM abc; -- 64 + INSERT INTO abc SELECT randomblob(1500) FROM abc; -- 128 + INSERT INTO abc SELECT randomblob(1500) FROM abc; -- 256 + INSERT INTO abc SELECT randomblob(1500) FROM abc; -- 512 + INSERT INTO abc SELECT randomblob(1500) FROM abc; -- 1024 + INSERT INTO abc SELECT randomblob(1500) FROM abc; -- 2048 + INSERT INTO abc SELECT randomblob(1500) FROM abc; -- 4096 + COMMIT; + } + faultsim_save_and_close +} {} +do_faultsim_test walfault-11 -faults shmerr* -prep { + catch { db2 close } + faultsim_restore_and_reopen + shmfault filter xShmPage +} -body { + db eval { SELECT count(*) FROM abc } + sqlite3 db2 test.db -vfs shmfault + db2 eval { PRAGMA wal_checkpoint } +} -test { + faultsim_test_result {0 {}} +} + +#------------------------------------------------------------------------- +# Test the handling of the various IO/OOM/SHM errors that may occur during +# a log recovery operation undertaken as part of a call to +# sqlite3_wal_checkpoint(). +# +do_test walfault-12-pre-1 { + faultsim_delete_and_reopen + execsql { + PRAGMA journal_mode = WAL; + PRAGMA wal_autocheckpoint = 0; + BEGIN; + CREATE TABLE abc(a PRIMARY KEY); + INSERT INTO abc VALUES(randomblob(1500)); + INSERT INTO abc VALUES(randomblob(1500)); + COMMIT; + } + faultsim_save_and_close +} {} +do_faultsim_test walfault-12 -prep { + if {[info commands shmfault] == ""} { + testvfs shmfault -default true + } + faultsim_restore_and_reopen + db eval { SELECT * FROM sqlite_master } + shmfault shm test.db [string repeat "\000" 40] +} -body { + set rc [sqlite3_wal_checkpoint db] + if {$rc != "SQLITE_OK"} { error [sqlite3_errmsg db] } +} -test { + db close + faultsim_test_result {0 {}} +} + finish_test