/* ** 2008 Jan 22 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: ** ** May you do good and not evil. ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ****************************************************************************** ** ** This file contains code for a VFS layer that acts as a wrapper around ** an existing VFS. The code in this file attempts to verify that SQLite ** correctly populates and syncs a journal file before writing to a ** corresponding database file. */ #if SQLITE_TEST /* This file is used for testing only */ #include "sqlite3.h" #include "sqliteInt.h" /* ** INTERFACE ** ** The public interface to this wrapper VFS is two functions: ** ** jt_register() ** jt_unregister() ** ** See header comments associated with those two functions below for ** details. ** ** LIMITATIONS ** ** This wrapper will not work if "PRAGMA synchronous = off" is used. ** ** OPERATION ** ** Starting a Transaction: ** ** When a write-transaction is started, the contents of the database is ** inspected and the following data stored as part of the database file ** handle (type struct jt_file): ** ** a) The page-size of the database file. ** b) The number of pages that are in the database file. ** c) The set of page numbers corresponding to free-list leaf pages. ** d) A check-sum for every page in the database file. ** ** The start of a write-transaction is deemed to have occurred when a ** 28-byte journal header is written to byte offset 0 of the journal ** file. ** ** Syncing the Journal File: ** ** Whenever the xSync method is invoked to sync a journal-file, the ** contents of the journal file are read. For each page written to ** the journal file, a check-sum is calculated and compared to the ** check-sum calculated for the corresponding database page when the ** write-transaction was initialized. The success of the comparison ** is assert()ed. So if SQLite has written something other than the ** original content to the database file, an assert() will fail. ** ** Additionally, the set of page numbers for which records exist in ** the journal file is added to (unioned with) the set of page numbers ** corresponding to free-list leaf pages collected when the ** write-transaction was initialized. This set comprises the page-numbers ** corresponding to those pages that SQLite may now safely modify. ** ** Writing to the Database File: ** ** When a block of data is written to a database file, the following ** invariants are asserted: ** ** a) That the block of data is an aligned block of page-size bytes. ** ** b) That if the page being written did not exist when the ** transaction was started (i.e. the database file is growing), then ** the journal-file must have been synced at least once since ** the start of the transaction. ** ** c) That if the page being written did exist when the transaction ** was started, then the page must have either been a free-list ** leaf page at the start of the transaction, or else must have ** been stored in the journal file prior to the most recent sync. ** ** Closing a Transaction: ** ** When a transaction is closed, all data collected at the start of ** the transaction, or following an xSync of a journal-file, is ** discarded. The end of a transaction is recognized when any one ** of the following occur: ** ** a) A block of zeroes (or anything else that is not a valid ** journal-header) is written to the start of the journal file. ** ** b) A journal file is truncated to zero bytes in size using xTruncate. ** ** c) The journal file is deleted using xDelete. */ /* ** Maximum pathname length supported by the jt backend. */ #define JT_MAX_PATHNAME 512 /* ** Name used to identify this VFS. */ #define JT_VFS_NAME "jt" typedef struct jt_file jt_file; struct jt_file { sqlite3_file base; const char *zName; /* Name of open file */ int flags; /* Flags the file was opened with */ /* The following are only used by database file file handles */ int eLock; /* Current lock held on the file */ u32 nPage; /* Size of file in pages when transaction started */ u32 nPagesize; /* Page size when transaction started */ Bitvec *pWritable; /* Bitvec of pages that may be written to the file */ u32 *aCksum; /* Checksum for first nPage pages */ int nSync; /* Number of times journal file has been synced */ /* Only used by journal file-handles */ sqlite3_int64 iMaxOff; /* Maximum offset written to this transaction */ jt_file *pNext; /* All files are stored in a linked list */ sqlite3_file *pReal; /* The file handle for the underlying vfs */ }; /* ** Method declarations for jt_file. */ static int jtClose(sqlite3_file*); static int jtRead(sqlite3_file*, void*, int iAmt, sqlite3_int64 iOfst); static int jtWrite(sqlite3_file*,const void*,int iAmt, sqlite3_int64 iOfst); static int jtTruncate(sqlite3_file*, sqlite3_int64 size); static int jtSync(sqlite3_file*, int flags); static int jtFileSize(sqlite3_file*, sqlite3_int64 *pSize); static int jtLock(sqlite3_file*, int); static int jtUnlock(sqlite3_file*, int); static int jtCheckReservedLock(sqlite3_file*, int *); static int jtFileControl(sqlite3_file*, int op, void *pArg); static int jtSectorSize(sqlite3_file*); static int jtDeviceCharacteristics(sqlite3_file*); /* ** Method declarations for jt_vfs. */ static int jtOpen(sqlite3_vfs*, const char *, sqlite3_file*, int , int *); static int jtDelete(sqlite3_vfs*, const char *zName, int syncDir); static int jtAccess(sqlite3_vfs*, const char *zName, int flags, int *); static int jtFullPathname(sqlite3_vfs*, const char *zName, int, char *zOut); static void *jtDlOpen(sqlite3_vfs*, const char *zFilename); static void jtDlError(sqlite3_vfs*, int nByte, char *zErrMsg); static void (*jtDlSym(sqlite3_vfs*,void*, const char *zSymbol))(void); static void jtDlClose(sqlite3_vfs*, void*); static int jtRandomness(sqlite3_vfs*, int nByte, char *zOut); static int jtSleep(sqlite3_vfs*, int microseconds); static int jtCurrentTime(sqlite3_vfs*, double*); static sqlite3_vfs jt_vfs = { 1, /* iVersion */ sizeof(jt_file), /* szOsFile */ JT_MAX_PATHNAME, /* mxPathname */ 0, /* pNext */ JT_VFS_NAME, /* zName */ 0, /* pAppData */ jtOpen, /* xOpen */ jtDelete, /* xDelete */ jtAccess, /* xAccess */ jtFullPathname, /* xFullPathname */ jtDlOpen, /* xDlOpen */ jtDlError, /* xDlError */ jtDlSym, /* xDlSym */ jtDlClose, /* xDlClose */ jtRandomness, /* xRandomness */ jtSleep, /* xSleep */ jtCurrentTime, /* xCurrentTime */ }; static sqlite3_io_methods jt_io_methods = { 1, /* iVersion */ jtClose, /* xClose */ jtRead, /* xRead */ jtWrite, /* xWrite */ jtTruncate, /* xTruncate */ jtSync, /* xSync */ jtFileSize, /* xFileSize */ jtLock, /* xLock */ jtUnlock, /* xUnlock */ jtCheckReservedLock, /* xCheckReservedLock */ jtFileControl, /* xFileControl */ jtSectorSize, /* xSectorSize */ jtDeviceCharacteristics /* xDeviceCharacteristics */ }; struct JtGlobal { sqlite3_vfs *pVfs; /* Parent VFS */ jt_file *pList; /* List of all open files */ }; static struct JtGlobal g = {0, 0}; /* ** Functions to obtain and relinquish a mutex to protect g.pList. The ** STATIC_PRNG mutex is reused, purely for the sake of convenience. */ static void enterJtMutex(void){ sqlite3_mutex_enter(sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_PRNG)); } static void leaveJtMutex(void){ sqlite3_mutex_leave(sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_PRNG)); } extern int sqlite3_io_error_pending; extern int sqlite3_io_error_hit; static void stop_ioerr_simulation(int *piSave, int *piSave2){ *piSave = sqlite3_io_error_pending; *piSave2 = sqlite3_io_error_hit; sqlite3_io_error_pending = -1; sqlite3_io_error_hit = 0; } static void start_ioerr_simulation(int iSave, int iSave2){ sqlite3_io_error_pending = iSave; sqlite3_io_error_hit = iSave2; } /* ** The jt_file pointed to by the argument may or may not be a file-handle ** open on a main database file. If it is, and a transaction is currently ** opened on the file, then discard all transaction related data. */ static void closeTransaction(jt_file *p){ sqlite3BitvecDestroy(p->pWritable); sqlite3_free(p->aCksum); p->pWritable = 0; p->aCksum = 0; p->nSync = 0; } /* ** Close an jt-file. */ static int jtClose(sqlite3_file *pFile){ jt_file **pp; jt_file *p = (jt_file *)pFile; closeTransaction(p); enterJtMutex(); if( p->zName ){ for(pp=&g.pList; *pp!=p; pp=&(*pp)->pNext); *pp = p->pNext; } leaveJtMutex(); return sqlite3OsClose(p->pReal); } /* ** Read data from an jt-file. */ static int jtRead( sqlite3_file *pFile, void *zBuf, int iAmt, sqlite_int64 iOfst ){ jt_file *p = (jt_file *)pFile; return sqlite3OsRead(p->pReal, zBuf, iAmt, iOfst); } /* ** Parameter zJournal is the name of a journal file that is currently ** open. This function locates and returns the handle opened on the ** corresponding database file by the pager that currently has the ** journal file opened. This file-handle is identified by the ** following properties: ** ** a) SQLITE_OPEN_MAIN_DB was specified when the file was opened. ** ** b) The file-name specified when the file was opened matches ** all but the final 8 characters of the journal file name. ** ** c) There is currently a reserved lock on the file. **/ static jt_file *locateDatabaseHandle(const char *zJournal){ jt_file *pMain = 0; enterJtMutex(); for(pMain=g.pList; pMain; pMain=pMain->pNext){ int nName = strlen(zJournal) - strlen("-journal"); if( (pMain->flags&SQLITE_OPEN_MAIN_DB) && (strlen(pMain->zName)==nName) && 0==memcmp(pMain->zName, zJournal, nName) && (pMain->eLock>=SQLITE_LOCK_RESERVED) ){ break; } } leaveJtMutex(); return pMain; } /* ** Parameter z points to a buffer of 4 bytes in size containing a ** unsigned 32-bit integer stored in big-endian format. Decode the ** integer and return its value. */ static u32 decodeUint32(const unsigned char *z){ return (z[0]<<24) + (z[1]<<16) + (z[2]<<8) + z[3]; } /* ** Calculate a checksum from the buffer of length n bytes pointed to ** by parameter z. */ static u32 genCksum(const unsigned char *z, int n){ int i; u32 cksum = 0; for(i=0; ipReal; int rc = SQLITE_OK; aData = sqlite3_malloc(pMain->nPagesize); pMain->pWritable = sqlite3BitvecCreate(pMain->nPage); pMain->aCksum = sqlite3_malloc(sizeof(u32) * (pMain->nPage + 1)); pJournal->iMaxOff = 0; if( !pMain->pWritable || !pMain->aCksum || !aData ){ rc = SQLITE_IOERR_NOMEM; }else if( pMain->nPage>0 ){ u32 iTrunk; int iSave; int iSave2; stop_ioerr_simulation(&iSave, &iSave2); /* Read the database free-list. Add the page-number for each free-list ** leaf to the jt_file.pWritable bitvec. */ rc = sqlite3OsRead(p, aData, pMain->nPagesize, 0); iTrunk = decodeUint32(&aData[32]); while( rc==SQLITE_OK && iTrunk>0 ){ u32 nLeaf; u32 iLeaf; sqlite3_int64 iOff = (iTrunk-1)*pMain->nPagesize; rc = sqlite3OsRead(p, aData, pMain->nPagesize, iOff); nLeaf = decodeUint32(&aData[4]); for(iLeaf=0; rc==SQLITE_OK && iLeafpWritable, pgno); } iTrunk = decodeUint32(aData); } /* Calculate and store a checksum for each page in the database file. */ if( rc==SQLITE_OK ){ int ii; for(ii=0; rc==SQLITE_OK && iinPage; ii++){ i64 iOff = (i64)(pMain->nPagesize) * (i64)ii; if( iOff==PENDING_BYTE ) continue; rc = sqlite3OsRead(pMain->pReal, aData, pMain->nPagesize, iOff); pMain->aCksum[ii] = genCksum(aData, pMain->nPagesize); } } start_ioerr_simulation(iSave, iSave2); } sqlite3_free(aData); return rc; } /* ** The first argument to this function is a handle open on a journal file. ** This function reads the journal file and adds the page number for each ** page in the journal to the Bitvec object passed as the second argument. */ static int readJournalFile(jt_file *p, jt_file *pMain){ int rc = SQLITE_OK; unsigned char zBuf[28]; sqlite3_file *pReal = p->pReal; sqlite3_int64 iOff = 0; sqlite3_int64 iSize = p->iMaxOff; unsigned char *aPage; int iSave; int iSave2; aPage = sqlite3_malloc(pMain->nPagesize); if( !aPage ){ return SQLITE_IOERR_NOMEM; } stop_ioerr_simulation(&iSave, &iSave2); while( rc==SQLITE_OK && iOff=(iOff+nSector) ){ rc = sqlite3OsRead(pReal, zBuf, 28, iOff); if( rc!=SQLITE_OK || 0==decodeJournalHdr(zBuf, 0, 0, 0, 0) ){ continue; } } nRec = (iSize-iOff) / (pMain->nPagesize+8); } /* Read all the records that follow the journal-header just read. */ for(ii=0; rc==SQLITE_OK && ii0 && pgno<=pMain->nPage ){ if( 0==sqlite3BitvecTest(pMain->pWritable, pgno) ){ rc = sqlite3OsRead(pReal, aPage, pMain->nPagesize, iOff+4); if( rc==SQLITE_OK ){ u32 cksum = genCksum(aPage, pMain->nPagesize); assert( cksum==pMain->aCksum[pgno-1] ); } } sqlite3BitvecSet(pMain->pWritable, pgno); } iOff += (8 + pMain->nPagesize); } } iOff = ((iOff + (nSector-1)) / nSector) * nSector; } finish_rjf: start_ioerr_simulation(iSave, iSave2); sqlite3_free(aPage); if( rc==SQLITE_IOERR_SHORT_READ ){ rc = SQLITE_OK; } return rc; } /* ** Write data to an jt-file. */ static int jtWrite( sqlite3_file *pFile, const void *zBuf, int iAmt, sqlite_int64 iOfst ){ int rc; jt_file *p = (jt_file *)pFile; if( p->flags&SQLITE_OPEN_MAIN_JOURNAL ){ if( iOfst==0 ){ jt_file *pMain = locateDatabaseHandle(p->zName); assert( pMain ); if( iAmt==28 ){ /* Zeroing the first journal-file header. This is the end of a ** transaction. */ closeTransaction(pMain); }else if( iAmt!=12 ){ /* Writing the first journal header to a journal file. This happens ** when a transaction is first started. */ u8 *z = (u8 *)zBuf; pMain->nPage = decodeUint32(&z[16]); pMain->nPagesize = decodeUint32(&z[24]); if( SQLITE_OK!=(rc=openTransaction(pMain, p)) ){ return rc; } } } if( p->iMaxOff<(iOfst + iAmt) ){ p->iMaxOff = iOfst + iAmt; } } if( p->flags&SQLITE_OPEN_MAIN_DB && p->pWritable ){ if( iAmtnPagesize && p->nPagesize%iAmt==0 && iOfst>=(PENDING_BYTE+512) && iOfst+iAmt<=PENDING_BYTE+p->nPagesize ){ /* No-op. This special case is hit when the backup code is copying a ** to a database with a larger page-size than the source database and ** it needs to fill in the non-locking-region part of the original ** pending-byte page. */ }else{ u32 pgno = iOfst/p->nPagesize + 1; assert( (iAmt==1||iAmt==p->nPagesize) && ((iOfst+iAmt)%p->nPagesize)==0 ); assert( pgno<=p->nPage || p->nSync>0 ); assert( pgno>p->nPage || sqlite3BitvecTest(p->pWritable, pgno) ); } } rc = sqlite3OsWrite(p->pReal, zBuf, iAmt, iOfst); if( (p->flags&SQLITE_OPEN_MAIN_JOURNAL) && iAmt==12 ){ jt_file *pMain = locateDatabaseHandle(p->zName); int rc2 = readJournalFile(p, pMain); if( rc==SQLITE_OK ) rc = rc2; } return rc; } /* ** Truncate an jt-file. */ static int jtTruncate(sqlite3_file *pFile, sqlite_int64 size){ jt_file *p = (jt_file *)pFile; if( p->flags&SQLITE_OPEN_MAIN_JOURNAL && size==0 ){ /* Truncating a journal file. This is the end of a transaction. */ jt_file *pMain = locateDatabaseHandle(p->zName); closeTransaction(pMain); } if( p->flags&SQLITE_OPEN_MAIN_DB && p->pWritable ){ u32 pgno; u32 locking_page = (u32)(PENDING_BYTE/p->nPagesize+1); for(pgno=size/p->nPagesize+1; pgno<=p->nPage; pgno++){ assert( pgno==locking_page || sqlite3BitvecTest(p->pWritable, pgno) ); } } return sqlite3OsTruncate(p->pReal, size); } /* ** Sync an jt-file. */ static int jtSync(sqlite3_file *pFile, int flags){ jt_file *p = (jt_file *)pFile; if( p->flags&SQLITE_OPEN_MAIN_JOURNAL ){ int rc; jt_file *pMain; /* The associated database file */ /* The journal file is being synced. At this point, we inspect the ** contents of the file up to this point and set each bit in the ** jt_file.pWritable bitvec of the main database file associated with ** this journal file. */ pMain = locateDatabaseHandle(p->zName); assert(pMain); /* Set the bitvec values */ if( pMain->pWritable ){ pMain->nSync++; rc = readJournalFile(p, pMain); if( rc!=SQLITE_OK ){ return rc; } } } return sqlite3OsSync(p->pReal, flags); } /* ** Return the current file-size of an jt-file. */ static int jtFileSize(sqlite3_file *pFile, sqlite_int64 *pSize){ jt_file *p = (jt_file *)pFile; return sqlite3OsFileSize(p->pReal, pSize); } /* ** Lock an jt-file. */ static int jtLock(sqlite3_file *pFile, int eLock){ int rc; jt_file *p = (jt_file *)pFile; rc = sqlite3OsLock(p->pReal, eLock); if( rc==SQLITE_OK && eLock>p->eLock ){ p->eLock = eLock; } return rc; } /* ** Unlock an jt-file. */ static int jtUnlock(sqlite3_file *pFile, int eLock){ int rc; jt_file *p = (jt_file *)pFile; rc = sqlite3OsUnlock(p->pReal, eLock); if( rc==SQLITE_OK && eLockeLock ){ p->eLock = eLock; } return rc; } /* ** Check if another file-handle holds a RESERVED lock on an jt-file. */ static int jtCheckReservedLock(sqlite3_file *pFile, int *pResOut){ jt_file *p = (jt_file *)pFile; return sqlite3OsCheckReservedLock(p->pReal, pResOut); } /* ** File control method. For custom operations on an jt-file. */ static int jtFileControl(sqlite3_file *pFile, int op, void *pArg){ jt_file *p = (jt_file *)pFile; return sqlite3OsFileControl(p->pReal, op, pArg); } /* ** Return the sector-size in bytes for an jt-file. */ static int jtSectorSize(sqlite3_file *pFile){ jt_file *p = (jt_file *)pFile; return sqlite3OsSectorSize(p->pReal); } /* ** Return the device characteristic flags supported by an jt-file. */ static int jtDeviceCharacteristics(sqlite3_file *pFile){ jt_file *p = (jt_file *)pFile; return sqlite3OsDeviceCharacteristics(p->pReal); } /* ** Open an jt file handle. */ static int jtOpen( sqlite3_vfs *pVfs, const char *zName, sqlite3_file *pFile, int flags, int *pOutFlags ){ int rc; jt_file *p = (jt_file *)pFile; pFile->pMethods = 0; p->pReal = (sqlite3_file *)&p[1]; p->pReal->pMethods = 0; rc = sqlite3OsOpen(g.pVfs, zName, p->pReal, flags, pOutFlags); assert( rc==SQLITE_OK || p->pReal->pMethods==0 ); if( rc==SQLITE_OK ){ pFile->pMethods = &jt_io_methods; p->eLock = 0; p->zName = zName; p->flags = flags; p->pNext = 0; p->pWritable = 0; p->aCksum = 0; enterJtMutex(); if( zName ){ p->pNext = g.pList; g.pList = p; } leaveJtMutex(); } return rc; } /* ** Delete the file located at zPath. If the dirSync argument is true, ** ensure the file-system modifications are synced to disk before ** returning. */ static int jtDelete(sqlite3_vfs *pVfs, const char *zPath, int dirSync){ int nPath = strlen(zPath); if( nPath>8 && 0==strcmp("-journal", &zPath[nPath-8]) ){ /* Deleting a journal file. The end of a transaction. */ jt_file *pMain = locateDatabaseHandle(zPath); if( pMain ){ closeTransaction(pMain); } } return sqlite3OsDelete(g.pVfs, zPath, dirSync); } /* ** Test for access permissions. Return true if the requested permission ** is available, or false otherwise. */ static int jtAccess( sqlite3_vfs *pVfs, const char *zPath, int flags, int *pResOut ){ return sqlite3OsAccess(g.pVfs, zPath, flags, pResOut); } /* ** Populate buffer zOut with the full canonical pathname corresponding ** to the pathname in zPath. zOut is guaranteed to point to a buffer ** of at least (JT_MAX_PATHNAME+1) bytes. */ static int jtFullPathname( sqlite3_vfs *pVfs, const char *zPath, int nOut, char *zOut ){ return sqlite3OsFullPathname(g.pVfs, zPath, nOut, zOut); } /* ** Open the dynamic library located at zPath and return a handle. */ static void *jtDlOpen(sqlite3_vfs *pVfs, const char *zPath){ return g.pVfs->xDlOpen(g.pVfs, zPath); } /* ** Populate the buffer zErrMsg (size nByte bytes) with a human readable ** utf-8 string describing the most recent error encountered associated ** with dynamic libraries. */ static void jtDlError(sqlite3_vfs *pVfs, int nByte, char *zErrMsg){ g.pVfs->xDlError(g.pVfs, nByte, zErrMsg); } /* ** Return a pointer to the symbol zSymbol in the dynamic library pHandle. */ static void (*jtDlSym(sqlite3_vfs *pVfs, void *p, const char *zSym))(void){ return g.pVfs->xDlSym(g.pVfs, p, zSym); } /* ** Close the dynamic library handle pHandle. */ static void jtDlClose(sqlite3_vfs *pVfs, void *pHandle){ g.pVfs->xDlClose(g.pVfs, pHandle); } /* ** Populate the buffer pointed to by zBufOut with nByte bytes of ** random data. */ static int jtRandomness(sqlite3_vfs *pVfs, int nByte, char *zBufOut){ return sqlite3OsRandomness(g.pVfs, nByte, zBufOut); } /* ** Sleep for nMicro microseconds. Return the number of microseconds ** actually slept. */ static int jtSleep(sqlite3_vfs *pVfs, int nMicro){ return sqlite3OsSleep(g.pVfs, nMicro); } /* ** Return the current time as a Julian Day number in *pTimeOut. */ static int jtCurrentTime(sqlite3_vfs *pVfs, double *pTimeOut){ return g.pVfs->xCurrentTime(g.pVfs, pTimeOut); } /************************************************************************** ** Start of public API. */ /* ** Configure the jt VFS as a wrapper around the VFS named by parameter ** zWrap. If the isDefault parameter is true, then the jt VFS is installed ** as the new default VFS for SQLite connections. If isDefault is not ** true, then the jt VFS is installed as non-default. In this case it ** is available via its name, "jt". */ int jt_register(char *zWrap, int isDefault){ g.pVfs = sqlite3_vfs_find(zWrap); if( g.pVfs==0 ){ return SQLITE_ERROR; } jt_vfs.szOsFile = sizeof(jt_file) + g.pVfs->szOsFile; sqlite3_vfs_register(&jt_vfs, isDefault); return SQLITE_OK; } /* ** Uninstall the jt VFS, if it is installed. */ void jt_unregister(void){ sqlite3_vfs_unregister(&jt_vfs); } #endif