SQLite: Check-in [62db5fa3b6]

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview

Comment:	Refactor wal.c to use the VFS. This check-in compiles and links and works ok as long as you leave WAL turned off, but WAL does not work.
Downloads:	Tarball \| ZIP archive
Timelines:	family \| ancestors \| descendants \| both \| wal
Files:	files \| file ages \| folders
SHA1:	62db5fa3b61be885b2d94e9b9ce3877b2c588350
User & Date:	drh 2010-04-29 22:34:08.000

Context

2010-04-30
02:13		The first 6 WAL tests now work. It's a start. (check-in: a92c1851da user: drh tags: wal)
2010-04-29
22:34		Refactor wal.c to use the VFS. This check-in compiles and links and works ok as long as you leave WAL turned off, but WAL does not work. (check-in: 62db5fa3b6 user: drh tags: wal)
16:40		Untested implementation of the shared-memory dead-man-switch. (check-in: 706611283e user: drh tags: wal)

Changes

Changes to src/btree.c.

Changes to src/main.c.

Changes to src/os_unix.c.

Changes to src/pager.c.

Changes to src/pager.h.

Changes to src/sqlite.h.in.

Changes to src/sqliteInt.h.

Changes to src/test1.c.

Changes to src/vdbe.c.

Changes to src/vdbeapi.c.

Changes to src/wal.c.

Changes to src/wal.h.

︙			︙
5045 5046 5047 5048 5049 5050 5051 5052 5053 5054 5055 5056 5057 5058	/ static int unixShmClose(sqlite3_shm pSharedMem){ unixShm p; / The connection to be closed / unixShmFile pFile; /* The underlying shared-memory file / unixShm pp; / For looping over sibling connections / int nRef; / Number of connections to pFile / p = (struct unixShm)pSharedMem; pFile = p->pFile; /* Verify that the connection being closed holds no locks */ assert( p->exclMask==0 ); assert( p->sharedMask==0 );	>	5045 5046 5047 5048 5049 5050 5051 5052 5053 5054 5055 5056 5057 5058 5059	/ static int unixShmClose(sqlite3_shm pSharedMem){ unixShm p; / The connection to be closed / unixShmFile pFile; /* The underlying shared-memory file / unixShm pp; / For looping over sibling connections / int nRef; / Number of connections to pFile / if( pSharedMem==0 ) return SQLITE_OK; p = (struct unixShm)pSharedMem; pFile = p->pFile; /* Verify that the connection being closed holds no locks */ assert( p->exclMask==0 ); assert( p->sharedMask==0 );
︙			︙
5088 5089 5090 5091 5092 5093 5094 ~~5095~~ 5096 5097 5098 5099 5100 5101 5102	shared memory segment. xShmRelease() must be called to release the lock. / static int unixShmSize( sqlite3_shm pSharedMem, /* Pointer returned by unixShmOpen() / int reqSize, / Requested size. -1 for query only / int pNewSize, /* Write new size here / ~~~~char~~ ppBuf / Write new buffer origin here /~~ ){ unixShm p = (unixShm)pSharedMem; unixShmFile pFile = p->pFile; int rc = SQLITE_OK; sqlite3_mutex_enter(pFile->mutexBuf); sqlite3_mutex_enter(pFile->mutex);	\|	5089 5090 5091 5092 5093 5094 5095 5096 5097 5098 5099 5100 5101 5102 5103	shared memory segment. xShmRelease() must be called to release the lock. / static int unixShmSize( sqlite3_shm pSharedMem, /* Pointer returned by unixShmOpen() / int reqSize, / Requested size. -1 for query only / int pNewSize, /* Write new size here / void ppBuf / Write new buffer origin here / ){ unixShm p = (unixShm)pSharedMem; unixShmFile pFile = p->pFile; int rc = SQLITE_OK; sqlite3_mutex_enter(pFile->mutexBuf); sqlite3_mutex_enter(pFile->mutex);
︙			︙
5161 5162 5163 5164 5165 5166 5167 ~~5168~~ 5169 5170 5171 5172 5173 5174 5175	/* Return directly if this is just a lock state query, or if ** the connection is already in the desired locking state. / if( desiredLock==SQLITE_SHM_QUERY \|\| desiredLock==p->lockState \|\| (desiredLock==SQLITE_SHM_READ && p->lockState==SQLITE_SHM_READ_FULL) ){ pGotLock = p->lockState; return SQLITE_OK; } sqlite3_mutex_enter(pFile->mutex); switch( desiredLock ){ case SQLITE_SHM_UNLOCK: { assert( p->lockState!=SQLITE_SHM_RECOVER );	\|	5162 5163 5164 5165 5166 5167 5168 5169 5170 5171 5172 5173 5174 5175 5176	/* Return directly if this is just a lock state query, or if ** the connection is already in the desired locking state. / if( desiredLock==SQLITE_SHM_QUERY \|\| desiredLock==p->lockState \|\| (desiredLock==SQLITE_SHM_READ && p->lockState==SQLITE_SHM_READ_FULL) ){ if( pGotLock ) pGotLock = p->lockState; return SQLITE_OK; } sqlite3_mutex_enter(pFile->mutex); switch( desiredLock ){ case SQLITE_SHM_UNLOCK: { assert( p->lockState!=SQLITE_SHM_RECOVER );
︙			︙
5253 5254 5255 5256 5257 5258 5259 ~~5260~~ 5261 5262 5263 5264 5265 5266 5267	if( rc==SQLITE_OK ){ p->lockState = SQLITE_SHM_RECOVER; } break; } } sqlite3_mutex_leave(pFile->mutex); pGotLock = p->lockState; return rc; } / ** Delete a shared-memory segment from the system. / static int unixShmDelete(sqlite3_vfs pVfs, const char *zName){	\|	5254 5255 5256 5257 5258 5259 5260 5261 5262 5263 5264 5265 5266 5267 5268	if( rc==SQLITE_OK ){ p->lockState = SQLITE_SHM_RECOVER; } break; } } sqlite3_mutex_leave(pFile->mutex); if( pGotLock ) pGotLock = p->lockState; return rc; } / ** Delete a shared-memory segment from the system. / static int unixShmDelete(sqlite3_vfs pVfs, const char *zName){
︙			︙

︙			︙
5109 5110 5111 5112 5113 5114 5115 ~~5116~~ 5117 5118 5119 5120 5121 5122 5123	extern int sqlite3WhereTrace; extern int sqlite3OSTrace; extern int sqlite3VdbeAddopTrace; #endif #ifdef SQLITE_TEST extern char sqlite3_query_plan[]; static char query_plan = sqlite3_query_plan; ~~extern int sqlite3_walsummary_mmap_incr; / In wal.c */~~ #ifdef SQLITE_ENABLE_FTS3 extern int sqlite3_fts3_enable_parentheses; #endif #endif for(i=0; i<sizeof(aCmd)/sizeof(aCmd[0]); i++){ Tcl_CreateCommand(interp, aCmd[i].zName, aCmd[i].xProc, 0, 0);	\|	5109 5110 5111 5112 5113 5114 5115 5116 5117 5118 5119 5120 5121 5122 5123	extern int sqlite3WhereTrace; extern int sqlite3OSTrace; extern int sqlite3VdbeAddopTrace; #endif #ifdef SQLITE_TEST extern char sqlite3_query_plan[]; static char query_plan = sqlite3_query_plan; / extern int sqlite3_walsummary_mmap_incr; // In wal.c */ #ifdef SQLITE_ENABLE_FTS3 extern int sqlite3_fts3_enable_parentheses; #endif #endif for(i=0; i<sizeof(aCmd)/sizeof(aCmd[0]); i++){ Tcl_CreateCommand(interp, aCmd[i].zName, aCmd[i].xProc, 0, 0);
︙			︙
5169 5170 5171 5172 5173 5174 5175 ~~5176 5177~~ 5178 5179 5180 5181 5182 5183 5184	#if SQLITE_OS_WIN Tcl_LinkVar(interp, "sqlite_os_type", (char)&sqlite3_os_type, TCL_LINK_INT); #endif #ifdef SQLITE_TEST Tcl_LinkVar(interp, "sqlite_query_plan", (char)&query_plan, TCL_LINK_STRING\|TCL_LINK_READ_ONLY); ~~Tcl_LinkVar(interp, "sqlite_walsummary_mmap_incr", (char)&sqlite3_walsummary_mmap_incr, TCL_LINK_INT);~~ #endif #ifdef SQLITE_DEBUG Tcl_LinkVar(interp, "sqlite_addop_trace", (char)&sqlite3VdbeAddopTrace, TCL_LINK_INT); Tcl_LinkVar(interp, "sqlite_where_trace", (char*)&sqlite3WhereTrace, TCL_LINK_INT); Tcl_LinkVar(interp, "sqlite_os_trace",	\| \|	5169 5170 5171 5172 5173 5174 5175 5176 5177 5178 5179 5180 5181 5182 5183 5184	#if SQLITE_OS_WIN Tcl_LinkVar(interp, "sqlite_os_type", (char)&sqlite3_os_type, TCL_LINK_INT); #endif #ifdef SQLITE_TEST Tcl_LinkVar(interp, "sqlite_query_plan", (char)&query_plan, TCL_LINK_STRING\|TCL_LINK_READ_ONLY); /* Tcl_LinkVar(interp, "sqlite_walsummary_mmap_incr", (char)&sqlite3_walsummary_mmap_incr, TCL_LINK_INT); / #endif #ifdef SQLITE_DEBUG Tcl_LinkVar(interp, "sqlite_addop_trace", (char)&sqlite3VdbeAddopTrace, TCL_LINK_INT); Tcl_LinkVar(interp, "sqlite_where_trace", (char)&sqlite3WhereTrace, TCL_LINK_INT); Tcl_LinkVar(interp, "sqlite_os_trace",
︙			︙

︙			︙
302 303 304 305 306 307 308 ~~309~~ 310 311 312 313 314 ~~315 316 317~~ 318 319 320 321 322 323 324	void sqlite3_result_error_nomem(sqlite3_context pCtx){ assert( sqlite3_mutex_held(pCtx->s.db->mutex) ); sqlite3VdbeMemSetNull(&pCtx->s); pCtx->isError = SQLITE_NOMEM; pCtx->s.db->mallocFailed = 1; } ~~static int do~~Log~~Callbacks(sqlite3 db){~~ int i; int rc = SQLITE_OK; for(i=0; i<db->nDb; i++){ Btree *pBt = db->aDb[i].pBt; if( pBt ){ int nEntry = sqlite3Pager~~Log~~Callback(sqlite3BtreePager(pBt)); if( db->x~~Log~~Callback && nEntry>0 && rc==SQLITE_OK && db->x~~Log~~Callback(db->p~~Log~~Arg, db, db->aDb[i].zName, nEntry) ){ rc = sqlite3PagerCheckpoint(sqlite3BtreePager(pBt)); } } } return rc; }	\| \| \| \|	302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324	void sqlite3_result_error_nomem(sqlite3_context pCtx){ assert( sqlite3_mutex_held(pCtx->s.db->mutex) ); sqlite3VdbeMemSetNull(&pCtx->s); pCtx->isError = SQLITE_NOMEM; pCtx->s.db->mallocFailed = 1; } static int doWalCallbacks(sqlite3 db){ int i; int rc = SQLITE_OK; for(i=0; i<db->nDb; i++){ Btree *pBt = db->aDb[i].pBt; if( pBt ){ int nEntry = sqlite3PagerWalCallback(sqlite3BtreePager(pBt)); if( db->xWalCallback && nEntry>0 && rc==SQLITE_OK && db->xWalCallback(db->pWalArg, db, db->aDb[i].zName, nEntry) ){ rc = sqlite3PagerCheckpoint(sqlite3BtreePager(pBt)); } } } return rc; }
︙			︙
402 403 404 405 406 407 408 ~~409~~ 410 411 412 413 414 415 416	elapseTime -= p->startTime; db->xProfile(db->pProfileArg, p->zSql, elapseTime); } #endif if( rc==SQLITE_DONE ){ assert( p->rc==SQLITE_OK ); ~~p->rc = do~~Log~~Callbacks(db);~~ if( p->rc!=SQLITE_OK ){ rc = SQLITE_ERROR; } } db->errCode = rc; if( SQLITE_NOMEM==sqlite3ApiExit(p->db, p->rc) ){	\|	402 403 404 405 406 407 408 409 410 411 412 413 414 415 416	elapseTime -= p->startTime; db->xProfile(db->pProfileArg, p->zSql, elapseTime); } #endif if( rc==SQLITE_DONE ){ assert( p->rc==SQLITE_OK ); p->rc = doWalCallbacks(db); if( p->rc!=SQLITE_OK ){ rc = SQLITE_ERROR; } } db->errCode = rc; if( SQLITE_NOMEM==sqlite3ApiExit(p->db, p->rc) ){
︙			︙

1 2 3 4 5 6 7 8 9 10 ~~11 12~~ 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 ~~34 35~~ 36 37 38 39 40 41 ~~42 43~~ 44 45 46 ~~47 48~~ 49 ~~50 51~~ 52 53 ~~54 55 56~~ 57 58 59 60 ~~61 62 63 64 65 66 67 68 69 70~~ 71 72 73 ~~74 75~~ 76 77 78 79 80 81 82 83 84 85 86 87 88 89 ~~90 91~~ 92 93 94 95 96 97 ~~98 99~~ 100 ~~101 102~~ 103 ~~104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145~~ 146 ~~147 148 149 150~~ 151 ~~152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173~~ ~~174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190~~ ~~191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237~~ 238 239 ~~240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268~~ 269 ~~270 271 272 273 274 275 276 277 278 279~~ ~~280 281 282 283~~ 284 285 286 287 288 289 290 291 292 293 294 ~~295 296 297~~ 298 ~~299~~ 300 ~~301 302~~ 303 ~~304~~ 305 306 307 308 309 310 311 ~~312 313 314 315 316 317 318 319 320~~ 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 ~~339~~ 340 341 342 343 344 345 346	/* This file contains the implementation of a log file used in "journal_mode=wal" mode. / / ~~LOG~~ FILE FORMAT A ~~log~~ file consists of a header followed by zero or more ~~log~~ frames. The ~~log~~ header is 12 bytes in size and consists of the following three big-endian 32-bit unsigned integer values: 0: Database page size, 4: Randomly selected salt value 1, 8: Randomly selected salt value 2. Immediately following the ~~log~~ header are zero or more ~~log~~ frames. Each frame itself consists of a 16-byte header followed by a <page-size> bytes of page data. The header is broken into 4 big-endian 32-bit unsigned integer values, as follows: 0: Page number. 4: For commit records, the size of the database image in pages after the commit. For all other records, zero. 8: Checksum value 1. 12: Checksum value 2. / / L~~OG SUMMARY~~ FILE FORMAT The log-~~summary~~ file consists of a header region, followed by an region that contains no useful data (used to apply byte-range locks to), followed by the data region. The contents of both the header and data region are specified in terms of 1, 2 and 4 byte unsigned integers. All integers are stored in machine-endian order. A log-~~summary~~ file is essentially a shadow-pager map. It contains a mapping from database page number to the set of locations in the ~~log~~ file that contain versions of the database page. When a database client needs to read a page of data, it first queries the log-~~summary~~ file to determine if the required version of the page is stored in the ~~log~~. If so, it is read from the ~~log file~~. If not, it is ~~read from~~ the database file. Whenever a transaction is appended to the ~~log~~ or a checkpoint transfers data from the ~~log file~~ into the database file, the log-~~summary~~ is updated accordingly. The fields in the log-~~summary~~ file header are described in the comment directly above the definition of struct ~~LogSumm~~aryHdr (see below). Immediately following the fields in the ~~LogSumm~~aryHdr structure is an 8 byte checksum based on the contents of the header. This field is ** not the same as the iCheck1 and iCheck2 fields of the ~~LogSumm~~aryHdr. / ~~#include "wal.h"~~ ~~#include <unistd.h>~~ ~~#include <fcntl.h>~~ ~~#include <sys/mman.h>~~ ~~typedef struct LogSummaryHdr LogSummaryHdr;~~ typedef struct ~~LogSumm~~ary ~~LogSumm~~ary; ~~typedef struct LogIterator LogIterator;~~ typedef struct LogLo~~ck LogLock~~; / The following ~~structure may be used to~~ store the same d~~ata that~~ is stored in the log-summary header. Member variables iCheck1 and iCheck2 contain the checksum for the last frame written to the ~~log~~, or 2 and 3 respectively if the log is currently empty. / ~~struct ~~LogSumm~~aryHdr {~~ u32 iChange; / Counter incremented each transaction / u32 pgsz; / Database page size in bytes / u32 iLastPg; / Address of last valid frame in log / u32 nPage; / Size of database in pages / u32 iCheck1; / Checkpoint value 1 / u32 iCheck2; / Checkpoint value 2 / }; ~~/ Size of serialized ~~LogSumm~~aryHdr object. / #define L~~OGSUMMARY~~_HDR_NFIELD (sizeof(~~LogSumm~~aryHdr) / sizeof(u32))~~ ~~/ A block of 16 bytes beginning at L~~OGSUMMARY~~_LOCK_OFFSET is reserved~~ for locks. Since some systems only feature mandatory file-locks, we do not read or write data from the region of the file on which locks ** are applied. / ~~#define L~~OGSUMMARY~~_LOCK_OFFSET ((sizeof(~~LogSumm~~aryHdr))+2sizeof(u32)) #define L~~OGSUMMARY~~_LOCK_RESERVED 16~~ ~~/* Size of header before each frame in ~~log file~~ / #define ~~LOG~~_FRAME_HDRSIZE 16~~ / Size of log header / #define ~~LOG~~_HDRSIZE 12 / Return the offset of frame iFrame in the log file, assuming a database page size of pgsz bytes. The offset returned is to the start of the ** log frame-header. / ~~#define logFrameOffset(iFrame, pgsz) ( \~~ ~~LOG_HDRSIZE + ((iFrame)-1)((pgsz)+LOG_FRAME_HDRSIZE) \~~ ) /* If using mmap() to access a shared (or otherwise) log-summary file, then the mapping size is incremented in units of the following size. A 64 KB log-summary mapping corresponds to a log file containing over ** 13000 frames, so the mapping size does not need to be increased often. / ~~#ifdef SQLITE_TEST~~ ~~int sqlite3_walsummary_mmap_incr = 128;~~ ~~# define LOGSUMMARY_MMAP_INCREMENT sqlite3_walsummary_mmap_incr~~ ~~#else~~ ~~# define LOGSUMMARY_MMAP_INCREMENT (641024)~~ ~~#endif~~ /* There is one instance of this structure for each log-summary object that this process has a connection to. They are stored in a linked list starting at pLogSummary (global variable). TODO: LogSummary.fd is a unix file descriptor. Unix APIs are used directly in this implementation because the VFS does not support ** the required blocking file-locks. / ~~struct LogSummary {~~ ~~sqlite3_mutex mutex; /* Mutex used to protect this object /~~ ~~int nRef; / Number of pointers to this structure /~~ ~~int fd; / File descriptor open on log-summary /~~ ~~char zPath; /* Path to associated WAL file /~~ ~~LogLock pLock; /* Linked list of locks on this object /~~ ~~LogSummary pNext; /* Next in global list /~~ ~~~~int nData; / Size of aData allocation/mapping /~~ ~~u32 aData; /* File body /~~ };~~ / This module uses three different types of file-locks. All are taken on the log-summary file. The three types of locks are as follows: MUTEX: The MUTEX lock is used as a robust inter-process mutex. It is held while the log-summary header is modified, and sometimes when it is read. It is also held while a new client obtains the DMH lock (see below), and while log recovery is being run. DMS: The DMS (Dead Mans Switch mechanism) lock is used to ensure that log-recovery is always run following a system restart. When it first opens a log-summary file, a process takes a SHARED lock on the DMH region. This lock is not released until the log-summary file is closed. The process then attempts to upgrade to an EXCLUSIVE lock. If successful, then the contents of the log-summary file are deemed suspect and the log-summary header zeroed. This forces the first process that reads the log-summary file to run log recovery. After zeroing the log-summary header, the process ~~downgrades to a SHARED lock o~~n the ~~DMH~~ re~~gion.~~ If the atte~~mpt to obtain the EXCLUSIVE~~ lock f~~ails, t~~he~~n the~~ process concludes that some other process is already using the log-summary file, and it can therefore be trusted. The procedure described in the previous three paragraphs (taking a ~~SHARED~~ ~~lock~~ ~~and~~ ~~then~~ ~~upgrading to an EXCLUSIVE lock to check~~ if the process is the only one to have an open connection to the log file) is protected by holding the MUTEX lock. This avoids the race condition wherein the first two clients connect almost simultaneously following a system restart and each prevents ~~the other from obtaining the EXCLUSIVE lock.~~ REGION: There are 4 different region locks, regions A, B, C and D. Various EXCLUSIVE and SHARED locks on these regions are obtained when a client reads, writes or checkpoints the database. To obtain a reader lock: 1. Attempt a SHARED lock on regions A and B. 2. If step 1 is successful, drop the lock on region B. Or, if it is unsuccessful, attempt a SHARED lock on region D. 3. Repeat the above until the lock attempt in step 1 or 2 is successful. The reader lock is released when the read transaction is finished. To obtain a writer lock: 1. Take (wait for) an EXCLUSIVE lock on regions C and D. The locks are released after the write transaction is finished and, if any frames were committed to the log, the log-summary file updated. To obtain a checkpointer lock: 1. Take (wait for) an EXCLUSIVE lock on regions B and C. 2. Take (wait for) an EXCLUSIVE lock on region A. Step 1 waits until any existing writer has finished. And forces all new readers to become "region D" readers. Step 2 causes the checkpointer to wait until all existing region A readers have finished their transactions. Once the exclusive lock on region A has been obtained, only "region D" readers exist. These readers are operating on the snapshot at the head of the log. As such, the log can be safely copied into the database file without interfering with the readers. Once the checkpoint has finished and the log-summary header updated (to indicate the log contents can now be ignored), all locks are released. However, there may still exist region D readers using data in the body of the log file, so the log file itself cannot be truncated or overwritten until all region D readers have finished. That requirement is satisfied, because writers (the clients that write to the log file) require an exclusive lock on region D. Which they cannot get until all region D readers have finished. / ~~#define LOG_LOCK_MUTEX (LOGSUMMARY_LOCK_OFFSET)~~ ~~#define LOG_LOCK_DMH (LOG_LOCK_MUTEX+1)~~ ~~#define LOG_LOCK_REGION (LOG_LOCK_DMH+1)~~ / The four lockable regions associated with each log-summary. A connection may take either a SHARED or EXCLUSIVE lock on each. An ORed combination of the following bitmasks is passed as the second argument to the logLockRegion() function. / ~~#define LOG_REGION_A 0x01~~ ~~#define LOG_REGION_B 0x02~~ ~~#define LOG_REGION_C 0x04~~ ~~#define LOG_REGION_D 0x08~~ / ** Values for the third parameter to logLockRegion(). / ~~#define LOG_UNLOCK 0 / Unlock a range of bytes /~~ ~~#define LOG_RDLOCK 1 / Put a SHARED lock on a range of bytes /~~ ~~#define LOG_WRLOCK 2 / Put an EXCLUSIVE lock on a byte-range /~~ ~~#define LOG_WRLOCKW 3 / Block on EXCLUSIVE lock on a byte-range /~~ / A single instance of this structure is allocated as part of each connection to a database log. All structures associated with the same log file are linked together into a list using LogLock.pNext starting at LogSummary.pLock. The mLock field of the structure describes the locks (if any) currently held by the connection. If a SHARED lock is held on any of the four locking regions, then the associated LOG_REGION_X bit (see above) is set. If an EXCLUSIVE lock is held on the region, then the (LOG_REGION_X << 8) bit is set. / ~~struct LogLock {~~ ~~LogLock pNext; /* Next lock on the same log /~~ ~~u32 mLock; / Mask of locks /~~ }; struct ~~Log~~ { ~~LogSummary pSummary; /* Log file summary data /~~ sqlite3_vfs pVfs; /* The VFS used to create pFd / sqlite3_file pFd; /* File handle for ~~log~~ file / ~~int~~ i~~sLo~~ck~~ed;~~ / ~~Non-zer~~o ~~if a snapshot i~~s hel~~d open~~ / int isW~~riteLocked~~; / ~~Tru~~e ~~if this is~~ the wri~~ter connection~~ / u32 i~~Callback;~~ / ~~Value~~ to pa~~ss to log~~ c~~allback (~~or 0) / LogS~~ummaryHdr hdr~~; / L~~og summary header for~~ c~~urre~~nt s~~napshot~~ / ~~LogLock~~ ~~lock~~; / Lock ~~held by this connection (if any)~~ / }; / This structure is used to implement an iterator that iterates through all frames in the log in database page order. Where two or more frames correspond to the same database page, the iterator visits only the frame most recently written to the log. The internals of this structure are only accessed by: ~~log~~IteratorInit() - Create a new iterator, ~~log~~IteratorNext() - Step an iterator, ~~log~~IteratorFree() - Free an iterator. This functionality is used by the checkpoint code (see ~~log~~Checkpoint()). / ~~struct ~~Log~~Iterator { int nSegment; / Size of ~~Log~~Iterator.aSegment[] array /~~ int nFinal; / Elements in segment nSegment-1 / ~~struct ~~Log~~Segment {~~ int iNext; / Next aIndex index / u8 aIndex; /* Pointer to index array / u32 aDbPage; /* Pointer to db page array / } aSegment[1]; }; / List of all LogSummary objects created by this process. Protected by static mutex LOG_SUMMARY_MUTEX. TODO: Should have a dedicated mutex ** here instead of borrowing the LRU mutex. / ~~#define LOG_SUMMARY_MUTEX SQLITE_MUTEX_STATIC_LRU~~ ~~static LogSummary pLogSummary = 0;~~ /* Generate an 8 byte checksum based on the data in array aByte[] and the initial values of aCksum[0] and aCksum[1]. The checksum is written into aCksum[] before returning. The range of bytes to checksum is treated as an array of 32-bit little-endian unsigned integers. For each integer X in the array, from start to finish, do the following: aCksum[0] += X; aCksum[1] += aCksum[0]; For the calculation above, use 64-bit unsigned accumulators. Before returning, truncate the values to 32-bits as follows: aCksum[0] = (u32)(aCksum[0] + (aCksum[0]>>24)); aCksum[1] = (u32)(aCksum[1] + (aCksum[1]>>24)); / ~~static void ~~log~~ChecksumBytes(u8 aByte, int nByte, u32 aCksum){~~ u64 sum1 = aCksum[0]; u64 sum2 = aCksum[1]; u32 a32 = (u32 )aByte; u32 aEnd = (u32 *)&aByte[nByte]; assert( (nByte&0x00000003)==0 );	< > > > > > > > > > > > \| > \| \| \| \| \| \| \| \| > \| \| \| \| \| \| \| \| \| \| \| < \| < < < < < \| < \| \| < \| \| \| \| \| \| \| \| \| \| \| < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < \| < > \| < < \| < \| < < < < \| < < < < < < > < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < \| < < < < < < < \| < < < < < < < < < < < < < < < < \| < \| \| \| > \| \| \| \| > \| \| \| \| \| \| \| < < < < < < < < < \|	1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187	/* 2010 February 1 The author disclaims copyright to this source code. In place of a legal notice, here is a blessing: May you do good and not evil. May you find forgiveness for yourself and forgive others. May you share freely, never taking more than you give. ********************************************************************* This file contains the implementation of a write-ahead log file used in "journal_mode=wal" mode. / #include "wal.h" / WRITE-AHEAD LOG (WAL) FILE FORMAT A wal file consists of a header followed by zero or more "frames". The header is 12 bytes in size and consists of the following three big-endian 32-bit unsigned integer values: 0: Database page size, 4: Randomly selected salt value 1, 8: Randomly selected salt value 2. Immediately following the header are zero or more frames. Each frame itself consists of a 16-byte header followed by a <page-size> bytes of page data. The header is broken into 4 big-endian 32-bit unsigned integer values, as follows: 0: Page number. 4: For commit records, the size of the database image in pages after the commit. For all other records, zero. 8: Checksum value 1. 12: Checksum value 2. / / WAL-INDEX FILE FORMAT The wal-index file consists of a 32-byte header region, followed by an 8-byte region that contains no useful data (used to apply byte-range locks to), followed by the data region. The contents of both the header and data region are specified in terms of 1, 2 and 4 byte unsigned integers. All integers are stored in machine-endian order. The wal-index is not a persistent file and so it does not need to be portable across archtectures. A wal-index file is essentially a shadow-pager map. It contains a mapping from database page number to the set of locations in the wal file that contain versions of the database page. When a database client needs to read a page of data, it first queries the wal-index file to determine if the required version of the page is stored in the wal. If so, the page is read from the wal. If not, the page is read from the database file. Whenever a transaction is appended to the wal or a checkpoint transfers data from the wal into the database file, the wal-index is updated accordingly. The fields in the wal-index file header are described in the comment directly above the definition of struct WalIndexHdr (see below). Immediately following the fields in the WalIndexHdr structure is an 8 byte checksum based on the contents of the header. This field is not the same as the iCheck1 and iCheck2 fields of the WalIndexHdr. / / Object declarations / typedef struct WalIndexHdr WalIndexHdr; typedef struct WalIterator WalIterator; / The following object stores a copy of the wal-index header. Member variables iCheck1 and iCheck2 contain the checksum for the last frame written to the wal, or 2 and 3 respectively if the log ** is currently empty. / struct WalIndexHdr { u32 iChange; / Counter incremented each transaction / u32 pgsz; / Database page size in bytes / u32 iLastPg; / Address of last valid frame in log / u32 nPage; / Size of database in pages / u32 iCheck1; / Checkpoint value 1 / u32 iCheck2; / Checkpoint value 2 / }; / Size of serialized WalIndexHdr object. / #define WALINDEX_HDR_NFIELD (sizeof(WalIndexHdr) / sizeof(u32)) / A block of 16 bytes beginning at WALINDEX_LOCK_OFFSET is reserved for locks. Since some systems only feature mandatory file-locks, we do not read or write data from the region of the file on which locks ** are applied. / #define WALINDEX_LOCK_OFFSET ((sizeof(WalIndexHdr))+2sizeof(u32)) #define WALINDEX_LOCK_RESERVED 8 /* Size of header before each frame in wal / #define WAL_FRAME_HDRSIZE 16 / Size of write ahead log header / #define WAL_HDRSIZE 12 / Return the offset of frame iFrame in the write-ahead log file, assuming a database page size of pgsz bytes. The offset returned ** is to the start of the write-ahead log frame-header. / #define walFrameOffset(iFrame, pgsz) ( \ WAL_HDRSIZE + ((iFrame)-1)((pgsz)+WAL_FRAME_HDRSIZE) \ ) /* An open write-ahead log file is represented by an instance of the following object. / struct Wal { sqlite3_vfs pVfs; /* The VFS used to create pFd / sqlite3_file pFd; /* File handle for WAL file / u32 iCallback; / Value to pass to log callback (or 0) / sqlite3_shm pWIndex; /* The open wal-index file / int szWIndex; / Size of the wal-index / u32 pWiData; /* Pointer to wal-index content in memory / u8 lockState; / SQLITE_SHM_xxxx constant showing lock state / u8 readerType; / SQLITE_SHM_READ or SQLITE_SHM_READ_FULL / WalIndexHdr hdr; / Wal-index for current snapshot / }; / This structure is used to implement an iterator that iterates through all frames in the log in database page order. Where two or more frames correspond to the same database page, the iterator visits only the frame most recently written to the log. The internals of this structure are only accessed by: walIteratorInit() - Create a new iterator, walIteratorNext() - Step an iterator, walIteratorFree() - Free an iterator. This functionality is used by the checkpoint code (see walCheckpoint()). / struct WalIterator { int nSegment; / Size of WalIterator.aSegment[] array / int nFinal; / Elements in segment nSegment-1 / struct WalSegment { int iNext; / Next aIndex index / u8 aIndex; /* Pointer to index array / u32 aDbPage; /* Pointer to db page array / } aSegment[1]; }; / Generate an 8 byte checksum based on the data in array aByte[] and the initial values of aCksum[0] and aCksum[1]. The checksum is written into aCksum[] before returning. The range of bytes to checksum is treated as an array of 32-bit little-endian unsigned integers. For each integer X in the array, from start to finish, do the following: aCksum[0] += X; aCksum[1] += aCksum[0]; For the calculation above, use 64-bit unsigned accumulators. Before returning, truncate the values to 32-bits as follows: aCksum[0] = (u32)(aCksum[0] + (aCksum[0]>>24)); aCksum[1] = (u32)(aCksum[1] + (aCksum[1]>>24)); / static void walChecksumBytes(u8 aByte, int nByte, u32 aCksum){ u64 sum1 = aCksum[0]; u64 sum2 = aCksum[1]; u32 a32 = (u32 )aByte; u32 aEnd = (u32 *)&aByte[nByte]; assert( (nByte&0x00000003)==0 );
︙			︙
362 363 364 365 366 367 368 ~~369 370 371 372 373~~ 374 ~~375~~ ~~376 377~~ 378 ~~379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424~~ 425 ~~426 427 428 429~~ 430 431 432 433 ~~434 435 436~~ 437 ~~438~~ ~~439~~ 440 ~~441~~ 442 443 444 445 ~~446~~ 447 448 449 450 451 452 453 454 ~~455~~ 456 457 458 459 460 461 462 ~~463~~ 464 465 466 467 ~~468 469~~ 470 471 472 473 474 475 476 477 478 ~~479~~ 480 481 482 483 484 485 486 ~~487~~ 488 ~~489 490~~ 491 492 493 494 495 496 497 498 499 500 501 502 503 ~~504 505~~ 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 ~~521 522~~ 523 524 525 526 527 528 529	} aCksum[0] = sum1 + (sum1>>24); aCksum[1] = sum2 + (sum2>>24); } /* Argument zPath must be a nul-terminated string containing a path-name. This function modifies the string in-place by removing any "./" or "../" elements in the path. For example, the following input: "/home/user/plans/good/../evil/./world_domination.txt" is overwritten with the 'normalized' version: ** "/home/user/plans/evil/world_domination.txt" / static ~~void logNorm~~al~~izeP~~at~~h(char zPath~~){ int ~~i, j~~; ~~char z = zPath;~~ ~~int n = strlen(z);~~ ~~while( n>1 && z[n-1]=='/' ){ n--; }~~ ~~for(i=j=0; i<n; i++){~~ ~~if( z[i]=='/' ){~~ ~~if( z[i+1]=='/' ) continue;~~ ~~if( z[i+1]=='.' && i+2<n && z[i+2]=='/' ){~~ ~~i += 1;~~ ~~continue;~~ } ~~if( z[i+1]=='.' && i+3<n && z[i+2]=='.' && z[i+3]=='/' ){~~ ~~while( j>0 && z[j-1]!='/' ){ j--; }~~ ~~if( j>0 ){ j--; }~~ ~~i += 2;~~ ~~continue;~~ } } ~~z[j++] = z[i];~~ } ~~z[j] = 0;~~ } / Unmap the log-summary mapping and close the file-descriptor. If the isTruncate argument is non-zero, truncate the log-summary file region to zero bytes. Regardless of the value of isTruncate, close the file-descriptor opened on the log-summary file. / ~~static int logSummaryUnmap(LogSummary pSummary, int isUnlink){~~ int rc = SQLITE_OK; ~~if( pSummary->aData ){~~ ~~assert( pSummary->fd>0 );~~ ~~munmap(pSummary->aData, pSummary->nData);~~ p~~Summary~~->~~aData~~ = 0; ~~if( isUnlink ){~~ ~~char zFile = sqlite3_mprintf("%s-summary", pSummary->zPath);~~ ~~if( !zFile ){~~ r~~c =~~ SQLITE_NOM~~EM;~~ } ~~unlink(zFile);~~ sql~~ite3_f~~ree~~(zFil~~e); } ~~} ~~if( pSummary->fd>0 ){~~ ~~close(pSummary->fd);~~ ~~pSummary->fd = -1;~~~~ } return rc; } static void l~~ogSummary~~WriteHdr(~~LogSummary~~ p~~Summary~~, ~~LogSumm~~aryHdr pHdr){ u32 aHdr = p~~Summary~~->aData; /* Write header here / u32 aCksum = &aHdr[L~~OGSUMMARY~~_HDR_NFIELD]; /* Write header cksum here / ~~assert( L~~OGSUMMARY~~_HDR_NFIELD==sizeof(~~LogSumm~~aryHdr)/4 );~~ ~~memcpy(aHdr, pHdr, sizeof(~~LogSumm~~aryHdr));~~ aCksum[0] = aCksum[1] = 1; ~~~~log~~ChecksumBytes((u8 )aHdr, sizeof(~~LogSumm~~aryHdr), aCksum);~~ } /* This function encodes a single frame header and writes it to a buffer supplied by the caller. A ~~log~~ frame-header is made up of a series of 4-byte big-endian integers, as follows: 0: Database page size in bytes. 4: Page number. 8: New database size (for commit frames, otherwise zero). 12: Frame checksum 1. ** 16: Frame checksum 2. / ~~static void ~~log~~EncodeFrame(~~ u32 aCksum, /* IN/OUT: Checksum values / u32 iPage, / Database page number for frame / u32 nTruncate, / New db size (or 0 for non-commit frames) / int nData, / Database page size (size of aData[]) / u8 aData, /* Pointer to page data (for checksum) / u8 aFrame /* OUT: Write encoded frame here / ){ ~~assert( ~~LOG~~_FRAME_HDRSIZE==16 );~~ sqlite3Put4byte(&aFrame[0], iPage); sqlite3Put4byte(&aFrame[4], nTruncate); ~~~~log~~ChecksumBytes(aFrame, 8, aCksum); ~~log~~ChecksumBytes(aData, nData, aCksum);~~ sqlite3Put4byte(&aFrame[8], aCksum[0]); sqlite3Put4byte(&aFrame[12], aCksum[1]); } / ** Return 1 and populate piPage, pnTruncate and aCksum if the ** frame checksum looks Ok. Otherwise return 0. / ~~static int ~~log~~DecodeFrame(~~ u32 aCksum, /* IN/OUT: Checksum values / u32 piPage, /* OUT: Database page number for frame / u32 pnTruncate, /* OUT: New db size (or 0 if not commit) / int nData, / Database page size (size of aData[]) / u8 aData, /* Pointer to page data (for checksum) / u8 aFrame /* Frame data / ){ ~~assert( ~~LOG~~_FRAME_HDRSIZE==16 );~~ ~~~~log~~ChecksumBytes(aFrame, 8, aCksum); ~~log~~ChecksumBytes(aData, nData, aCksum);~~ if( aCksum[0]!=sqlite3Get4byte(&aFrame[8]) \|\| aCksum[1]!=sqlite3Get4byte(&aFrame[12]) ){ / Checksum failed. / return 0; } piPage = sqlite3Get4byte(&aFrame[0]); pnTruncate = sqlite3Get4byte(&aFrame[4]); return 1; } ~~static void ~~log~~Mergesort8( Pgno aContent, /* Pages in ~~log~~ /~~ u8 aBuffer, /* Buffer of at least pnList items to use / u8 aList, / IN/OUT: List to sort / int pnList /* IN/OUT: Number of elements in aList[] / ){ int nList = pnList; if( nList>1 ){ int nLeft = nList / 2; /* Elements in left list / int nRight = nList - nLeft; / Elements in right list / u8 aLeft = aList; /* Left list / u8 aRight = &aList[nLeft]; /* Right list / int iLeft = 0; / Current index in aLeft / int iRight = 0; / Current index in aright / int iOut = 0; / Current index in output buffer / / TODO: Change to non-recursive version. */ ~~~~log~~Mergesort8(aContent, aBuffer, aLeft, &nLeft); ~~log~~Mergesort8(aContent, aBuffer, aRight, &nRight);~~ while( iRight<nRight \|\| iLeft<nLeft ){ u8 logpage; Pgno dbpage; if( (iLeft<nLeft) && (iRight>=nRight \|\| aContent[aLeft[iLeft]]<aContent[aRight[iRight]])	< < < \| < < > > \| < \| \| < < \| < < < < < < < < < < < < < < < < < < < \| < < < < < < < < < \| < < < \| < < < \| < < \| < < < < > > > \| \| \| \| > \| \| \| \| \| \| \| \| \| \| \| \| \| \| \|	203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328	} aCksum[0] = sum1 + (sum1>>24); aCksum[1] = sum2 + (sum2>>24); } /* Attempt to change the lock status. When changing the lock status to SQLITE_SHM_READ, store the type of reader lock (either SQLITE_SHM_READ or SQLITE_SHM_READ_FULL) ** in pWal->readerType. / static int walSetLock(Wal pWal, int desiredStatus){ int rc, got; if( pWal->lockState==desiredStatus ) return SQLITE_OK; rc = pWal->pVfs->xShmLock(pWal->pWIndex, desiredStatus, &got); if( rc==SQLITE_OK ){ pWal->lockState = desiredStatus; if( desiredStatus==SQLITE_SHM_READ ){ pWal->readerType = got; } } return rc; } /* ** Update the header of the wal-index file. / static void walIndexWriteHdr(Wal pWal, WalIndexHdr pHdr){ u32 aHdr = pWal->pWiData; /* Write header here / u32 aCksum = &aHdr[WALINDEX_HDR_NFIELD]; /* Write header cksum here / assert( WALINDEX_HDR_NFIELD==sizeof(WalIndexHdr)/4 ); assert( aHdr!=0 ); memcpy(aHdr, pHdr, sizeof(WalIndexHdr)); aCksum[0] = aCksum[1] = 1; walChecksumBytes((u8 )aHdr, sizeof(WalIndexHdr), aCksum); } /* This function encodes a single frame header and writes it to a buffer supplied by the caller. A frame-header is made up of a series of 4-byte big-endian integers, as follows: 0: Database page size in bytes. 4: Page number. 8: New database size (for commit frames, otherwise zero). 12: Frame checksum 1. ** 16: Frame checksum 2. / static void walEncodeFrame( u32 aCksum, /* IN/OUT: Checksum values / u32 iPage, / Database page number for frame / u32 nTruncate, / New db size (or 0 for non-commit frames) / int nData, / Database page size (size of aData[]) / u8 aData, /* Pointer to page data (for checksum) / u8 aFrame /* OUT: Write encoded frame here / ){ assert( WAL_FRAME_HDRSIZE==16 ); sqlite3Put4byte(&aFrame[0], iPage); sqlite3Put4byte(&aFrame[4], nTruncate); walChecksumBytes(aFrame, 8, aCksum); walChecksumBytes(aData, nData, aCksum); sqlite3Put4byte(&aFrame[8], aCksum[0]); sqlite3Put4byte(&aFrame[12], aCksum[1]); } / ** Return 1 and populate piPage, pnTruncate and aCksum if the ** frame checksum looks Ok. Otherwise return 0. / static int walDecodeFrame( u32 aCksum, /* IN/OUT: Checksum values / u32 piPage, /* OUT: Database page number for frame / u32 pnTruncate, /* OUT: New db size (or 0 if not commit) / int nData, / Database page size (size of aData[]) / u8 aData, /* Pointer to page data (for checksum) / u8 aFrame /* Frame data / ){ assert( WAL_FRAME_HDRSIZE==16 ); walChecksumBytes(aFrame, 8, aCksum); walChecksumBytes(aData, nData, aCksum); if( aCksum[0]!=sqlite3Get4byte(&aFrame[8]) \|\| aCksum[1]!=sqlite3Get4byte(&aFrame[12]) ){ / Checksum failed. / return 0; } piPage = sqlite3Get4byte(&aFrame[0]); pnTruncate = sqlite3Get4byte(&aFrame[4]); return 1; } static void walMergesort8( Pgno aContent, /* Pages in wal / u8 aBuffer, /* Buffer of at least pnList items to use / u8 aList, / IN/OUT: List to sort / int pnList /* IN/OUT: Number of elements in aList[] / ){ int nList = pnList; if( nList>1 ){ int nLeft = nList / 2; /* Elements in left list / int nRight = nList - nLeft; / Elements in right list / u8 aLeft = aList; /* Left list / u8 aRight = &aList[nLeft]; /* Right list / int iLeft = 0; / Current index in aLeft / int iRight = 0; / Current index in aright / int iOut = 0; / Current index in output buffer / / TODO: Change to non-recursive version. */ walMergesort8(aContent, aBuffer, aLeft, &nLeft); walMergesort8(aContent, aBuffer, aRight, &nRight); while( iRight<nRight \|\| iLeft<nLeft ){ u8 logpage; Pgno dbpage; if( (iLeft<nLeft) && (iRight>=nRight \|\| aContent[aLeft[iLeft]]<aContent[aRight[iRight]])
︙			︙
552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 ~~617~~ 618 ~~619~~ 620 621 622 623 624 ~~625~~ 626 ~~627 628~~ 629 630 631 632 ~~633 634 635~~ ~~636~~ 637 ~~638~~ 639 ~~640 641~~ 642 643 644 645 646 ~~647 648~~ 649 650 651 652 653 654 655 656 657 658 659 ~~660 661~~ 662 663 664 665 ~~666~~ 667 668 669 670 671 672 673 674 ~~675 676~~ 677 ~~678~~ 679 680 ~~681~~ 682 683 684 ~~685~~ 686 687 688 689 ~~690 691~~ 692 693 694 695 696 697 698 699 700 701 702 ~~703~~ 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 ~~719~~ 720 721 722 723 ~~724~~ 725 726 727 ~~728~~ 729 730 731 732 733 ~~734~~ 735 ~~736~~ 737 ~~738~~ 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 ~~1030~~ 1031 1032 1033 1034 ~~1035~~ 1036 ~~1037~~ 1038 1039 ~~1040 1041 1042~~ 1043 1044 1045 1046 1047 1048 ~~1049 1050~~ ~~1051 1052~~ 1053 1054 1055 ~~1056 1057 1058 1059~~ 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 ~~1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118~~ 1119 ~~1120~~ 1121 1122 1123 1124 ~~1125 1126~~ 1127 ~~1128~~ 1129 1130 1131 ~~1132 1133 1134 1135~~ 1136 1137 1138 1139 1140 1141 1142 ~~1143~~ 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 ~~1163 1164 1165~~ 1166 1167 1168 1169 1170 ~~1171~~ 1172 1173 ~~1174~~ 1175 1176 1177 ~~1178 1179~~ 1180 1181 1182 1183 1184 1185 1186 ~~1187 1188~~ 1189 1190 1191 ~~1192~~ 1193 1194 1195 1196 1197 ~~1198~~ 1199 1200 1201 1202 1203 1204 ~~1205~~ 1206 ~~1207~~ 1208 1209 1210 1211 1212 1213 ~~1214 1215~~ 1216 1217 1218 1219 1220 ~~1221 1222~~ 1223 ~~1224~~ 1225 ~~1226~~ 1227 1228 1229 1230 ~~1231~~ 1232 1233 1234 1235 ~~1236~~ 1237 1238 1239 1240 ~~1241 1242 1243~~ 1244 1245 1246 1247 1248 1249 1250 ~~1251~~ 1252 1253 ~~1254~~ 1255 1256 1257 1258 ~~1259 1260 1261 1262~~ 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 ~~1278 1279~~ 1280 ~~1281~~ 1282 1283 1284 ~~1285~~ 1286 1287 1288 1289 1290 1291 1292 ~~1293~~ 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 ~~1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395~~ 1396 1397 1398 1399 1400 ~~1401~~ 1402 ~~1403~~ 1404 ~~1405~~ 1406 1407 1408 1409 1410 ~~1411 1412 1413 1414~~ 1415 1416 1417 1418 ~~1419~~ 1420 1421 1422 ~~1423~~ 1424 1425 1426 1427 1428 ~~1429 1430~~ 1431 1432 ~~1433~~ 1434 1435 ~~1436~~ 1437 1438 1439 1440 ~~1441 1442 1443 1444~~ 1445 ~~1446 1447~~ 1448 1449 ~~1450~~ 1451 ~~1452~~ 1453 1454 1455 ~~1456 1457~~ 1458 1459 1460 ~~1461~~ 1462 ~~1463~~ 1464 1465 ~~1466~~ 1467 1468 1469 1470 1471 1472 1473 1474 1475 ~~1476~~ 1477 1478 1479 1480 ~~1481 1482 1483 1484~~ 1485 ~~1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514~~ 1515 1516 ~~1517~~ 1518 1519 ~~1520 1521 1522 1523 1524 1525 1526~~ 1527 1528 1529 1530 1531 1532 1533 1534 1535 ~~1536 1537 1538 1539~~ 1540 ~~1541~~ 1542 1543 1544 1545 1546 ~~1547 1548~~ 1549 1550 ~~1551~~ 1552 ~~1553 1554~~ 1555 1556 ~~1557~~ 1558 1559 1560 ~~1561 1562 1563 1564~~ 1565 1566 1567 1568 1569 ~~1570~~ 1571 1572 1573 1574 1575 1576 1577 1578 ~~1579~~ 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 ~~1597 1598 1599~~ 1600 1601 1602 1603 1604 ~~1605 1606 1607~~ 1608 1609 ~~1610~~ 1611 1612 1613 1614 1615 1616 1617 ~~1618 1619~~ ~~1620~~ 1621 1622 1623 1624 1625 1626 1627 ~~1628 1629~~ 1630 ~~1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669~~ 1670 ~~1671~~ 1672 1673 1674 ~~1675~~ 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 ~~1688~~ 1689 ~~1690~~ 1691 1692 ~~1693 1694 1695 1696~~ 1697 1698 1699 1700 ~~1701 1702 1703~~ 1704 1705 ~~1706~~ 1707 1708 ~~1709~~ 1710 ~~1711~~ 1712 ~~1713 1714 1715 1716~~ 1717 1718 1719 1720 1721 1722 1723 1724 ~~1725 1726 1727~~ 1728 1729 1730 1731 1732 1733 1734 ~~1735~~ 1736 1737 1738 1739 1740 1741 1742 1743 ~~1744~~ 1745 1746 1747 1748 1749 ~~1750~~ 1751 1752 1753 1754 1755 1756 ~~1757 1758~~ 1759 1760 1761 ~~1762 1763 1764~~ 1765 1766 1767 1768 1769 ~~1770 1771~~ 1772 1773 1774 1775 1776 1777 ~~1778~~ 1779 1780 1781 ~~1782 1783~~ 1784 1785 1786 1787 1788 ~~1789~~ 1790 1791 1792 1793 1794 1795 1796 1797 ~~1798 1799~~ 1800 1801 1802 1803 1804 1805 1806 1807 ~~1808 1809~~ 1810 1811 1812 1813 ~~1814 1815~~ 1816 1817 1818 1819 1820 1821 1822 ~~1823~~ 1824 1825 1826 1827 1828 1829 ~~1830~~ 1831 1832 1833 ~~1834~~ 1835 1836 ~~1837~~ 1838 1839 1840 1841 ~~1842~~ 1843 1844 1845 ~~1846 1847~~ 1848 ~~1849 1850~~ 1851 ~~1852 1853~~ 1854 ~~1855 1856 1857 1858 1859~~ 1860 1861 1862 1863 1864 1865 1866 1867 ~~1868 1869 1870 1871 1872~~ 1873 1874 ~~1875~~ 1876 1877 1878 1879 1880 1881 1882 1883 1884 ~~1885~~ 1886 ~~1887~~ 1888 ~~1889 1890 1891 1892 1893 1894 1895~~ 1896 1897 ~~1898~~ 1899 1900 1901 1902 ~~1903~~ 1904 ~~1905~~ 1906 1907 1908 ~~1909~~ 1910 1911 1912 1913 ~~1914~~ 1915 1916 1917 ~~1918~~ 1919 1920 1921 ~~1922~~ 1923 ~~1924 1925 1926~~ 1927 1928 1929	} } #endif } /* Memory map the first nByte bytes of the summary file opened with pSummary->fd at pSummary->aData. If the summary file is smaller than nByte bytes in size when this function is called, ftruncate() is used to expand it before it is mapped. It is assumed that an exclusive lock is held on the summary file ** by the caller (to protect the ftruncate()). / ~~static int logSummaryMap(LogSummary pSummary, int nByte){~~ ~~struct stat sStat;~~ ~~int rc;~~ ~~int fd = pSummary->fd;~~ ~~void pMap;~~ ~~assert( pSummary->aData==0 );~~ ~~/ If the file is less than nByte bytes in size, cause it to grow. /~~ ~~rc = fstat(fd, &sStat);~~ ~~if( rc!=0 ) return SQLITE_IOERR;~~ ~~if( sStat.st_size<nByte ){~~ ~~rc = ftruncate(fd, nByte);~~ ~~if( rc!=0 ) return SQLITE_IOERR;~~ ~~}else{~~ ~~nByte = sStat.st_size;~~ } ~~/ Map the file. /~~ ~~pMap = mmap(0, nByte, PROT_READ\|PROT_WRITE, MAP_SHARED, fd, 0);~~ ~~if( pMap==MAP_FAILED ){~~ ~~return SQLITE_IOERR;~~ } ~~pSummary->aData = (u32 )pMap;~~ ~~pSummary->nData = nByte/4;~~ ~~return SQLITE_OK;~~ } /* The log-summary file is already mapped to pSummary->aData[], but the mapping needs to be resized. Unmap and remap the file so that the mapping is at least nByte bytes in size, or the size of the entire file if it is larger than nByte bytes. / ~~static int logSummaryRemap(LogSummary pSummary, int nByte){~~ ~~int rc;~~ ~~sqlite3_mutex_enter(pSummary->mutex);~~ ~~munmap(pSummary->aData, pSummary->nData4);~~ ~~pSummary->aData = 0;~~ ~~rc = logSummaryMap(pSummary, nByte);~~ ~~sqlite3_mutex_leave(pSummary->mutex);~~ ~~return rc;~~ } / Return the index in the ~~LogSumm~~ary.aData array that corresponds to frame iFrame. The log-~~summary~~ file consists of a header, followed by ** alternating "map" and "index" blocks. / ~~static int l~~ogSummary~~Entry(u32 iFrame){~~ return ( ~~(L~~OGSUMMARY~~_LOCK_OFFSET+L~~OGSUMMARY~~_LOCK_RESERVED)/sizeof(u32)~~ + (((iFrame-1)>>8)<<6) / Indexes that occur before iFrame / + iFrame-1 / Db page numbers that occur before iFrame / ); } / Set an entry in the log-~~summary~~ map to map log frame iFrame to db page iPage. Values are always appended to the log-~~summary~~ (i.e. the value of iFrame is always exactly one more than the value passed to the previous call), but that restriction is not enforced or asserted ** here. / ~~static int l~~ogSummary~~Append(~~LogSummary~~ p~~Summary~~, u32 iFrame, u32 iPage){ u32 iSlot = l~~ogSummary~~Entry(iFrame);~~ ~~while( (iSlot+128)>=p~~Summary~~->~~nData~~ ){~~ int rc; ~~int nByte = p~~Summary~~->~~nData~~4 + L~~OGSUMMARY~~_MMAP_INCREMENT;~~ ~~/ Unmap and remap the log-~~summary~~ file. / rc = l~~ogSummary~~Remap(p~~Summary~~, nByte);~~ if( rc!=SQLITE_OK ){ return rc; } } ~~/ Set the log-~~summary~~ entry itself / p~~Summary~~->aData[iSlot] = iPage;~~ / If the frame number is a multiple of 256 (frames are numbered starting ** at 1), build an index of the most recently added 256 frames. / if( (iFrame&0x000000FF)==0 ){ int i; / Iterator used while initializing aIndex / u32 aFrame; /* Pointer to array of 256 frames / int nIndex; / Number of entries in index / u8 aIndex; /* 256 bytes to build index in / u8 aTmp; /* Scratch space to use while sorting / ~~aFrame = &p~~Summary~~->aData[iSlot-255]; aIndex = (u8 )&p~~Summary~~->aData[iSlot+1];~~ aTmp = &aIndex[256]; nIndex = 256; for(i=0; i<256; i++) aIndex[i] = (u8)i; ~~~~log~~Mergesort8(aFrame, aTmp, aIndex, &nIndex);~~ memset(&aIndex[nIndex], aIndex[nIndex-1], 256-nIndex); } return SQLITE_OK; } /* Recover the log-~~summary~~ by reading the log file. ~~The caller must hold~~ an ~~excl~~us~~ive~~ lock on the log-~~summary~~ file. / ~~static int l~~ogSummary~~Recover(~~LogSummary~~ p~~Summary, sqlite3_file pFd~~){~~ int rc; / Return Code / i64 nSize; / Size of log file / ~~~~LogSumm~~aryHdr hdr; / Recovered log-~~summary~~ header /~~ memset(&hdr, 0, sizeof(hdr)); ~~rc = sqlite3OsFileSize(pFd, &nSize);~~ if( rc!=SQLITE_OK ){ return rc; } ~~if( nSize>~~LOG~~_FRAME_HDRSIZE ){ u8 aBuf[~~LOG~~_FRAME_HDRSIZE]; / Buffer to load first frame header into /~~ u8 aFrame = 0; /* Malloc'd buffer to load entire frame / int nFrame; / Number of bytes at aFrame / u8 aData; /* Pointer to data part of aFrame buffer / int iFrame; / Index of last frame read / i64 iOffset; / Next offset to read from log file / int nPgsz; / Page size according to the log / u32 aCksum[2]; / Running checksum / / Read in the first frame header in the file (to determine the ** database page size). / ~~rc = sqlite3OsRead(pFd, aBuf, ~~LOG~~_HDRSIZE, 0);~~ if( rc!=SQLITE_OK ){ return rc; } / If the database page size is not a power of two, or is greater than ** SQLITE_MAX_PAGE_SIZE, conclude that the log file contains no valid data. / nPgsz = sqlite3Get4byte(&aBuf[0]); if( nPgsz&(nPgsz-1) \|\| nPgsz>SQLITE_MAX_PAGE_SIZE \|\| nPgsz<512 ){ goto finished; } aCksum[0] = sqlite3Get4byte(&aBuf[4]); aCksum[1] = sqlite3Get4byte(&aBuf[8]); / Malloc a buffer to read frames into. / ~~nFrame = nPgsz + ~~LOG~~_FRAME_HDRSIZE;~~ aFrame = (u8 )sqlite3_malloc(nFrame); if( !aFrame ){ return SQLITE_NOMEM; } ~~aData = &aFrame[~~LOG~~_FRAME_HDRSIZE];~~ /* Read all frames from the log file. / iFrame = 0; ~~for(iOffset=~~LOG~~_HDRSIZE; (iOffset+nFrame)<=nSize; iOffset+=nFrame){~~ u32 pgno; / Database page number for frame / u32 nTruncate; / dbsize field from frame header / int isValid; / True if this frame is valid / / Read and decode the next log frame. / ~~rc = sqlite3OsRead(pFd, aFrame, nFrame, iOffset);~~ if( rc!=SQLITE_OK ) break; ~~isValid = ~~log~~DecodeFrame(aCksum, &pgno, &nTruncate, nPgsz, aData, aFrame);~~ if( !isValid ) break; ~~l~~ogSummary~~Append(p~~Summary~~, ++iFrame, pgno);~~ / If nTruncate is non-zero, this is a commit record. / if( nTruncate ){ hdr.iCheck1 = aCksum[0]; hdr.iCheck2 = aCksum[1]; hdr.iLastPg = iFrame; hdr.nPage = nTruncate; hdr.pgsz = nPgsz; } } sqlite3_free(aFrame); }else{ hdr.iCheck1 = 2; hdr.iCheck2 = 3; } finished: ~~logSummaryWriteHdr(pSummary, &hdr);~~ ~~return rc;~~ } / Place, modify or remove a lock on the log-summary file associated with pSummary. The locked byte-range should be inside the region dedicated to ** locking. This region of the log-summary file is never read or written. / ~~static int logLockFd(~~ ~~LogSummary pSummary, /* The log-summary object to lock /~~ ~~int iStart, / First byte to lock /~~ ~~int nByte, / Number of bytes to lock /~~ ~~int op / LOG_UNLOCK, RDLOCK, WRLOCK or WRLOCKW /~~ ){ ~~int aType[4] = {~~ ~~F_UNLCK, / LOG_UNLOCK /~~ ~~F_RDLCK, / LOG_RDLOCK /~~ ~~F_WRLCK, / LOG_WRLOCK /~~ ~~F_WRLCK / LOG_WRLOCKW /~~ }; ~~int aOp[4] = {~~ ~~F_SETLK, / LOG_UNLOCK /~~ ~~F_SETLK, / LOG_RDLOCK /~~ ~~F_SETLK, / LOG_WRLOCK /~~ ~~F_SETLKW / LOG_WRLOCKW /~~ }; ~~struct flock f; / Locking operation /~~ ~~int rc; / Value returned by fcntl() /~~ ~~assert( ArraySize(aType)==ArraySize(aOp) );~~ ~~assert( op>=0 && op<ArraySize(aType) );~~ ~~assert( nByte>0 );~~ ~~assert( iStart>=LOGSUMMARY_LOCK_OFFSET~~ ~~&& iStart+nByte<=LOGSUMMARY_LOCK_OFFSET+LOGSUMMARY_LOCK_RESERVED~~ ); ~~#if defined(SQLITE_DEBUG) && defined(SQLITE_OS_UNIX)~~ ~~if( pSummary->aData ) memset(&((u8)pSummary->aData)[iStart], op, nByte);~~ ~~#endif~~ ~~memset(&f, 0, sizeof(f));~~ ~~f.l_type = aType[op];~~ ~~f.l_whence = SEEK_SET;~~ ~~f.l_start = iStart;~~ ~~f.l_len = nByte;~~ ~~rc = fcntl(pSummary->fd, aOp[op], &f);~~ ~~return (rc==0) ? SQLITE_OK : SQLITE_BUSY;~~ } ~~static int logLockRegion(Log pLog, u32 mRegion, int op){~~ ~~LogSummary pSummary = pLog->pSummary;~~ ~~LogLock p; / Used to iterate through in-process locks /~~ ~~u32 mOther; / Locks held by other connections /~~ ~~u32 mNew; / New mask for pLog /~~ ~~assert(~~ ~~/ Writer lock operations /~~ ~~(op==LOG_WRLOCK && mRegion==(LOG_REGION_C\|LOG_REGION_D))~~ ~~\|\| (op==LOG_UNLOCK && mRegion==(LOG_REGION_C\|LOG_REGION_D))~~ ~~/ Normal reader lock operations /~~ ~~\|\| (op==LOG_RDLOCK && mRegion==(LOG_REGION_A\|LOG_REGION_B))~~ ~~\|\| (op==LOG_UNLOCK && mRegion==(LOG_REGION_A))~~ ~~\|\| (op==LOG_UNLOCK && mRegion==(LOG_REGION_B))~~ ~~/ Region D reader lock operations /~~ ~~\|\| (op==LOG_RDLOCK && mRegion==(LOG_REGION_D))~~ ~~\|\| (op==LOG_RDLOCK && mRegion==(LOG_REGION_A))~~ ~~\|\| (op==LOG_UNLOCK && mRegion==(LOG_REGION_D))~~ ~~/ Checkpointer lock operations /~~ ~~\|\| (op==LOG_WRLOCK && mRegion==(LOG_REGION_B\|LOG_REGION_C))~~ ~~\|\| (op==LOG_WRLOCK && mRegion==(LOG_REGION_A))~~ ~~\|\| (op==LOG_UNLOCK && mRegion==(LOG_REGION_B\|LOG_REGION_C))~~ ~~\|\| (op==LOG_UNLOCK && mRegion==(LOG_REGION_A\|LOG_REGION_B\|LOG_REGION_C))~~ ); ~~/ Assert that a connection never tries to go from an EXCLUSIVE to a~~ SHARED lock on a region. Moving from SHARED to EXCLUSIVE sometimes happens though (when a region D reader upgrades to a writer). / ~~assert( op!=LOG_RDLOCK \|\| 0==(pLog->lock.mLock & (mRegion<<8)) );~~ ~~sqlite3_mutex_enter(pSummary->mutex);~~ ~~/ Calculate a mask of logs held by all connections in this process apart~~ from this one. The least significant byte of the mask contains a mask of the SHARED logs held. The next least significant byte of the mask indicates the EXCLUSIVE locks held. For example, to test if some other connection is holding a SHARED lock on region A, or an EXCLUSIVE lock on region C, do: hasSharedOnA = (mOther & (LOG_REGION_A<<0)); hasExclusiveOnC = (mOther & (LOG_REGION_C<<8)); In all masks, if the bit in the EXCLUSIVE byte mask is set, so is the ** corresponding bit in the SHARED mask. / ~~mOther = 0;~~ ~~for(p=pSummary->pLock; p; p=p->pNext){~~ ~~assert( (p->mLock & (p->mLock<<8))==(p->mLock&0x0000FF00) );~~ ~~if( p!=&pLog->lock ){~~ ~~mOther \|= p->mLock;~~ } } ~~/ If this call is to lock a region (not to unlock one), test if locks held~~ by any other connection in this process prevent the new locks from begin granted. If so, exit the summary mutex and return SQLITE_BUSY. / ~~if( op && (mOther & (mRegion << (op==LOG_RDLOCK ? 8 : 0))) ){~~ ~~sqlite3_mutex_leave(pSummary->mutex);~~ ~~return SQLITE_BUSY;~~ } ~~/ Figure out the new log mask for this connection. /~~ ~~switch( op ){~~ ~~case LOG_UNLOCK:~~ ~~mNew = (pLog->lock.mLock & ~(mRegion\|(mRegion<<8)));~~ ~~break;~~ ~~case LOG_RDLOCK:~~ ~~mNew = (pLog->lock.mLock \| mRegion);~~ ~~break;~~ ~~default:~~ ~~assert( op==LOG_WRLOCK );~~ ~~mNew = (pLog->lock.mLock \| (mRegion<<8) \| mRegion);~~ ~~break;~~ } ~~/ Now modify the locks held on the log-summary file descriptor. This~~ file descriptor is shared by all log connections in this process. Therefore: + If one or more log connections in this process hold a SHARED lock on a region, the file-descriptor should hold a SHARED lock on the file region. + If a log connection in this process holds an EXCLUSIVE lock on a region, the file-descriptor should also hold an EXCLUSIVE lock on the region in question. If this is an LOG_UNLOCK operation, only regions for which no other connection holds a lock should actually be unlocked. And if this is a LOG_RDLOCK operation and other connections already hold all ** the required SHARED locks, then no system call is required. / ~~if( op==LOG_UNLOCK ){~~ ~~mRegion = (mRegion & ~mOther);~~ } ~~if( (op==LOG_WRLOCK)~~ ~~\|\| (op==LOG_UNLOCK && mRegion)~~ ~~\|\| (op==LOG_RDLOCK && (mOther&mRegion)!=mRegion)~~ ){ ~~struct LockMap {~~ ~~int iStart; / Byte offset to start locking operation /~~ ~~int iLen; / Length field for locking operation /~~ ~~} aMap[] = {~~ ~~/ 0000 / {0, 0}, / 0001 / {3+LOG_LOCK_REGION, 1},~~ ~~/ 0010 / {2+LOG_LOCK_REGION, 1}, / 0011 / {2+LOG_LOCK_REGION, 2},~~ ~~/ 0100 / {1+LOG_LOCK_REGION, 1}, / 0101 / {0, 0},~~ ~~/ 0110 / {1+LOG_LOCK_REGION, 2}, / 0111 / {1+LOG_LOCK_REGION, 3},~~ ~~/ 1000 / {0+LOG_LOCK_REGION, 1}, / 1001 / {0, 0},~~ ~~/ 1010 / {0, 0}, / 1011 / {0, 0},~~ ~~/ 1100 / {0+LOG_LOCK_REGION, 2}, / 1101 / {0, 0},~~ ~~/ 1110 / {0, 0}, / 1111 / {0, 0}~~ }; ~~int rc; / Return code of logLockFd() /~~ ~~assert( mRegion<ArraySize(aMap) && aMap[mRegion].iStart!=0 );~~ ~~rc = logLockFd(pSummary, aMap[mRegion].iStart, aMap[mRegion].iLen, op);~~ ~~if( rc!=0 ){~~ ~~sqlite3_mutex_leave(pSummary->mutex);~~ ~~return rc;~~ } } ~~pLog->lock.mLock = mNew;~~ ~~sqlite3_mutex_leave(pSummary->mutex);~~ ~~return SQLITE_OK;~~ } / Lock the DMH region, either with an EXCLUSIVE or SHARED lock. This function is never called with LOG_UNLOCK - the only way the DMH region ** is every completely unlocked is by by closing the file descriptor. / ~~static int logLockDMH(LogSummary pSummary, int eLock){~~ ~~assert( sqlite3_mutex_held(pSummary->mutex) );~~ ~~assert( eLock==LOG_RDLOCK \|\| eLock==LOG_WRLOCK );~~ ~~return logLockFd(pSummary, LOG_LOCK_DMH, 1, eLock);~~ } /* Lock (or unlock) the MUTEX region. It is always locked using an EXCLUSIVE, blocking lock. / ~~static int logLockMutex(LogSummary pSummary, int eLock){~~ ~~assert( sqlite3_mutex_held(pSummary->mutex) );~~ ~~assert( eLock==LOG_WRLOCKW \|\| eLock==LOG_UNLOCK );~~ ~~logLockFd(pSummary, LOG_LOCK_MUTEX, 1, eLock);~~ ~~return SQLITE_OK;~~ } /* This function intializes the connection to the log-summary identified by struct pSummary. / ~~static int logSummaryInit(~~ ~~LogSummary pSummary, /* Log summary object to initialize /~~ ~~sqlite3_file pFd /* File descriptor open on log file /~~ ){ ~~int rc; / Return Code /~~ ~~char zFile; /* File name for summary file /~~ ~~assert( pSummary->fd<0 );~~ ~~assert( pSummary->aData==0 );~~ ~~assert( pSummary->nRef>0 );~~ ~~assert( pSummary->zPath );~~ ~~/ Open a file descriptor on the summary file. /~~ ~~zFile = sqlite3_mprintf("%s-summary", pSummary->zPath);~~ ~~if( !zFile ){~~ ~~return SQLITE_NOMEM;~~ } ~~pSummary->fd = open(zFile, O_RDWR\|O_CREAT, S_IWUSR\|S_IRUSR);~~ ~~sqlite3_free(zFile);~~ ~~if( pSummary->fd<0 ){~~ ~~return SQLITE_IOERR;~~ } ~~/ Grab an exclusive lock the summary file. Then mmap() it.~~ TODO: This code needs to be enhanced to support a growable mapping. For now, just make the mapping very large to start with. The pages should not be allocated until they are first accessed anyhow, so using a large mapping consumes no more resources than a smaller one would. / ~~assert( sqlite3_mutex_held(pSummary->mutex) );~~ ~~rc = logLockMutex(pSummary, LOG_WRLOCKW);~~ ~~if( rc!=SQLITE_OK ) return rc;~~ ~~rc = logSummaryMap(pSummary, LOGSUMMARY_MMAP_INCREMENT);~~ ~~if( rc!=SQLITE_OK ) goto out;~~ ~~/ Try to obtain an EXCLUSIVE lock on the dead-mans-hand region. If this~~ is possible, the contents of the log-summary file (if any) may not be trusted. Zero the log-summary header before continuing. / ~~rc = logLockDMH(pSummary, LOG_WRLOCK);~~ ~~if( rc==SQLITE_OK ){~~ ~~memset(pSummary->aData, 0, (LOGSUMMARY_HDR_NFIELD+2)sizeof(u32) );~~ } ~~rc = logLockDMH(pSummary, LOG_RDLOCK);~~ ~~if( rc!=SQLITE_OK ){~~ ~~rc = SQLITE_IOERR;~~ } ~~out:~~ ~~logLockMutex(pSummary, LOG_UNLOCK);~~ return rc; } /* Open a connection to the log file associated with database zDb. The database file does not actually have to exist. zDb is used only to figure out the name of the log file to open. If the log file does not exist it is created by this call. A SHARED lock should be held on the database file when this function is called. The purpose of this SHARED lock is to prevent any other client from unlinking the log or log-~~summary~~ file. If another process were to do this just after this client opened one of these files, the system would be badly broken. / int sqlite3WalOpen( ~~sqlite3_vfs pVfs, /* vfs module to open l~~og file~~ w~~ith~~ /~~ const char zDb, /* Name of database file / ~~~~Log~~ pp~~Log~~ / OUT: Allocated ~~Log~~ handle /~~ ){ int rc = SQLITE_OK; / Return Code / ~~Log~~ pRet; /* Object to allocate and return / ~~LogSummary pSummary = 0; /* Summary object /~~ ~~sqlite3_mutex mutex = 0; /* LOG_SUMMARY_MUTEX mutex /~~ int flags; / Flags passed to OsOpen() / char zWal = 0; /* Path to WAL file / int nWal; / Length of zWal in bytes / assert( zDb ); ~~/ Allocate an instance of struct ~~Log~~ to return. / pp~~Log~~ = 0;~~ ~~pRet = (~~Log~~ )sqlite3MallocZero(sizeof(~~Log~~) + pVfs->szOsFile); if( !pRet ) goto out;~~ pRet->pVfs = pVfs; pRet->pFd = (sqlite3_file )&pRet[1]; ~~/* No~~rmalize~~ the path ~~nam~~e. / ~~zWal = sqlite3_mprintf("%s-wal", zDb);~~ ~~if( !~~zWal ~~) goto out~~; ~~logNormalizePath(zWal);~~~~ flags = (SQLITE_OPEN_READWRITE\|SQLITE_OPEN_CREATE\|SQLITE_OPEN_MAIN_JOURNAL); ~~nWal = sqlite3Strlen30(zWal);~~ ~~/ Enter the mutex that protects the linked-list of LogSummary structures /~~ ~~if( sqlite3GlobalConfig.bCoreMutex ){~~ ~~mutex = sqlite3_mutex_alloc(LOG_SUMMARY_MUTEX);~~ } ~~sqlite3_mutex_enter(mutex);~~ ~~/ Search for an existing log summary object in the linked list. If one~~ ** cannot be found, allocate and initialize a new object. / ~~for(pSummary=pLogSummary; pSummary; pSummary=pSummary->pNext){~~ ~~int nPath = sqlite3Strlen30(pSummary->zPath);~~ ~~if( nWal==nPath && 0==memcmp(pSummary->zPath, zWal, nPath) ) break;~~ } ~~if( !pSummary ){~~ ~~int nByte = sizeof(LogSummary) + nWal + 1;~~ ~~pSummary = (LogSummary )sqlite3MallocZero(nByte);~~ ~~if( !pSummary ){~~ ~~rc = SQLITE_NOMEM;~~ ~~goto out;~~ } ~~if( sqlite3GlobalConfig.bCoreMutex ){~~ ~~pSummary->mutex = sqlite3_mutex_alloc(SQLITE_MUTEX_RECURSIVE);~~ } ~~pSummary->zPath = (char )&pSummary[1];~~ ~~pSummary->fd = -1;~~ ~~memcpy(pSummary->zPath, zWal, nWal);~~ ~~pSummary->pNext = pLogSummary;~~ ~~pLogSummary = pSummary;~~ } ~~pSummary->nRef++;~~ ~~pRet->pSummary = pSummary;~~ ~~/ Exit the mutex protecting the linked-list of LogSummary objects. /~~ ~~sqlite3_mutex_leave(mutex);~~ ~~mutex = 0;~~ ~~/ Open file handle on the log file. /~~ rc = sqlite3OsOpen(pVfs, ~~pSummary->zPath~~, pRet->pFd, flags, &flags); ~~if( rc!=SQLITE_OK ) goto out;~~ ~~/ Object pSummary is shared between all connections to the database made~~ by this process. So at this point it may or may not be connected to the log-summary. If it is not, connect it. / ~~sqlite3_mutex_enter(pSummary->mutex);~~ ~~mutex = pSummary->mutex;~~ ~~if( pSummary->fd<0 ){~~ ~~rc = logSummaryInit(pSummary, pRet->pFd);~~ } ~~pRet->lock.pNext = pSummary->pLock;~~ ~~pSummary->pLock = &pRet->lock;~~ out: ~~sqlite3_mutex_leave(mutex);~~ ~~sqlite3_free(zWal);~~ if( rc!=SQLITE_OK ){ ~~assert(0);~~ if( pRet ){ sqlite3OsClose(pRet->pFd); sqlite3_free(pRet); } ~~assert( !pSummary \|\| pSummary->nRef==0 );~~ ~~sqlite3_free(pSummary);~~ } pp~~Log~~ = pRet; return rc; } static int ~~log~~IteratorNext( ~~Log~~Iterator p, / Iterator / u32 piPage, /* OUT: Next db page to write / u32 piFrame /* OUT: ~~Log~~ frame to read from / ){ u32 iMin = piPage; u32 iRet = 0xFFFFFFFF; int i; int nBlock = p->nFinal; for(i=p->nSegment-1; i>=0; i--){ ~~struct ~~Log~~Segment pSegment = &p->aSegment[i];~~ while( pSegment->iNext<nBlock ){ u32 iPg = pSegment->aDbPage[pSegment->aIndex[pSegment->iNext]]; if( iPg>iMin ){ if( iPg<iRet ){ iRet = iPg; piFrame = i256 + 1 + pSegment->aIndex[pSegment->iNext]; } break; } pSegment->iNext++; } nBlock = 256; } piPage = iRet; return (iRet==0xFFFFFFFF); } ~~static ~~Log~~Iterator ~~log~~IteratorInit(~~Log~~ p~~Log~~){ u32 aData = pLog-~~>pSummary->aData;~~ ~~Log~~Iterator p; /* Return value /~~ int nSegment; / Number of segments to merge / u32 iLast; / Last frame in log / int nByte; / Number of bytes to allocate / int i; / Iterator variable / int nFinal; / Number of unindexed entries / ~~struct ~~Log~~Segment pFinal; /* Final (unindexed) segment /~~ u8 aTmp; /* Temp space used by merge-sort / ~~iLast = p~~Log~~->hdr.iLastPg;~~ nSegment = (iLast >> 8) + 1; nFinal = (iLast & 0x000000FF); ~~nByte = sizeof(~~Log~~Iterator) + (nSegment-1)sizeof(struct ~~Log~~Segment) + 512; p = (~~Log~~Iterator )sqlite3_malloc(nByte);~~ if( p ){ memset(p, 0, nByte); p->nSegment = nSegment; p->nFinal = nFinal; } for(i=0; i<nSegment-1; i++){ ~~p->aSegment[i].aDbPage = &aData[l~~ogSummary~~Entry(i256+1)]; p->aSegment[i].aIndex = (u8 )&aData[l~~ogSummary~~Entry(i256+1)+256];~~ } pFinal = &p->aSegment[nSegment-1]; ~~pFinal->aDbPage = &aData[l~~ogSummary~~Entry((nSegment-1)256+1)];~~ pFinal->aIndex = (u8 )&pFinal[1]; aTmp = &pFinal->aIndex[256]; for(i=0; i<nFinal; i++){ pFinal->aIndex[i] = i; } ~~~~log~~Mergesort8(pFinal->aDbPage, aTmp, pFinal->aIndex, &nFinal);~~ p->nFinal = nFinal; return p; } /* ** Free a log iterator allocated by ~~log~~IteratorInit(). / ~~static void ~~log~~IteratorFree(~~Log~~Iterator p){~~ sqlite3_free(p); } /* ** Checkpoint the contents of the log file. / ~~static int ~~log~~Checkpoint( ~~Log~~ p~~Log~~, /* ~~Log~~ connection /~~ sqlite3_file pFd, /* File descriptor open on db file / int sync_flags, / Flags for OsSync() (or 0) / u8 zBuf /* Temporary buffer to use / ){ int rc; / Return code / ~~int pgsz = p~~Log~~->hdr.pgsz; / Database page-size / ~~Log~~Iterator pIter = 0; /* ~~Log~~ iterator context /~~ u32 iDbpage = 0; / Next database page to write / ~~u32 iFrame = 0; / ~~Log~~ frame containing data for iDbpage /~~ ~~if( p~~Log~~->hdr.iLastPg==0 ){~~ return SQLITE_OK; } / Allocate the iterator / ~~pIter = ~~log~~IteratorInit(p~~Log~~);~~ if( !pIter ) return SQLITE_NOMEM; / Sync the log file to disk / if( sync_flags ){ ~~rc = sqlite3OsSync(p~~Log~~->pFd, sync_flags);~~ if( rc!=SQLITE_OK ) goto out; } / Iterate through the contents of the log, copying data to the db file. / ~~while( 0==~~log~~IteratorNext(pIter, &iDbpage, &iFrame) ){ rc = sqlite3OsRead(p~~Log~~->pFd, zBuf, pgsz, ~~log~~FrameOffset(iFrame, pgsz) + ~~LOG~~_FRAME_HDRSIZE~~ ); if( rc!=SQLITE_OK ) goto out; rc = sqlite3OsWrite(pFd, zBuf, pgsz, (iDbpage-1)pgsz); if( rc!=SQLITE_OK ) goto out; } /* Truncate the database file / ~~rc = sqlite3OsTruncate(pFd, ((i64)p~~Log~~->hdr.nPage(i64)pgsz));~~ if( rc!=SQLITE_OK ) goto out; ~~/* Sync the database file. If successful, update the log-~~summary~~. /~~ if( sync_flags ){ rc = sqlite3OsSync(pFd, sync_flags); if( rc!=SQLITE_OK ) goto out; } ~~p~~Log~~->hdr.iLastPg = 0; p~~Log~~->hdr.iCheck1 = 2; p~~Log~~->hdr.iCheck2 = 3; l~~ogSummary~~WriteHdr(p~~Log->pSummary~~, &p~~Log~~->hdr);~~ / TODO: If a crash occurs and the current log is copied into the database there is no problem. However, if a crash occurs while writing the next transaction into the start of the log, such that: * The first transaction currently in the log is left intact, but ** * The second (or subsequent) transaction is damaged, then the database could become corrupt. The easiest thing to do would be to write and sync a dummy header into the log at this point. Unfortunately, that turns out to be an unwelcome performance hit. Alternatives are... / #if 0 ~~memset(zBuf, 0, ~~LOG~~_FRAME_HDRSIZE); rc = sqlite3OsWrite(p~~Log~~->pFd, zBuf, ~~LOG~~_FRAME_HDRSIZE, 0);~~ if( rc!=SQLITE_OK ) goto out; ~~rc = sqlite3OsSync(p~~Log~~->pFd, p~~Log~~->sync_flags);~~ #endif out: ~~~~log~~IteratorFree(pIter);~~ return rc; } / ** Close a connection to a log file. / int sqlite3WalClose( ~~~~Log~~ p~~Log~~, /* ~~Log~~ to close /~~ sqlite3_file pFd, /* Database file / int sync_flags, / Flags to pass to OsSync() (or 0) / u8 zBuf /* Buffer of at least page-size bytes / ){ int rc = SQLITE_OK; if( p~~Log~~ ){ ~~LogLock ppL;~~ ~~LogSummary pSummary = pLog->pSummary;~~ ~~sqlite3_mutex mutex = 0;~~ ~~sqlite3_mutex_enter(pSummary->mutex);~~ ~~for(ppL=&pSummary->pLock; ppL!=&pLog->lock; ppL=&(ppL)->pNext);~~ ppL = pLog->lock.pNext; ~~sqlite3_mutex_leave(pSummary->mutex);~~ ~~if( sqlite3GlobalConfig.bCoreMutex ){~~ ~~mutex = sqlite3_mutex_alloc(LOG_SUMMARY_MUTEX);~~ } ~~sqlite3_mutex_enter(mutex);~~ ~~/* Decrement the reference count on the log summary. If this is the last~~ reference to the log summary object in this process, the object will be freed. If this is also the last connection to the database, then checkpoint the database and truncate the log and log-summary files to zero bytes in size. / ~~pSummary->nRef--;~~ ~~if( pSummary->nRef==0 ){~~ ~~int rc;~~ ~~LogSummary pp;~~ ~~for(pp=&pLogSummary; pp!=pSummary; pp=&(pp)->pNext);~~ pp = (pp)->pNext; ~~sqlite3_mutex_leave(mutex);~~ ~~rc = sqlite3OsLock(pFd, SQLITE_LOCK_EXCLUSIVE);~~ ~~if( rc==SQLITE_OK ){~~ ~~/* This is the last connection to the database (including other~~ processes). Do three things: 1. Checkpoint the db. 2. Truncate the log file. ** 3. Unlink the log-summary file. / ~~rc = logCheckpoint(pLog, pFd, sync_flags, zBuf);~~ ~~if( rc==SQLITE_OK ){~~ ~~rc = sqlite3OsDelete(pLog->pVfs, pSummary->zPath, 0);~~ } ~~logSummaryUnmap(pSummary, 1);~~ ~~}else{~~ ~~if( rc==SQLITE_BUSY ){~~ ~~rc = SQLITE_OK;~~ } ~~logSummaryUnmap(pSummary, 0);~~ } ~~sqlite3_mutex_free(pSummary->mutex);~~ ~~sqlite3_free(pSummary);~~ ~~}else{~~ ~~sqlite3_mutex_leave(mutex);~~ } ~~/ Close the connection to the log file and free the Log handle. /~~ sqlite3OsClose(p~~Log~~->pFd); sqlite3_free(p~~Log~~); } return rc; } / Enter and leave the log-summary mutex. In this context, entering the log-summary mutex means: 1. Obtaining mutex pLog->pSummary->mutex, and 2. Taking an exclusive lock on the log-summary file. i.e. this mutex locks out other processes as well as other threads hosted in this address space. / ~~static int logEnterMutex(Log pLog){~~ ~~LogSummary pSummary = pLog->pSummary;~~ ~~int rc;~~ ~~sqlite3_mutex_enter(pSummary->mutex);~~ ~~rc = logLockMutex(pSummary, LOG_WRLOCKW);~~ ~~if( rc!=SQLITE_OK ){~~ ~~sqlite3_mutex_leave(pSummary->mutex);~~ } ~~return rc;~~ } ~~static void logLeaveMutex(Log pLog){~~ ~~LogSummary pSummary = pLog->pSummary;~~ ~~logLockMutex(pSummary, LOG_UNLOCK);~~ ~~sqlite3_mutex_leave(pSummary->mutex);~~ } / Try to read the log-~~summary~~ header. Attempt to verify the header checksum. If the checksum can be verified, copy the log-~~summary~~ header into structure p~~Log~~->hdr. If the contents of p~~Log~~->hdr are modified by this and pChanged is not NULL, set pChanged to 1. * Otherwise leave pChanged unmodified. * ** If the checksum cannot be verified return SQLITE_ERROR. / ~~int l~~ogSummary~~TryHdr(~~Log~~ p~~Log~~, int pChanged){~~ u32 aCksum[2] = {1, 1}; ~~u32 aHdr[L~~OGSUMMARY~~_HDR_NFIELD+2];~~ ~~/ Read the header. The caller may or may not have locked the log-~~summary~~~~ file, meaning it is possible that an inconsistent snapshot is read from the file. If this happens, return SQLITE_ERROR. The caller will retry. Or, if the caller has already locked the file and the header still looks inconsistent, it will run recovery. / memcpy(aHdr, p~~Log~~->p~~Summary->a~~Data, sizeof(aHdr)); ~~log~~ChecksumBytes((u8)aHdr, sizeof(u32)L~~OGSUMMARY~~_HDR_NFIELD, aCksum); if( aCksum[0]!=aHdr[L~~OGSUMMARY~~_HDR_NFIELD] \|\| aCksum[1]!=aHdr[L~~OGSUMMARY~~_HDR_NFIELD+1] ){ return SQLITE_ERROR; } ~~if( memcmp(&p~~Log~~->hdr, aHdr, sizeof(~~LogSumm~~aryHdr)) ){~~ if( pChanged ){ pChanged = 1; } ~~memcpy(&p~~Log~~->hdr, aHdr, sizeof(~~LogSumm~~aryHdr));~~ } return SQLITE_OK; } /* Read the log-~~summary~~ header from the log-~~summary~~ file into structure p~~Log~~->hdr. If attempting to verify the header checksum fails, try to recover the log before returning. If the log-~~summary~~ header is successfully read, return SQLITE_OK. Otherwise an SQLite error code. / ~~int l~~ogSummary~~ReadHdr(~~Log~~ p~~Log~~, int pChanged){~~ int rc; / First try to read the header without a lock. Verify the checksum before returning. This will almost always work. TODO: Doing this causes a race-condition with the code that resizes the mapping. Unless Log.pSummary->mutex is held, it is possible that ** LogSummary.aData is invalid. / ~~~~#if 0~~ if( SQLITE_OK==l~~ogSummary~~TryHdr(p~~Log~~, pChanged) ){~~ return SQLITE_OK; } ~~#endif~~ ~~/ If the first attempt to read the header failed, lock the log-~~summary~~~~ file and try again. If the header checksum verification fails this time as well, run log recovery. / ~~if( SQLITE_OK==(rc = lo~~gEnterMutex~~(p~~Log~~)) ){ if( SQLITE_OK!=l~~ogSummary~~TryHdr(p~~Log~~, pChanged) ){~~ if( pChanged ){ pChanged = 1; } ~~rc = l~~ogSummary~~Recover(p~~Log->pSummary, pLog->pFd~~);~~ if( rc==SQLITE_OK ){ ~~rc = l~~ogSummary~~TryHdr(p~~Log~~, 0);~~ } } ~~lo~~gLeaveMutex~~(p~~Log~~);~~ } return rc; } /* Lock a snapshot. If this call obtains a new read-lock and the database contents have been modified since the most recent call to ~~Log~~CloseSnapshot() on this ~~Log~~ ** connection, then pChanged is set to 1 before returning. Otherwise, it * is left unmodified. This is used by the pager layer to determine whether ** or not any cached pages may be safely reused. / ~~int sqlite3WalOpenSnapshot(~~Log~~ p~~Log~~, int pChanged){ int rc ~~= SQLITE_OK~~; ~~if( pLog->isLocked==0 ){~~ ~~int nAttempt;~~~~ ~~/ Obtain a snapshot-lock on the log-summary file. The procedure~~ for obtaining the snapshot log is: 1. Attempt a SHARED lock on regions A and B. 2a. If step 1 is successful, drop the lock on region B. 2b. If step 1 is unsuccessful, attempt a SHARED lock on region D. 3. Repeat the above until the lock attempt in step 1 or 2b is successful. If neither of the locks can be obtained after 5 tries, presumably something is wrong (i.e. a process not following the locking protocol). ** Return an error code in this case. / rc = SQLITE_~~BUSY~~; ~~for(nAttempt=0; nAttempt<5 && rc==SQLITE_BUSY; nAttempt++){~~ ~~rc = logLockRegion(pLog, LOG_REGION_A\|LOG_REGION_B, LOG_RDLOCK);~~ if( rc==SQLITE_~~BUSY~~ ){ ~~rc = logLockRegion(pLog, LOG_REGION_D, LOG_RDLOCK);~~ ~~if( rc==SQLITE_OK ) pLog->isLocked = LOG_REGION_D;~~ ~~}else{~~ ~~logLockRegion(pLog, LOG_REGION_B, LOG_UNLOCK);~~ ~~pLog->isLocked = LOG_REGION_A;~~ } } ~~if(~~ ~~rc!=~~SQLITE_~~OK ){~~ ~~return rc;~~ } ~~rc = logSummaryReadHdr(pLog, pChanged);~~ if( rc!=SQLITE_OK ){ / An error occured while attempting log recovery. / ~~sqlite3WalCloseSnapshot(p~~Log~~);~~ }else{ / Check if the mapping needs to grow. / ~~LogSummary pSummary = pLog->pSummary;~~ if( p~~Log~~->hdr.iLastPg && l~~ogSummary~~Entry(p~~Log~~->hdr.iLastPg)>=p~~Summary~~->n~~Data~~ ){ rc = l~~ogSummary~~Remap(p~~Summary~~, 0); assert( rc \|\| l~~ogSummary~~Entry(p~~Log~~->hdr.iLastPg)<p~~Summary~~->~~nData~~ ); } } } return rc; } /* ** Unlock the current snapshot. / void sqlite3WalCloseSnapshot(~~Log~~ p~~Log~~){ if( p~~Log~~->~~isL~~ocked ){ assert( p~~Log~~->~~isL~~ocked==~~LOG~~_RE~~GION_A \|\| pLog->isLocked==LOG_REGION_~~D ); ~~log~~Lock~~Region~~(p~~Log~~, pL~~og->isLocked, LOG~~_UNLOCK); } ~~pLog->isLocked = 0;~~ } /* ** Read a page from the log, if it is present. / ~~int sqlite3WalRead(~~Log~~ p~~Log~~, Pgno pgno, int pIn~~Log~~, u8 pOut){ ~~LogSummary pSummary = pLog->pSummary;~~~~ u32 iRead = 0; u32 aData; ~~int iFrame = (p~~Log~~->hdr.iLastPg & 0xFFFFFF00);~~ ~~assert( p~~Log~~->~~isL~~ocked ); ~~sqlite3_mutex_enter(pSummary->mutex);~~~~ /* Do a linear search of the unindexed block of page-numbers (if any) at the end of the log-~~summary~~. An alternative to this would be to build an index in private memory each time a read transaction is ** opened on a new snapshot. / ~~aData = p~~Summary~~->aData; if( p~~Log~~->hdr.iLastPg ){ u32 pi = &aData[l~~ogSummary~~Entry(p~~Log~~->hdr.iLastPg)]; u32 piStop = pi - (p~~Log~~->hdr.iLastPg & 0xFF);~~ while( pi!=pgno && pi!=piStop ) pi--; if( pi!=piStop ){ iRead = (pi-piStop) + iFrame; } } ~~assert( iRead==0 \|\| aData[l~~ogSummary~~Entry(iRead)]==pgno );~~ while( iRead==0 && iFrame>0 ){ int iLow = 0; int iHigh = 255; u32 aFrame; u8 aIndex; iFrame -= 256; ~~aFrame = &aData[l~~ogSummary~~Entry(iFrame+1)];~~ aIndex = (u8 )&aFrame[256]; while( iLow<=iHigh ){ int iTest = (iLow+iHigh)>>1; u32 iPg = aFrame[aIndex[iTest]]; if( iPg==pgno ){ iRead = iFrame + 1 + aIndex[iTest]; break; } else if( iPg<pgno ){ iLow = iTest+1; }else{ iHigh = iTest-1; } } } ~~assert( iRead==0 \|\| aData[l~~ogSummary~~Entry(iRead)]==pgno ); ~~sqlite3_mutex_leave(pLog->pSummary->mutex);~~~~ / If iRead is non-zero, then it is the log frame number that contains the ** required page. Read and return data from the log file. / if( iRead ){ ~~i64 iOffset = ~~log~~FrameOffset(iRead, p~~Log~~->hdr.pgsz) + ~~LOG~~_FRAME_HDRSIZE; pIn~~Log~~ = 1; return sqlite3OsRead(p~~Log~~->pFd, pOut, p~~Log~~->hdr.pgsz, iOffset);~~ } pIn~~Log~~ = 0; return SQLITE_OK; } / ** Set pPgno to the size of the database file (or zero, if unknown). / ~~void sqlite3WalDbsize(~~Log~~ p~~Log~~, Pgno pPgno){ assert( p~~Log~~->~~isL~~ock~~ed );~~~~ pPgno = p~~Log~~->hdr.nPage; } / This function returns SQLITE_OK if the caller may write to the database. Otherwise, if the caller is operating on a snapshot that has already ** been overwritten by another writer, SQLITE_BUSY is returned. / ~~int sqlite3WalWriteLock(~~Log~~ p~~Log~~, int op){ ~~asse~~r~~t( pLog->isLocked )~~;~~ if( op ){ ~~/* Obtain the writer lock /~~ ~~int rc = logLockRegion(pLog, LOG_REGION_C\|LOG_REGION_D, LOG_WRLOCK);~~ ~~if( rc!=SQLITE_OK ){~~ ~~return rc;~~ } ~~/ If this is connection is a region D reader, then the SHARED lock on~~ region D has just been upgraded to EXCLUSIVE. But no lock at all is held on region A. This means that if the write-transaction is committed and this connection downgrades to a reader, it will be left with no lock at all. And so its snapshot could get clobbered by a checkpoint operation. To stop this from happening, grab a SHARED lock on region A now. This should always be successful, as the only time a client holds an EXCLUSIVE lock on region A, it must also be holding an EXCLUSIVE lock on region C (a checkpointer does this). This is not possible, ** as this connection currently has the EXCLUSIVE lock on region C. / ~~if( pLog->isLocked==LOG_REGION_D ){~~ ~~logLockRegion(pLog, LOG_REGION_A, LOG_RDLOCK);~~ ~~pLog->isLocked = LOG_REGION_A;~~ } ~~/ If this connection is not reading the most recent database snapshot,~~ it is not possible to write to the database. In this case release the write locks and return SQLITE_BUSY. / ~~if( memcmp(&pLog->hdr, pLog->pSummary->aData, sizeof(pLog->hdr)) ){~~ ~~logLockRegion(pLog, LOG_REGION_C\|LOG_REGION_D, LOG_UNLOCK);~~ ~~return SQLITE_BUSY;~~ } ~~pLog->isWriteLocked = 1;~~ ~~}else if( pLog->isWriteLocked ){~~ ~~logLockRegion(pLog, LOG_REGION_C\|LOG_REGION_D, LOG_UNLOCK);~~ ~~memcpy(&pLog->hdr, pLog->pSummary->aData, sizeof(pLog->hdr));~~ ~~pLog->isWriteLocked = 0;~~ } ~~return ~~SQLITE_OK~~;~~ } / The lo~~g handle~~ passed to this function must be holding the write-lock. If any data has been written (but not committed) to the log file, this function moves the write-pointer back to the start of the transaction. Additionally, the callback function is invoked for each frame written to the log since the start of the transaction. If the callback returns other than SQLITE_OK, it is not invoked again and the error code is returned to the caller. Otherwise, if the callback function does not return an error, this function returns SQLITE_OK. / ~~int sqlite3WalUndo(~~Log~~ p~~Log~~, int (xUndo)(void , Pgno), void pUndoCtx){~~ int rc = SQLITE_OK; ~~Pgno iMax = p~~Log~~->hdr.iLastPg;~~ Pgno iFrame; assert( p~~Log~~->~~isWriteL~~ocked ); l~~ogSummary~~ReadHdr(p~~Log~~, 0); for(iFrame=p~~Log~~->hdr.iLastPg+1; iFrame<=iMax && rc==SQLITE_OK; iFrame++){ rc = xUndo(pUndoCtx, p~~Log~~->p~~Summary->a~~Data[l~~ogSummary~~Entry(iFrame)]); } return rc; } ~~u32 sqlite3WalSavepoint(~~Log~~ p~~Log~~){ assert( p~~Log~~->~~isWriteL~~ocked ); return p~~Log~~->hdr.iLastPg;~~ } ~~int sqlite3WalSavepointUndo(~~Log~~ p~~Log~~, u32 iFrame){~~ int rc = SQLITE_OK; u8 aCksum[8]; ~~assert( p~~Log~~->~~isWriteL~~ocked );~~ ~~p~~Log~~->hdr.iLastPg = iFrame;~~ if( iFrame>0 ){ i64 iOffset = ~~log~~FrameOffset(iFrame, p~~Log~~->hdr.pgsz) + sizeof(u32)2; rc = sqlite3OsRead(p~~Log~~->pFd, aCksum, sizeof(aCksum), iOffset); p~~Log~~->hdr.iCheck1 = sqlite3Get4byte(&aCksum[0]); p~~Log~~->hdr.iCheck2 = sqlite3Get4byte(&aCksum[4]); } return rc; } /* ** Return true if data has been written but not committed to the log file. / ~~int sqlite3WalDirty(~~Log~~ p~~Log~~){ assert( p~~Log~~->~~isWriteL~~ocked ); return( p~~Log~~->hdr.iLastPg!=((~~LogSumm~~aryHdr)p~~Log~~->p~~Summary->a~~Data)->iLastPg );~~ } / Write a set of frames to the log. The caller must hold the write-lock on the log file (obtained using sqlite3WalWriteLock()). / int sqlite3WalFrames( ~~~~Log~~ p~~Log~~, /* ~~Log~~ handle to write to /~~ int nPgsz, / Database page-size in bytes / PgHdr pList, /* List of dirty pages to write / Pgno nTruncate, / Database size after this commit / int isCommit, / True if this is a commit / int sync_flags / Flags to pass to OsSync() (or 0) / ){ int rc; / Used to catch return codes / u32 iFrame; / Next frame address / ~~u8 aFrame[~~LOG~~_FRAME_HDRSIZE]; / Buffer to assemble frame-header in /~~ PgHdr p; /* Iterator to run through pList with. / u32 aCksum[2]; / Checksums / PgHdr pLast; /* Last frame in list / int nLast = 0; / Number of extra copies of last page / ~~assert( ~~LOG~~_FRAME_HDRSIZE==(4 2 + 2sizeof(u32)) );~~ assert( pList ); / If this is the first frame written into the log, write the log header to the start of the log file. See comments at the top of this file for a description of the log-header format. / ~~assert( ~~LOG~~_FRAME_HDRSIZE>=~~LOG~~_HDRSIZE ); iFrame = p~~Log~~->hdr.iLastPg;~~ if( iFrame==0 ){ sqlite3Put4byte(aFrame, nPgsz); sqlite3_randomness(8, &aFrame[4]); ~~p~~Log~~->hdr.iCheck1 = sqlite3Get4byte(&aFrame[4]); p~~Log~~->hdr.iCheck2 = sqlite3Get4byte(&aFrame[8]); rc = sqlite3OsWrite(p~~Log~~->pFd, aFrame, ~~LOG~~_HDRSIZE, 0);~~ if( rc!=SQLITE_OK ){ return rc; } } ~~aCksum[0] = p~~Log~~->hdr.iCheck1; aCksum[1] = p~~Log~~->hdr.iCheck2;~~ / Write the log file. / for(p=pList; p; p=p->pDirty){ u32 nDbsize; / Db-size field for frame header / i64 iOffset; / Write offset in log file / ~~iOffset = ~~log~~FrameOffset(++iFrame, nPgsz);~~ / Populate and write the frame header / nDbsize = (isCommit && p->pDirty==0) ? nTruncate : 0; ~~~~log~~EncodeFrame(aCksum, p->pgno, nDbsize, nPgsz, p->pData, aFrame); rc = sqlite3OsWrite(p~~Log~~->pFd, aFrame, sizeof(aFrame), iOffset);~~ if( rc!=SQLITE_OK ){ return rc; } / Write the page data / ~~rc = sqlite3OsWrite(p~~Log~~->pFd, p->pData, nPgsz, iOffset + sizeof(aFrame));~~ if( rc!=SQLITE_OK ){ return rc; } pLast = p; } / Sync the log file if the 'isSync' flag was specified. / if( sync_flags ){ ~~i64 iSegment = sqlite3OsSectorSize(p~~Log~~->pFd); i64 iOffset = ~~log~~FrameOffset(iFrame+1, nPgsz);~~ assert( isCommit ); if( iSegment<SQLITE_DEFAULT_SECTOR_SIZE ){ iSegment = SQLITE_DEFAULT_SECTOR_SIZE; } iSegment = (((iOffset+iSegment-1)/iSegment) iSegment); while( iOffset<iSegment ){ ~~~~log~~EncodeFrame(aCksum,pLast->pgno,nTruncate,nPgsz,pLast->pData,aFrame); rc = sqlite3OsWrite(p~~Log~~->pFd, aFrame, sizeof(aFrame), iOffset);~~ if( rc!=SQLITE_OK ){ return rc; } ~~iOffset += ~~LOG~~_FRAME_HDRSIZE; rc = sqlite3OsWrite(p~~Log~~->pFd, pLast->pData, nPgsz, iOffset);~~ if( rc!=SQLITE_OK ){ return rc; } nLast++; iOffset += nPgsz; } ~~rc = sqlite3OsSync(p~~Log~~->pFd, sync_flags);~~ if( rc!=SQLITE_OK ){ return rc; } } /* Append data to the log summary. It is not necessary to lock the log-~~summary~~ to do this as the RESERVED lock held on the db file guarantees that there are no other writers, and no data that may ** be in use by existing readers is being overwritten. / ~~iFrame = p~~Log~~->hdr.iLastPg;~~ for(p=pList; p; p=p->pDirty){ iFrame++; ~~l~~ogSummary~~Append(p~~Log->pSummary~~, iFrame, p->pgno);~~ } while( nLast>0 ){ iFrame++; nLast--; ~~l~~ogSummary~~Append(p~~Log->pSummary~~, iFrame, pLast->pgno);~~ } / Update the private copy of the header. / ~~p~~Log~~->hdr.pgsz = nPgsz; p~~Log~~->hdr.iLastPg = iFrame;~~ if( isCommit ){ ~~p~~Log~~->hdr.iChange++; p~~Log~~->hdr.nPage = nTruncate;~~ } ~~p~~Log~~->hdr.iCheck1 = aCksum[0]; p~~Log~~->hdr.iCheck2 = aCksum[1];~~ / If this is a commit, update the log-~~summary~~ header too. / if( isCommit ~~&& SQLITE_OK==(rc = logEnterMutex(pLog))~~ ){ l~~ogSummary~~WriteHdr(p~~Log->pSummary~~, &p~~Log~~->hdr); ~~logLeaveMutex(pLog);~~ p~~Log~~->iCallback = iFrame; } return rc; } / Checkpoint the database: 1. Wai~~t for~~ an EXC~~LUSIVE~~ lock ~~on regions B and C.~~ 2. Wait for an EXCLUSIVE lock on region A. 3. Copy the contents of the log into the database file. 4. Zero the log-~~summary~~ header (so new readers will ignore the log). ** 5. Drop the lock~~s obtained in steps 1 and 2~~. / int sqlite3WalCheckpoint( ~~~~Log~~ p~~Log~~, /* ~~Log~~ connection /~~ sqlite3_file pFd, /* File descriptor open on db file / int sync_flags, / Flags to sync db file with (or 0) / u8 zBuf, /* Temporary buffer to use / int (xBusyHandler)(void ), / Pointer to busy-handler function / void pBusyHandlerArg /* Argument to pass to xBusyHandler / ){ int rc; / Return code / int isChanged = 0; / True if a new wal-index header is loaded / ~~assert( ~~!pLog~~->~~isL~~ocked );~~ ~~/ ~~Wai~~t ~~for~~ an EXC~~LUS~~IVE lock ~~on regions B and C.~~ /~~ do { ~~rc = logLockRegion(pLog, LOG_REGION_B\|LOG_REGION_C, LOG_WRLOCK);~~ ~~}while( rc==SQLITE_BUSY && xBusyHandler(pBusyHandlerArg) );~~ ~~if( rc!=SQLITE_OK ) return rc;~~ ~~/ Wait for an EXCLUSIVE lock on region A. /~~ ~~do {~~ ~~rc = logLockRegion(pLog, LOG_REGION_A, LOG_WRLOCK);~~ }while( rc==SQLITE_BUSY && xBusyHandler(pBusyHandlerArg) ); if( rc!=SQLITE_OK ){ ~~~~log~~Lock~~Region~~(p~~Log~~, LOG_~~REGION_B\|LOG_REGION_C, LOG~~_UNLOCK);~~ return rc; } / Copy data from the log to the database file. / ~~rc = l~~ogSummary~~ReadHdr(p~~Log~~, &isChanged);~~ if( rc==SQLITE_OK ){ ~~rc = ~~log~~Checkpoint(p~~Log~~, pFd, sync_flags, zBuf);~~ } if( isChanged ){ / If a new wal-index header was loaded before the checkpoint was performed, then the pager-cache associated with log p~~Log~~ is now out of date. So zero the cached wal-index header to ensure that next time the pager opens a snapshot on this database it knows that the cache needs to be reset. / ~~memset(&p~~Log~~->hdr, 0, sizeof(~~LogSumm~~aryHdr));~~ } / Release the locks. / ~~~~log~~Lock~~Region~~(p~~Log~~, LOG_~~REGION_A\|LOG_REGION_B\|LOG_REGION_C, LOG~~_UNLOCK);~~ return rc; } ~~int sqlite3WalCallback(~~Log~~ p~~Log~~){~~ u32 ret = 0; ~~if( p~~Log~~ ){ ret = p~~Log~~->iCallback; p~~Log~~->iCallback = 0;~~ } return (int)ret; }	< < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < \| \| \| \| > > > > > > > \| > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > \| \| \| \| \| > \| \| \| \| \| \| \| \| \| \| \| \| \| > \| \| \| \| \| \| \| \| \| \| < < < \| < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < \| \| \| \| < < > \| \| > \| \| > > > > \| < \| < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < \| < < < < < < < < < < < < < < \| < < < > < < \| \| \| \| \| \| \| \| \| \| > > \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| < < < \| < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < \| \| < < < < < < < < < < < < < < < < < < < < < < < < < < < \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| > > > < < < < < \| < \| \| \| \| \| \| \| \| \| < < < < < < < < < < < < < < < \| < < \| < < < < < < < \| < \| \| < \| < < \| \| \| \| \| \| \| \| \| < \| < \| \| < > \| \| \| \| \| \| \| \| \| < \| \| \| \| \| \| > \| \| \| \| < < < < < \| < < < < < < < < < < < < < < < < < \| < < < < < < < < < \| < < < < \| \| \| \| \| \| \| \| > > > > \| \| \| > > > \| \| \| \| \| \| \| \| \| \| \| \| \| > \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| \| < \| > \| < \| \| \| \| \| \| < < < \| < < < \| \| \| \| \| \| > > > > > \| \| \| \|	351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252	} } #endif } /* Return the index in the WalIndex.aData array that corresponds to frame iFrame. The wal-index file consists of a header, followed by ** alternating "map" and "index" blocks. / static int walIndexEntry(u32 iFrame){ return ( (WALINDEX_LOCK_OFFSET+WALINDEX_LOCK_RESERVED)/sizeof(u32) + (((iFrame-1)>>8)<<6) / Indexes that occur before iFrame / + iFrame-1 / Db page numbers that occur before iFrame / ); } / ** Release our reference to the wal-index memory map. / static void walIndexUnmap(Wal pWal){ if( pWal->pWiData ){ pWal->pVfs->xShmRelease(pWal->pWIndex); pWal->pWiData = 0; } } /* ** Map the wal-index file into memory if it isn't already. / static int walIndexMap(Wal pWal){ int rc = SQLITE_OK; if( pWal->pWiData==0 ){ rc = pWal->pVfs->xShmSize(pWal->pWIndex, -1, &pWal->szWIndex, (void*)(char)&pWal->pWiData); } return rc; } /* ** Resize the wal-index file. / static int walIndexRemap(Wal pWal, int newSize){ int rc; walIndexUnmap(pWal); rc = pWal->pVfs->xShmSize(pWal->pWIndex, newSize, &pWal->szWIndex, (void*)(char)&pWal->pWiData); return rc; } /* ** Increment by which to increase the wal-index file size. / #define WALINDEX_MMAP_INCREMENT (641024) /* Set an entry in the wal-index map to map log frame iFrame to db page iPage. Values are always appended to the wal-index (i.e. the value of iFrame is always exactly one more than the value passed to the previous call), but that restriction is not enforced or asserted ** here. / static int walIndexAppend(Wal pWal, u32 iFrame, u32 iPage){ u32 iSlot = walIndexEntry(iFrame); walIndexMap(pWal); while( (iSlot+128)>=pWal->szWIndex ){ int rc; int nByte = pWal->szWIndex4 + WALINDEX_MMAP_INCREMENT; / Unmap and remap the wal-index file. / rc = walIndexRemap(pWal, nByte); if( rc!=SQLITE_OK ){ return rc; } } / Set the wal-index entry itself / pWal->pWiData[iSlot] = iPage; / If the frame number is a multiple of 256 (frames are numbered starting ** at 1), build an index of the most recently added 256 frames. / if( (iFrame&0x000000FF)==0 ){ int i; / Iterator used while initializing aIndex / u32 aFrame; /* Pointer to array of 256 frames / int nIndex; / Number of entries in index / u8 aIndex; /* 256 bytes to build index in / u8 aTmp; /* Scratch space to use while sorting / aFrame = &pWal->pWiData[iSlot-255]; aIndex = (u8 )&pWal->pWiData[iSlot+1]; aTmp = &aIndex[256]; nIndex = 256; for(i=0; i<256; i++) aIndex[i] = (u8)i; walMergesort8(aFrame, aTmp, aIndex, &nIndex); memset(&aIndex[nIndex], aIndex[nIndex-1], 256-nIndex); } return SQLITE_OK; } /* Recover the wal-index by reading the write-ahead log file. The caller must hold RECOVER lock on the wal-index file. / static int walIndexRecover(Wal pWal){ int rc; /* Return Code / i64 nSize; / Size of log file / WalIndexHdr hdr; / Recovered wal-index header / assert( pWal->lockState==SQLITE_SHM_RECOVER ); memset(&hdr, 0, sizeof(hdr)); rc = sqlite3OsFileSize(pWal->pFd, &nSize); if( rc!=SQLITE_OK ){ return rc; } if( nSize>WAL_FRAME_HDRSIZE ){ u8 aBuf[WAL_FRAME_HDRSIZE]; / Buffer to load first frame header into / u8 aFrame = 0; /* Malloc'd buffer to load entire frame / int nFrame; / Number of bytes at aFrame / u8 aData; /* Pointer to data part of aFrame buffer / int iFrame; / Index of last frame read / i64 iOffset; / Next offset to read from log file / int nPgsz; / Page size according to the log / u32 aCksum[2]; / Running checksum / / Read in the first frame header in the file (to determine the ** database page size). / rc = sqlite3OsRead(pWal->pFd, aBuf, WAL_HDRSIZE, 0); if( rc!=SQLITE_OK ){ return rc; } / If the database page size is not a power of two, or is greater than ** SQLITE_MAX_PAGE_SIZE, conclude that the log file contains no valid data. / nPgsz = sqlite3Get4byte(&aBuf[0]); if( nPgsz&(nPgsz-1) \|\| nPgsz>SQLITE_MAX_PAGE_SIZE \|\| nPgsz<512 ){ goto finished; } aCksum[0] = sqlite3Get4byte(&aBuf[4]); aCksum[1] = sqlite3Get4byte(&aBuf[8]); / Malloc a buffer to read frames into. / nFrame = nPgsz + WAL_FRAME_HDRSIZE; aFrame = (u8 )sqlite3_malloc(nFrame); if( !aFrame ){ return SQLITE_NOMEM; } aData = &aFrame[WAL_FRAME_HDRSIZE]; /* Read all frames from the log file. / iFrame = 0; for(iOffset=WAL_HDRSIZE; (iOffset+nFrame)<=nSize; iOffset+=nFrame){ u32 pgno; / Database page number for frame / u32 nTruncate; / dbsize field from frame header / int isValid; / True if this frame is valid / / Read and decode the next log frame. / rc = sqlite3OsRead(pWal->pFd, aFrame, nFrame, iOffset); if( rc!=SQLITE_OK ) break; isValid = walDecodeFrame(aCksum, &pgno, &nTruncate, nPgsz, aData, aFrame); if( !isValid ) break; walIndexAppend(pWal, ++iFrame, pgno); / If nTruncate is non-zero, this is a commit record. / if( nTruncate ){ hdr.iCheck1 = aCksum[0]; hdr.iCheck2 = aCksum[1]; hdr.iLastPg = iFrame; hdr.nPage = nTruncate; hdr.pgsz = nPgsz; } } sqlite3_free(aFrame); }else{ hdr.iCheck1 = 2; hdr.iCheck2 = 3; } finished: walIndexWriteHdr(pWal, &hdr); return rc; } / Open a connection to the log file associated with database zDb. The database file does not actually have to exist. zDb is used only to figure out the name of the log file to open. If the log file does not exist it is created by this call. A SHARED lock should be held on the database file when this function is called. The purpose of this SHARED lock is to prevent any other client from unlinking the log or wal-index file. If another process were to do this just after this client opened one of these files, the system would be badly broken. / int sqlite3WalOpen( sqlite3_vfs pVfs, /* vfs module to open wal and wal-index / const char zDb, /* Name of database file / Wal ppWal / OUT: Allocated Wal handle / ){ int rc = SQLITE_OK; / Return Code / Wal pRet; /* Object to allocate and return / int flags; / Flags passed to OsOpen() / char zWal = 0; /* Path to WAL file / int nWal; / Length of zWal in bytes / assert( zDb ); if( pVfs->xShmOpen==0 ) return SQLITE_CANTOPEN; / Allocate an instance of struct Wal to return. / ppWal = 0; nWal = strlen(zDb); pRet = (Wal)sqlite3MallocZero(sizeof(Wal) + pVfs->szOsFile + nWal+11); if( !pRet ) goto wal_open_out; pRet->pVfs = pVfs; pRet->pFd = (sqlite3_file )&pRet[1]; zWal = pVfs->szOsFile + (char)pRet->pFd; sqlite3_snprintf(nWal, zWal, "%s-wal-index", zDb); rc = pVfs->xShmOpen(pVfs, zWal, &pRet->pWIndex); if( rc ) goto wal_open_out; / Open file handle on the write-ahead log file. / zWal[nWal-6] = 0; flags = (SQLITE_OPEN_READWRITE\|SQLITE_OPEN_CREATE\|SQLITE_OPEN_MAIN_JOURNAL); rc = sqlite3OsOpen(pVfs, zWal, pRet->pFd, flags, &flags); wal_open_out: if( rc!=SQLITE_OK ){ if( pRet ){ pVfs->xShmClose(pRet->pWIndex); sqlite3OsClose(pRet->pFd); sqlite3_free(pRet); } } ppWal = pRet; return rc; } static int walIteratorNext( WalIterator p, / Iterator / u32 piPage, /* OUT: Next db page to write / u32 piFrame /* OUT: Wal frame to read from / ){ u32 iMin = piPage; u32 iRet = 0xFFFFFFFF; int i; int nBlock = p->nFinal; for(i=p->nSegment-1; i>=0; i--){ struct WalSegment pSegment = &p->aSegment[i]; while( pSegment->iNext<nBlock ){ u32 iPg = pSegment->aDbPage[pSegment->aIndex[pSegment->iNext]]; if( iPg>iMin ){ if( iPg<iRet ){ iRet = iPg; piFrame = i256 + 1 + pSegment->aIndex[pSegment->iNext]; } break; } pSegment->iNext++; } nBlock = 256; } piPage = iRet; return (iRet==0xFFFFFFFF); } static WalIterator walIteratorInit(Wal pWal){ u32 aData; / Content of the wal-index file / WalIterator p; /* Return value / int nSegment; / Number of segments to merge / u32 iLast; / Last frame in log / int nByte; / Number of bytes to allocate / int i; / Iterator variable / int nFinal; / Number of unindexed entries / struct WalSegment pFinal; /* Final (unindexed) segment / u8 aTmp; /* Temp space used by merge-sort / walIndexMap(pWal); aData = pWal->pWiData; iLast = pWal->hdr.iLastPg; nSegment = (iLast >> 8) + 1; nFinal = (iLast & 0x000000FF); nByte = sizeof(WalIterator) + (nSegment-1)sizeof(struct WalSegment) + 512; p = (WalIterator )sqlite3_malloc(nByte); if( p ){ memset(p, 0, nByte); p->nSegment = nSegment; p->nFinal = nFinal; } for(i=0; i<nSegment-1; i++){ p->aSegment[i].aDbPage = &aData[walIndexEntry(i256+1)]; p->aSegment[i].aIndex = (u8 )&aData[walIndexEntry(i256+1)+256]; } pFinal = &p->aSegment[nSegment-1]; pFinal->aDbPage = &aData[walIndexEntry((nSegment-1)256+1)]; pFinal->aIndex = (u8 )&pFinal[1]; aTmp = &pFinal->aIndex[256]; for(i=0; i<nFinal; i++){ pFinal->aIndex[i] = i; } walMergesort8(pFinal->aDbPage, aTmp, pFinal->aIndex, &nFinal); p->nFinal = nFinal; return p; } /* ** Free a log iterator allocated by walIteratorInit(). / static void walIteratorFree(WalIterator p){ sqlite3_free(p); } /* ** Checkpoint the contents of the log file. / static int walCheckpoint( Wal pWal, /* Wal connection / sqlite3_file pFd, /* File descriptor open on db file / int sync_flags, / Flags for OsSync() (or 0) / u8 zBuf /* Temporary buffer to use / ){ int rc; / Return code / int pgsz = pWal->hdr.pgsz; / Database page-size / WalIterator pIter = 0; /* Wal iterator context / u32 iDbpage = 0; / Next database page to write / u32 iFrame = 0; / Wal frame containing data for iDbpage / if( pWal->hdr.iLastPg==0 ){ return SQLITE_OK; } / Allocate the iterator / pIter = walIteratorInit(pWal); if( !pIter ) return SQLITE_NOMEM; / Sync the log file to disk / if( sync_flags ){ rc = sqlite3OsSync(pWal->pFd, sync_flags); if( rc!=SQLITE_OK ) goto out; } / Iterate through the contents of the log, copying data to the db file. / while( 0==walIteratorNext(pIter, &iDbpage, &iFrame) ){ rc = sqlite3OsRead(pWal->pFd, zBuf, pgsz, walFrameOffset(iFrame, pgsz) + WAL_FRAME_HDRSIZE ); if( rc!=SQLITE_OK ) goto out; rc = sqlite3OsWrite(pFd, zBuf, pgsz, (iDbpage-1)pgsz); if( rc!=SQLITE_OK ) goto out; } /* Truncate the database file / rc = sqlite3OsTruncate(pFd, ((i64)pWal->hdr.nPage(i64)pgsz)); if( rc!=SQLITE_OK ) goto out; /* Sync the database file. If successful, update the wal-index. / if( sync_flags ){ rc = sqlite3OsSync(pFd, sync_flags); if( rc!=SQLITE_OK ) goto out; } pWal->hdr.iLastPg = 0; pWal->hdr.iCheck1 = 2; pWal->hdr.iCheck2 = 3; walIndexWriteHdr(pWal, &pWal->hdr); / TODO: If a crash occurs and the current log is copied into the database there is no problem. However, if a crash occurs while writing the next transaction into the start of the log, such that: * The first transaction currently in the log is left intact, but ** * The second (or subsequent) transaction is damaged, then the database could become corrupt. The easiest thing to do would be to write and sync a dummy header into the log at this point. Unfortunately, that turns out to be an unwelcome performance hit. Alternatives are... / #if 0 memset(zBuf, 0, WAL_FRAME_HDRSIZE); rc = sqlite3OsWrite(pWal->pFd, zBuf, WAL_FRAME_HDRSIZE, 0); if( rc!=SQLITE_OK ) goto out; rc = sqlite3OsSync(pWal->pFd, pWal->sync_flags); #endif out: walIteratorFree(pIter); return rc; } / ** Close a connection to a log file. / int sqlite3WalClose( Wal pWal, /* Wal to close / sqlite3_file pFd, /* Database file / int sync_flags, / Flags to pass to OsSync() (or 0) / u8 zBuf /* Buffer of at least page-size bytes / ){ int rc = SQLITE_OK; if( pWal ){ pWal->pVfs->xShmClose(pWal->pWIndex); sqlite3OsClose(pWal->pFd); sqlite3_free(pWal); } return rc; } / Try to read the wal-index header. Attempt to verify the header checksum. If the checksum can be verified, copy the wal-index header into structure pWal->hdr. If the contents of pWal->hdr are modified by this and pChanged is not NULL, set pChanged to 1. * Otherwise leave pChanged unmodified. * ** If the checksum cannot be verified return SQLITE_ERROR. / int walIndexTryHdr(Wal pWal, int pChanged){ u32 aCksum[2] = {1, 1}; u32 aHdr[WALINDEX_HDR_NFIELD+2]; / Read the header. The caller may or may not have locked the wal-index file, meaning it is possible that an inconsistent snapshot is read from the file. If this happens, return SQLITE_ERROR. The caller will retry. Or, if the caller has already locked the file and the header still looks inconsistent, it will run recovery. / memcpy(aHdr, pWal->pWiData, sizeof(aHdr)); walChecksumBytes((u8)aHdr, sizeof(u32)WALINDEX_HDR_NFIELD, aCksum); if( aCksum[0]!=aHdr[WALINDEX_HDR_NFIELD] \|\| aCksum[1]!=aHdr[WALINDEX_HDR_NFIELD+1] ){ return SQLITE_ERROR; } if( memcmp(&pWal->hdr, aHdr, sizeof(WalIndexHdr)) ){ if( pChanged ){ pChanged = 1; } memcpy(&pWal->hdr, aHdr, sizeof(WalIndexHdr)); } return SQLITE_OK; } /* Read the wal-index header from the wal-index file into structure pWal->hdr. If attempting to verify the header checksum fails, try to recover the log before returning. If the wal-index header is successfully read, return SQLITE_OK. Otherwise an SQLite error code. / static int walIndexReadHdr(Wal pWal, int pChanged){ int rc; assert( pWal->lockState==SQLITE_SHM_READ ); walIndexMap(pWal); / First try to read the header without a lock. Verify the checksum ** before returning. This will almost always work. / if( SQLITE_OK==walIndexTryHdr(pWal, pChanged) ){ return SQLITE_OK; } / If the first attempt to read the header failed, lock the wal-index file and try again. If the header checksum verification fails this time as well, run log recovery. / if( SQLITE_OK==(rc = walSetLock(pWal, SQLITE_SHM_RECOVER)) ){ if( SQLITE_OK!=walIndexTryHdr(pWal, pChanged) ){ if( pChanged ){ pChanged = 1; } rc = walIndexRecover(pWal); if( rc==SQLITE_OK ){ rc = walIndexTryHdr(pWal, 0); } } walSetLock(pWal, SQLITE_SHM_READ); } return rc; } /* Lock a snapshot. If this call obtains a new read-lock and the database contents have been modified since the most recent call to WalCloseSnapshot() on this Wal ** connection, then pChanged is set to 1 before returning. Otherwise, it * is left unmodified. This is used by the pager layer to determine whether ** or not any cached pages may be safely reused. / int sqlite3WalOpenSnapshot(Wal pWal, int pChanged){ int rc; rc = walSetLock(pWal, SQLITE_SHM_READ); if( rc==SQLITE_OK ){ pWal->lockState = SQLITE_SHM_READ; rc = walIndexReadHdr(pWal, pChanged); if( rc!=SQLITE_OK ){ / An error occured while attempting log recovery. / sqlite3WalCloseSnapshot(pWal); }else{ / Check if the mapping needs to grow. / if( pWal->hdr.iLastPg && walIndexEntry(pWal->hdr.iLastPg)>=pWal->szWIndex ){ rc = walIndexRemap(pWal, 0); assert( rc \|\| walIndexEntry(pWal->hdr.iLastPg)<pWal->szWIndex ); } } } return rc; } / ** Unlock the current snapshot. / void sqlite3WalCloseSnapshot(Wal pWal){ if( pWal->lockState!=SQLITE_SHM_UNLOCK ){ assert( pWal->lockState==SQLITE_SHM_READ ); walSetLock(pWal, SQLITE_SHM_UNLOCK); } } /* ** Read a page from the log, if it is present. / int sqlite3WalRead(Wal pWal, Pgno pgno, int pInWal, u8 pOut){ u32 iRead = 0; u32 aData; int iFrame = (pWal->hdr.iLastPg & 0xFFFFFF00); assert( pWal->lockState==SQLITE_SHM_READ ); walIndexMap(pWal); / Do a linear search of the unindexed block of page-numbers (if any) at the end of the wal-index. An alternative to this would be to build an index in private memory each time a read transaction is ** opened on a new snapshot. / aData = pWal->pWiData; if( pWal->hdr.iLastPg ){ u32 pi = &aData[walIndexEntry(pWal->hdr.iLastPg)]; u32 piStop = pi - (pWal->hdr.iLastPg & 0xFF); while( pi!=pgno && pi!=piStop ) pi--; if( pi!=piStop ){ iRead = (pi-piStop) + iFrame; } } assert( iRead==0 \|\| aData[walIndexEntry(iRead)]==pgno ); while( iRead==0 && iFrame>0 ){ int iLow = 0; int iHigh = 255; u32 aFrame; u8 aIndex; iFrame -= 256; aFrame = &aData[walIndexEntry(iFrame+1)]; aIndex = (u8 )&aFrame[256]; while( iLow<=iHigh ){ int iTest = (iLow+iHigh)>>1; u32 iPg = aFrame[aIndex[iTest]]; if( iPg==pgno ){ iRead = iFrame + 1 + aIndex[iTest]; break; } else if( iPg<pgno ){ iLow = iTest+1; }else{ iHigh = iTest-1; } } } assert( iRead==0 \|\| aData[walIndexEntry(iRead)]==pgno ); walIndexUnmap(pWal); / If iRead is non-zero, then it is the log frame number that contains the ** required page. Read and return data from the log file. / if( iRead ){ i64 iOffset = walFrameOffset(iRead, pWal->hdr.pgsz) + WAL_FRAME_HDRSIZE; pInWal = 1; return sqlite3OsRead(pWal->pFd, pOut, pWal->hdr.pgsz, iOffset); } pInWal = 0; return SQLITE_OK; } / ** Set pPgno to the size of the database file (or zero, if unknown). / void sqlite3WalDbsize(Wal pWal, Pgno pPgno){ assert( pWal->lockState==SQLITE_SHM_READ \|\| pWal->lockState==SQLITE_SHM_WRITE ); pPgno = pWal->hdr.nPage; } / This function returns SQLITE_OK if the caller may write to the database. Otherwise, if the caller is operating on a snapshot that has already ** been overwritten by another writer, SQLITE_BUSY is returned. / int sqlite3WalWriteLock(Wal pWal, int op){ int rc; if( op ){ assert( pWal->lockState == SQLITE_SHM_READ ); rc = walSetLock(pWal, SQLITE_SHM_WRITE); }else if( pWal->lockState==SQLITE_SHM_WRITE ){ rc = walSetLock(pWal, SQLITE_SHM_READ); } return rc; } /* The Wal object passed to this function must be holding the write-lock. If any data has been written (but not committed) to the log file, this function moves the write-pointer back to the start of the transaction. Additionally, the callback function is invoked for each frame written to the log since the start of the transaction. If the callback returns other than SQLITE_OK, it is not invoked again and the error code is returned to the caller. Otherwise, if the callback function does not return an error, this function returns SQLITE_OK. / int sqlite3WalUndo(Wal pWal, int (xUndo)(void , Pgno), void pUndoCtx){ int rc = SQLITE_OK; Pgno iMax = pWal->hdr.iLastPg; Pgno iFrame; assert( pWal->lockState==SQLITE_SHM_WRITE ); walIndexReadHdr(pWal, 0); for(iFrame=pWal->hdr.iLastPg+1; iFrame<=iMax && rc==SQLITE_OK; iFrame++){ rc = xUndo(pUndoCtx, pWal->pWiData[walIndexEntry(iFrame)]); } walIndexUnmap(pWal); return rc; } / Return an integer that records the current (uncommitted) write ** position in the WAL / u32 sqlite3WalSavepoint(Wal pWal){ assert( pWal->lockState==SQLITE_SHM_WRITE ); return pWal->hdr.iLastPg; } /* Move the write position of the WAL back to iFrame. Called in ** response to a ROLLBACK TO command. / int sqlite3WalSavepointUndo(Wal pWal, u32 iFrame){ int rc = SQLITE_OK; u8 aCksum[8]; assert( pWal->lockState==SQLITE_SHM_WRITE ); pWal->hdr.iLastPg = iFrame; if( iFrame>0 ){ i64 iOffset = walFrameOffset(iFrame, pWal->hdr.pgsz) + sizeof(u32)2; rc = sqlite3OsRead(pWal->pFd, aCksum, sizeof(aCksum), iOffset); pWal->hdr.iCheck1 = sqlite3Get4byte(&aCksum[0]); pWal->hdr.iCheck2 = sqlite3Get4byte(&aCksum[4]); } return rc; } / ** Return true if data has been written but not committed to the log file. / int sqlite3WalDirty(Wal pWal){ assert( pWal->lockState==SQLITE_SHM_WRITE ); return( pWal->hdr.iLastPg!=((WalIndexHdr)pWal->pWiData)->iLastPg ); } / Write a set of frames to the log. The caller must hold the write-lock on the log file (obtained using sqlite3WalWriteLock()). / int sqlite3WalFrames( Wal pWal, /* Wal handle to write to / int nPgsz, / Database page-size in bytes / PgHdr pList, /* List of dirty pages to write / Pgno nTruncate, / Database size after this commit / int isCommit, / True if this is a commit / int sync_flags / Flags to pass to OsSync() (or 0) / ){ int rc; / Used to catch return codes / u32 iFrame; / Next frame address / u8 aFrame[WAL_FRAME_HDRSIZE]; / Buffer to assemble frame-header in / PgHdr p; /* Iterator to run through pList with. / u32 aCksum[2]; / Checksums / PgHdr pLast; /* Last frame in list / int nLast = 0; / Number of extra copies of last page / assert( WAL_FRAME_HDRSIZE==(4 2 + 2sizeof(u32)) ); assert( pList ); assert( pWal->lockState==SQLITE_SHM_WRITE ); / If this is the first frame written into the log, write the log header to the start of the log file. See comments at the top of this file for a description of the log-header format. / assert( WAL_FRAME_HDRSIZE>=WAL_HDRSIZE ); iFrame = pWal->hdr.iLastPg; if( iFrame==0 ){ sqlite3Put4byte(aFrame, nPgsz); sqlite3_randomness(8, &aFrame[4]); pWal->hdr.iCheck1 = sqlite3Get4byte(&aFrame[4]); pWal->hdr.iCheck2 = sqlite3Get4byte(&aFrame[8]); rc = sqlite3OsWrite(pWal->pFd, aFrame, WAL_HDRSIZE, 0); if( rc!=SQLITE_OK ){ return rc; } } aCksum[0] = pWal->hdr.iCheck1; aCksum[1] = pWal->hdr.iCheck2; / Write the log file. / for(p=pList; p; p=p->pDirty){ u32 nDbsize; / Db-size field for frame header / i64 iOffset; / Write offset in log file / iOffset = walFrameOffset(++iFrame, nPgsz); / Populate and write the frame header / nDbsize = (isCommit && p->pDirty==0) ? nTruncate : 0; walEncodeFrame(aCksum, p->pgno, nDbsize, nPgsz, p->pData, aFrame); rc = sqlite3OsWrite(pWal->pFd, aFrame, sizeof(aFrame), iOffset); if( rc!=SQLITE_OK ){ return rc; } / Write the page data / rc = sqlite3OsWrite(pWal->pFd, p->pData, nPgsz, iOffset + sizeof(aFrame)); if( rc!=SQLITE_OK ){ return rc; } pLast = p; } / Sync the log file if the 'isSync' flag was specified. / if( sync_flags ){ i64 iSegment = sqlite3OsSectorSize(pWal->pFd); i64 iOffset = walFrameOffset(iFrame+1, nPgsz); assert( isCommit ); if( iSegment<SQLITE_DEFAULT_SECTOR_SIZE ){ iSegment = SQLITE_DEFAULT_SECTOR_SIZE; } iSegment = (((iOffset+iSegment-1)/iSegment) iSegment); while( iOffset<iSegment ){ walEncodeFrame(aCksum,pLast->pgno,nTruncate,nPgsz,pLast->pData,aFrame); rc = sqlite3OsWrite(pWal->pFd, aFrame, sizeof(aFrame), iOffset); if( rc!=SQLITE_OK ){ return rc; } iOffset += WAL_FRAME_HDRSIZE; rc = sqlite3OsWrite(pWal->pFd, pLast->pData, nPgsz, iOffset); if( rc!=SQLITE_OK ){ return rc; } nLast++; iOffset += nPgsz; } rc = sqlite3OsSync(pWal->pFd, sync_flags); if( rc!=SQLITE_OK ){ return rc; } } /* Append data to the log summary. It is not necessary to lock the wal-index to do this as the RESERVED lock held on the db file guarantees that there are no other writers, and no data that may ** be in use by existing readers is being overwritten. / iFrame = pWal->hdr.iLastPg; for(p=pList; p; p=p->pDirty){ iFrame++; walIndexAppend(pWal, iFrame, p->pgno); } while( nLast>0 ){ iFrame++; nLast--; walIndexAppend(pWal, iFrame, pLast->pgno); } / Update the private copy of the header. / pWal->hdr.pgsz = nPgsz; pWal->hdr.iLastPg = iFrame; if( isCommit ){ pWal->hdr.iChange++; pWal->hdr.nPage = nTruncate; } pWal->hdr.iCheck1 = aCksum[0]; pWal->hdr.iCheck2 = aCksum[1]; / If this is a commit, update the wal-index header too. / if( isCommit ){ walIndexWriteHdr(pWal, &pWal->hdr); pWal->iCallback = iFrame; } walIndexUnmap(pWal); return rc; } / Checkpoint the database: 1. Acquire a CHECKPOINT lock 2. Copy the contents of the log into the database file. 3. Zero the wal-index header (so new readers will ignore the log). 4. Drop the CHECKPOINT lock. / int sqlite3WalCheckpoint( Wal pWal, /* Wal connection / sqlite3_file pFd, /* File descriptor open on db file / int sync_flags, / Flags to sync db file with (or 0) / u8 zBuf, /* Temporary buffer to use / int (xBusyHandler)(void ), / Pointer to busy-handler function / void pBusyHandlerArg /* Argument to pass to xBusyHandler / ){ int rc; / Return code / int isChanged = 0; / True if a new wal-index header is loaded / assert( pWal->lockState==SQLITE_SHM_UNLOCK ); / Get the CHECKPOINT lock / do { rc = walSetLock(pWal, SQLITE_SHM_CHECKPOINT); }while( rc==SQLITE_BUSY && xBusyHandler(pBusyHandlerArg) ); if( rc!=SQLITE_OK ){ walSetLock(pWal, SQLITE_SHM_UNLOCK); return rc; } / Copy data from the log to the database file. / rc = walIndexReadHdr(pWal, &isChanged); if( rc==SQLITE_OK ){ rc = walCheckpoint(pWal, pFd, sync_flags, zBuf); } if( isChanged ){ / If a new wal-index header was loaded before the checkpoint was performed, then the pager-cache associated with log pWal is now out of date. So zero the cached wal-index header to ensure that next time the pager opens a snapshot on this database it knows that the cache needs to be reset. / memset(&pWal->hdr, 0, sizeof(WalIndexHdr)); } / Release the locks. / walSetLock(pWal, SQLITE_SHM_UNLOCK); return rc; } / Return the value to pass to a sqlite3_wal_hook callback, the number of frames in the WAL at the point of the last commit since sqlite3WalCallback() was called. If no commits have occurred since ** the last call, then return 0. / int sqlite3WalCallback(Wal pWal){ u32 ret = 0; if( pWal ){ ret = pWal->iCallback; pWal->iCallback = 0; } return (int)ret; }

︙			︙
395 396 397 398 399 400 401 ~~402~~ 403 404 405 406 407 408 409	void (xCodecSizeChng)(void,int,int); /* Notify of page size changes / void (xCodecFree)(void); / Destructor for the codec / void pCodec; /* First argument to xCodec... methods / #endif char pTmpSpace; /* Pager.pageSize bytes of space for tmp use / PCache pPCache; /* Pointer to page cache object / sqlite3_backup pBackup; /* Pointer to list of ongoing backup processes / ~~~~Log~~ p~~Log~~; /* Log used by "journal_mode=wal" /~~ }; / The following global variables hold counters used for testing purposes only. These variables do not exist in ** a non-testing build. These variables are not thread-safe. */	\|	395 396 397 398 399 400 401 402 403 404 405 406 407 408 409	void (xCodecSizeChng)(void,int,int); /* Notify of page size changes / void (xCodecFree)(void); / Destructor for the codec / void pCodec; /* First argument to xCodec... methods / #endif char pTmpSpace; /* Pager.pageSize bytes of space for tmp use / PCache pPCache; /* Pointer to page cache object / sqlite3_backup pBackup; /* Pointer to list of ongoing backup processes / Wal pWal; /* Write-ahead log used by "journal_mode=wal" / }; / The following global variables hold counters used for testing purposes only. These variables do not exist in ** a non-testing build. These variables are not thread-safe. */
︙			︙
1188 1189 1190 1191 1192 1193 1194 ~~1195 1196~~ 1197 1198 1199 1200 1201 1202 1203	return rc; } /* Return true if this pager uses a write-ahead log instead of the usual rollback journal. Otherwise false. / ~~static int pagerUse~~Log~~(Pager pPager){ return (pPager->p~~Log~~!=0);~~ } /* Unlock the database file. This function is a no-op if the pager is in exclusive mode. If the pager is currently in error state, discard the contents of	\| \|	1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203	return rc; } /* Return true if this pager uses a write-ahead log instead of the usual rollback journal. Otherwise false. / static int pagerUseWal(Pager pPager){ return (pPager->pWal!=0); } /* Unlock the database file. This function is a no-op if the pager is in exclusive mode. If the pager is currently in error state, discard the contents of
︙			︙
1223 1224 1225 1226 1227 1228 1229 ~~1230 1231~~ 1232 1233 1234 1235 1236 1237 1238	values stored in Pager.dbSize etc. might become invalid if this happens. One can argue that this doesn't need to be cleared until the change-counter check fails in PagerSharedLock(). Clearing the page size cache here is being conservative. */ pPager->dbSizeValid = 0; ~~if( pagerUse~~Log~~(pPager) ){ sqlite3WalCloseSnapshot(pPager->p~~Log~~);~~ }else{ rc = osUnlock(pPager->fd, NO_LOCK); } if( rc ){ pPager->errCode = rc; } IOTRACE(("UNLOCK %p\n", pPager))	\| \|	1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238	values stored in Pager.dbSize etc. might become invalid if this happens. One can argue that this doesn't need to be cleared until the change-counter check fails in PagerSharedLock(). Clearing the page size cache here is being conservative. */ pPager->dbSizeValid = 0; if( pagerUseWal(pPager) ){ sqlite3WalCloseSnapshot(pPager->pWal); }else{ rc = osUnlock(pPager->fd, NO_LOCK); } if( rc ){ pPager->errCode = rc; } IOTRACE(("UNLOCK %p\n", pPager))
︙			︙
1376 1377 1378 1379 1380 1381 1382 ~~1383~~ 1384 1385 1386 1387 1388 1389 1390	if( pPager->state<PAGER_RESERVED ){ return SQLITE_OK; } releaseAllSavepoints(pPager); assert( isOpen(pPager->jfd) \|\| pPager->pInJournal==0 ); if( isOpen(pPager->jfd) ){ ~~assert( !pagerUse~~Log~~(pPager) );~~ /* Finalize the journal file. */ if( sqlite3IsMemJournal(pPager->jfd) ){ assert( pPager->journalMode==PAGER_JOURNALMODE_MEMORY ); sqlite3OsClose(pPager->jfd); }else if( pPager->journalMode==PAGER_JOURNALMODE_TRUNCATE ){ if( pPager->journalOff==0 ){	\|	1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390	if( pPager->state<PAGER_RESERVED ){ return SQLITE_OK; } releaseAllSavepoints(pPager); assert( isOpen(pPager->jfd) \|\| pPager->pInJournal==0 ); if( isOpen(pPager->jfd) ){ assert( !pagerUseWal(pPager) ); /* Finalize the journal file. */ if( sqlite3IsMemJournal(pPager->jfd) ){ assert( pPager->journalMode==PAGER_JOURNALMODE_MEMORY ); sqlite3OsClose(pPager->jfd); }else if( pPager->journalMode==PAGER_JOURNALMODE_TRUNCATE ){ if( pPager->journalOff==0 ){
︙			︙
1422 1423 1424 1425 1426 1427 1428 ~~1429 1430~~ 1431 1432 1433 1434 1435 1436 1437	#endif } sqlite3BitvecDestroy(pPager->pInJournal); pPager->pInJournal = 0; pPager->nRec = 0; sqlite3PcacheCleanAll(pPager->pPCache); ~~if( pagerUse~~Log~~(pPager) ){ rc2 = sqlite3WalWriteLock(pPager->p~~Log~~, 0);~~ pPager->state = PAGER_SHARED; }else if( !pPager->exclusiveMode ){ rc2 = osUnlock(pPager->fd, SHARED_LOCK); pPager->state = PAGER_SHARED; pPager->changeCountDone = 0; }else if( pPager->state==PAGER_SYNCED ){ pPager->state = PAGER_EXCLUSIVE;	\| \|	1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437	#endif } sqlite3BitvecDestroy(pPager->pInJournal); pPager->pInJournal = 0; pPager->nRec = 0; sqlite3PcacheCleanAll(pPager->pPCache); if( pagerUseWal(pPager) ){ rc2 = sqlite3WalWriteLock(pPager->pWal, 0); pPager->state = PAGER_SHARED; }else if( !pPager->exclusiveMode ){ rc2 = osUnlock(pPager->fd, SHARED_LOCK); pPager->state = PAGER_SHARED; pPager->changeCountDone = 0; }else if( pPager->state==PAGER_SYNCED ){ pPager->state = PAGER_EXCLUSIVE;
︙			︙
1536 1537 1538 1539 1540 1541 1542 ~~1543~~ 1544 1545 1546 1547 1548 1549 1550	assert( (isMainJrnl&~1)==0 ); /* isMainJrnl is 0 or 1 / assert( (isSavepnt&~1)==0 ); / isSavepnt is 0 or 1 / assert( isMainJrnl \|\| pDone ); / pDone always used on sub-journals / assert( isSavepnt \|\| pDone==0 ); / pDone never used on non-savepoint / aData = pPager->pTmpSpace; assert( aData ); / Temp storage must have already been allocated / ~~assert( pagerUse~~Log~~(pPager)==0 \|\| (!isMainJrnl && isSavepnt) );~~ / Read the page number and page data from the journal or sub-journal ** file. Return an error code to the caller if an IO error occurs. / jfd = isMainJrnl ? pPager->jfd : pPager->sjfd; rc = read32bits(jfd, pOffset, &pgno); if( rc!=SQLITE_OK ) return rc;	\|	1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550	assert( (isMainJrnl&~1)==0 ); /* isMainJrnl is 0 or 1 / assert( (isSavepnt&~1)==0 ); / isSavepnt is 0 or 1 / assert( isMainJrnl \|\| pDone ); / pDone always used on sub-journals / assert( isSavepnt \|\| pDone==0 ); / pDone never used on non-savepoint / aData = pPager->pTmpSpace; assert( aData ); / Temp storage must have already been allocated / assert( pagerUseWal(pPager)==0 \|\| (!isMainJrnl && isSavepnt) ); / Read the page number and page data from the journal or sub-journal ** file. Return an error code to the caller if an IO error occurs. / jfd = isMainJrnl ? pPager->jfd : pPager->sjfd; rc = read32bits(jfd, pOffset, &pgno); if( rc!=SQLITE_OK ) return rc;
︙			︙
1606 1607 1608 1609 1610 1611 1612 ~~1613~~ 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 ~~1634~~ 1635 1636 1637 1638 1639 1640 1641	in the main journal either because the page is not in cache or else the page is marked as needSync==0. 2008-04-14: When attempting to vacuum a corrupt database file, it is possible to fail a statement on a database that does not yet exist. Do not attempt to write if database file has never been opened. / ~~if( pagerUse~~Log~~(pPager) ){~~ pPg = 0; }else{ pPg = pager_lookup(pPager, pgno); } assert( pPg \|\| !MEMDB ); PAGERTRACE(("PLAYBACK %d page %d hash(%08x) %s\n", PAGERID(pPager), pgno, pager_datahash(pPager->pageSize, (u8)aData), (isMainJrnl?"main-journal":"sub-journal") )); if( isMainJrnl ){ isSynced = pPager->noSync \|\| (pOffset <= pPager->journalHdr); }else{ isSynced = (pPg==0 \|\| 0==(pPg->flags & PGHDR_NEED_SYNC)); } if( (pPager->state>=PAGER_EXCLUSIVE) && isOpen(pPager->fd) && isSynced ){ i64 ofst = (pgno-1)(i64)pPager->pageSize; testcase( !isSavepnt && pPg!=0 && (pPg->flags&PGHDR_NEED_SYNC)!=0 ); ~~assert( !pagerUse~~Log~~(pPager) );~~ rc = sqlite3OsWrite(pPager->fd, (u8)aData, pPager->pageSize, ofst); if( pgno>pPager->dbFileSize ){ pPager->dbFileSize = pgno; } if( pPager->pBackup ){ CODEC1(pPager, aData, pgno, 3, rc=SQLITE_NOMEM); sqlite3BackupUpdate(pPager->pBackup, pgno, (u8)aData);	\| \|	1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641	in the main journal either because the page is not in cache or else the page is marked as needSync==0. 2008-04-14: When attempting to vacuum a corrupt database file, it is possible to fail a statement on a database that does not yet exist. Do not attempt to write if database file has never been opened. / if( pagerUseWal(pPager) ){ pPg = 0; }else{ pPg = pager_lookup(pPager, pgno); } assert( pPg \|\| !MEMDB ); PAGERTRACE(("PLAYBACK %d page %d hash(%08x) %s\n", PAGERID(pPager), pgno, pager_datahash(pPager->pageSize, (u8)aData), (isMainJrnl?"main-journal":"sub-journal") )); if( isMainJrnl ){ isSynced = pPager->noSync \|\| (pOffset <= pPager->journalHdr); }else{ isSynced = (pPg==0 \|\| 0==(pPg->flags & PGHDR_NEED_SYNC)); } if( (pPager->state>=PAGER_EXCLUSIVE) && isOpen(pPager->fd) && isSynced ){ i64 ofst = (pgno-1)(i64)pPager->pageSize; testcase( !isSavepnt && pPg!=0 && (pPg->flags&PGHDR_NEED_SYNC)!=0 ); assert( !pagerUseWal(pPager) ); rc = sqlite3OsWrite(pPager->fd, (u8)aData, pPager->pageSize, ofst); if( pgno>pPager->dbFileSize ){ pPager->dbFileSize = pgno; } if( pPager->pBackup ){ CODEC1(pPager, aData, pgno, 3, rc=SQLITE_NOMEM); sqlite3BackupUpdate(pPager->pBackup, pgno, (u8)aData);
︙			︙
1692 1693 1694 1695 1696 1697 1698 ~~1699~~ 1700 1701 1702 1703 1704 1705 1706	the PGHDR_NEED_SYNC flag is cleared, if the page is written to again within this transaction, it will be marked as dirty but the PGHDR_NEED_SYNC flag will not be set. It could then potentially be written out into the database file before its journal file segment is synced. If a crash occurs during or following this, database corruption may ensue. / ~~assert( !pagerUse~~Log~~(pPager) );~~ sqlite3PcacheMakeClean(pPg); } #ifdef SQLITE_CHECK_PAGES pPg->pageHash = pager_pagehash(pPg); #endif / If this was page 1, then restore the value of Pager.dbFileVers. ** Do this before any decoding. */	\|	1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706	the PGHDR_NEED_SYNC flag is cleared, if the page is written to again within this transaction, it will be marked as dirty but the PGHDR_NEED_SYNC flag will not be set. It could then potentially be written out into the database file before its journal file segment is synced. If a crash occurs during or following this, database corruption may ensue. / assert( !pagerUseWal(pPager) ); sqlite3PcacheMakeClean(pPg); } #ifdef SQLITE_CHECK_PAGES pPg->pageHash = pager_pagehash(pPg); #endif / If this was page 1, then restore the value of Pager.dbFileVers. ** Do this before any decoding. */
︙			︙
2184 2185 2186 2187 2188 2189 2190 ~~2191~~ 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 ~~2202~~ 2203 ~~2204~~ 2205 ~~2206~~ 2207 2208 2209 2210 2211 2212 2213	** Otherwise, SQLITE_OK is returned. / static int readDbPage(PgHdr pPg){ Pager pPager = pPg->pPager; / Pager object associated with page pPg / Pgno pgno = pPg->pgno; / Page number to read / int rc = SQLITE_OK; / Return code / i64 iOffset; / Byte offset of file to read from / ~~int isIn~~Log~~ = 0; / True if page is in log file /~~ assert( pPager->state>=PAGER_SHARED && !MEMDB ); assert( isOpen(pPager->fd) ); if( NEVER(!isOpen(pPager->fd)) ){ assert( pPager->tempFile ); memset(pPg->pData, 0, pPager->pageSize); return SQLITE_OK; } ~~if( pagerUse~~Log~~(pPager) ){~~ / Try to pull the page from the write-ahead log. / ~~rc = sqlite3WalRead(pPager->p~~Log~~, pgno, &isIn~~Log~~, pPg->pData);~~ } ~~if( rc==SQLITE_OK && !isIn~~Log~~ ){~~ iOffset = (pgno-1)(i64)pPager->pageSize; rc = sqlite3OsRead(pPager->fd, pPg->pData, pPager->pageSize, iOffset); if( rc==SQLITE_IOERR_SHORT_READ ){ rc = SQLITE_OK; } }	\| \| \| \|	2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213	** Otherwise, SQLITE_OK is returned. / static int readDbPage(PgHdr pPg){ Pager pPager = pPg->pPager; / Pager object associated with page pPg / Pgno pgno = pPg->pgno; / Page number to read / int rc = SQLITE_OK; / Return code / i64 iOffset; / Byte offset of file to read from / int isInWal = 0; / True if page is in log file / assert( pPager->state>=PAGER_SHARED && !MEMDB ); assert( isOpen(pPager->fd) ); if( NEVER(!isOpen(pPager->fd)) ){ assert( pPager->tempFile ); memset(pPg->pData, 0, pPager->pageSize); return SQLITE_OK; } if( pagerUseWal(pPager) ){ / Try to pull the page from the write-ahead log. / rc = sqlite3WalRead(pPager->pWal, pgno, &isInWal, pPg->pData); } if( rc==SQLITE_OK && !isInWal ){ iOffset = (pgno-1)(i64)pPager->pageSize; rc = sqlite3OsRead(pPager->fd, pPg->pData, pPager->pageSize, iOffset); if( rc==SQLITE_IOERR_SHORT_READ ){ rc = SQLITE_OK; } }
︙			︙
2274 2275 2276 2277 2278 2279 2280 ~~2281~~ 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 ~~2293~~ 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 ~~2305~~ 2306 2307 2308 2309 2310 2311 2312	return rc; } /* ** This function is called to rollback a transaction on a WAL database. / ~~static int pagerRollback~~Log~~(Pager pPager){~~ int rc; /* Return Code / PgHdr pList; /* List of dirty pages to revert / / Normally, if a transaction is rolled back, any backup processes are updated as data is copied out of the rollback journal and into the database. This is not generally possible with a WAL database, as rollback involves simply truncating the log file. Therefore, if one or more frames have already been written to the log (and therefore also copied into the backup databases) as part of this transaction, the backups must be restarted. / ~~if( sqlite3WalDirty(pPager->p~~Log~~) ){~~ sqlite3BackupRestart(pPager->pBackup); } / For all pages in the cache that are currently dirty or have already been written (but not committed) to the log file, do one of the following: + Discard the cached page (if refcount==0), or ** + Reload page content from the database (if refcount>0). / pPager->dbSize = pPager->dbOrigSize; ~~rc = sqlite3WalUndo(pPager->p~~Log~~, pagerUndoCallback, (void )pPager);~~ pList = sqlite3PcacheDirtyList(pPager->pPCache); while( pList && rc==SQLITE_OK ){ PgHdr pNext = pList->pDirty; rc = pagerUndoCallback((void )pPager, pList->pgno); pList = pNext; }	\| \| \|	2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312	return rc; } /* ** This function is called to rollback a transaction on a WAL database. / static int pagerRollbackWal(Pager pPager){ int rc; /* Return Code / PgHdr pList; /* List of dirty pages to revert / / Normally, if a transaction is rolled back, any backup processes are updated as data is copied out of the rollback journal and into the database. This is not generally possible with a WAL database, as rollback involves simply truncating the log file. Therefore, if one or more frames have already been written to the log (and therefore also copied into the backup databases) as part of this transaction, the backups must be restarted. / if( sqlite3WalDirty(pPager->pWal) ){ sqlite3BackupRestart(pPager->pBackup); } / For all pages in the cache that are currently dirty or have already been written (but not committed) to the log file, do one of the following: + Discard the cached page (if refcount==0), or ** + Reload page content from the database (if refcount>0). / pPager->dbSize = pPager->dbOrigSize; rc = sqlite3WalUndo(pPager->pWal, pagerUndoCallback, (void )pPager); pList = sqlite3PcacheDirtyList(pPager->pPCache); while( pList && rc==SQLITE_OK ){ PgHdr pNext = pList->pDirty; rc = pagerUndoCallback((void )pPager, pList->pgno); pList = pNext; }
︙			︙
2366 2367 2368 2369 2370 2371 2372 ~~2373 2374~~ 2375 2376 2377 2378 2379 2380 2381 2382 ~~2383~~ 2384 2385 2386 2387 2388 2389 2390 2391 ~~2392~~ 2393 2394 2395 2396 2397 2398 2399	} /* Set the database size back to the value it was before the savepoint ** being reverted was opened. / pPager->dbSize = pSavepoint ? pSavepoint->nOrig : pPager->dbOrigSize; ~~if( !pSavepoint && pagerUse~~Log~~(pPager) ){ return pagerRollback~~Log~~(pPager);~~ } / Use pPager->journalOff as the effective size of the main rollback journal. The actual file might be larger than this in PAGER_JOURNALMODE_TRUNCATE or PAGER_JOURNALMODE_PERSIST. But anything ** past pPager->journalOff is off-limits to us. / szJ = pPager->journalOff; ~~assert( pagerUse~~Log~~(pPager)==0 \|\| szJ==0 );~~ / Begin by rolling back records from the main journal starting at PagerSavepoint.iOffset and continuing to the next journal header. There might be records in the main journal that have a page number greater than the current database size (pPager->dbSize) but those will be skipped automatically. Pages are added to pDone as they ** are played back. */ ~~if( pSavepoint && !pagerUse~~Log~~(pPager) ){~~ iHdrOff = pSavepoint->iHdrOffset ? pSavepoint->iHdrOffset : szJ; pPager->journalOff = pSavepoint->iOffset; while( rc==SQLITE_OK && pPager->journalOff<iHdrOff ){ rc = pager_playback_one_page(pPager, &pPager->journalOff, pDone, 1, 1); } assert( rc!=SQLITE_DONE ); }else{	\| \| \| \|	2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399	} /* Set the database size back to the value it was before the savepoint ** being reverted was opened. / pPager->dbSize = pSavepoint ? pSavepoint->nOrig : pPager->dbOrigSize; if( !pSavepoint && pagerUseWal(pPager) ){ return pagerRollbackWal(pPager); } / Use pPager->journalOff as the effective size of the main rollback journal. The actual file might be larger than this in PAGER_JOURNALMODE_TRUNCATE or PAGER_JOURNALMODE_PERSIST. But anything ** past pPager->journalOff is off-limits to us. / szJ = pPager->journalOff; assert( pagerUseWal(pPager)==0 \|\| szJ==0 ); / Begin by rolling back records from the main journal starting at PagerSavepoint.iOffset and continuing to the next journal header. There might be records in the main journal that have a page number greater than the current database size (pPager->dbSize) but those will be skipped automatically. Pages are added to pDone as they ** are played back. */ if( pSavepoint && !pagerUseWal(pPager) ){ iHdrOff = pSavepoint->iHdrOffset ? pSavepoint->iHdrOffset : szJ; pPager->journalOff = pSavepoint->iOffset; while( rc==SQLITE_OK && pPager->journalOff<iHdrOff ){ rc = pager_playback_one_page(pPager, &pPager->journalOff, pDone, 1, 1); } assert( rc!=SQLITE_DONE ); }else{
︙			︙
2433 2434 2435 2436 2437 2438 2439 ~~2440 2441~~ 2442 2443 2444 2445 2446 2447 2448	previously rolled back out of the main journal (and are hence in pDone) will be skipped. Out-of-range pages are also skipped. / if( pSavepoint ){ u32 ii; / Loop counter / i64 offset = pSavepoint->iSubRec(4+pPager->pageSize); ~~if( pagerUse~~Log~~(pPager) ){ rc = sqlite3WalSavepointUndo(pPager->p~~Log~~, pSavepoint->iFrame);~~ } for(ii=pSavepoint->iSubRec; rc==SQLITE_OK && ii<pPager->nSubRec; ii++){ assert( offset==ii*(4+pPager->pageSize) ); rc = pager_playback_one_page(pPager, &offset, pDone, 0, 1); } assert( rc!=SQLITE_DONE ); }	\| \|	2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448	previously rolled back out of the main journal (and are hence in pDone) will be skipped. Out-of-range pages are also skipped. / if( pSavepoint ){ u32 ii; / Loop counter / i64 offset = pSavepoint->iSubRec(4+pPager->pageSize); if( pagerUseWal(pPager) ){ rc = sqlite3WalSavepointUndo(pPager->pWal, pSavepoint->iFrame); } for(ii=pSavepoint->iSubRec; rc==SQLITE_OK && ii<pPager->nSubRec; ii++){ assert( offset==ii*(4+pPager->pageSize) ); rc = pager_playback_one_page(pPager, &offset, pDone, 0, 1); } assert( rc!=SQLITE_DONE ); }
︙			︙
2746 2747 2748 2749 2750 2751 2752 ~~2753 2754~~ 2755 2756 2757 2758 2759 2760 2761	/* Determine the number of pages in the file. Store this in nPage. / if( pPager->dbSizeValid ){ nPage = pPager->dbSize; }else{ int rc; / Error returned by OsFileSize() / i64 n = 0; / File size in bytes returned by OsFileSize() */ ~~if( pagerUse~~Log~~(pPager) ){ sqlite3WalDbsize(pPager->p~~Log~~, &nPage);~~ } if( nPage==0 ){ assert( isOpen(pPager->fd) \|\| pPager->tempFile ); if( isOpen(pPager->fd) ){ if( SQLITE_OK!=(rc = sqlite3OsFileSize(pPager->fd, &n)) ){ pager_error(pPager, rc);	\| \|	2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761	/* Determine the number of pages in the file. Store this in nPage. / if( pPager->dbSizeValid ){ nPage = pPager->dbSize; }else{ int rc; / Error returned by OsFileSize() / i64 n = 0; / File size in bytes returned by OsFileSize() */ if( pagerUseWal(pPager) ){ sqlite3WalDbsize(pPager->pWal, &nPage); } if( nPage==0 ){ assert( isOpen(pPager->fd) \|\| pPager->tempFile ); if( isOpen(pPager->fd) ){ if( SQLITE_OK!=(rc = sqlite3OsFileSize(pPager->fd, &n)) ){ pager_error(pPager, rc);
︙			︙
2932 2933 2934 2935 2936 2937 2938 ~~2939~~ 2940 2941 ~~2942~~ 2943 2944 2945 2946 2947 2948 2949	int sqlite3PagerClose(Pager pPager){ u8 pTmp = (u8 )pPager->pTmpSpace; disable_simulated_io_errors(); sqlite3BeginBenignMalloc(); pPager->errCode = 0; pPager->exclusiveMode = 0; ~~sqlite3WalClose(pPager->p~~Log~~, pPager->fd,~~ (pPager->noSync ? 0 : pPager->sync_flags), pTmp ); ~~pPager->p~~Log~~ = 0;~~ pager_reset(pPager); if( MEMDB ){ pager_unlock(pPager); }else{ / Set Pager.journalHdr to -1 for the benefit of the pager_playback() call which may be made from within pagerUnlockAndRollback(). If it is not -1, then the unsynced portion of an open journal file may	\| \|	2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949	int sqlite3PagerClose(Pager pPager){ u8 pTmp = (u8 )pPager->pTmpSpace; disable_simulated_io_errors(); sqlite3BeginBenignMalloc(); pPager->errCode = 0; pPager->exclusiveMode = 0; sqlite3WalClose(pPager->pWal, pPager->fd, (pPager->noSync ? 0 : pPager->sync_flags), pTmp ); pPager->pWal = 0; pager_reset(pPager); if( MEMDB ){ pager_unlock(pPager); }else{ / Set Pager.journalHdr to -1 for the benefit of the pager_playback() call which may be made from within pagerUnlockAndRollback(). If it is not -1, then the unsynced portion of an open journal file may
︙			︙
3172 3173 3174 3175 3176 3177 3178 ~~3179~~ 3180 3181 3182 3183 3184 3185 3186	While the pager is in the RESERVED state, the original database file is unchanged and we can rollback without having to playback the journal into the original database file. Once we transition to EXCLUSIVE, it means the database file has been changed and any rollback will require a journal playback. / ~~assert( !pagerUse~~Log~~(pList->pPager) );~~ assert( pPager->state>=PAGER_RESERVED ); rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK); / If the file is a temp-file has not yet been opened, open it now. It is not possible for rc to be other than SQLITE_OK if this branch is taken, as pager_wait_on_lock() is a no-op for temp-files. */	\|	3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186	While the pager is in the RESERVED state, the original database file is unchanged and we can rollback without having to playback the journal into the original database file. Once we transition to EXCLUSIVE, it means the database file has been changed and any rollback will require a journal playback. / assert( !pagerUseWal(pList->pPager) ); assert( pPager->state>=PAGER_RESERVED ); rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK); / If the file is a temp-file has not yet been opened, open it now. It is not possible for rc to be other than SQLITE_OK if this branch is taken, as pager_wait_on_lock() is a no-op for temp-files. */
︙			︙
3261 3262 3263 3264 3265 3266 3267 ~~3268~~ 3269 3270 3271 3272 3273 3274 3275	void pData = pPg->pData; i64 offset = pPager->nSubRec(4+pPager->pageSize); char *pData2; CODEC2(pPager, pData, pPg->pgno, 7, return SQLITE_NOMEM, pData2); PAGERTRACE(("STMT-JOURNAL %d page %d\n", PAGERID(pPager), pPg->pgno)); ~~assert( pagerUse~~Log~~(pPager)~~ \|\| pageInJournal(pPg) \|\| pPg->pgno>pPager->dbOrigSize ); rc = write32bits(pPager->sjfd, offset, pPg->pgno); if( rc==SQLITE_OK ){ rc = sqlite3OsWrite(pPager->sjfd, pData2, pPager->pageSize, offset+4); }	\|	3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275	void pData = pPg->pData; i64 offset = pPager->nSubRec(4+pPager->pageSize); char *pData2; CODEC2(pPager, pData, pPg->pgno, 7, return SQLITE_NOMEM, pData2); PAGERTRACE(("STMT-JOURNAL %d page %d\n", PAGERID(pPager), pPg->pgno)); assert( pagerUseWal(pPager) \|\| pageInJournal(pPg) \|\| pPg->pgno>pPager->dbOrigSize ); rc = write32bits(pPager->sjfd, offset, pPg->pgno); if( rc==SQLITE_OK ){ rc = sqlite3OsWrite(pPager->sjfd, pData2, pPager->pageSize, offset+4); }
︙			︙
3284 3285 3286 3287 3288 3289 3290 ~~3291~~ 3292 3293 3294 3295 3296 3297 3298 3299 ~~3300 3301~~ 3302 3303 3304 3305 3306 3307 3308	/* This function is a wrapper around sqlite3WalFrames(). As well as logging the contents of the list of pages headed by pList (connected by pDirty), this function notifies any active backup processes that the pages have changed. / ~~static int pager~~Log~~Frames(~~ Pager pPager, /* Pager object / PgHdr pList, /* List of frames to log / Pgno nTruncate, / Database size after this commit / int isCommit, / True if this is a commit / int sync_flags / Flags to pass to OsSync() (or 0) / ){ int rc; / Return code / ~~assert( pPager->p~~Log~~ ); rc = sqlite3WalFrames(pPager->p~~Log~~,~~ pPager->pageSize, pList, nTruncate, isCommit, sync_flags ); if( rc==SQLITE_OK && pPager->pBackup ){ PgHdr p; for(p=pList; p; p=p->pDirty){ sqlite3BackupUpdate(pPager->pBackup, p->pgno, (u8 *)p->pData); }	\| \| \|	3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308	/* This function is a wrapper around sqlite3WalFrames(). As well as logging the contents of the list of pages headed by pList (connected by pDirty), this function notifies any active backup processes that the pages have changed. / static int pagerWalFrames( Pager pPager, /* Pager object / PgHdr pList, /* List of frames to log / Pgno nTruncate, / Database size after this commit / int isCommit, / True if this is a commit / int sync_flags / Flags to pass to OsSync() (or 0) / ){ int rc; / Return code / assert( pPager->pWal ); rc = sqlite3WalFrames(pPager->pWal, pPager->pageSize, pList, nTruncate, isCommit, sync_flags ); if( rc==SQLITE_OK && pPager->pBackup ){ PgHdr p; for(p=pList; p; p=p->pDirty){ sqlite3BackupUpdate(pPager->pBackup, p->pgno, (u8 *)p->pData); }
︙			︙
3333 3334 3335 3336 3337 3338 3339 ~~3340~~ 3341 3342 3343 3344 3345 ~~3346~~ 3347 3348 3349 3350 3351 3352 3353	Pager pPager = (Pager )p; int rc = SQLITE_OK; assert( pPg->pPager==pPager ); assert( pPg->flags&PGHDR_DIRTY ); pPg->pDirty = 0; ~~if( pagerUse~~Log~~(pPager) ){~~ /* Write a single frame for this page to the log. / if( subjRequiresPage(pPg) ){ rc = subjournalPage(pPg); } if( rc==SQLITE_OK ){ ~~rc = pager~~Log~~Frames(pPager, pPg, 0, 0, 0);~~ } }else{ / The doNotSync flag is set by the sqlite3PagerWrite() function while it is journalling a set of two or more database pages that are stored on the same disk sector. Syncing the journal is not allowed while this is happening as it is important that all members of such a set of pages are synced to disk together. So, if the page this function	\| \|	3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353	Pager pPager = (Pager )p; int rc = SQLITE_OK; assert( pPg->pPager==pPager ); assert( pPg->flags&PGHDR_DIRTY ); pPg->pDirty = 0; if( pagerUseWal(pPager) ){ /* Write a single frame for this page to the log. / if( subjRequiresPage(pPg) ){ rc = subjournalPage(pPg); } if( rc==SQLITE_OK ){ rc = pagerWalFrames(pPager, pPg, 0, 0, 0); } }else{ / The doNotSync flag is set by the sqlite3PagerWrite() function while it is journalling a set of two or more database pages that are stored on the same disk sector. Syncing the journal is not allowed while this is happening as it is important that all members of such a set of pages are synced to disk together. So, if the page this function
︙			︙
3832 3833 3834 3835 3836 3837 3838 ~~3839 3840~~ 3841 3842 ~~3843 3844~~ 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 ~~3857~~ 3858 ~~3859~~ 3860 3861 3862 3863 3864 3865 3866	a WAL, this ensures there is no race condition between the xAccess() below and an xDelete() being executed by some other connection. / static int pagerHasWAL(Pager pPager, int pExists){ int rc; / Return code / if( !pPager->tempFile ){ ~~char z~~Log~~ = sqlite3_mprintf("%s-wal", pPager->zFilename); if( !z~~Log~~ ){~~ rc = SQLITE_NOMEM; }else{ ~~rc = sqlite3OsAccess(pPager->pVfs, z~~Log~~, SQLITE_ACCESS_EXISTS, pExists); sqlite3_free(z~~Log~~);~~ } }else{ rc = SQLITE_OK; pExists = 0; } return rc; } static int pagerOpenSnapshot(Pager pPager){ int rc; /* Return code / int changed = 0; / True if cache must be reset */ ~~assert( pagerUse~~Log~~(pPager) );~~ ~~rc = sqlite3WalOpenSnapshot(pPager->p~~Log~~, &changed);~~ if( rc==SQLITE_OK ){ int dummy; if( changed ){ pager_reset(pPager); assert( pPager->errCode \|\| pPager->dbSizeValid==0 ); } rc = sqlite3PagerPagecount(pPager, &dummy);	\| \| \| \| \| \|	3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866	a WAL, this ensures there is no race condition between the xAccess() below and an xDelete() being executed by some other connection. / static int pagerHasWAL(Pager pPager, int pExists){ int rc; / Return code / if( !pPager->tempFile ){ char zWal = sqlite3_mprintf("%s-wal", pPager->zFilename); if( !zWal ){ rc = SQLITE_NOMEM; }else{ rc = sqlite3OsAccess(pPager->pVfs, zWal, SQLITE_ACCESS_EXISTS, pExists); sqlite3_free(zWal); } }else{ rc = SQLITE_OK; pExists = 0; } return rc; } static int pagerOpenSnapshot(Pager pPager){ int rc; /* Return code / int changed = 0; / True if cache must be reset */ assert( pagerUseWal(pPager) ); rc = sqlite3WalOpenSnapshot(pPager->pWal, &changed); if( rc==SQLITE_OK ){ int dummy; if( changed ){ pager_reset(pPager); assert( pPager->errCode \|\| pPager->dbSizeValid==0 ); } rc = sqlite3PagerPagecount(pPager, &dummy);
︙			︙
3919 3920 3921 3922 3923 3924 3925 ~~3926~~ 3927 3928 3929 3930 3931 3932 3933	if( isOpen(pPager->jfd) \|\| pPager->zJournal ){ isErrorReset = 1; } pPager->errCode = SQLITE_OK; pager_reset(pPager); } ~~if( pagerUse~~Log~~(pPager) ){~~ rc = pagerOpenSnapshot(pPager); }else if( pPager->state==PAGER_UNLOCK \|\| isErrorReset ){ sqlite3_vfs * const pVfs = pPager->pVfs; int isHotJournal = 0; int isWal = 0; assert( !MEMDB ); assert( sqlite3PcacheRefCount(pPager->pPCache)==0 );	\|	3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933	if( isOpen(pPager->jfd) \|\| pPager->zJournal ){ isErrorReset = 1; } pPager->errCode = SQLITE_OK; pager_reset(pPager); } if( pagerUseWal(pPager) ){ rc = pagerOpenSnapshot(pPager); }else if( pPager->state==PAGER_UNLOCK \|\| isErrorReset ){ sqlite3_vfs * const pVfs = pPager->pVfs; int isHotJournal = 0; int isWal = 0; assert( !MEMDB ); assert( sqlite3PcacheRefCount(pPager->pPCache)==0 );