Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Initial code for incremental checkpoint in WAL mode. This check-in compiles on unix and runs as long as you do not engage WAL mode. WAL mode crashes and burns. Consider this check-in a baseline implementation for getting the new capability up and running. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | wal-incr-ckpt |
Files: | files | file ages | folders |
SHA1: |
ef3ba7a17ff90674d702e5694b9e7928 |
User & Date: | drh 2010-05-30 19:55:16.000 |
Context
2010-05-31
| ||
01:41 | WAL runs but quickly deadlocks. (check-in: ace58acbf1 user: drh tags: wal-incr-ckpt) | |
2010-05-30
| ||
19:55 | Initial code for incremental checkpoint in WAL mode. This check-in compiles on unix and runs as long as you do not engage WAL mode. WAL mode crashes and burns. Consider this check-in a baseline implementation for getting the new capability up and running. (check-in: ef3ba7a17f user: drh tags: wal-incr-ckpt) | |
2010-05-29
| ||
08:40 | Add tests to fkey2.test to check that ON CONFLICT clauses do not affect SQLite's behaviour when an FK constraint is violated. (check-in: e9e5b10019 user: dan tags: trunk) | |
Changes
Changes to src/os.c.
︙ | ︙ | |||
106 107 108 109 110 111 112 | } int sqlite3OsShmGet(sqlite3_file *id,int reqSize,int *pSize,void volatile **pp){ return id->pMethods->xShmGet(id, reqSize, pSize, pp); } int sqlite3OsShmRelease(sqlite3_file *id){ return id->pMethods->xShmRelease(id); } | | | | 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 | } int sqlite3OsShmGet(sqlite3_file *id,int reqSize,int *pSize,void volatile **pp){ return id->pMethods->xShmGet(id, reqSize, pSize, pp); } int sqlite3OsShmRelease(sqlite3_file *id){ return id->pMethods->xShmRelease(id); } int sqlite3OsShmLock(sqlite3_file *id, int offset, int n, int flags){ return id->pMethods->xShmLock(id, offset, n, flags); } void sqlite3OsShmBarrier(sqlite3_file *id){ id->pMethods->xShmBarrier(id); } int sqlite3OsShmClose(sqlite3_file *id, int deleteFlag){ return id->pMethods->xShmClose(id, deleteFlag); } |
︙ | ︙ |
Changes to src/os.h.
︙ | ︙ | |||
243 244 245 246 247 248 249 | #define SQLITE_FCNTL_DB_UNCHANGED 0xca093fa0 int sqlite3OsSectorSize(sqlite3_file *id); int sqlite3OsDeviceCharacteristics(sqlite3_file *id); int sqlite3OsShmOpen(sqlite3_file *id); int sqlite3OsShmSize(sqlite3_file *id, int, int*); int sqlite3OsShmGet(sqlite3_file *id, int, int*, void volatile**); int sqlite3OsShmRelease(sqlite3_file *id); | | | 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 | #define SQLITE_FCNTL_DB_UNCHANGED 0xca093fa0 int sqlite3OsSectorSize(sqlite3_file *id); int sqlite3OsDeviceCharacteristics(sqlite3_file *id); int sqlite3OsShmOpen(sqlite3_file *id); int sqlite3OsShmSize(sqlite3_file *id, int, int*); int sqlite3OsShmGet(sqlite3_file *id, int, int*, void volatile**); int sqlite3OsShmRelease(sqlite3_file *id); int sqlite3OsShmLock(sqlite3_file *id, int, int, int); void sqlite3OsShmBarrier(sqlite3_file *id); int sqlite3OsShmClose(sqlite3_file *id, int); /* ** Functions for accessing sqlite3_vfs methods */ int sqlite3OsOpen(sqlite3_vfs *, const char *, sqlite3_file*, int, int *); |
︙ | ︙ |
Changes to src/os_unix.c.
︙ | ︙ | |||
3164 3165 3166 3167 3168 3169 3170 | ** ** All other fields are read/write. The unixShm.pFile->mutex must be held ** while accessing any read/write fields. */ struct unixShm { unixShmNode *pShmNode; /* The underlying unixShmNode object */ unixShm *pNext; /* Next unixShm with the same unixShmNode */ | < | | < < < < < < < < < | | | > > | | | | | | > | > > | | < < < > | < < | < > > > > > > < < < < < < < < < < | | < < < < < < < < < < < | > > | | | | | | | | | | | | < | < > < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < | 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 | ** ** All other fields are read/write. The unixShm.pFile->mutex must be held ** while accessing any read/write fields. */ struct unixShm { unixShmNode *pShmNode; /* The underlying unixShmNode object */ unixShm *pNext; /* Next unixShm with the same unixShmNode */ u8 hasMutex; /* True if holding the unixShmNode mutex */ u8 hasMutexBuf; /* True if holding pFile->mutexBuf */ u16 sharedMask; /* Mask of shared locks held */ u16 exclMask; /* Mask of exclusive locks held */ #ifdef SQLITE_DEBUG u8 id; /* Id of this connection within its unixShmNode */ #endif }; /* ** Constants used for locking */ #define UNIX_SHM_BASE 80 /* Byte offset of the first lock byte */ #define UNIX_SHM_DMS 80 /* The deadman switch lock */ #ifdef SQLITE_DEBUG /* ** Return a pointer to a nul-terminated string in static memory that ** describes a locking mask. The string is of the form "MSABCD" with ** each character representing a lock. "M" for MUTEX, "S" for DMS, ** and "A" through "D" for the region locks. If a lock is held, the ** letter is shown. If the lock is not held, the letter is converted ** to ".". ** ** This routine is for debugging purposes only and does not appear ** in a production build. */ static const char *unixShmLockString(u16 maskShared, u16 maskExclusive){ static char zBuf[52]; static int iBuf = 0; int i; u16 mask; char *z; z = &zBuf[iBuf]; iBuf += 16; if( iBuf>=sizeof(zBuf) ) iBuf = 0; for(i=0, mask=1; i<SQLITE_SHM_NLOCK; i++, mask += mask){ if( mask & maskShared ){ z[i] = 's'; }else if( mask & maskExclusive ){ z[i] = 'E'; }else{ z[i] = '.'; } } z[i] = 0; return z; } #endif /* SQLITE_DEBUG */ /* ** Apply posix advisory locks for all bytes from ofst through ofst+n-1. ** ** Locks block if the mask is exactly UNIX_SHM_C and are non-blocking ** otherwise. */ static int unixShmSystemLock( unixShmNode *pShmNode, /* Apply locks to this open shared-memory segment */ int lockType, /* F_UNLCK, F_RDLCK, or F_WRLCK */ int ofst, /* First byte of the locking range */ int n /* Number of bytes to lock */ ){ struct flock f; /* The posix advisory locking structure */ int rc = SQLITE_OK; /* Result code form fcntl() */ /* Access to the unixShmNode object is serialized by the caller */ assert( sqlite3_mutex_held(pShmNode->mutex) || pShmNode->nRef==0 ); /* Shared locks never span more than one byte */ assert( n==1 || lockType!=F_RDLCK ); /* Locks are within range */ assert( n>=1 && ofst>=0 && ofst+n<SQLITE_SHM_NLOCK ); /* Initialize the locking parameters */ memset(&f, 0, sizeof(f)); f.l_type = lockType; f.l_whence = SEEK_SET; f.l_start = ofst+UNIX_SHM_BASE; f.l_len = n; rc = fcntl(pShmNode->h, F_SETLK, &f); rc = (rc!=(-1)) ? SQLITE_OK : SQLITE_BUSY; /* Update the global lock state and do debug tracing */ #ifdef SQLITE_DEBUG { u16 mask; OSTRACE(("SHM-LOCK ")); mask = (1<<(ofst+n)) - (1<<ofst); if( rc==SQLITE_OK ){ if( lockType==F_UNLCK ){ OSTRACE(("unlock %d ok", ofst)); pShmNode->exclMask &= ~mask; pShmNode->sharedMask &= ~mask; }else if( lockType==F_RDLCK ){ OSTRACE(("read-lock %d ok", ofst)); pShmNode->exclMask &= ~mask; pShmNode->sharedMask |= mask; }else{ assert( lockType==F_WRLCK ); OSTRACE(("write-lock %d ok", ofst)); pShmNode->exclMask |= mask; pShmNode->sharedMask &= ~mask; } }else{ if( lockType==F_UNLCK ){ OSTRACE(("unlock %d failed", ofst)); }else if( lockType==F_RDLCK ){ OSTRACE(("read-lock failed")); }else{ assert( lockType==F_WRLCK ); OSTRACE(("write-lock %d failed", ofst)); } } OSTRACE((" - afterwards %s\n", unixShmLockString(pShmNode->sharedMask, pShmNode->exclMask))); } #endif return rc; } /* ** Purge the unixShmNodeList list of all entries with unixShmNode.nRef==0. ** ** This is not a VFS shared-memory method; it is a utility function called ** by VFS shared-memory methods. */ |
︙ | ︙ | |||
3516 3517 3518 3519 3520 3521 3522 | goto shm_open_err; } /* Check to see if another process is holding the dead-man switch. ** If not, truncate the file to zero length. */ rc = SQLITE_OK; | | | | 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 | goto shm_open_err; } /* Check to see if another process is holding the dead-man switch. ** If not, truncate the file to zero length. */ rc = SQLITE_OK; if( unixShmSystemLock(pShmNode, F_WRLCK, UNIX_SHM_DMS, 1)==SQLITE_OK ){ if( ftruncate(pShmNode->h, 0) ){ rc = SQLITE_IOERR; } } if( rc==SQLITE_OK ){ rc = unixShmSystemLock(pShmNode, F_RDLCK, UNIX_SHM_DMS, 1); } if( rc ) goto shm_open_err; } /* Make the new connection a child of the unixShmNode */ p->pShmNode = pShmNode; p->pNext = pShmNode->pFirst; |
︙ | ︙ | |||
3683 3684 3685 3686 3687 3688 3689 | unixShm *p = pDbFd->pShm; unixShmNode *pShmNode = p->pShmNode; int rc = SQLITE_OK; assert( pShmNode==pDbFd->pInode->pShmNode ); assert( pShmNode->pInode==pDbFd->pInode ); | | | 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 | unixShm *p = pDbFd->pShm; unixShmNode *pShmNode = p->pShmNode; int rc = SQLITE_OK; assert( pShmNode==pDbFd->pInode->pShmNode ); assert( pShmNode->pInode==pDbFd->pInode ); if( p->hasMutexBuf==0 ){ assert( sqlite3_mutex_notheld(pShmNode->mutex) ); sqlite3_mutex_enter(pShmNode->mutexBuf); p->hasMutexBuf = 1; } sqlite3_mutex_enter(pShmNode->mutex); if( pShmNode->szMap==0 || reqMapSize>pShmNode->szMap ){ int actualSize; |
︙ | ︙ | |||
3727 3728 3729 3730 3731 3732 3733 | ** really want to release the lock, so in that case too, this routine ** is a no-op. */ static int unixShmRelease(sqlite3_file *fd){ unixFile *pDbFd = (unixFile*)fd; unixShm *p = pDbFd->pShm; | | < < < < < < < < < < < < < < < | > | | | > | | > | < < < < | | | | > | < < < > | > | > | < < > > | > | | > > | | > | | | | | > | > > | < > > | < > | > > | < | | < < < < < < | > | | | | < > | < | | | < < < < < < | | < < < < < | | | > > > > > > > > | | < < < > < < < < < | < < | > > | < < < < < < < < < < < | | > < | < | 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 | ** really want to release the lock, so in that case too, this routine ** is a no-op. */ static int unixShmRelease(sqlite3_file *fd){ unixFile *pDbFd = (unixFile*)fd; unixShm *p = pDbFd->pShm; if( p->hasMutexBuf ){ assert( sqlite3_mutex_notheld(p->pShmNode->mutex) ); sqlite3_mutex_leave(p->pShmNode->mutexBuf); p->hasMutexBuf = 0; } return SQLITE_OK; } /* ** Change the lock state for a shared-memory segment. */ static int unixShmLock( sqlite3_file *fd, /* Database file holding the shared memory */ int ofst, /* First lock to acquire or release */ int n, /* Number of locks to acquire or release */ int flags /* What to do with the lock */ ){ unixFile *pDbFd = (unixFile*)fd; /* Connection holding shared memory */ unixShm *p = pDbFd->pShm; /* The shared memory being locked */ unixShm *pX; /* For looping over all siblings */ unixShmNode *pShmNode = p->pShmNode; /* The underlying file iNode */ int rc = SQLITE_OK; /* Result code */ u16 mask; /* Mask of locks to take or release */ assert( pShmNode==pDbFd->pInode->pShmNode ); assert( pShmNode->pInode==pDbFd->pInode ); assert( ofst>=0 && ofst+n<SQLITE_SHM_NLOCK ); assert( n>=1 ); assert( flags==(SQLITE_SHM_LOCK | SQLITE_SHM_SHARED) || flags==(SQLITE_SHM_LOCK | SQLITE_SHM_EXCLUSIVE) || flags==(SQLITE_SHM_UNLOCK | SQLITE_SHM_SHARED) || flags==(SQLITE_SHM_UNLOCK | SQLITE_SHM_EXCLUSIVE) ); assert( n==1 || (flags & SQLITE_SHM_EXCLUSIVE)!=0 ); mask = (1<<(ofst+n+1)) - (1<<(ofst+1)); assert( n>1 || mask==(1<<ofst) ); sqlite3_mutex_enter(pShmNode->mutex); if( flags & SQLITE_SHM_UNLOCK ){ u16 allMask = 0; /* Mask of locks held by siblings */ /* See if any siblings hold this same lock */ for(pX=pShmNode->pFirst; pX; pX=pX->pNext){ if( pX==p ) continue; assert( (pX->exclMask & (p->exclMask|p->sharedMask))==0 ); allMask |= pX->sharedMask; } /* Unlock the system-level locks */ if( (mask & allMask)==0 ){ rc = unixShmSystemLock(pShmNode, F_UNLCK, ofst+1, n); }else{ rc = SQLITE_OK; } /* Undo the local locks */ if( rc==SQLITE_OK ){ p->exclMask &= ~mask; p->sharedMask &= ~mask; } }else if( flags & SQLITE_SHM_SHARED ){ u16 allShared = 0; /* Union of locks held by connections other than "p" */ /* Find out which shared locks are already held by sibling connections. ** If any sibling already holds an exclusive lock, go ahead and return ** SQLITE_BUSY. */ for(pX=pShmNode->pFirst; pX; pX=pX->pNext){ if( pX==p ) continue; if( (pX->exclMask & mask)!=0 ){ rc = SQLITE_BUSY; break; } allShared |= pX->sharedMask; } /* Get shared locks at the system level, if necessary */ if( rc==SQLITE_OK ){ if( (allShared & mask)==0 ){ rc = unixShmSystemLock(pShmNode, F_RDLCK, ofst+1, n); }else{ rc = SQLITE_OK; } } /* Get the local shared locks */ if( rc==SQLITE_OK ){ p->sharedMask |= mask; } }else{ /* Make sure no sibling connections hold locks that will block this ** lock. If any do, return SQLITE_BUSY right away. */ for(pX=pShmNode->pFirst; pX; pX=pX->pNext){ if( pX==p ) continue; if( (pX->exclMask & mask)!=0 || (pX->sharedMask & mask)!=0 ){ rc = SQLITE_BUSY; break; } } /* Get the exclusive locks at the system level. Then if successful ** also mark the local connection as being locked. */ if( rc==SQLITE_OK ){ rc = unixShmSystemLock(pShmNode, F_WRLCK, ofst+1, n); if( rc==SQLITE_OK ){ p->sharedMask &= ~mask; p->exclMask |= mask; } } } sqlite3_mutex_leave(pShmNode->mutex); OSTRACE(("SHM-LOCK shmid-%d, pid-%d got %s\n", p->id, getpid(), unixShmLockString(p->sharedMask, p->exclMask))); return rc; } /* ** Implement a memory barrier or memory fence on shared memory. ** ** All loads and stores begun before the barrier must complete before |
︙ | ︙ |
Changes to src/pager.c.
︙ | ︙ | |||
1199 1200 1201 1202 1203 1204 1205 | return (pPager->pWal!=0); } #else # define pagerUseWal(x) 0 # define pagerRollbackWal(x) 0 # define pagerWalFrames(v,w,x,y,z) 0 # define pagerOpenWalIfPresent(z) SQLITE_OK | | | 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 | return (pPager->pWal!=0); } #else # define pagerUseWal(x) 0 # define pagerRollbackWal(x) 0 # define pagerWalFrames(v,w,x,y,z) 0 # define pagerOpenWalIfPresent(z) SQLITE_OK # define pagerBeginReadTransaction(z) SQLITE_OK #endif /* ** Unlock the database file. This function is a no-op if the pager ** is in exclusive mode. ** ** If the pager is currently in error state, discard the contents of |
︙ | ︙ | |||
1234 1235 1236 1237 1238 1239 1240 | ** this happens. One can argue that this doesn't need to be cleared ** until the change-counter check fails in PagerSharedLock(). ** Clearing the page size cache here is being conservative. */ pPager->dbSizeValid = 0; if( pagerUseWal(pPager) ){ | | | 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 | ** this happens. One can argue that this doesn't need to be cleared ** until the change-counter check fails in PagerSharedLock(). ** Clearing the page size cache here is being conservative. */ pPager->dbSizeValid = 0; if( pagerUseWal(pPager) ){ sqlite3WalEndReadTransaction(pPager->pWal); }else{ rc = osUnlock(pPager->fd, NO_LOCK); } if( rc ){ pPager->errCode = rc; } IOTRACE(("UNLOCK %p\n", pPager)) |
︙ | ︙ | |||
1433 1434 1435 1436 1437 1438 1439 | } sqlite3BitvecDestroy(pPager->pInJournal); pPager->pInJournal = 0; pPager->nRec = 0; sqlite3PcacheCleanAll(pPager->pPCache); if( pagerUseWal(pPager) ){ | | | 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 | } sqlite3BitvecDestroy(pPager->pInJournal); pPager->pInJournal = 0; pPager->nRec = 0; sqlite3PcacheCleanAll(pPager->pPCache); if( pagerUseWal(pPager) ){ rc2 = sqlite3WalEndWriteTransaction(pPager->pWal); pPager->state = PAGER_SHARED; /* If the connection was in locking_mode=exclusive mode but is no longer, ** drop the EXCLUSIVE lock held on the database file. */ if( rc2==SQLITE_OK && !pPager->exclusiveMode |
︙ | ︙ | |||
2358 2359 2360 2361 2362 2363 2364 | sqlite3BackupUpdate(pPager->pBackup, p->pgno, (u8 *)p->pData); } } return rc; } /* | > > > > | > | | | 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 | sqlite3BackupUpdate(pPager->pBackup, p->pgno, (u8 *)p->pData); } } return rc; } /* ** Begin a read transaction on the WAL. ** ** This routine used to be called "pagerOpenSnapshot()" because it essentially ** makes a snapshot of the database at the current point in time and preserves ** that snapshot for use by the reader in spite of concurrently changes by ** other writers or checkpointers. */ static int pagerBeginReadTransaction(Pager *pPager){ int rc; /* Return code */ int changed = 0; /* True if cache must be reset */ assert( pagerUseWal(pPager) ); rc = sqlite3WalBeginReadTransaction(pPager->pWal, &changed); if( rc==SQLITE_OK ){ int dummy; if( changed ){ pager_reset(pPager); assert( pPager->errCode || pPager->dbSizeValid==0 ); } rc = sqlite3PagerPagecount(pPager, &dummy); |
︙ | ︙ | |||
2424 2425 2426 2427 2428 2429 2430 | int isWal; /* True if WAL file exists */ rc = pagerHasWAL(pPager, &isWal); if( rc==SQLITE_OK ){ if( isWal ){ pager_reset(pPager); rc = sqlite3PagerOpenWal(pPager, 0); if( rc==SQLITE_OK ){ | | | 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 | int isWal; /* True if WAL file exists */ rc = pagerHasWAL(pPager, &isWal); if( rc==SQLITE_OK ){ if( isWal ){ pager_reset(pPager); rc = sqlite3PagerOpenWal(pPager, 0); if( rc==SQLITE_OK ){ rc = pagerBeginReadTransaction(pPager); } }else if( pPager->journalMode==PAGER_JOURNALMODE_WAL ){ pPager->journalMode = PAGER_JOURNALMODE_DELETE; } } } return rc; |
︙ | ︙ | |||
3998 3999 4000 4001 4002 4003 4004 | isErrorReset = 1; } pPager->errCode = SQLITE_OK; pager_reset(pPager); } if( pagerUseWal(pPager) ){ | | | 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 | isErrorReset = 1; } pPager->errCode = SQLITE_OK; pager_reset(pPager); } if( pagerUseWal(pPager) ){ rc = pagerBeginReadTransaction(pPager); }else if( pPager->state==PAGER_UNLOCK || isErrorReset ){ sqlite3_vfs * const pVfs = pPager->pVfs; int isHotJournal = 0; assert( !MEMDB ); assert( sqlite3PcacheRefCount(pPager->pPCache)==0 ); if( pPager->noReadlock ){ assert( pPager->readOnly ); |
︙ | ︙ | |||
4557 4558 4559 4560 4561 4562 4563 | ** ** WAL mode sets Pager.state to PAGER_RESERVED when it has an open ** transaction, but never to PAGER_EXCLUSIVE. This is because in ** PAGER_EXCLUSIVE state the code to roll back savepoint transactions ** may copy data from the sub-journal into the database file as well ** as into the page cache. Which would be incorrect in WAL mode. */ | | | 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 | ** ** WAL mode sets Pager.state to PAGER_RESERVED when it has an open ** transaction, but never to PAGER_EXCLUSIVE. This is because in ** PAGER_EXCLUSIVE state the code to roll back savepoint transactions ** may copy data from the sub-journal into the database file as well ** as into the page cache. Which would be incorrect in WAL mode. */ rc = sqlite3WalBeginWriteTransaction(pPager->pWal); if( rc==SQLITE_OK ){ pPager->dbOrigSize = pPager->dbSize; pPager->state = PAGER_RESERVED; pPager->journalOff = 0; } assert( rc!=SQLITE_OK || pPager->state==PAGER_RESERVED ); |
︙ | ︙ | |||
5888 5889 5890 5891 5892 5893 5894 | */ int sqlite3PagerCheckpoint(Pager *pPager){ int rc = SQLITE_OK; if( pPager->pWal ){ u8 *zBuf = (u8 *)pPager->pTmpSpace; rc = sqlite3WalCheckpoint(pPager->pWal, (pPager->noSync ? 0 : pPager->sync_flags), | | < | 5893 5894 5895 5896 5897 5898 5899 5900 5901 5902 5903 5904 5905 5906 5907 | */ int sqlite3PagerCheckpoint(Pager *pPager){ int rc = SQLITE_OK; if( pPager->pWal ){ u8 *zBuf = (u8 *)pPager->pTmpSpace; rc = sqlite3WalCheckpoint(pPager->pWal, (pPager->noSync ? 0 : pPager->sync_flags), pPager->pageSize, zBuf ); } return rc; } int sqlite3PagerWalCallback(Pager *pPager){ return sqlite3WalCallback(pPager->pWal); |
︙ | ︙ |
Changes to src/sqlite.h.in.
︙ | ︙ | |||
440 441 442 443 444 445 446 | #define SQLITE_IOERR_BLOCKED (SQLITE_IOERR | (11<<8)) #define SQLITE_IOERR_NOMEM (SQLITE_IOERR | (12<<8)) #define SQLITE_IOERR_ACCESS (SQLITE_IOERR | (13<<8)) #define SQLITE_IOERR_CHECKRESERVEDLOCK (SQLITE_IOERR | (14<<8)) #define SQLITE_IOERR_LOCK (SQLITE_IOERR | (15<<8)) #define SQLITE_IOERR_CLOSE (SQLITE_IOERR | (16<<8)) #define SQLITE_IOERR_DIR_CLOSE (SQLITE_IOERR | (17<<8)) | | > | 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 | #define SQLITE_IOERR_BLOCKED (SQLITE_IOERR | (11<<8)) #define SQLITE_IOERR_NOMEM (SQLITE_IOERR | (12<<8)) #define SQLITE_IOERR_ACCESS (SQLITE_IOERR | (13<<8)) #define SQLITE_IOERR_CHECKRESERVEDLOCK (SQLITE_IOERR | (14<<8)) #define SQLITE_IOERR_LOCK (SQLITE_IOERR | (15<<8)) #define SQLITE_IOERR_CLOSE (SQLITE_IOERR | (16<<8)) #define SQLITE_IOERR_DIR_CLOSE (SQLITE_IOERR | (17<<8)) #define SQLITE_LOCKED_SHAREDCACHE (SQLITE_LOCKED | (1<<8)) #define SQLITE_BUSY_RECOVERY (SQLITE_BUSY | (1<<8)) /* ** CAPI3REF: Flags For File Open Operations ** ** These bit values are intended for use in the ** 3rd parameter to the [sqlite3_open_v2()] interface and ** in the 4th parameter to the xOpen method of the |
︙ | ︙ | |||
654 655 656 657 658 659 660 | int (*xSectorSize)(sqlite3_file*); int (*xDeviceCharacteristics)(sqlite3_file*); /* Methods above are valid for version 1 */ int (*xShmOpen)(sqlite3_file*); int (*xShmSize)(sqlite3_file*, int reqSize, int *pNewSize); int (*xShmGet)(sqlite3_file*, int reqSize, int *pSize, void volatile**); int (*xShmRelease)(sqlite3_file*); | | | 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 | int (*xSectorSize)(sqlite3_file*); int (*xDeviceCharacteristics)(sqlite3_file*); /* Methods above are valid for version 1 */ int (*xShmOpen)(sqlite3_file*); int (*xShmSize)(sqlite3_file*, int reqSize, int *pNewSize); int (*xShmGet)(sqlite3_file*, int reqSize, int *pSize, void volatile**); int (*xShmRelease)(sqlite3_file*); int (*xShmLock)(sqlite3_file*, int offset, int n, int flags); void (*xShmBarrier)(sqlite3_file*); int (*xShmClose)(sqlite3_file*, int deleteFlag); /* Methods above are valid for version 2 */ /* Additional methods may be added in future releases */ }; /* |
︙ | ︙ | |||
884 885 886 887 888 889 890 | #define SQLITE_ACCESS_EXISTS 0 #define SQLITE_ACCESS_READWRITE 1 #define SQLITE_ACCESS_READ 2 /* ** CAPI3REF: Flags for the xShmLock VFS method ** | | > > > > > > > > > > > > > > > > | | | | | | > > > > > > > > | | | 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 | #define SQLITE_ACCESS_EXISTS 0 #define SQLITE_ACCESS_READWRITE 1 #define SQLITE_ACCESS_READ 2 /* ** CAPI3REF: Flags for the xShmLock VFS method ** ** These integer constants define the various locking operations ** allowed by the xShmLock method of [sqlite3_io_methods]. The ** following are the only legal combinations of flags to the ** xShmLock method: ** ** <ul> ** <li> SQLITE_SHM_LOCK | SQLITE_SHM_SHARED ** <li> SQLITE_SHM_LOCK | SQLITE_SHM_EXCLUSIVE ** <li> SQLITE_SHM_UNLOCK | SQLITE_SHM_SHARED ** <li> SQLITE_SHM_UNLOCK | SQLITE_SHM_EXCLUSIVE ** </ul> ** ** When unlocking, the same SHARED or EXCLUSIVE flag must be supplied as ** was given no the corresponding lock. ** ** The xShmLock method can transition between unlocked and SHARED or ** between unlocked and EXCLUSIVE. It cannot transition between SHARED ** and EXCLUSIVE. */ #define SQLITE_SHM_UNLOCK 1 #define SQLITE_SHM_LOCK 2 #define SQLITE_SHM_SHARED 4 #define SQLITE_SHM_EXCLUSIVE 8 /* ** CAPI3REF: Maximum xShmLock index ** ** The xShmLock method on [sqlite3_io_methods] may use values ** between 0 and this upper bound as its "offset" argument. ** The SQLite core will never attempt to acquire or release a ** lock outside of this range */ #define SQLITE_SHM_NLOCK 8 /* ** CAPI3REF: Initialize The SQLite Library ** ** ^The sqlite3_initialize() routine initializes the ** SQLite library. ^The sqlite3_shutdown() routine ** deallocates any resources that were allocated by sqlite3_initialize(). |
︙ | ︙ |
Changes to src/test6.c.
︙ | ︙ | |||
536 537 538 539 540 541 542 | void volatile **pp ){ return sqlite3OsShmGet(((CrashFile*)pFile)->pRealFile, reqSize, pSize, pp); } static int cfShmRelease(sqlite3_file *pFile){ return sqlite3OsShmRelease(((CrashFile*)pFile)->pRealFile); } | | | | 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 | void volatile **pp ){ return sqlite3OsShmGet(((CrashFile*)pFile)->pRealFile, reqSize, pSize, pp); } static int cfShmRelease(sqlite3_file *pFile){ return sqlite3OsShmRelease(((CrashFile*)pFile)->pRealFile); } static int cfShmLock(sqlite3_file *pFile, int ofst, int n, int flags){ return sqlite3OsShmLock(((CrashFile*)pFile)->pRealFile, ofst, n, flags); } static void cfShmBarrier(sqlite3_file *pFile){ sqlite3OsShmBarrier(((CrashFile*)pFile)->pRealFile); } static int cfShmClose(sqlite3_file *pFile, int delFlag){ return sqlite3OsShmClose(((CrashFile*)pFile)->pRealFile, delFlag); } |
︙ | ︙ |
Changes to src/test_devsym.c.
︙ | ︙ | |||
50 51 52 53 54 55 56 | static int devsymFileControl(sqlite3_file*, int op, void *pArg); static int devsymSectorSize(sqlite3_file*); static int devsymDeviceCharacteristics(sqlite3_file*); static int devsymShmOpen(sqlite3_file*); static int devsymShmSize(sqlite3_file*,int,int*); static int devsymShmGet(sqlite3_file*,int,int*,volatile void**); static int devsymShmRelease(sqlite3_file*); | | | 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 | static int devsymFileControl(sqlite3_file*, int op, void *pArg); static int devsymSectorSize(sqlite3_file*); static int devsymDeviceCharacteristics(sqlite3_file*); static int devsymShmOpen(sqlite3_file*); static int devsymShmSize(sqlite3_file*,int,int*); static int devsymShmGet(sqlite3_file*,int,int*,volatile void**); static int devsymShmRelease(sqlite3_file*); static int devsymShmLock(sqlite3_file*,int,int,int); static void devsymShmBarrier(sqlite3_file*); static int devsymShmClose(sqlite3_file*,int); /* ** Method declarations for devsym_vfs. */ static int devsymOpen(sqlite3_vfs*, const char *, sqlite3_file*, int , int *); |
︙ | ︙ | |||
259 260 261 262 263 264 265 | devsym_file *p = (devsym_file *)pFile; return sqlite3OsShmGet(p->pReal, reqSz, pSize, pp); } static int devsymShmRelease(sqlite3_file *pFile){ devsym_file *p = (devsym_file *)pFile; return sqlite3OsShmRelease(p->pReal); } | | | | 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 | devsym_file *p = (devsym_file *)pFile; return sqlite3OsShmGet(p->pReal, reqSz, pSize, pp); } static int devsymShmRelease(sqlite3_file *pFile){ devsym_file *p = (devsym_file *)pFile; return sqlite3OsShmRelease(p->pReal); } static int devsymShmLock(sqlite3_file *pFile, int ofst, int n, int flags){ devsym_file *p = (devsym_file *)pFile; return sqlite3OsShmLock(p->pReal, ofst, n, flags); } static void devsymShmBarrier(sqlite3_file *pFile){ devsym_file *p = (devsym_file *)pFile; sqlite3OsShmBarrier(p->pReal); } static int devsymShmClose(sqlite3_file *pFile, int delFlag){ devsym_file *p = (devsym_file *)pFile; |
︙ | ︙ |
Changes to src/test_osinst.c.
︙ | ︙ | |||
151 152 153 154 155 156 157 | static int vfslogSectorSize(sqlite3_file*); static int vfslogDeviceCharacteristics(sqlite3_file*); static int vfslogShmOpen(sqlite3_file *pFile); static int vfslogShmSize(sqlite3_file *pFile, int reqSize, int *pNewSize); static int vfslogShmGet(sqlite3_file *pFile, int,int*,volatile void **); static int vfslogShmRelease(sqlite3_file *pFile); | | | 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 | static int vfslogSectorSize(sqlite3_file*); static int vfslogDeviceCharacteristics(sqlite3_file*); static int vfslogShmOpen(sqlite3_file *pFile); static int vfslogShmSize(sqlite3_file *pFile, int reqSize, int *pNewSize); static int vfslogShmGet(sqlite3_file *pFile, int,int*,volatile void **); static int vfslogShmRelease(sqlite3_file *pFile); static int vfslogShmLock(sqlite3_file *pFile, int ofst, int n, int flags); static void vfslogShmBarrier(sqlite3_file*); static int vfslogShmClose(sqlite3_file *pFile, int deleteFlag); /* ** Method declarations for vfslog_vfs. */ static int vfslogOpen(sqlite3_vfs*, const char *, sqlite3_file*, int , int *); |
︙ | ︙ | |||
456 457 458 459 460 461 462 | VfslogFile *p = (VfslogFile *)pFile; t = vfslog_time(); rc = p->pReal->pMethods->xShmRelease(p->pReal); t = vfslog_time() - t; vfslog_call(p->pVfslog, OS_SHMRELEASE, p->iFileId, t, rc, 0, 0); return rc; } | | | | 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 | VfslogFile *p = (VfslogFile *)pFile; t = vfslog_time(); rc = p->pReal->pMethods->xShmRelease(p->pReal); t = vfslog_time() - t; vfslog_call(p->pVfslog, OS_SHMRELEASE, p->iFileId, t, rc, 0, 0); return rc; } static int vfslogShmLock(sqlite3_file *pFile, int ofst, int n, int flags){ int rc; sqlite3_uint64 t; VfslogFile *p = (VfslogFile *)pFile; t = vfslog_time(); rc = p->pReal->pMethods->xShmLock(p->pReal, ofst, n, flags); t = vfslog_time() - t; vfslog_call(p->pVfslog, OS_SHMLOCK, p->iFileId, t, rc, 0, 0); return rc; } static void vfslogShmBarrier(sqlite3_file *pFile){ sqlite3_uint64 t; VfslogFile *p = (VfslogFile *)pFile; |
︙ | ︙ |
Changes to src/test_vfs.c.
︙ | ︙ | |||
98 99 100 101 102 103 104 | static int tvfsSleep(sqlite3_vfs*, int microseconds); static int tvfsCurrentTime(sqlite3_vfs*, double*); static int tvfsShmOpen(sqlite3_file*); static int tvfsShmSize(sqlite3_file*, int , int *); static int tvfsShmGet(sqlite3_file*, int , int *, volatile void **); static int tvfsShmRelease(sqlite3_file*); | | | 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 | static int tvfsSleep(sqlite3_vfs*, int microseconds); static int tvfsCurrentTime(sqlite3_vfs*, double*); static int tvfsShmOpen(sqlite3_file*); static int tvfsShmSize(sqlite3_file*, int , int *); static int tvfsShmGet(sqlite3_file*, int , int *, volatile void **); static int tvfsShmRelease(sqlite3_file*); static int tvfsShmLock(sqlite3_file*, int , int, int); static void tvfsShmBarrier(sqlite3_file*); static int tvfsShmClose(sqlite3_file*, int); static sqlite3_io_methods tvfs_io_methods = { 2, /* iVersion */ tvfsClose, /* xClose */ tvfsRead, /* xRead */ |
︙ | ︙ | |||
540 541 542 543 544 545 546 | tvfsResultCode(p, &rc); return rc; } static int tvfsShmLock( sqlite3_file *pFile, | | | > | > > | | | > | > | | | > > < < < < | 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 | tvfsResultCode(p, &rc); return rc; } static int tvfsShmLock( sqlite3_file *pFile, int ofst, int n, int flags ){ int rc = SQLITE_OK; TestvfsFile *pFd = (TestvfsFile *)pFile; Testvfs *p = (Testvfs *)(pFd->pVfs->pAppData); int nLock; char zLock[80]; sqlite3_snprintf(sizeof(zLock), zLock, "%d %d", ofst, n); nLock = strlen(zLock); if( flags & SQLITE_SHM_LOCK ){ strcpy(&zLock[nLock], " lock"); }else{ strcpy(&zLock[nLock], " unlock"); } nLock += strlen(&zLock[nLock]); if( flags & SQLITE_SHM_SHARED ){ strcpy(&zLock[nLock], " shared"); }else{ strcpy(&zLock[nLock], " exclusive"); } tvfsExecTcl(p, "xShmLock", Tcl_NewStringObj(pFd->pShm->zFile, -1), pFd->pShmId, Tcl_NewStringObj(zLock, -1) ); tvfsResultCode(p, &rc); return rc; } static void tvfsShmBarrier(sqlite3_file *pFile){ int rc = SQLITE_OK; TestvfsFile *pFd = (TestvfsFile *)pFile; Testvfs *p = (Testvfs *)(pFd->pVfs->pAppData); |
︙ | ︙ | |||
712 713 714 715 716 717 718 | ** VFSNAME shm FILENAME ?NEWVALUE? ** ** When the xShmLock method is invoked by SQLite, the following script is ** run: ** ** SCRIPT xShmLock FILENAME ID LOCK ** | | < < | 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 | ** VFSNAME shm FILENAME ?NEWVALUE? ** ** When the xShmLock method is invoked by SQLite, the following script is ** run: ** ** SCRIPT xShmLock FILENAME ID LOCK ** ** where LOCK is of the form "OFFSET NBYTE lock/unlock shared/exclusive" */ static int testvfs_cmd( ClientData cd, Tcl_Interp *interp, int objc, Tcl_Obj *CONST objv[] ){ |
︙ | ︙ |
Changes to src/wal.c.
︙ | ︙ | |||
89 90 91 92 93 94 95 | ** being considered valid at the same time and being checkpointing together ** following a crash. ** ** READER ALGORITHM ** ** To read a page from the database (call it page number P), a reader ** first checks the WAL to see if it contains page P. If so, then the | | > | | | > > > > > > > > > | | 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 | ** being considered valid at the same time and being checkpointing together ** following a crash. ** ** READER ALGORITHM ** ** To read a page from the database (call it page number P), a reader ** first checks the WAL to see if it contains page P. If so, then the ** last valid instance of page P that is a followed by a commit frame ** or is a commit frame itself becomes the value read. If the WAL ** contains no copies of page P that are valid and which are a commit ** frame or are followed by a commit frame, then page P is read from ** the database file. ** ** To start a read transaction, the reader records the index of the last ** valid frame in the WAL. The reader uses this recorded "mxFrame" value ** for all subsequent read operations. New transactions can be appended ** to the WAL, but as long as the reader uses its original mxFrame value ** and ignores the newly appended content, it will see a consistent snapshot ** of the database from a single point in time. This technique allows ** multiple concurrent readers to view different versions of the database ** content simultaneously. ** ** The reader algorithm in the previous paragraphs works correctly, but ** because frames for page P can appear anywhere within the WAL, the ** reader has to scan the entire WAL looking for page P frames. If the ** WAL is large (multiple megabytes is typical) that scan can be slow, ** and read performance suffers. To overcome this problem, a separate ** data structure called the wal-index is maintained to expedite the ** search for frames of a particular page. ** |
︙ | ︙ | |||
157 158 159 160 161 162 163 | ** table is never more than half full. The expected number of collisions ** prior to finding a match is 1. Each entry of the hash table is an ** 1-based index of an entry in the mapping section of the same ** index block. Let K be the 1-based index of the largest entry in ** the mapping section. (For index blocks other than the last, K will ** always be exactly HASHTABLE_NPAGE (4096) and for the last index block ** K will be (mxFrame%HASHTABLE_NPAGE).) Unused slots of the hash table | | < | 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 | ** table is never more than half full. The expected number of collisions ** prior to finding a match is 1. Each entry of the hash table is an ** 1-based index of an entry in the mapping section of the same ** index block. Let K be the 1-based index of the largest entry in ** the mapping section. (For index blocks other than the last, K will ** always be exactly HASHTABLE_NPAGE (4096) and for the last index block ** K will be (mxFrame%HASHTABLE_NPAGE).) Unused slots of the hash table ** contain a value of 0. ** ** To look for page P in the hash table, first compute a hash iKey on ** P as follows: ** ** iKey = (P * 383) % HASHTABLE_NSLOT ** ** Then start scanning entries of the hash table, starting with iKey |
︙ | ︙ | |||
210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 | ** that correspond to frames greater than the new K value are removed ** from the hash table at this point. */ #ifndef SQLITE_OMIT_WAL #include "wal.h" /* Object declarations */ typedef struct WalIndexHdr WalIndexHdr; typedef struct WalIterator WalIterator; /* ** The following object holds a copy of the wal-index header content. ** ** The actual header in the wal-index consists of two copies of this ** object. */ struct WalIndexHdr { u32 iChange; /* Counter incremented each transaction */ u16 bigEndCksum; /* True if checksums in WAL are big-endian */ u16 szPage; /* Database page size in bytes */ u32 mxFrame; /* Index of last valid frame in the WAL */ u32 nPage; /* Size of database in pages */ u32 aFrameCksum[2]; /* Checksum of last frame in log */ u32 aSalt[2]; /* Two salt values copied from WAL header */ u32 aCksum[2]; /* Checksum over all prior fields */ }; /* A block of WALINDEX_LOCK_RESERVED bytes beginning at ** WALINDEX_LOCK_OFFSET is reserved for locks. Since some systems ** only support mandatory file-locks, we do not read or write data ** from the region of the file on which locks are applied. */ | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | | | 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 | ** that correspond to frames greater than the new K value are removed ** from the hash table at this point. */ #ifndef SQLITE_OMIT_WAL #include "wal.h" /* ** Indices of various locking bytes. WAL_NREADER is the number ** of available reader locks and should be at least 3. */ #define WAL_WRITE_LOCK 0 #define WAL_ALL_BUT_WRITE 1 #define WAL_CKPT_LOCK 1 #define WAL_RECOVER_LOCK 2 #define WAL_READ_LOCK(I) (3+(I)) #define WAL_NREADER (SQLITE_SHM_NLOCK-3) /* Object declarations */ typedef struct WalIndexHdr WalIndexHdr; typedef struct WalIterator WalIterator; typedef struct WalCkptInfo WalCkptInfo; /* ** The following object holds a copy of the wal-index header content. ** ** The actual header in the wal-index consists of two copies of this ** object. */ struct WalIndexHdr { u32 iChange; /* Counter incremented each transaction */ u16 bigEndCksum; /* True if checksums in WAL are big-endian */ u16 szPage; /* Database page size in bytes */ u32 mxFrame; /* Index of last valid frame in the WAL */ u32 nPage; /* Size of database in pages */ u32 aFrameCksum[2]; /* Checksum of last frame in log */ u32 aSalt[2]; /* Two salt values copied from WAL header */ u32 aCksum[2]; /* Checksum over all prior fields */ }; /* ** A copy of the following object occurs in the wal-index immediately ** following the second copy of the WalIndexHdr. This object stores ** information used by checkpoint. ** ** nBackfill is the number of frames in the WAL that have been written ** back into the database. (We call the act of moving content from WAL to ** database "backfilling".) The nBackfill number is never greater than ** WalIndexHdr.mxFrame. nBackfill can only be increased by threads ** holding the WAL_CKPT_LOCK lock (which includes a recovery thread). ** However, a WAL_WRITE_LOCK thread can move the value of nBackfill from ** mxFrame back to zero when the WAL is reset. ** ** There is one entry in aReadMark[] for each reader lock. If a reader ** holds read-lock K, then the value in aReadMark[K] is no greater than ** the mxFrame for that reader. aReadMark[0] is a special case. It ** always holds zero. Readers holding WAL_READ_LOCK(0) always ignore ** the entire WAL and read all content directly from the database. ** ** The value of aReadMark[K] may only be changed by a thread that ** is holding an exclusive lock on WAL_READ_LOCK(K). Thus, the value of ** aReadMark[K] cannot changed while there is a reader is using that mark ** since the reader will be holding a shared lock on WAL_READ_LOCK(K). ** ** The checkpointer may only transfer frames from WAL to database where ** the frame numbers are less than or equal to every aReadMark[] that is ** in use (that is, every aReadMark[j] for which there is a corresponding ** WAL_READ_LOCK(j)). New readers (usually) pick the aReadMark[] with the ** largest value and will increase an unused aReadMark[] to mxFrame if there ** is not already an aReadMark[] equal to mxFrame. The exception to the ** previous sentence is when nBackfill equals mxFrame (meaning that everything ** in the WAL has been backfilled into the database) then new readers ** will choose aReadMark[0] which has value 0 and hence such reader will ** get all their all content directly from the database file and ignore ** the WAL. ** ** Writers normally append new frames to the end of the WAL. However, ** if nBackfill equals mxFrame (meaning that all WAL content has been ** written back into the database) and if no readers are using the WAL ** (in other words, if there are no WAL_READ_LOCK(i) where i>0) then ** the writer will first "reset" the WAL back to the beginning and start ** writing new content beginning at frame 1. ** ** We assume that 32-bit loads are atomic and so no locks are needed in ** order to read from any aReadMark[] entries. */ struct WalCkptInfo { u32 nBackfill; /* Number of WAL frames backfilled into DB */ u32 aReadMark[WAL_NREADER]; /* Reader marks */ }; /* A block of WALINDEX_LOCK_RESERVED bytes beginning at ** WALINDEX_LOCK_OFFSET is reserved for locks. Since some systems ** only support mandatory file-locks, we do not read or write data ** from the region of the file on which locks are applied. */ #define WALINDEX_LOCK_OFFSET (sizeof(WalIndexHdr)*2 + sizeof(WalCkptInfo)) #define WALINDEX_LOCK_RESERVED 16 #define WALINDEX_HDR_SIZE (WALINDEX_LOCK_OFFSET+WALINDEX_LOCK_RESERVED) /* Size of header before each frame in wal */ #define WAL_FRAME_HDRSIZE 24 /* Size of write ahead log header */ #define WAL_HDRSIZE 24 |
︙ | ︙ | |||
273 274 275 276 277 278 279 | ) /* ** An open write-ahead log file is represented by an instance of the ** following object. */ struct Wal { | | | | | > > | < > > > > > > > > | 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 | ) /* ** An open write-ahead log file is represented by an instance of the ** following object. */ struct Wal { sqlite3_vfs *pVfs; /* The VFS used to create pDbFd */ sqlite3_file *pDbFd; /* File handle for the database file */ sqlite3_file *pWalFd; /* File handle for WAL file */ u32 iCallback; /* Value to pass to log callback (or 0) */ int szWIndex; /* Size of the wal-index that is mapped in mem */ volatile u32 *pWiData; /* Pointer to wal-index content in memory */ u16 szPage; /* Database page size */ i16 readLock; /* Which read lock is being held. -1 for none */ u8 exclusiveMode; /* Non-zero if connection is in exclusive mode */ u8 isWIndexOpen; /* True if ShmOpen() called on pDbFd */ u8 writeLock; /* True if in a write transaction */ u8 ckptLock; /* True if holding a checkpoint lock */ WalIndexHdr hdr; /* Wal-index header for current transaction */ char *zWalName; /* Name of WAL file */ u32 nCkpt; /* Checkpoint sequence counter in the wal-header */ }; /* ** Return a pointer to the WalCkptInfo structure in the wal-index. */ static volatile WalCkptInfo *walCkptInfo(Wal *pWal){ assert( pWal->pWiData!=0 ); return (volatile WalCkptInfo*)&pWal->pWiData[sizeof(WalIndexHdr)/2]; } /* ** This structure is used to implement an iterator that loops through ** all frames in the WAL in database page order. Where two or more frames ** correspond to the same database page, the iterator visits only the ** frame most recently written to the WAL (in other words, the frame with |
︙ | ︙ | |||
375 376 377 378 379 380 381 | }while( aData<aEnd ); } aOut[0] = s1; aOut[1] = s2; } | < < < < < < < < < < < < < < < < < < < < < < < > > | < | | | 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 | }while( aData<aEnd ); } aOut[0] = s1; aOut[1] = s2; } /* ** Write the header information in pWal->hdr into the wal-index. ** ** The checksum on pWal->hdr is updated before it is written. */ static void walIndexWriteHdr(Wal *pWal){ WalIndexHdr *aHdr; assert( pWal->writeLock ); walChecksumBytes(1, (u8*)&pWal->hdr, offsetof(WalIndexHdr, aCksum), 0, pWal->hdr.aCksum); aHdr = (WalIndexHdr*)pWal->pWiData; memcpy(&aHdr[1], &pWal->hdr, sizeof(WalIndexHdr)); sqlite3OsShmBarrier(pWal->pDbFd); memcpy(&aHdr[0], &pWal->hdr, sizeof(WalIndexHdr)); } /* ** This function encodes a single frame header and writes it to a buffer ** supplied by the caller. A frame-header is made up of a series of ** 4-byte big-endian integers, as follows: ** |
︙ | ︙ | |||
515 516 517 518 519 520 521 522 523 524 525 526 527 528 | ** create incompatibilities. */ #define HASHTABLE_NPAGE 4096 /* Must be power of 2 and multiple of 256 */ #define HASHTABLE_DATATYPE u16 #define HASHTABLE_HASH_1 383 /* Should be prime */ #define HASHTABLE_NSLOT (HASHTABLE_NPAGE*2) /* Must be a power of 2 */ #define HASHTABLE_NBYTE (sizeof(HASHTABLE_DATATYPE)*HASHTABLE_NSLOT) /* ** Return the index in the Wal.pWiData array that corresponds to ** frame iFrame. ** ** Wal.pWiData is an array of u32 elements that is the wal-index. ** The array begins with a header and is then followed by alternating | > > > > > > > > > > > > > > > > > > > > > > > > > > | 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 | ** create incompatibilities. */ #define HASHTABLE_NPAGE 4096 /* Must be power of 2 and multiple of 256 */ #define HASHTABLE_DATATYPE u16 #define HASHTABLE_HASH_1 383 /* Should be prime */ #define HASHTABLE_NSLOT (HASHTABLE_NPAGE*2) /* Must be a power of 2 */ #define HASHTABLE_NBYTE (sizeof(HASHTABLE_DATATYPE)*HASHTABLE_NSLOT) /* ** Set or release locks. ** ** In locking_mode=EXCLUSIVE, all of these routines become no-ops. */ static int walLockShared(Wal *pWal, int lockIdx){ if( pWal->exclusiveMode ) return SQLITE_OK; return sqlite3OsShmLock(pWal->pDbFd, lockIdx, 1, SQLITE_SHM_LOCK | SQLITE_SHM_SHARED); } static void walUnlockShared(Wal *pWal, int lockIdx){ if( pWal->exclusiveMode ) return; (void)sqlite3OsShmLock(pWal->pDbFd, lockIdx, 1, SQLITE_SHM_UNLOCK | SQLITE_SHM_SHARED); } static int walLockExclusive(Wal *pWal, int lockIdx, int n){ if( pWal->exclusiveMode ) return SQLITE_OK; return sqlite3OsShmLock(pWal->pDbFd, lockIdx, n, SQLITE_SHM_LOCK | SQLITE_SHM_EXCLUSIVE); } static void walUnlockExclusive(Wal *pWal, int lockIdx, int n){ if( pWal->exclusiveMode ) return; (void)sqlite3OsShmLock(pWal->pDbFd, lockIdx, n, SQLITE_SHM_UNLOCK | SQLITE_SHM_EXCLUSIVE); } /* ** Return the index in the Wal.pWiData array that corresponds to ** frame iFrame. ** ** Wal.pWiData is an array of u32 elements that is the wal-index. ** The array begins with a header and is then followed by alternating |
︙ | ︙ | |||
596 597 598 599 600 601 602 | ** ** If enlargeTo is non-negative, then increase the size of the underlying ** storage to be at least as big as enlargeTo before remapping. */ static int walIndexRemap(Wal *pWal, int enlargeTo){ int rc; int sz; | | | 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 | ** ** If enlargeTo is non-negative, then increase the size of the underlying ** storage to be at least as big as enlargeTo before remapping. */ static int walIndexRemap(Wal *pWal, int enlargeTo){ int rc; int sz; assert( pWal->writeLock ); rc = sqlite3OsShmSize(pWal->pDbFd, enlargeTo, &sz); if( rc==SQLITE_OK && sz>pWal->szWIndex ){ walIndexUnmap(pWal); rc = walIndexMap(pWal, sz); } assert( pWal->szWIndex>=enlargeTo || rc!=SQLITE_OK ); return rc; |
︙ | ︙ | |||
681 682 683 684 685 686 687 | */ static void walCleanupHash(Wal *pWal){ volatile HASHTABLE_DATATYPE *aHash; /* Pointer to hash table to clear */ volatile u32 *aPgno; /* Unused return from walHashFind() */ u32 iZero; /* frame == (aHash[x]+iZero) */ int iLimit; /* Zero values greater than this */ | | | 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 | */ static void walCleanupHash(Wal *pWal){ volatile HASHTABLE_DATATYPE *aHash; /* Pointer to hash table to clear */ volatile u32 *aPgno; /* Unused return from walHashFind() */ u32 iZero; /* frame == (aHash[x]+iZero) */ int iLimit; /* Zero values greater than this */ assert( pWal->writeLock ); walHashFind(pWal, pWal->hdr.mxFrame+1, &aHash, &aPgno, &iZero); iLimit = pWal->hdr.mxFrame - iZero; if( iLimit>0 ){ int nByte; /* Number of bytes to zero in aPgno[] */ int i; /* Used to iterate through aHash[] */ for(i=0; i<HASHTABLE_NSLOT; i++){ if( aHash[i]>iLimit ){ |
︙ | ︙ | |||
806 807 808 809 810 811 812 | return rc; } /* ** Recover the wal-index by reading the write-ahead log file. | > > | > > > > > > > > | | | | 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 | return rc; } /* ** Recover the wal-index by reading the write-ahead log file. ** ** This routine first tries to establish an exclusive lock on the ** wal-index to prevent other threads/processes from doing anything ** with the WAL or wal-index while recovery is running. The ** WAL_RECOVER_LOCK is also held so that other threads will know ** that this thread is running recovery. If unable to establish ** the necessary locks, this routine returns SQLITE_BUSY. */ static int walIndexRecover(Wal *pWal){ int rc; /* Return Code */ i64 nSize; /* Size of log file */ u32 aFrameCksum[2] = {0, 0}; rc = walLockExclusive(pWal, WAL_ALL_BUT_WRITE, SQLITE_SHM_NLOCK-1); if( rc ){ return rc; } memset(&pWal->hdr, 0, sizeof(WalIndexHdr)); rc = sqlite3OsFileSize(pWal->pWalFd, &nSize); if( rc!=SQLITE_OK ){ goto recovery_error; } if( nSize>WAL_HDRSIZE ){ u8 aBuf[WAL_HDRSIZE]; /* Buffer to load WAL header into */ u8 *aFrame = 0; /* Malloc'd buffer to load entire frame */ int szFrame; /* Number of bytes in buffer aFrame[] */ u8 *aData; /* Pointer to data part of aFrame buffer */ int iFrame; /* Index of last frame read */ i64 iOffset; /* Next offset to read from log file */ int szPage; /* Page size according to the log */ u32 magic; /* Magic value read from WAL header */ /* Read in the WAL header. */ rc = sqlite3OsRead(pWal->pWalFd, aBuf, WAL_HDRSIZE, 0); if( rc!=SQLITE_OK ){ goto recovery_error; } /* If the database page size is not a power of two, or is greater than ** SQLITE_MAX_PAGE_SIZE, conclude that the WAL file contains no valid ** data. Similarly, if the 'magic' value is invalid, ignore the whole ** WAL file. */ |
︙ | ︙ | |||
863 864 865 866 867 868 869 | aBuf, WAL_HDRSIZE, 0, pWal->hdr.aFrameCksum ); /* Malloc a buffer to read frames into. */ szFrame = szPage + WAL_FRAME_HDRSIZE; aFrame = (u8 *)sqlite3_malloc(szFrame); if( !aFrame ){ | | > | 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 | aBuf, WAL_HDRSIZE, 0, pWal->hdr.aFrameCksum ); /* Malloc a buffer to read frames into. */ szFrame = szPage + WAL_FRAME_HDRSIZE; aFrame = (u8 *)sqlite3_malloc(szFrame); if( !aFrame ){ rc = SQLITE_NOMEM; goto recovery_error; } aData = &aFrame[WAL_FRAME_HDRSIZE]; /* Read all frames from the log file. */ iFrame = 0; for(iOffset=WAL_HDRSIZE; (iOffset+szFrame)<=nSize; iOffset+=szFrame){ u32 pgno; /* Database page number for frame */ |
︙ | ︙ | |||
904 905 906 907 908 909 910 911 912 913 914 915 916 917 | rc = walIndexRemap(pWal, walMappingSize(1)); } if( rc==SQLITE_OK ){ pWal->hdr.aFrameCksum[0] = aFrameCksum[0]; pWal->hdr.aFrameCksum[1] = aFrameCksum[1]; walIndexWriteHdr(pWal); } return rc; } /* ** Close an open wal-index. */ static void walIndexClose(Wal *pWal, int isDelete){ | > > > | < < | | 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 | rc = walIndexRemap(pWal, walMappingSize(1)); } if( rc==SQLITE_OK ){ pWal->hdr.aFrameCksum[0] = aFrameCksum[0]; pWal->hdr.aFrameCksum[1] = aFrameCksum[1]; walIndexWriteHdr(pWal); } recovery_error: walUnlockExclusive(pWal, WAL_ALL_BUT_WRITE, SQLITE_SHM_NLOCK-1); return rc; } /* ** Close an open wal-index. */ static void walIndexClose(Wal *pWal, int isDelete){ if( pWal->isWIndexOpen ){ sqlite3OsShmClose(pWal->pDbFd, isDelete); pWal->isWIndexOpen = 0; } } /* ** Open a connection to the log file associated with database zDb. The ** database file does not actually have to exist. zDb is used only to ** figure out the name of the log file to open. If the log file does not |
︙ | ︙ | |||
974 975 976 977 978 979 980 981 982 983 984 985 986 987 | return SQLITE_NOMEM; } pRet->pVfs = pVfs; pRet->pWalFd = (sqlite3_file *)&pRet[1]; pRet->pDbFd = pDbFd; pRet->szWIndex = -1; sqlite3_randomness(8, &pRet->hdr.aSalt); pRet->zWalName = zWal = pVfs->szOsFile + (char*)pRet->pWalFd; sqlite3_snprintf(nWal, zWal, "%s-wal", zDbName); rc = sqlite3OsShmOpen(pDbFd); /* Open file handle on the write-ahead log file. */ if( rc==SQLITE_OK ){ | > | | 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 | return SQLITE_NOMEM; } pRet->pVfs = pVfs; pRet->pWalFd = (sqlite3_file *)&pRet[1]; pRet->pDbFd = pDbFd; pRet->szWIndex = -1; pRet->readLock = -1; sqlite3_randomness(8, &pRet->hdr.aSalt); pRet->zWalName = zWal = pVfs->szOsFile + (char*)pRet->pWalFd; sqlite3_snprintf(nWal, zWal, "%s-wal", zDbName); rc = sqlite3OsShmOpen(pDbFd); /* Open file handle on the write-ahead log file. */ if( rc==SQLITE_OK ){ pRet->isWIndexOpen = 1; flags = (SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE|SQLITE_OPEN_MAIN_JOURNAL); rc = sqlite3OsOpen(pVfs, zWal, pRet->pWalFd, flags, &flags); } if( rc!=SQLITE_OK ){ walIndexClose(pRet, 0); sqlite3OsClose(pRet->pWalFd); |
︙ | ︙ | |||
1129 1130 1131 1132 1133 1134 1135 | } /* This routine only runs while holding SQLITE_SHM_CHECKPOINT. No other ** thread is able to write to shared memory while this routine is ** running (or, indeed, while the WalIterator object exists). Hence, ** we can cast off the volatile qualifacation from shared memory */ | | | 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 | } /* This routine only runs while holding SQLITE_SHM_CHECKPOINT. No other ** thread is able to write to shared memory while this routine is ** running (or, indeed, while the WalIterator object exists). Hence, ** we can cast off the volatile qualifacation from shared memory */ assert( pWal->ckptLock ); aData = (u32*)pWal->pWiData; /* Allocate space for the WalIterator object */ iLast = pWal->hdr.mxFrame; nSegment = (iLast >> 8) + 1; nFinal = (iLast & 0x000000FF); nByte = sizeof(WalIterator) + (nSegment+1)*(sizeof(struct WalSegment)+256); |
︙ | ︙ | |||
1175 1176 1177 1178 1179 1180 1181 1182 1183 | /* ** Free an iterator allocated by walIteratorInit(). */ static void walIteratorFree(WalIterator *p){ sqlite3_free(p); } /* | > > > > > > > > > > > > > > > > > > > > > > > > > > > > | > > > > > | > > > | < > > > > > > | > > > > | | | > > > > > | | > | | < < > | < < < > | > > > | | > > > | > > | > > | | > > > > > | | < | > > | < < < < > | | | | 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 | /* ** Free an iterator allocated by walIteratorInit(). */ static void walIteratorFree(WalIterator *p){ sqlite3_free(p); } /* ** Copy as much content as we can from the WAL back into the database file ** in response to an sqlite3_wal_checkpoint() request or the equivalent. ** ** The amount of information copies from WAL to database might be limited ** by active readers. This routine will never overwrite a database page ** that a concurrent reader might be using. ** ** All I/O barrier operations (a.k.a fsyncs) occur in this routine when ** SQLite is in WAL-mode in synchronous=NORMAL. That means that if ** checkpoints are always run by a background thread or background ** process, foreground threads will never block on a lengthy fsync call. ** ** Fsync is called on the WAL before writing content out of the WAL and ** into the database. This ensures that if the new content is persistent ** in the WAL and can be recovered following a power-loss or hard reset. ** ** Fsync is also called on the database file if (and only if) the entire ** WAL content is copied into the database file. This second fsync makes ** it safe to delete the WAL since the new content will persist in the ** database file. ** ** This routine uses and updates the nBackfill field of the wal-index header. ** This is the only routine tha will increase the value of nBackfill. ** (A WAL reset or recovery will revert nBackfill to zero, but not increase ** its value.) ** ** The caller must be holding sufficient locks to ensure that no other ** checkpoint is running (in any other thread or process) at the same ** time. */ static int walCheckpoint( Wal *pWal, /* Wal connection */ int sync_flags, /* Flags for OsSync() (or 0) */ int nBuf, /* Size of zBuf in bytes */ u8 *zBuf /* Temporary buffer to use */ ){ int rc; /* Return code */ int szPage = pWal->hdr.szPage; /* Database page-size */ WalIterator *pIter = 0; /* Wal iterator context */ u32 iDbpage = 0; /* Next database page to write */ u32 iFrame = 0; /* Wal frame containing data for iDbpage */ u32 mxSafeFrame; /* Max frame that can be backfilled */ int i; /* Loop counter */ volatile WalIndexHdr *pHdr; /* The actual wal-index header in SHM */ volatile WalCkptInfo *pInfo; /* The checkpoint status information */ /* Allocate the iterator */ rc = walIteratorInit(pWal, &pIter); if( rc!=SQLITE_OK || pWal->hdr.mxFrame==0 ){ walIteratorFree(pIter); return rc; } /*** TODO: Move this test out to the caller. Make it an assert() here ***/ if( pWal->hdr.szPage!=nBuf ){ walIteratorFree(pIter); return SQLITE_CORRUPT_BKPT; } /* Compute in mxSafeFrame the index of the last frame of the WAL that is ** safe to write into the database. Frames beyond mxSafeFrame might ** overwrite database pages that are in use by active readers and thus ** cannot be backfilled from the WAL. */ mxSafeFrame = 0; pHdr = (volatile WalIndexHdr*)pWal->pWiData; pInfo = (volatile WalCkptInfo*)&pHdr[2]; assert( pInfo==walCkptInfo(pWal) ); for(i=1; i<WAL_NREADER; i++){ u32 y = pInfo->aReadMark[i]; if( y>0 && (mxSafeFrame==0 || mxSafeFrame<y) ){ if( y<pWal->hdr.mxFrame && (rc = walLockExclusive(pWal, WAL_READ_LOCK(i), 1))==SQLITE_OK ){ pInfo->aReadMark[i] = 0; walUnlockExclusive(pWal, WAL_READ_LOCK(i), 1); }else{ mxSafeFrame = y; } } } if( pInfo->nBackfill<mxSafeFrame && (rc = walLockExclusive(pWal, WAL_READ_LOCK(0), 1))==SQLITE_OK ){ u32 nBackfill = pInfo->nBackfill; /* Sync the WAL to disk */ if( sync_flags ){ rc = sqlite3OsSync(pWal->pWalFd, sync_flags); } /* Iterate through the contents of the WAL, copying data to the db file. */ while( rc==SQLITE_OK && 0==walIteratorNext(pIter, &iDbpage, &iFrame) ){ if( iFrame<=nBackfill || iFrame>mxSafeFrame ) continue; rc = sqlite3OsRead(pWal->pWalFd, zBuf, szPage, walFrameOffset(iFrame, szPage) + WAL_FRAME_HDRSIZE ); if( rc!=SQLITE_OK ) break; rc = sqlite3OsWrite(pWal->pDbFd, zBuf, szPage, (iDbpage-1)*szPage); if( rc!=SQLITE_OK ) break; } /* If work was actually accomplished... */ if( rc==SQLITE_OK && pInfo->nBackfill<mxSafeFrame ){ pInfo->nBackfill = mxSafeFrame; if( mxSafeFrame==pHdr[0].mxFrame && sync_flags ){ rc = sqlite3OsTruncate(pWal->pDbFd, ((i64)pWal->hdr.nPage*(i64)szPage)); if( rc==SQLITE_OK && sync_flags ){ rc = sqlite3OsSync(pWal->pDbFd, sync_flags); } } } /* Release the reader lock held while backfilling */ walUnlockExclusive(pWal, WAL_READ_LOCK(0), 1); } walIteratorFree(pIter); return rc; } /* ** Close a connection to a log file. */ |
︙ | ︙ | |||
1262 1263 1264 1265 1266 1267 1268 | ** the database. In this case checkpoint the database and unlink both ** the wal and wal-index files. ** ** The EXCLUSIVE lock is not released before returning. */ rc = sqlite3OsLock(pWal->pDbFd, SQLITE_LOCK_EXCLUSIVE); if( rc==SQLITE_OK ){ | > | | 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 | ** the database. In this case checkpoint the database and unlink both ** the wal and wal-index files. ** ** The EXCLUSIVE lock is not released before returning. */ rc = sqlite3OsLock(pWal->pDbFd, SQLITE_LOCK_EXCLUSIVE); if( rc==SQLITE_OK ){ pWal->exclusiveMode = 1; rc = walCheckpoint(pWal, sync_flags, nBuf, zBuf); if( rc==SQLITE_OK ){ isDelete = 1; } walIndexUnmap(pWal); } walIndexClose(pWal, isDelete); |
︙ | ︙ | |||
1286 1287 1288 1289 1290 1291 1292 | /* ** Try to read the wal-index header. Return 0 on success and 1 if ** there is a problem. ** ** The wal-index is in shared memory. Another thread or process might ** be writing the header at the same time this procedure is trying to ** read it, which might result in inconsistency. A dirty read is detected | > | | | | | 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 | /* ** Try to read the wal-index header. Return 0 on success and 1 if ** there is a problem. ** ** The wal-index is in shared memory. Another thread or process might ** be writing the header at the same time this procedure is trying to ** read it, which might result in inconsistency. A dirty read is detected ** by verifying that both copies of the header are the same and also by ** a checksum on the header. ** ** If and only if the read is consistent and the header is different from ** pWal->hdr, then pWal->hdr is updated to the content of the new header ** and *pChanged is set to 1. ** ** If the checksum cannot be verified return non-zero. If the header ** is read successfully and the checksum verified, return zero. */ int walIndexTryHdr(Wal *pWal, int *pChanged){ u32 aCksum[2]; /* Checksum on the header content */ WalIndexHdr h1, h2; /* Two copies of the header content */ WalIndexHdr *aHdr; /* Header in shared memory */ if( pWal->szWIndex < WALINDEX_HDR_SIZE ){ /* The wal-index is not large enough to hold the header, then assume ** header is invalid. */ return 1; } assert( pWal->pWiData ); /* Read the header. This might happen currently with a write to the ** same area of shared memory on a different CPU in a SMP, ** meaning it is possible that an inconsistent snapshot is read ** from the file. If this happens, return non-zero. ** ** There are two copies of the header at the beginning of the wal-index. ** When reading, read [0] first then [1]. Writes are in the reverse order. ** Memory barriers are used to prevent the compiler or the hardware from ** reordering the reads and writes. */ |
︙ | ︙ | |||
1363 1364 1365 1366 1367 1368 1369 | ** after this routine returns. ** ** If the wal-index header is successfully read, return SQLITE_OK. ** Otherwise an SQLite error code. */ static int walIndexReadHdr(Wal *pWal, int *pChanged){ int rc; /* Return code */ | | < | < < < < | < < < < < | | | < < < < < < < < < | | | | | > | > > > > | > | | < > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | | | < < < < < < | < | | < > | < < | < | < < < < < | > | | | | < | > | > > > > > > > > | > > | < | 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 | ** after this routine returns. ** ** If the wal-index header is successfully read, return SQLITE_OK. ** Otherwise an SQLite error code. */ static int walIndexReadHdr(Wal *pWal, int *pChanged){ int rc; /* Return code */ int badHdr; /* True if a header read failed */ assert( pChanged ); rc = walIndexMap(pWal, walMappingSize(1)); if( rc!=SQLITE_OK ){ return rc; } /* Try once to read the header straight out. This works most of the ** time. */ badHdr = walIndexTryHdr(pWal, pChanged); /* If the first attempt failed, it might have been due to a race ** with a writer. So get a WRITE lock and try again. */ assert( pWal->writeLock==0 ); if( badHdr ){ rc = walLockExclusive(pWal, WAL_WRITE_LOCK, 1); if( rc==SQLITE_OK ){ pWal->writeLock = 1; badHdr = walIndexTryHdr(pWal, pChanged); if( badHdr ){ /* If the wal-index header is still malformed even while holding ** a WRITE lock, it can only mean that the header is corrupted and ** needs to be reconstructed. So run recovery to do exactly that. */ rc = walIndexRecover(pWal); } walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1); pWal->writeLock = 0; }else if( rc!=SQLITE_BUSY ){ return rc; } } /* Make sure the mapping is large enough to cover the entire wal-index */ if( rc==SQLITE_OK ){ int szWanted = walMappingSize(pWal->hdr.mxFrame); if( pWal->szWIndex<szWanted ){ rc = walIndexMap(pWal, szWanted); } } return rc; } /* ** This is the value that walTryBeginRead returns when it needs to ** be retried. */ #define WAL_RETRY (-1) /* ** Attempt to start a read transaction. This might fail due to a race or ** other transient condition. When that happens, it returns WAL_RETRY to ** indicate to the caller that it is safe to retry immediately. ** ** On success return SQLITE_OK. On a permantent failure (such an ** I/O error or an SQLITE_BUSY because another process is running ** recovery) return a positive error code. ** ** On success, this routine obtains a read lock on ** WAL_READ_LOCK(pWal->readLock). The pWal->readLock integer is ** in the range 0 <= pWal->readLock < WAL_NREADER. If pWal->readLock==(-1) ** that means the Wal does not hold any read lock. The reader must not ** access any database page that is modified by a WAL frame up to and ** including frame number aReadMark[pWal->readLock]. The reader will ** use WAL frames up to and including pWal->hdr.mxFrame if pWal->readLock>0 ** Or if pWal->readLock==0, then the reader will ignore the WAL ** completely and get all content directly from the database file. ** When the read transaction is completed, the caller must release the ** lock on WAL_READ_LOCK(pWal->readLock) and set pWal->readLock to -1. ** ** This routine uses the nBackfill and aReadMark[] fields of the header ** to select a particular WAL_READ_LOCK() that strives to let the ** checkpoint process do as much work as possible. This routine might ** update values of the aReadMark[] array in the header, but if it does ** so it takes care to hold an exclusive lock on the corresponding ** WAL_READ_LOCK() while changing values. */ static int walTryBeginRead(Wal *pWal, int *pChanged, int useWal){ volatile WalIndexHdr *pHdr; /* Header of the wal-index */ volatile WalCkptInfo *pInfo; /* Checkpoint information in wal-index */ u32 mxReadMark; /* Largest aReadMark[] value */ int mxI; /* Index of largest aReadMark[] value */ int i; /* Loop counter */ int rc; /* Return code */ assert( pWal->readLock<0 ); /* No read lock held on entry */ if( !useWal ){ rc = walIndexReadHdr(pWal, pChanged); if( rc==SQLITE_BUSY ){ /* If there is not a recovery running in another thread or process ** then convert BUSY errors to WAL_RETRY. If recovery is known to ** be running, convert BUSY to BUSY_RECOVERY. There is a race here ** which might cause WAL_RETRY to be returned even if BUSY_RECOVERY ** would be technically correct. But the race is benign since with ** WAL_RETRY this routine will be called again and will probably be ** right on the second iteration. */ rc = walLockShared(pWal, WAL_RECOVER_LOCK); if( rc==SQLITE_OK ){ walUnlockShared(pWal, WAL_RECOVER_LOCK); rc = WAL_RETRY; }else if( rc==SQLITE_BUSY ){ rc = SQLITE_BUSY_RECOVERY; } } }else{ rc = walIndexMap(pWal, pWal->hdr.mxFrame); } if( rc!=SQLITE_OK ){ return rc; } pHdr = (volatile WalIndexHdr*)pWal->pWiData; pInfo = (volatile WalCkptInfo*)&pHdr[2]; assert( pInfo==walCkptInfo(pWal) ); if( !useWal && pInfo->nBackfill==pWal->hdr.mxFrame ){ /* The WAL has been completely backfilled (or it is empty). ** and can be safely ignored. */ rc = walLockShared(pWal, WAL_READ_LOCK(0)); if( rc==SQLITE_OK ){ if( pHdr->mxFrame!=pWal->hdr.mxFrame ){ walUnlockShared(pWal, WAL_READ_LOCK(0)); return WAL_RETRY; } pWal->readLock = 0; return SQLITE_OK; }else if( rc!=SQLITE_BUSY ){ return rc; } } /* If we get this far, it means that the reader will want to use ** the WAL to get at content from recent commits. The job now is ** to select one of the aReadMark[] entries that is closest to ** but not exceeding pWal->hdr.mxFrame and lock that entry. */ mxReadMark = 0; mxI = 0; for(i=1; i<WAL_NREADER; i++){ u32 thisMark = pInfo->aReadMark[i]; if( mxReadMark<thisMark ){ mxReadMark = thisMark; mxI = i; } } if( mxI==0 ){ /* If we get here, it means that all of the aReadMark[] entries between ** 1 and WAL_NREADER-1 are zero. Try to initialize aReadMark[1] to ** be mxFrame, then retry. */ rc = walLockExclusive(pWal, WAL_READ_LOCK(1), 1); if( rc==SQLITE_OK ){ pInfo->aReadMark[1] = pWal->hdr.mxFrame; walUnlockExclusive(pWal, WAL_READ_LOCK(1), 1); } return WAL_RETRY; }else{ if( mxReadMark < pWal->hdr.mxFrame ){ for(i=0; i<WAL_NREADER; i++){ rc = walLockExclusive(pWal, WAL_READ_LOCK(i), 1); if( rc==SQLITE_OK ){ pInfo->aReadMark[i] = pWal->hdr.mxFrame; mxReadMark = pWal->hdr.mxFrame; mxI = i; walUnlockExclusive(pWal, WAL_READ_LOCK(i), 1); break; } } } rc = walLockShared(pWal, WAL_READ_LOCK(mxI)); if( rc ){ return rc==SQLITE_BUSY ? WAL_RETRY : rc; } if( pInfo->aReadMark[mxI]!=mxReadMark || pHdr[0].mxFrame!=pWal->hdr.mxFrame || (sqlite3OsShmBarrier(pWal->pDbFd), pHdr[1].mxFrame!=pWal->hdr.mxFrame) ){ walUnlockShared(pWal, WAL_READ_LOCK(mxI)); return WAL_RETRY; }else{ pWal->readLock = mxI; } } return rc; } /* ** Begin a read transaction on the database. ** ** This routine used to be called sqlite3OpenSnapshot() and with good reason: ** it takes a snapshot of the state of the WAL and wal-index for the current ** instant in time. The current thread will continue to use this snapshot. ** Other threads might append new content to the WAL and wal-index but ** that extra content is ignored by the current thread. ** ** If the database contents have changes since the previous read ** transaction, then *pChanged is set to 1 before returning. The ** Pager layer will use this to know that is cache is stale and ** needs to be flushed. */ int sqlite3WalBeginReadTransaction(Wal *pWal, int *pChanged){ int rc; /* Return code */ do{ rc = walTryBeginRead(pWal, pChanged, 0); }while( rc==WAL_RETRY ); walIndexUnmap(pWal); return rc; } /* ** Finish with a read transaction. All this does is release the ** read-lock. */ void sqlite3WalEndReadTransaction(Wal *pWal){ if( pWal->readLock>=0 ){ walUnlockShared(pWal, WAL_READ_LOCK(pWal->readLock)); pWal->readLock = -1; } } /* ** Read a page from the WAL, if it is present in the WAL and if the ** current read transaction is configured to use the WAL. ** ** The *pInWal is set to 1 if the requested page is in the WAL and ** has been loaded. Or *pInWal is set to 0 if the page was not in ** the WAL and needs to be read out of the database. */ int sqlite3WalRead( Wal *pWal, /* WAL handle */ Pgno pgno, /* Database page number to read data for */ int *pInWal, /* OUT: True if data is read from WAL */ int nOut, /* Size of buffer pOut in bytes */ u8 *pOut /* Buffer to write page data to */ ){ int rc; /* Return code */ u32 iRead = 0; /* If !=0, WAL frame to return data from */ u32 iLast = pWal->hdr.mxFrame; /* Last page in WAL for this reader */ int iHash; /* Used to loop through N hash tables */ /* This routine is only called from within a read transaction */ assert( pWal->readLock>=0 ); /* If the "last page" field of the wal-index header snapshot is 0, then ** no data will be read from the wal under any circumstances. Return early ** in this case to avoid the walIndexMap/Unmap overhead. Likewise, if ** pWal->readLock==0, then the WAL is ignored by the reader so ** return early, as if the WAL were empty. */ if( iLast==0 || pWal->readLock==0 ){ *pInWal = 0; return SQLITE_OK; } /* Ensure the wal-index is mapped. */ rc = walIndexMap(pWal, walMappingSize(iLast)); if( rc!=SQLITE_OK ){ return rc; } /* Search the hash table or tables for an entry matching page number ** pgno. Each iteration of the following for() loop searches one |
︙ | ︙ | |||
1603 1604 1605 1606 1607 1608 1609 | } /* ** Set *pPgno to the size of the database file (or zero, if unknown). */ void sqlite3WalDbsize(Wal *pWal, Pgno *pPgno){ | | < > | | > > > > > > | > > | | > | > > | | > > > > > > | | | > | | > > > > > > > > > > > > > > | > | < | | > > > > > | > > | | < > > | > | > | < < | > > > > > > | | | | | | | 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 | } /* ** Set *pPgno to the size of the database file (or zero, if unknown). */ void sqlite3WalDbsize(Wal *pWal, Pgno *pPgno){ assert( pWal->readLock>=0 ); *pPgno = pWal->hdr.nPage; } /* ** This function starts a write transaction on the WAL. ** ** A read transaction must have already been started by a prior call ** to sqlite3WalBeginReadTransaction(). ** ** If another thread or process has written into the database since ** the read transaction was started, then it is not possible for this ** thread to write as doing so would cause a fork. So this routine ** returns SQLITE_BUSY in that case and no write transaction is started. ** ** There can only be a single writer active at a time. */ int sqlite3WalBeginWriteTransaction(Wal *pWal){ int rc; volatile WalCkptInfo *pInfo; /* Cannot start a write transaction without first holding a read ** transaction. */ assert( pWal->readLock>=0 ); /* Only one writer allowed at a time. Get the write lock. Return ** SQLITE_BUSY if unable. */ rc = walLockExclusive(pWal, WAL_WRITE_LOCK, 1); if( rc ){ return rc; } /* If another connection has written to the database file since the ** time the read transaction on this connection was started, then ** the write is disallowed. */ rc = walIndexMap(pWal, pWal->hdr.mxFrame); if( rc ){ walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1); return rc; } if( memcmp(&pWal->hdr, (void*)pWal->pWiData, sizeof(WalIndexHdr))!=0 ){ walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1); walIndexUnmap(pWal); return SQLITE_BUSY; } pInfo = walCkptInfo(pWal); if( pWal->readLock==0 && pInfo->nBackfill==pWal->hdr.mxFrame ){ rc = walLockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1); if( rc==SQLITE_OK ){ /* If all readers are using WAL_READ_LOCK(0) (in other words if no ** readers are currently using the WAL) */ pWal->nCkpt++; pWal->hdr.mxFrame = 0; sqlite3Put4byte((u8*)pWal->hdr.aSalt, 1 + sqlite3Get4byte((u8*)pWal->hdr.aSalt)); sqlite3_randomness(4, &pWal->hdr.aSalt[1]); walIndexWriteHdr(pWal); pInfo->nBackfill = 0; memset(&pInfo->aReadMark[1], 0, sizeof(pInfo->aReadMark)-sizeof(u32)); rc = sqlite3OsTruncate(pWal->pDbFd, ((i64)pWal->hdr.nPage*(i64)pWal->szPage)); walUnlockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1); } walUnlockShared(pWal, WAL_READ_LOCK(0)); do{ int notUsed; rc = walTryBeginRead(pWal, ¬Used, 1); }while( rc==WAL_RETRY ); } return rc; } /* ** End a write transaction. The commit has already been done. This ** routine merely releases the lock. */ int sqlite3WalEndWriteTransaction(Wal *pWal){ walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1); return SQLITE_OK; } /* ** If any data has been written (but not committed) to the log file, this ** function moves the write-pointer back to the start of the transaction. ** ** Additionally, the callback function is invoked for each frame written ** to the WAL since the start of the transaction. If the callback returns ** other than SQLITE_OK, it is not invoked again and the error code is ** returned to the caller. ** ** Otherwise, if the callback function does not return an error, this ** function returns SQLITE_OK. */ int sqlite3WalUndo(Wal *pWal, int (*xUndo)(void *, Pgno), void *pUndoCtx){ int rc = SQLITE_OK; if( pWal->writeLock ){ int unused; Pgno iMax = pWal->hdr.mxFrame; Pgno iFrame; assert( pWal->pWiData==0 ); rc = walIndexReadHdr(pWal, &unused); if( rc==SQLITE_OK ){ rc = walIndexMap(pWal, walMappingSize(iMax)); } if( rc==SQLITE_OK ){ for(iFrame=pWal->hdr.mxFrame+1; rc==SQLITE_OK && iFrame<=iMax; iFrame++){ assert( pWal->writeLock ); rc = xUndo(pUndoCtx, pWal->pWiData[walIndexEntry(iFrame)]); } walCleanupHash(pWal); } walIndexUnmap(pWal); } return rc; } /* ** Argument aWalData must point to an array of WAL_SAVEPOINT_NDATA u32 ** values. This function populates the array with values required to ** "rollback" the write position of the WAL handle back to the current ** point in the event of a savepoint rollback (via WalSavepointUndo()). */ void sqlite3WalSavepoint(Wal *pWal, u32 *aWalData){ assert( pWal->writeLock ); aWalData[0] = pWal->hdr.mxFrame; aWalData[1] = pWal->hdr.aFrameCksum[0]; aWalData[2] = pWal->hdr.aFrameCksum[1]; } /* ** Move the write position of the WAL back to the point identified by ** the values in the aWalData[] array. aWalData must point to an array ** of WAL_SAVEPOINT_NDATA u32 values that has been previously populated ** by a call to WalSavepoint(). */ int sqlite3WalSavepointUndo(Wal *pWal, u32 *aWalData){ int rc = SQLITE_OK; assert( pWal->writeLock ); assert( aWalData[0]<=pWal->hdr.mxFrame ); if( aWalData[0]<pWal->hdr.mxFrame ){ rc = walIndexMap(pWal, walMappingSize(pWal->hdr.mxFrame)); pWal->hdr.mxFrame = aWalData[0]; pWal->hdr.aFrameCksum[0] = aWalData[1]; pWal->hdr.aFrameCksum[1] = aWalData[2]; |
︙ | ︙ | |||
1735 1736 1737 1738 1739 1740 1741 | u32 iFrame; /* Next frame address */ u8 aFrame[WAL_FRAME_HDRSIZE]; /* Buffer to assemble frame-header in */ PgHdr *p; /* Iterator to run through pList with. */ PgHdr *pLast = 0; /* Last frame in list */ int nLast = 0; /* Number of extra copies of last page */ assert( pList ); | | | 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 | u32 iFrame; /* Next frame address */ u8 aFrame[WAL_FRAME_HDRSIZE]; /* Buffer to assemble frame-header in */ PgHdr *p; /* Iterator to run through pList with. */ PgHdr *pLast = 0; /* Last frame in list */ int nLast = 0; /* Number of extra copies of last page */ assert( pList ); assert( pWal->writeLock ); assert( pWal->pWiData==0 ); /* If this is the first frame written into the log, write the WAL ** header to the start of the WAL file. See comments at the top of ** this source file for a description of the WAL header format. */ iFrame = pWal->hdr.mxFrame; |
︙ | ︙ | |||
1848 1849 1850 1851 1852 1853 1854 | } walIndexUnmap(pWal); return rc; } /* | | > | | < < | < < < < < < < < < < < < | < < < | > > | | | 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 | } walIndexUnmap(pWal); return rc; } /* ** This routine is called to implement sqlite3_wal_checkpoint() and ** related interfaces. ** ** Obtain a CHECKPOINT lock and then backfill as much information as ** we can from WAL into the database. */ int sqlite3WalCheckpoint( Wal *pWal, /* Wal connection */ int sync_flags, /* Flags to sync db file with (or 0) */ int nBuf, /* Size of temporary buffer */ u8 *zBuf /* Temporary buffer to use */ ){ int rc; /* Return code */ int isChanged = 0; /* True if a new wal-index header is loaded */ assert( pWal->pWiData==0 ); rc = walLockExclusive(pWal, WAL_CKPT_LOCK, 1); if( rc ){ /* Usually this is SQLITE_BUSY meaning that another thread or process ** is already running a checkpoint, or maybe a recovery. But it might ** also be SQLITE_IOERR. */ return rc; } /* Copy data from the log to the database file. */ rc = walIndexReadHdr(pWal, &isChanged); if( rc==SQLITE_OK ){ rc = walCheckpoint(pWal, sync_flags, nBuf, zBuf); } if( isChanged ){ /* If a new wal-index header was loaded before the checkpoint was ** performed, then the pager-cache associated with pWal is now ** out of date. So zero the cached wal-index header to ensure that ** next time the pager opens a snapshot on this database it knows that ** the cache needs to be reset. */ memset(&pWal->hdr, 0, sizeof(WalIndexHdr)); } /* Release the locks. */ walIndexUnmap(pWal); walUnlockExclusive(pWal, WAL_CKPT_LOCK, 1); return rc; } /* Return the value to pass to a sqlite3_wal_hook callback, the ** number of frames in the WAL at the point of the last commit since ** sqlite3WalCallback() was called. If no commits have occurred since ** the last call, then return 0. |
︙ | ︙ | |||
1930 1931 1932 1933 1934 1935 1936 | ** This function is called to set or query the exclusive-mode flag ** associated with the WAL connection passed as the first argument. The ** exclusive-mode flag should be set to indicate that the caller is ** holding an EXCLUSIVE lock on the database file (it does this in ** locking_mode=exclusive mode). If the EXCLUSIVE lock is to be dropped, ** the flag set by this function should be cleared before doing so. ** | < < < < | 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 | ** This function is called to set or query the exclusive-mode flag ** associated with the WAL connection passed as the first argument. The ** exclusive-mode flag should be set to indicate that the caller is ** holding an EXCLUSIVE lock on the database file (it does this in ** locking_mode=exclusive mode). If the EXCLUSIVE lock is to be dropped, ** the flag set by this function should be cleared before doing so. ** ** When the flag is set, this module does not call the VFS xShmLock() ** method to obtain any locks on the wal-index (as it assumes it ** has exclusive access to the wal and wal-index files anyhow). It ** continues to hold (and does not drop) the existing READ lock on ** the wal-index. ** ** To set or clear the flag, the "op" parameter is passed 1 or 0, ** respectively. To query the flag, pass -1. In all cases, the value ** returned is the value of the exclusive-mode flag (after its value ** has been modified, if applicable). */ int sqlite3WalExclusiveMode(Wal *pWal, int op){ if( op>=0 ){ pWal->exclusiveMode = (u8)op; } return pWal->exclusiveMode; } #endif /* #ifndef SQLITE_OMIT_WAL */ |
Changes to src/wal.h.
︙ | ︙ | |||
16 17 18 19 20 21 22 | #ifndef _WAL_H_ #define _WAL_H_ #include "sqliteInt.h" #ifdef SQLITE_OMIT_WAL | | | | | | | > | | | | | | | > | | < < | 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 | #ifndef _WAL_H_ #define _WAL_H_ #include "sqliteInt.h" #ifdef SQLITE_OMIT_WAL # define sqlite3WalOpen(x,y,z) 0 # define sqlite3WalClose(w,x,y,z) 0 # define sqlite3WalBeginReadTransaction(y,z) 0 # define sqlite3WalEndReadTransaction(z) # define sqlite3WalRead(v,w,x,y,z) 0 # define sqlite3WalDbsize(y,z) # define sqlite3WalBeginWriteTransaction(y) 0 # define sqlite3WalEndWRiteTransaction(x) 0 # define sqlite3WalUndo(x,y,z) 0 # define sqlite3WalSavepoint(y,z) # define sqlite3WalSavepointUndo(y,z) 0 # define sqlite3WalFrames(u,v,w,x,y,z) 0 # define sqlite3WalCheckpoint(u,v,w,x) 0 # define sqlite3WalCallback(z) 0 #else #define WAL_SAVEPOINT_NDATA 3 /* Connection to a write-ahead log (WAL) file. ** There is one object of this type for each pager. */ typedef struct Wal Wal; /* Open and close a connection to a write-ahead log. */ int sqlite3WalOpen(sqlite3_vfs*, sqlite3_file*, const char *zName, Wal**); int sqlite3WalClose(Wal *pWal, int sync_flags, int, u8 *); /* Used by readers to open (lock) and close (unlock) a snapshot. A ** snapshot is like a read-transaction. It is the state of the database ** at an instant in time. sqlite3WalOpenSnapshot gets a read lock and ** preserves the current state even if the other threads or processes ** write to or checkpoint the WAL. sqlite3WalCloseSnapshot() closes the ** transaction and releases the lock. */ int sqlite3WalBeginReadTransaction(Wal *pWal, int *); void sqlite3WalEndReadTransaction(Wal *pWal); /* Read a page from the write-ahead log, if it is present. */ int sqlite3WalRead(Wal *pWal, Pgno pgno, int *pInWal, int nOut, u8 *pOut); /* Return the size of the database as it existed at the beginning ** of the snapshot */ void sqlite3WalDbsize(Wal *pWal, Pgno *pPgno); /* Obtain or release the WRITER lock. */ int sqlite3WalBeginWriteTransaction(Wal *pWal); int sqlite3WalEndWriteTransaction(Wal *pWal); /* Undo any frames written (but not committed) to the log */ int sqlite3WalUndo(Wal *pWal, int (*xUndo)(void *, Pgno), void *pUndoCtx); /* Return an integer that records the current (uncommitted) write ** position in the WAL */ void sqlite3WalSavepoint(Wal *pWal, u32 *aWalData); /* Move the write position of the WAL back to iFrame. Called in ** response to a ROLLBACK TO command. */ int sqlite3WalSavepointUndo(Wal *pWal, u32 *aWalData); /* Write a frame or frames to the log. */ int sqlite3WalFrames(Wal *pWal, int, PgHdr *, Pgno, int, int); /* Copy pages from the log to the database file */ int sqlite3WalCheckpoint( Wal *pWal, /* Write-ahead log connection */ int sync_flags, /* Flags to sync db file with (or 0) */ int nBuf, /* Size of buffer nBuf */ u8 *zBuf /* Temporary buffer to use */ ); /* Return the value to pass to a sqlite3_wal_hook callback, the ** number of frames in the WAL at the point of the last commit since ** sqlite3WalCallback() was called. If no commits have occurred since ** the last call, then return 0. */ |
︙ | ︙ |