Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Changes to make WAL more robust against SHM locking failures and OOM errors. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA1: |
ebf4041383c3cdddb5861960359abd20 |
User & Date: | drh 2010-06-02 14:45:51.000 |
Context
2010-06-02
| ||
15:43 | When walTryBeginRead() encounters an I/O error trying to set a lock (as opposed to SQLITE_BUSY) be sure to propagate that error back up the call stack. (check-in: aa2c2b67a7 user: drh tags: trunk) | |
14:45 | Changes to make WAL more robust against SHM locking failures and OOM errors. (check-in: ebf4041383 user: drh tags: trunk) | |
14:43 | In the rowhash.test, make sure global variables are cleared prior to use. (check-in: 28efe0a404 user: drh tags: trunk) | |
Changes
Changes to src/os_unix.c.
︙ | ︙ | |||
3358 3359 3360 3361 3362 3363 3364 | /* Check to see if another process is holding the dead-man switch. ** If not, truncate the file to zero length. */ rc = SQLITE_OK; if( unixShmSystemLock(pShmNode, F_WRLCK, UNIX_SHM_DMS, 1)==SQLITE_OK ){ if( ftruncate(pShmNode->h, 0) ){ | | | 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 | /* Check to see if another process is holding the dead-man switch. ** If not, truncate the file to zero length. */ rc = SQLITE_OK; if( unixShmSystemLock(pShmNode, F_WRLCK, UNIX_SHM_DMS, 1)==SQLITE_OK ){ if( ftruncate(pShmNode->h, 0) ){ rc = SQLITE_IOERR_SHMOPEN; } } if( rc==SQLITE_OK ){ rc = unixShmSystemLock(pShmNode, F_RDLCK, UNIX_SHM_DMS, 1); } if( rc ) goto shm_open_err; } |
︙ | ︙ | |||
3465 3466 3467 3468 3469 3470 3471 | while( 1 ){ if( fstat(pShmNode->h, &sStat)==0 ){ *pNewSize = (int)sStat.st_size; if( reqSize<=(int)sStat.st_size ) break; }else{ *pNewSize = 0; | | | 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 | while( 1 ){ if( fstat(pShmNode->h, &sStat)==0 ){ *pNewSize = (int)sStat.st_size; if( reqSize<=(int)sStat.st_size ) break; }else{ *pNewSize = 0; rc = SQLITE_IOERR_SHMSIZE; break; } rc = ftruncate(pShmNode->h, reqSize); reqSize = -1; } return rc; } |
︙ | ︙ |
Changes to src/sqlite.h.in.
︙ | ︙ | |||
384 385 386 387 388 389 390 | #define SQLITE_READONLY 8 /* Attempt to write a readonly database */ #define SQLITE_INTERRUPT 9 /* Operation terminated by sqlite3_interrupt()*/ #define SQLITE_IOERR 10 /* Some kind of disk I/O error occurred */ #define SQLITE_CORRUPT 11 /* The database disk image is malformed */ #define SQLITE_NOTFOUND 12 /* NOT USED. Table or record not found */ #define SQLITE_FULL 13 /* Insertion failed because database is full */ #define SQLITE_CANTOPEN 14 /* Unable to open the database file */ | | | 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 | #define SQLITE_READONLY 8 /* Attempt to write a readonly database */ #define SQLITE_INTERRUPT 9 /* Operation terminated by sqlite3_interrupt()*/ #define SQLITE_IOERR 10 /* Some kind of disk I/O error occurred */ #define SQLITE_CORRUPT 11 /* The database disk image is malformed */ #define SQLITE_NOTFOUND 12 /* NOT USED. Table or record not found */ #define SQLITE_FULL 13 /* Insertion failed because database is full */ #define SQLITE_CANTOPEN 14 /* Unable to open the database file */ #define SQLITE_PROTOCOL 15 /* Database lock protocol error */ #define SQLITE_EMPTY 16 /* Database is empty */ #define SQLITE_SCHEMA 17 /* The database schema changed */ #define SQLITE_TOOBIG 18 /* String or BLOB exceeds size limit */ #define SQLITE_CONSTRAINT 19 /* Abort due to constraint violation */ #define SQLITE_MISMATCH 20 /* Data type mismatch */ #define SQLITE_MISUSE 21 /* Library used incorrectly */ #define SQLITE_NOLFS 22 /* Uses OS features not supported on host */ |
︙ | ︙ | |||
440 441 442 443 444 445 446 447 448 449 450 451 452 453 | #define SQLITE_IOERR_BLOCKED (SQLITE_IOERR | (11<<8)) #define SQLITE_IOERR_NOMEM (SQLITE_IOERR | (12<<8)) #define SQLITE_IOERR_ACCESS (SQLITE_IOERR | (13<<8)) #define SQLITE_IOERR_CHECKRESERVEDLOCK (SQLITE_IOERR | (14<<8)) #define SQLITE_IOERR_LOCK (SQLITE_IOERR | (15<<8)) #define SQLITE_IOERR_CLOSE (SQLITE_IOERR | (16<<8)) #define SQLITE_IOERR_DIR_CLOSE (SQLITE_IOERR | (17<<8)) #define SQLITE_LOCKED_SHAREDCACHE (SQLITE_LOCKED | (1<<8)) #define SQLITE_BUSY_RECOVERY (SQLITE_BUSY | (1<<8)) #define SQLITE_CANTOPEN_NOTEMPDIR (SQLITE_CANTOPEN | (1<<8)) /* ** CAPI3REF: Flags For File Open Operations ** | > > > | 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 | #define SQLITE_IOERR_BLOCKED (SQLITE_IOERR | (11<<8)) #define SQLITE_IOERR_NOMEM (SQLITE_IOERR | (12<<8)) #define SQLITE_IOERR_ACCESS (SQLITE_IOERR | (13<<8)) #define SQLITE_IOERR_CHECKRESERVEDLOCK (SQLITE_IOERR | (14<<8)) #define SQLITE_IOERR_LOCK (SQLITE_IOERR | (15<<8)) #define SQLITE_IOERR_CLOSE (SQLITE_IOERR | (16<<8)) #define SQLITE_IOERR_DIR_CLOSE (SQLITE_IOERR | (17<<8)) #define SQLITE_IOERR_SHMOPEN (SQLITE_IOERR | (18<<8)) #define SQLITE_IOERR_SHMSIZE (SQLITE_IOERR | (19<<8)) #define SQLITE_IOERR_SHMLOCK (SQLITE_IOERR | (20<<8)) #define SQLITE_LOCKED_SHAREDCACHE (SQLITE_LOCKED | (1<<8)) #define SQLITE_BUSY_RECOVERY (SQLITE_BUSY | (1<<8)) #define SQLITE_CANTOPEN_NOTEMPDIR (SQLITE_CANTOPEN | (1<<8)) /* ** CAPI3REF: Flags For File Open Operations ** |
︙ | ︙ |
Changes to src/wal.c.
︙ | ︙ | |||
373 374 375 376 377 378 379 380 381 382 383 384 385 386 | u8 exclusiveMode; /* Non-zero if connection is in exclusive mode */ u8 isWIndexOpen; /* True if ShmOpen() called on pDbFd */ u8 writeLock; /* True if in a write transaction */ u8 ckptLock; /* True if holding a checkpoint lock */ WalIndexHdr hdr; /* Wal-index header for current transaction */ char *zWalName; /* Name of WAL file */ u32 nCkpt; /* Checkpoint sequence counter in the wal-header */ }; /* ** Return a pointer to the WalCkptInfo structure in the wal-index. */ static volatile WalCkptInfo *walCkptInfo(Wal *pWal){ assert( pWal->pWiData!=0 ); | > > > | 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 | u8 exclusiveMode; /* Non-zero if connection is in exclusive mode */ u8 isWIndexOpen; /* True if ShmOpen() called on pDbFd */ u8 writeLock; /* True if in a write transaction */ u8 ckptLock; /* True if holding a checkpoint lock */ WalIndexHdr hdr; /* Wal-index header for current transaction */ char *zWalName; /* Name of WAL file */ u32 nCkpt; /* Checkpoint sequence counter in the wal-header */ #ifdef SQLITE_DEBUG u8 lockError; /* True if a locking error has occurred */ #endif }; /* ** Return a pointer to the WalCkptInfo structure in the wal-index. */ static volatile WalCkptInfo *walCkptInfo(Wal *pWal){ assert( pWal->pWiData!=0 ); |
︙ | ︙ | |||
625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 | static int walLockShared(Wal *pWal, int lockIdx){ int rc; if( pWal->exclusiveMode ) return SQLITE_OK; rc = sqlite3OsShmLock(pWal->pDbFd, lockIdx, 1, SQLITE_SHM_LOCK | SQLITE_SHM_SHARED); WALTRACE(("WAL%p: acquire SHARED-%s %s\n", pWal, walLockName(lockIdx), rc ? "failed" : "ok")); return rc; } static void walUnlockShared(Wal *pWal, int lockIdx){ if( pWal->exclusiveMode ) return; (void)sqlite3OsShmLock(pWal->pDbFd, lockIdx, 1, SQLITE_SHM_UNLOCK | SQLITE_SHM_SHARED); WALTRACE(("WAL%p: release SHARED-%s\n", pWal, walLockName(lockIdx))); } static int walLockExclusive(Wal *pWal, int lockIdx, int n){ int rc; if( pWal->exclusiveMode ) return SQLITE_OK; rc = sqlite3OsShmLock(pWal->pDbFd, lockIdx, n, SQLITE_SHM_LOCK | SQLITE_SHM_EXCLUSIVE); WALTRACE(("WAL%p: acquire EXCLUSIVE-%s cnt=%d %s\n", pWal, walLockName(lockIdx), n, rc ? "failed" : "ok")); return rc; } static void walUnlockExclusive(Wal *pWal, int lockIdx, int n){ if( pWal->exclusiveMode ) return; (void)sqlite3OsShmLock(pWal->pDbFd, lockIdx, n, SQLITE_SHM_UNLOCK | SQLITE_SHM_EXCLUSIVE); WALTRACE(("WAL%p: release EXCLUSIVE-%s cnt=%d\n", pWal, | > > | 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 | static int walLockShared(Wal *pWal, int lockIdx){ int rc; if( pWal->exclusiveMode ) return SQLITE_OK; rc = sqlite3OsShmLock(pWal->pDbFd, lockIdx, 1, SQLITE_SHM_LOCK | SQLITE_SHM_SHARED); WALTRACE(("WAL%p: acquire SHARED-%s %s\n", pWal, walLockName(lockIdx), rc ? "failed" : "ok")); VVA_ONLY( pWal->lockError = (rc!=SQLITE_OK && rc!=SQLITE_BUSY); ) return rc; } static void walUnlockShared(Wal *pWal, int lockIdx){ if( pWal->exclusiveMode ) return; (void)sqlite3OsShmLock(pWal->pDbFd, lockIdx, 1, SQLITE_SHM_UNLOCK | SQLITE_SHM_SHARED); WALTRACE(("WAL%p: release SHARED-%s\n", pWal, walLockName(lockIdx))); } static int walLockExclusive(Wal *pWal, int lockIdx, int n){ int rc; if( pWal->exclusiveMode ) return SQLITE_OK; rc = sqlite3OsShmLock(pWal->pDbFd, lockIdx, n, SQLITE_SHM_LOCK | SQLITE_SHM_EXCLUSIVE); WALTRACE(("WAL%p: acquire EXCLUSIVE-%s cnt=%d %s\n", pWal, walLockName(lockIdx), n, rc ? "failed" : "ok")); VVA_ONLY( pWal->lockError = (rc!=SQLITE_OK && rc!=SQLITE_BUSY); ) return rc; } static void walUnlockExclusive(Wal *pWal, int lockIdx, int n){ if( pWal->exclusiveMode ) return; (void)sqlite3OsShmLock(pWal->pDbFd, lockIdx, n, SQLITE_SHM_UNLOCK | SQLITE_SHM_EXCLUSIVE); WALTRACE(("WAL%p: release EXCLUSIVE-%s cnt=%d\n", pWal, |
︙ | ︙ | |||
1689 1690 1691 1692 1693 1694 1695 | ** This routine uses the nBackfill and aReadMark[] fields of the header ** to select a particular WAL_READ_LOCK() that strives to let the ** checkpoint process do as much work as possible. This routine might ** update values of the aReadMark[] array in the header, but if it does ** so it takes care to hold an exclusive lock on the corresponding ** WAL_READ_LOCK() while changing values. */ | | > > > > > > | 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 | ** This routine uses the nBackfill and aReadMark[] fields of the header ** to select a particular WAL_READ_LOCK() that strives to let the ** checkpoint process do as much work as possible. This routine might ** update values of the aReadMark[] array in the header, but if it does ** so it takes care to hold an exclusive lock on the corresponding ** WAL_READ_LOCK() while changing values. */ static int walTryBeginRead(Wal *pWal, int *pChanged, int useWal, int cnt){ volatile WalIndexHdr *pHdr; /* Header of the wal-index */ volatile WalCkptInfo *pInfo; /* Checkpoint information in wal-index */ u32 mxReadMark; /* Largest aReadMark[] value */ int mxI; /* Index of largest aReadMark[] value */ int i; /* Loop counter */ int rc; /* Return code */ assert( pWal->readLock<0 ); /* Not currently locked */ /* Take steps to avoid spinning forever if there is a protocol error. */ if( cnt>5 ){ if( cnt>100 ) return SQLITE_PROTOCOL; sqlite3OsSleep(pWal->pVfs, 1); } if( !useWal ){ rc = walIndexReadHdr(pWal, pChanged); if( rc==SQLITE_BUSY ){ /* If there is not a recovery running in another thread or process ** then convert BUSY errors to WAL_RETRY. If recovery is known to ** be running, convert BUSY to BUSY_RECOVERY. There is a race here |
︙ | ︙ | |||
1816 1817 1818 1819 1820 1821 1822 1823 1824 | ** If the database contents have changes since the previous read ** transaction, then *pChanged is set to 1 before returning. The ** Pager layer will use this to know that is cache is stale and ** needs to be flushed. */ int sqlite3WalBeginReadTransaction(Wal *pWal, int *pChanged){ int rc; /* Return code */ do{ | > | | 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 | ** If the database contents have changes since the previous read ** transaction, then *pChanged is set to 1 before returning. The ** Pager layer will use this to know that is cache is stale and ** needs to be flushed. */ int sqlite3WalBeginReadTransaction(Wal *pWal, int *pChanged){ int rc; /* Return code */ int cnt = 0; /* Number of TryBeginRead attempts */ do{ rc = walTryBeginRead(pWal, pChanged, 0, ++cnt); }while( rc==WAL_RETRY ); walIndexUnmap(pWal); return rc; } /* ** Finish with a read transaction. All this does is release the |
︙ | ︙ | |||
1855 1856 1857 1858 1859 1860 1861 | u8 *pOut /* Buffer to write page data to */ ){ int rc; /* Return code */ u32 iRead = 0; /* If !=0, WAL frame to return data from */ u32 iLast = pWal->hdr.mxFrame; /* Last page in WAL for this reader */ int iHash; /* Used to loop through N hash tables */ | | | | 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 | u8 *pOut /* Buffer to write page data to */ ){ int rc; /* Return code */ u32 iRead = 0; /* If !=0, WAL frame to return data from */ u32 iLast = pWal->hdr.mxFrame; /* Last page in WAL for this reader */ int iHash; /* Used to loop through N hash tables */ /* This routine is only be called from within a read transaction. */ assert( pWal->readLock>=0 || pWal->lockError ); /* If the "last page" field of the wal-index header snapshot is 0, then ** no data will be read from the wal under any circumstances. Return early ** in this case to avoid the walIndexMap/Unmap overhead. Likewise, if ** pWal->readLock==0, then the WAL is ignored by the reader so ** return early, as if the WAL were empty. */ |
︙ | ︙ | |||
1978 1979 1980 1981 1982 1983 1984 | } /* ** Set *pPgno to the size of the database file (or zero, if unknown). */ void sqlite3WalDbsize(Wal *pWal, Pgno *pPgno){ | | | 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 | } /* ** Set *pPgno to the size of the database file (or zero, if unknown). */ void sqlite3WalDbsize(Wal *pWal, Pgno *pPgno){ assert( pWal->readLock>=0 || pWal->lockError ); *pPgno = pWal->hdr.nPage; } /* ** This function starts a write transaction on the WAL. ** |
︙ | ︙ | |||
2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 | ** ** SQLITE_OK is returned if no error is encountered (regardless of whether ** or not pWal->hdr.mxFrame is modified). An SQLite error code is returned ** if some error */ static int walRestartLog(Wal *pWal){ int rc = SQLITE_OK; if( pWal->readLock==0 && SQLITE_OK==(rc = walIndexMap(pWal, walMappingSize(pWal->hdr.mxFrame))) ){ volatile WalCkptInfo *pInfo = walCkptInfo(pWal); assert( pInfo->nBackfill==pWal->hdr.mxFrame ); if( pInfo->nBackfill>0 ){ rc = walLockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1); | > > | 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 | ** ** SQLITE_OK is returned if no error is encountered (regardless of whether ** or not pWal->hdr.mxFrame is modified). An SQLite error code is returned ** if some error */ static int walRestartLog(Wal *pWal){ int rc = SQLITE_OK; int cnt; if( pWal->readLock==0 && SQLITE_OK==(rc = walIndexMap(pWal, walMappingSize(pWal->hdr.mxFrame))) ){ volatile WalCkptInfo *pInfo = walCkptInfo(pWal); assert( pInfo->nBackfill==pWal->hdr.mxFrame ); if( pInfo->nBackfill>0 ){ rc = walLockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1); |
︙ | ︙ | |||
2159 2160 2161 2162 2163 2164 2165 2166 2167 | walIndexWriteHdr(pWal); memset((void*)pInfo, 0, sizeof(*pInfo)); walUnlockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1); } } walUnlockShared(pWal, WAL_READ_LOCK(0)); pWal->readLock = -1; do{ int notUsed; | > | | 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 | walIndexWriteHdr(pWal); memset((void*)pInfo, 0, sizeof(*pInfo)); walUnlockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1); } } walUnlockShared(pWal, WAL_READ_LOCK(0)); pWal->readLock = -1; cnt = 0; do{ int notUsed; rc = walTryBeginRead(pWal, ¬Used, 1, ++cnt); }while( rc==WAL_RETRY ); /* Unmap the wal-index before returning. Otherwise the VFS layer may ** hold a mutex for the duration of the IO performed by WalFrames(). */ walIndexUnmap(pWal); } |
︙ | ︙ | |||
2414 2415 2416 2417 2418 2419 2420 | ** If op is negative, then do a dry-run of the op==1 case but do ** not actually change anything. The pager uses this to see if it ** should acquire the database exclusive lock prior to invoking ** the op==1 case. */ int sqlite3WalExclusiveMode(Wal *pWal, int op){ int rc; | | > > > | > | < < > | 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 | ** If op is negative, then do a dry-run of the op==1 case but do ** not actually change anything. The pager uses this to see if it ** should acquire the database exclusive lock prior to invoking ** the op==1 case. */ int sqlite3WalExclusiveMode(Wal *pWal, int op){ int rc; assert( pWal->writeLock==0 ); /* pWal->readLock is usually set, but might be -1 if there was a prior OOM */ assert( pWal->readLock>=0 || pWal->lockError ); if( op==0 ){ if( pWal->exclusiveMode ){ pWal->exclusiveMode = 0; if( pWal->readLock>=0 && walLockShared(pWal, WAL_READ_LOCK(pWal->readLock))!=SQLITE_OK ){ pWal->exclusiveMode = 1; } rc = pWal->exclusiveMode==0; }else{ /* Already in locking_mode=NORMAL */ rc = 0; } }else if( op>0 ){ assert( pWal->exclusiveMode==0 ); assert( pWal->readLock>=0 ); walUnlockShared(pWal, WAL_READ_LOCK(pWal->readLock)); pWal->exclusiveMode = 1; rc = 1; }else{ rc = pWal->exclusiveMode==0; } return rc; } #endif /* #ifndef SQLITE_OMIT_WAL */ |