Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Shift the meaning of aReadMark[] back so that +1 offset from mxFrame is removed. Add the new READMARK_NOT_USED value (0xffffffff) instead of zero to signal an aReadMark[] that is not in use. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA1: |
c576827d55c156572b76cf7063e9f253 |
User & Date: | drh 2010-06-09 14:45:13.000 |
Context
2010-06-09
| ||
15:47 | Fix for ticket [f973c7ac31]. (check-in: 6eb058dda8 user: dan tags: trunk) | |
14:45 | Shift the meaning of aReadMark[] back so that +1 offset from mxFrame is removed. Add the new READMARK_NOT_USED value (0xffffffff) instead of zero to signal an aReadMark[] that is not in use. (check-in: c576827d55 user: drh tags: trunk) | |
11:28 | Simpler fix for the race condition also fixed by [7c102c7b5f] (check-in: 3c2de82003 user: dan tags: trunk) | |
Changes
Changes to src/wal.c.
︙ | ︙ | |||
281 282 283 284 285 286 287 | ** WalIndexHdr.mxFrame. nBackfill can only be increased by threads ** holding the WAL_CKPT_LOCK lock (which includes a recovery thread). ** However, a WAL_WRITE_LOCK thread can move the value of nBackfill from ** mxFrame back to zero when the WAL is reset. ** ** There is one entry in aReadMark[] for each reader lock. If a reader ** holds read-lock K, then the value in aReadMark[K] is no greater than | | > > > | | | 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 | ** WalIndexHdr.mxFrame. nBackfill can only be increased by threads ** holding the WAL_CKPT_LOCK lock (which includes a recovery thread). ** However, a WAL_WRITE_LOCK thread can move the value of nBackfill from ** mxFrame back to zero when the WAL is reset. ** ** There is one entry in aReadMark[] for each reader lock. If a reader ** holds read-lock K, then the value in aReadMark[K] is no greater than ** the mxFrame for that reader. The value READMARK_NOT_USED (0xffffffff) ** for any aReadMark[] means that entry is unused. aReadMark[0] is ** a special case; its value is never used and it exists as a place-holder ** to avoid having to offset aReadMark[] indexs by one. Readers holding ** WAL_READ_LOCK(0) always ignore the entire WAL and read all content ** directly from the database. ** ** The value of aReadMark[K] may only be changed by a thread that ** is holding an exclusive lock on WAL_READ_LOCK(K). Thus, the value of ** aReadMark[K] cannot changed while there is a reader is using that mark ** since the reader will be holding a shared lock on WAL_READ_LOCK(K). ** ** The checkpointer may only transfer frames from WAL to database where |
︙ | ︙ | |||
316 317 318 319 320 321 322 323 324 325 326 327 328 329 | ** We assume that 32-bit loads are atomic and so no locks are needed in ** order to read from any aReadMark[] entries. */ struct WalCkptInfo { u32 nBackfill; /* Number of WAL frames backfilled into DB */ u32 aReadMark[WAL_NREADER]; /* Reader marks */ }; /* A block of WALINDEX_LOCK_RESERVED bytes beginning at ** WALINDEX_LOCK_OFFSET is reserved for locks. Since some systems ** only support mandatory file-locks, we do not read or write data ** from the region of the file on which locks are applied. */ | > | 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 | ** We assume that 32-bit loads are atomic and so no locks are needed in ** order to read from any aReadMark[] entries. */ struct WalCkptInfo { u32 nBackfill; /* Number of WAL frames backfilled into DB */ u32 aReadMark[WAL_NREADER]; /* Reader marks */ }; #define READMARK_NOT_USED 0xffffffff /* A block of WALINDEX_LOCK_RESERVED bytes beginning at ** WALINDEX_LOCK_OFFSET is reserved for locks. Since some systems ** only support mandatory file-locks, we do not read or write data ** from the region of the file on which locks are applied. */ |
︙ | ︙ | |||
1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 | } finished: if( rc==SQLITE_OK && pWal->hdr.mxFrame==0 ){ rc = walIndexRemap(pWal, walMappingSize(1)); } if( rc==SQLITE_OK ){ pWal->hdr.aFrameCksum[0] = aFrameCksum[0]; pWal->hdr.aFrameCksum[1] = aFrameCksum[1]; walIndexWriteHdr(pWal); | > > | | > > > | 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 | } finished: if( rc==SQLITE_OK && pWal->hdr.mxFrame==0 ){ rc = walIndexRemap(pWal, walMappingSize(1)); } if( rc==SQLITE_OK ){ volatile WalCkptInfo *pInfo; int i; pWal->hdr.aFrameCksum[0] = aFrameCksum[0]; pWal->hdr.aFrameCksum[1] = aFrameCksum[1]; walIndexWriteHdr(pWal); /* Reset the checkpoint-header. This is safe because this thread is ** currently holding locks that exclude all other readers, writers and ** checkpointers. */ pInfo = walCkptInfo(pWal); pInfo->nBackfill = 0; pInfo->aReadMark[0] = 0; for(i=1; i<WAL_NREADER; i++) pInfo->aReadMark[i] = READMARK_NOT_USED; } recovery_error: WALTRACE(("WAL%p: recovery %s\n", pWal, rc ? "failed" : "ok")); walUnlockExclusive(pWal, iLock, nLock); return rc; } |
︙ | ︙ | |||
1422 1423 1424 1425 1426 1427 1428 | */ mxSafeFrame = pWal->hdr.mxFrame; pHdr = (volatile WalIndexHdr*)pWal->pWiData; pInfo = (volatile WalCkptInfo*)&pHdr[2]; assert( pInfo==walCkptInfo(pWal) ); for(i=1; i<WAL_NREADER; i++){ u32 y = pInfo->aReadMark[i]; | | | | | 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 | */ mxSafeFrame = pWal->hdr.mxFrame; pHdr = (volatile WalIndexHdr*)pWal->pWiData; pInfo = (volatile WalCkptInfo*)&pHdr[2]; assert( pInfo==walCkptInfo(pWal) ); for(i=1; i<WAL_NREADER; i++){ u32 y = pInfo->aReadMark[i]; if( mxSafeFrame>=y ){ assert( y<=pWal->hdr.mxFrame ); rc = walLockExclusive(pWal, WAL_READ_LOCK(i), 1); if( rc==SQLITE_OK ){ pInfo->aReadMark[i] = READMARK_NOT_USED; walUnlockExclusive(pWal, WAL_READ_LOCK(i), 1); }else if( rc==SQLITE_BUSY ){ mxSafeFrame = y; }else{ goto walcheckpoint_out; } } } if( pInfo->nBackfill<mxSafeFrame |
︙ | ︙ | |||
1776 1777 1778 1779 1780 1781 1782 | ** to select one of the aReadMark[] entries that is closest to ** but not exceeding pWal->hdr.mxFrame and lock that entry. */ mxReadMark = 0; mxI = 0; for(i=1; i<WAL_NREADER; i++){ u32 thisMark = pInfo->aReadMark[i]; | | > | | | 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 | ** to select one of the aReadMark[] entries that is closest to ** but not exceeding pWal->hdr.mxFrame and lock that entry. */ mxReadMark = 0; mxI = 0; for(i=1; i<WAL_NREADER; i++){ u32 thisMark = pInfo->aReadMark[i]; if( mxReadMark<=thisMark && thisMark<=pWal->hdr.mxFrame ){ assert( thisMark!=READMARK_NOT_USED ); mxReadMark = thisMark; mxI = i; } } if( mxI==0 ){ /* If we get here, it means that all of the aReadMark[] entries between ** 1 and WAL_NREADER-1 are zero. Try to initialize aReadMark[1] to ** be mxFrame, then retry. */ rc = walLockExclusive(pWal, WAL_READ_LOCK(1), 1); if( rc==SQLITE_OK ){ pInfo->aReadMark[1] = pWal->hdr.mxFrame; walUnlockExclusive(pWal, WAL_READ_LOCK(1), 1); rc = WAL_RETRY; }else if( rc==SQLITE_BUSY ){ rc = WAL_RETRY; } return rc; }else{ if( mxReadMark < pWal->hdr.mxFrame ){ for(i=1; i<WAL_NREADER; i++){ rc = walLockExclusive(pWal, WAL_READ_LOCK(i), 1); if( rc==SQLITE_OK ){ mxReadMark = pInfo->aReadMark[i] = pWal->hdr.mxFrame; mxI = i; walUnlockExclusive(pWal, WAL_READ_LOCK(i), 1); break; }else if( rc!=SQLITE_BUSY ){ return rc; } } |
︙ | ︙ | |||
1841 1842 1843 1844 1845 1846 1847 | sqlite3OsShmBarrier(pWal->pDbFd); if( pInfo->aReadMark[mxI]!=mxReadMark || memcmp((void *)pHdr, &pWal->hdr, sizeof(WalIndexHdr)) ){ walUnlockShared(pWal, WAL_READ_LOCK(mxI)); return WAL_RETRY; }else{ | | | 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 | sqlite3OsShmBarrier(pWal->pDbFd); if( pInfo->aReadMark[mxI]!=mxReadMark || memcmp((void *)pHdr, &pWal->hdr, sizeof(WalIndexHdr)) ){ walUnlockShared(pWal, WAL_READ_LOCK(mxI)); return WAL_RETRY; }else{ assert( mxReadMark<=pWal->hdr.mxFrame ); pWal->readLock = mxI; } } return rc; } /* |
︙ | ︙ |
Changes to test/wal2.test.
︙ | ︙ | |||
163 164 165 166 167 168 169 170 171 172 173 174 175 176 | # After this, the header is corrupted again and the reader is allowed # to run recovery. This time, it sees an up-to-date snapshot of the # database file. # set WRITER [list 0 1 lock exclusive] set LOCKS [list \ {0 1 lock exclusive} {0 1 unlock exclusive} \ {4 1 lock shared} {4 1 unlock shared} \ ] do_test wal2-2.0 { testvfs tvfs tvfs script tvfs_cb proc tvfs_cb {method args} { | > | 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 | # After this, the header is corrupted again and the reader is allowed # to run recovery. This time, it sees an up-to-date snapshot of the # database file. # set WRITER [list 0 1 lock exclusive] set LOCKS [list \ {0 1 lock exclusive} {0 1 unlock exclusive} \ {4 1 lock exclusive} {4 1 unlock exclusive} \ {4 1 lock shared} {4 1 unlock shared} \ ] do_test wal2-2.0 { testvfs tvfs tvfs script tvfs_cb proc tvfs_cb {method args} { |
︙ | ︙ |