Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | If a writer exits unexpectedly in the middle of a transaction, have the following writer remove any wal-index hash-table entries left by the interrupted transaction. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA1: |
ed77556adcdf7011b95b9969b360269f |
User & Date: | dan 2010-05-25 10:50:57.000 |
Context
2010-05-25
| ||
13:40 | Update header comments in wal.c to correctly describe the WAL file format. Update the locking region offsets in os_unix.c and os_win.c and add assert() statement to verify that the locking region offsets are correct. (check-in: 40030c0739 user: drh tags: trunk) | |
10:50 | If a writer exits unexpectedly in the middle of a transaction, have the following writer remove any wal-index hash-table entries left by the interrupted transaction. (check-in: ed77556adc user: dan tags: trunk) | |
02:24 | Remove unreachable code associated with WAL from the pager. (check-in: 54c1718e6d user: drh tags: trunk) | |
Changes
Changes to src/wal.c.
︙ | ︙ | |||
656 657 658 659 660 661 662 663 664 665 666 667 668 669 | assert( walIndexEntry(iZero+1)==(&aPgno[iZero+1] - pWal->pWiData) ); *paHash = aHash; *paPgno = aPgno; *piZero = iZero; } /* ** Set an entry in the wal-index that will map database page number ** pPage into WAL frame iFrame. */ static int walIndexAppend(Wal *pWal, u32 iFrame, u32 iPage){ int rc; /* Return code */ | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 | assert( walIndexEntry(iZero+1)==(&aPgno[iZero+1] - pWal->pWiData) ); *paHash = aHash; *paPgno = aPgno; *piZero = iZero; } /* ** Remove entries from the hash table that point to WAL slots greater ** than pWal->hdr.mxFrame. ** ** This function is called whenever pWal->hdr.mxFrame is decreased due ** to a rollback or savepoint. ** ** At most only the very last hash table needs to be updated. Any ** later hash tables will be automatically cleared when pWal->hdr.mxFrame ** advances to the point where those hash tables are actually needed. */ static void walCleanupHash(Wal *pWal){ volatile HASHTABLE_DATATYPE *aHash; /* Pointer to hash table to clear */ volatile u32 *aPgno; /* Unused return from walHashFind() */ u32 iZero; /* frame == (aHash[x]+iZero) */ int iLimit; /* Zero values greater than this */ assert( pWal->lockState==SQLITE_SHM_WRITE ); walHashFind(pWal, pWal->hdr.mxFrame+1, &aHash, &aPgno, &iZero); iLimit = pWal->hdr.mxFrame - iZero; if( iLimit>0 ){ int nByte; /* Number of bytes to zero in aPgno[] */ int i; /* Used to iterate through aHash[] */ for(i=0; i<HASHTABLE_NSLOT; i++){ if( aHash[i]>iLimit ){ aHash[i] = 0; } } /* Zero the entries in the aPgno array that correspond to frames with ** frame numbers greater than pWal->hdr.mxFrame. */ nByte = sizeof(u32) * (HASHTABLE_NPAGE-iLimit); memset((void *)&aPgno[iZero+iLimit+1], 0, nByte); assert( &((u8 *)&aPgno[iZero+iLimit+1])[nByte]==(u8 *)aHash ); } #ifdef SQLITE_ENABLE_EXPENSIVE_ASSERT /* Verify that the every entry in the mapping region is still reachable ** via the hash table even after the cleanup. */ { int i; /* Loop counter */ int iKey; /* Hash key */ for(i=1; i<=iLimit; i++){ for(iKey=walHash(aPgno[i+iZero]); aHash[iKey]; iKey=walNextHash(iKey)){ if( aHash[iKey]==i ) break; } assert( aHash[iKey]==i ); } } #endif /* SQLITE_ENABLE_EXPENSIVE_ASSERT */ } /* ** Set an entry in the wal-index that will map database page number ** pPage into WAL frame iFrame. */ static int walIndexAppend(Wal *pWal, u32 iFrame, u32 iPage){ int rc; /* Return code */ |
︙ | ︙ | |||
688 689 690 691 692 693 694 | volatile u32 *aPgno; /* Page number array */ volatile HASHTABLE_DATATYPE *aHash; /* Hash table */ int idx; /* Value to write to hash-table slot */ TESTONLY( int nCollide = 0; /* Number of hash collisions */ ) walHashFind(pWal, iFrame, &aHash, &aPgno, &iZero); idx = iFrame - iZero; | > > | > > > > > > > > > > > > | 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 | volatile u32 *aPgno; /* Page number array */ volatile HASHTABLE_DATATYPE *aHash; /* Hash table */ int idx; /* Value to write to hash-table slot */ TESTONLY( int nCollide = 0; /* Number of hash collisions */ ) walHashFind(pWal, iFrame, &aHash, &aPgno, &iZero); idx = iFrame - iZero; if( idx==1 ){ memset((void*)&aPgno[iZero+1], 0, HASHTABLE_NPAGE*sizeof(u32)); memset((void*)aHash, 0, HASHTABLE_NBYTE); } assert( idx <= HASHTABLE_NSLOT/2 + 1 ); if( aPgno[iFrame] ){ /* If the entry in aPgno[] is already set, then the previous writer ** must have exited unexpectedly in the middle of a transaction (after ** writing one or more dirty pages to the WAL to free up memory). ** Remove the remnants of that writers uncommitted transaction from ** the hash-table before writing any new entries. */ walCleanupHash(pWal); assert( !aPgno[iFrame] ); } aPgno[iFrame] = iPage; for(iKey=walHash(iPage); aHash[iKey]; iKey=walNextHash(iKey)){ assert( nCollide++ < idx ); } aHash[iKey] = idx; #ifdef SQLITE_ENABLE_EXPENSIVE_ASSERT |
︙ | ︙ | |||
1548 1549 1550 1551 1552 1553 1554 | } }else if( pWal->lockState==SQLITE_SHM_WRITE ){ rc = walSetLock(pWal, SQLITE_SHM_READ); } return rc; } | < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < | 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 | } }else if( pWal->lockState==SQLITE_SHM_WRITE ){ rc = walSetLock(pWal, SQLITE_SHM_READ); } return rc; } /* ** If any data has been written (but not committed) to the log file, this ** function moves the write-pointer back to the start of the transaction. ** ** Additionally, the callback function is invoked for each frame written ** to the log since the start of the transaction. If the callback returns ** other than SQLITE_OK, it is not invoked again and the error code is |
︙ | ︙ | |||
1616 1617 1618 1619 1620 1621 1622 | int unused; Pgno iMax = pWal->hdr.mxFrame; Pgno iFrame; assert( pWal->pWiData==0 ); rc = walIndexReadHdr(pWal, &unused); if( rc==SQLITE_OK ){ | < > | 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 | int unused; Pgno iMax = pWal->hdr.mxFrame; Pgno iFrame; assert( pWal->pWiData==0 ); rc = walIndexReadHdr(pWal, &unused); if( rc==SQLITE_OK ){ for(iFrame=pWal->hdr.mxFrame+1; rc==SQLITE_OK && iFrame<=iMax; iFrame++){ assert( pWal->lockState==SQLITE_SHM_WRITE ); rc = xUndo(pUndoCtx, pWal->pWiData[walIndexEntry(iFrame)]); } walCleanupHash(pWal); } walIndexUnmap(pWal); } return rc; } /* |
︙ | ︙ |
Added test/walcrash2.test.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 | # 2010 May 25 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #*********************************************************************** # set testdir [file dirname $argv0] source $testdir/tester.tcl source $testdir/lock_common.tcl ifcapable !wal {finish_test ; return } proc wal_file_size {nFrame pgsz} { expr {24 + ($pgsz+24)*$nFrame} } #------------------------------------------------------------------------- # This test case demonstrates a flaw in the wal-index manipulation that # existed at one point: If a process crashes mid-transaction, it may have # already added some entries to one of the hash-tables in the wal-index. # If the transaction were to be explicitly rolled back at this point, the # hash-table entries would be removed as part of the rollback. However, # if the process crashes, the transaction is implicitly rolled back and # the rogue entries remain in the hash table. # # Normally, this causes no problem - readers can tell the difference # between committed and uncommitted entries in the hash table. However, # if it happens often enough that all slots in the hash-table become # non-zero, the next process that attempts to read or write the hash # table falls into an infinite loop. # # Even if run with an SQLite version affected by the bug, this test case # only goes into an infinite loop if SQLite is compiled without SQLITE_DEBUG # defined. If SQLITE_DEBUG is defined, the program is halted by a failing # assert() before entering the infinite loop. # # walcrash2-1.1: Create a database. Commit a transaction that adds 8 frames # to the WAL (and 8 entry to the first hash-table in the # wal-index). # # walcrash2-1.2: Have an external process open a transaction, add 8 entries # to the wal-index hash-table, then crash. Repeat this 1023 # times (so that the wal-index contains 8192 entries - all # slots are non-zero). # # walcrash2-1.3: Using a new database connection, attempt to query the # database. This should cause the process to go into the # infinite loop. # do_test walcrash2-1.1 { execsql { PRAGMA page_size = 1024; PRAGMA journal_mode = WAL; PRAGMA synchronous = NORMAL; BEGIN; CREATE TABLE t1(x); CREATE TABLE t2(x); CREATE TABLE t3(x); CREATE TABLE t4(x); CREATE TABLE t5(x); CREATE TABLE t6(x); CREATE TABLE t7(x); COMMIT; } file size test.db-wal } [wal_file_size 8 1024] for {set nEntry 8} {$nEntry < 8192} {incr nEntry 8} { do_test walcrash2-1.2.[expr $nEntry/8] { set C [launch_testfixture] testfixture $C { sqlite3 db test.db db eval { PRAGMA cache_size = 15; BEGIN; INSERT INTO t1 VALUES(randomblob(900)); -- 1 row, 1 page INSERT INTO t1 SELECT * FROM t1; -- 2 rows, 3 pages INSERT INTO t1 SELECT * FROM t1; -- 4 rows, 5 pages INSERT INTO t1 SELECT * FROM t1; -- 8 rows, 9 pages INSERT INTO t1 SELECT * FROM t1; -- 16 rows, 17 pages INSERT INTO t1 SELECT * FROM t1 LIMIT 3; -- 20 rows, 20 pages } } close $C file size test.db-wal } [wal_file_size 16 1024] } do_test walcrash2-1.3 { sqlite3 db2 test.db execsql { SELECT count(*) FROM t1 } db2 } {0} catch { db2 close } finish_test |