/ Check-in [39420473]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Remove a condition from sqlite3WalRead() that is unreachable as of the changes to clear entries out of the wal-index hash tables on transaction or savepoint rollback.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 394204735a842b04b677cca20485b1578e475d4c
User & Date: dan 2010-06-05 18:12:24
Context
2010-06-05
18:34
Clarify the purpose of a test for a race-condition in sqlite3WalBeginReadTransaction(). check-in: c041c6a9 user: dan tags: trunk
18:12
Remove a condition from sqlite3WalRead() that is unreachable as of the changes to clear entries out of the wal-index hash tables on transaction or savepoint rollback. check-in: 39420473 user: dan tags: trunk
14:42
Mark a condition in wal.c as ALWAYS(). check-in: 3fe0cc78 user: dan tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to src/wal.c.

1742
1743
1744
1745
1746
1747
1748
1749













1750
1751
1752
1753
1754
1755
1756
....
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956

1957
1958
1959
1960
1961
1962
1963
  assert( pInfo==walCkptInfo(pWal) );
  if( !useWal && pInfo->nBackfill==pWal->hdr.mxFrame ){
    /* The WAL has been completely backfilled (or it is empty).
    ** and can be safely ignored.
    */
    rc = walLockShared(pWal, WAL_READ_LOCK(0));
    if( rc==SQLITE_OK ){
      if( pHdr->mxFrame!=pWal->hdr.mxFrame ){













        walUnlockShared(pWal, WAL_READ_LOCK(0));
        return WAL_RETRY;
      }
      pWal->readLock = 0;
      return SQLITE_OK;
    }else if( rc!=SQLITE_BUSY ){
      return rc;
................................................................................
  **
  **   (aPgno[iFrame]==pgno): 
  **     This condition filters out normal hash-table collisions.
  **
  **   (iFrame<=iLast): 
  **     This condition filters out entries that were added to the hash
  **     table after the current read-transaction had started.
  **
  **   (iFrame>iRead): 
  **     This filters out a dangerous class of garbage data. The 
  **     garbage hash slot may refer to a frame with the correct page 
  **     number, but not the most recent version of the frame. For
  **     example, if at the start of the read-transaction the WAL
  **     contains three copies of the desired page in frames 2, 3 and 4,
  **     the hash table may contain the following:
  **
  **       { ..., 2, 3, 4, 99, 99, ..... }
  **
  **     The correct answer is to read data from frame 4. But a 
  **     dirty-read may potentially cause the hash-table to appear as 
  **     follows to the reader:
  **
  **       { ..., 2, 3, 4, 3, 99, ..... }
  **
  **     Without this part of the if(...) clause, the reader might
  **     incorrectly read data from frame 3 instead of 4. This would be
  **     an error.
  **
  ** It is not actually clear to the developers that such a dirty-read
  ** can occur. But if it does, it should not cause any problems.
  */
  for(iHash=iLast; iHash>0 && iRead==0; iHash-=HASHTABLE_NPAGE){
    volatile HASHTABLE_DATATYPE *aHash;  /* Pointer to hash table */
    volatile u32 *aPgno;                 /* Pointer to array of page numbers */
    u32 iZero;                    /* Frame number corresponding to aPgno[0] */
    int iKey;                     /* Hash slot index */
    int mxHash;                   /* upper bound on aHash[] values */

    walHashFind(pWal, iHash, &aHash, &aPgno, &iZero);
    mxHash = iLast - iZero;
    if( mxHash > HASHTABLE_NPAGE )  mxHash = HASHTABLE_NPAGE;
    for(iKey=walHash(pgno); aHash[iKey]; iKey=walNextHash(iKey)){
      u32 iFrame = aHash[iKey] + iZero;
      if( iFrame<=iLast && aPgno[iFrame]==pgno && iFrame>iRead ){

        iRead = iFrame;
      }
    }
  }
  assert( iRead==0 || pWal->pWiData[walIndexEntry(iRead)]==pgno );

#ifdef SQLITE_ENABLE_EXPENSIVE_ASSERT







|
>
>
>
>
>
>
>
>
>
>
>
>
>







 







<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<













|
>







1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
....
1926
1927
1928
1929
1930
1931
1932























1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
  assert( pInfo==walCkptInfo(pWal) );
  if( !useWal && pInfo->nBackfill==pWal->hdr.mxFrame ){
    /* The WAL has been completely backfilled (or it is empty).
    ** and can be safely ignored.
    */
    rc = walLockShared(pWal, WAL_READ_LOCK(0));
    if( rc==SQLITE_OK ){
      if( memcmp(pHdr, &pWal->hdr, sizeof(WalIndexHdr)) ){
        /* It is not safe to allow the reader to continue here if frames
        ** may have been appended to the log before READ_LOCK(0) was obtained.
        ** When holding READ_LOCK(0), the reader ignores the entire log file,
        ** which implies that the database file contains a trustworthy
        ** snapshoT. Since holding READ_LOCK(0) prevents a checkpoint from
        ** happening, this is usually correct.
        **
        ** However, if frames have been appended to the log (or if the log 
        ** is wrapped and written for that matter) before the READ_LOCK(0)
        ** is obtained, that is not necessarily true. A checkpointer may
        ** have started to backfill the appended frames but crashed before
        ** it finished. Leaving a corrupt image in the database file.
        */
        walUnlockShared(pWal, WAL_READ_LOCK(0));
        return WAL_RETRY;
      }
      pWal->readLock = 0;
      return SQLITE_OK;
    }else if( rc!=SQLITE_BUSY ){
      return rc;
................................................................................
  **
  **   (aPgno[iFrame]==pgno): 
  **     This condition filters out normal hash-table collisions.
  **
  **   (iFrame<=iLast): 
  **     This condition filters out entries that were added to the hash
  **     table after the current read-transaction had started.























  */
  for(iHash=iLast; iHash>0 && iRead==0; iHash-=HASHTABLE_NPAGE){
    volatile HASHTABLE_DATATYPE *aHash;  /* Pointer to hash table */
    volatile u32 *aPgno;                 /* Pointer to array of page numbers */
    u32 iZero;                    /* Frame number corresponding to aPgno[0] */
    int iKey;                     /* Hash slot index */
    int mxHash;                   /* upper bound on aHash[] values */

    walHashFind(pWal, iHash, &aHash, &aPgno, &iZero);
    mxHash = iLast - iZero;
    if( mxHash > HASHTABLE_NPAGE )  mxHash = HASHTABLE_NPAGE;
    for(iKey=walHash(pgno); aHash[iKey]; iKey=walNextHash(iKey)){
      u32 iFrame = aHash[iKey] + iZero;
      if( iFrame<=iLast && aPgno[iFrame]==pgno ){
        assert( iFrame>iRead );
        iRead = iFrame;
      }
    }
  }
  assert( iRead==0 || pWal->pWiData[walIndexEntry(iRead)]==pgno );

#ifdef SQLITE_ENABLE_EXPENSIVE_ASSERT

Changes to test/wal3.test.

414
415
416
417
418
419
420
421



422





423







































































































































424
425
  list $::testrc $::testmsg
} {1 {disk I/O error}}

db close
T delete

#-------------------------------------------------------------------------
# When opening a read-transaction on a database 



#













































































































































finish_test








|
>
>
>

>
>
>
>
>
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>


414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
  list $::testrc $::testmsg
} {1 {disk I/O error}}

db close
T delete

#-------------------------------------------------------------------------
# When opening a read-transaction on a database, if the entire log has
# already been copied to the database file, the reader grabs a special
# kind of read lock (on aReadMark[0]). This test case tests the outcome
# of the following:
#
#   + The reader discovering that between the time when it determined 
#     that the log had been completely backfilled and the lock is obtained
#     that a writer has written to the log. In this case the reader should
#     acquire a different read-lock (not aReadMark[0]) and read the new
#     snapshot.
#
#   + The attempt to obtain the lock on aReadMark[0] fails with SQLITE_BUSY.
#     This can happen if a checkpoint is ongoing. In this case also simply
#     obtain a different read-lock.
#
catch {db close}
testvfs T -default 1
do_test wal3-6.1.1 {
  file delete -force test.db test.db-journal test.db wal
  sqlite3 db test.db
  execsql { PRAGMA journal_mode = WAL }
  execsql {
    CREATE TABLE t1(a, b);
    INSERT INTO t1 VALUES('o', 't');
    INSERT INTO t1 VALUES('t', 'f');
  }
} {}
do_test wal3-6.1.2 {
  sqlite3 db2 test.db
  sqlite3 db3 test.db
  execsql { BEGIN ; SELECT * FROM t1 } db3
} {o t t f}
do_test wal3-6.1.3 {
  execsql { PRAGMA wal_checkpoint } db2
} {}

# At this point the log file has been fully checkpointed. However, 
# connection [db3] holds a lock that prevents the log from being wrapped.
# Test case 3.6.1.4 has [db] attempt a read-lock on aReadMark[0]. But
# as it is obtaining the lock, [db2] appends to the log file.
#
T filter xShmLock
T script lock_callback
proc lock_callback {method file handle spec} {
  if {$spec == "3 1 lock shared"} {
    # This is the callback for [db] to obtain the read lock on aReadMark[0].
    # Disable future callbacks using [T filter {}] and write to the log
    # file using [db2]. [db3] is preventing [db2] from wrapping the log
    # here, so this is an append.
    T filter {}
    db2 eval { INSERT INTO t1 VALUES('f', 's') }
  }
  return SQLITE_OK
}
do_test wal3-6.1.4 {
  execsql {
    BEGIN;
    SELECT * FROM t1;
  }
} {o t t f f s}

# [db] should be left holding a read-lock on some slot other than 
# aReadMark[0]. Test this by demonstrating that the read-lock is preventing
# the log from being wrapped.
#
do_test wal3-6.1.5 {
  db3 eval COMMIT
  db2 eval { PRAGMA wal_checkpoint }
  set sz1 [file size test.db-wal]
  db2 eval { INSERT INTO t1 VALUES('s', 'e') }
  set sz2 [file size test.db-wal]
  expr {$sz2>$sz1}
} {1}

# Test that if [db2] had not interfered when [db] was trying to grab
# aReadMark[0], it would have been possible to wrap the log in 3.6.1.5.
#
do_test wal3-6.1.6 {
  execsql { COMMIT }
  execsql { PRAGMA wal_checkpoint } db2
  execsql {
    BEGIN;
    SELECT * FROM t1;
  }
} {o t t f f s s e}
do_test wal3-6.1.7 {
  db2 eval { PRAGMA wal_checkpoint }
  set sz1 [file size test.db-wal]
  db2 eval { INSERT INTO t1 VALUES('n', 't') }
  set sz2 [file size test.db-wal]
  expr {$sz2==$sz1}
} {1}

db3 close
db2 close
db close

do_test wal3-6.2.1 {
  file delete -force test.db test.db-journal test.db wal
  sqlite3 db test.db
  sqlite3 db2 test.db
  execsql { PRAGMA journal_mode = WAL }
  execsql {
    CREATE TABLE t1(a, b);
    INSERT INTO t1 VALUES('h', 'h');
    INSERT INTO t1 VALUES('l', 'b');
  }
} {}

T filter xShmLock
T script lock_callback
proc lock_callback {method file handle spec} {
  if {$spec == "3 1 unlock exclusive"} {
    T filter {}
    set ::R [db2 eval {
      BEGIN;
      SELECT * FROM t1;
    }]
  }
}
do_test wal3-6.2.2 {
  execsql { PRAGMA wal_checkpoint }
} {}
do_test wal3-6.2.3 {
  set ::R
} {h h l b}
do_test wal3-6.2.4 {
  set sz1 [file size test.db-wal]
  execsql { INSERT INTO t1 VALUES('b', 'c'); }
  set sz2 [file size test.db-wal]
  expr {$sz2 > $sz1}
} {1}
do_test wal3-6.2.5 {
  db2 eval { COMMIT }
  execsql { PRAGMA wal_checkpoint }
  set sz1 [file size test.db-wal]
  execsql { INSERT INTO t1 VALUES('n', 'o'); }
  set sz2 [file size test.db-wal]
  expr {$sz2 == $sz1}
} {1}
 
db2 close
db close
T delete


finish_test