/ Check-in [dbe569a0]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Fix a problem whereby following an IO error in CommitPhaseTwo() of a multi-file transaction the b-tree layer could be left in TRANS_WRITE state, causing problems later on.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: dbe569a099c2855480e35c0cc4d9332821ad80da
User & Date: dan 2011-03-29 15:40:55
Context
2011-03-29
18:28
Add tests to syscall.test and sysfault.test. check-in: 3d2de011 user: dan tags: trunk
15:40
Fix a problem whereby following an IO error in CommitPhaseTwo() of a multi-file transaction the b-tree layer could be left in TRANS_WRITE state, causing problems later on. check-in: dbe569a0 user: dan tags: trunk
10:04
Fix a problem in the unix VFS implementation of xNextSystemCall(). Also some typos that prevent compilation when HAVE_POSIX_FALLOCATE is defined. check-in: bc6cce81 user: dan tags: trunk
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

install-sh became executable.


Changes to src/backup.c.

   484    484           }
   485    485         }else{
   486    486           rc = sqlite3PagerCommitPhaseOne(pDestPager, 0, 0);
   487    487         }
   488    488     
   489    489         /* Finish committing the transaction to the destination database. */
   490    490         if( SQLITE_OK==rc
   491         -       && SQLITE_OK==(rc = sqlite3BtreeCommitPhaseTwo(p->pDest))
          491  +       && SQLITE_OK==(rc = sqlite3BtreeCommitPhaseTwo(p->pDest, 0))
   492    492         ){
   493    493           rc = SQLITE_DONE;
   494    494         }
   495    495       }
   496    496     
   497    497       /* If bCloseTrans is true, then this function opened a read transaction
   498    498       ** on the source database. Close the read transaction here. There is
   499    499       ** no need to check the return values of the btree methods here, as
   500    500       ** "committing" a read-only transaction cannot fail.
   501    501       */
   502    502       if( bCloseTrans ){
   503    503         TESTONLY( int rc2 );
   504    504         TESTONLY( rc2  = ) sqlite3BtreeCommitPhaseOne(p->pSrc, 0);
   505         -      TESTONLY( rc2 |= ) sqlite3BtreeCommitPhaseTwo(p->pSrc);
          505  +      TESTONLY( rc2 |= ) sqlite3BtreeCommitPhaseTwo(p->pSrc, 0);
   506    506         assert( rc2==SQLITE_OK );
   507    507       }
   508    508     
   509    509       if( rc==SQLITE_IOERR_NOMEM ){
   510    510         rc = SQLITE_NOMEM;
   511    511       }
   512    512       p->rc = rc;

Changes to src/btree.c.

  3155   3155   ** sqlite3BtreeCommitPhaseOne() routine does the first phase and should
  3156   3156   ** be invoked prior to calling this routine.  The sqlite3BtreeCommitPhaseOne()
  3157   3157   ** routine did all the work of writing information out to disk and flushing the
  3158   3158   ** contents so that they are written onto the disk platter.  All this
  3159   3159   ** routine has to do is delete or truncate or zero the header in the
  3160   3160   ** the rollback journal (which causes the transaction to commit) and
  3161   3161   ** drop locks.
         3162  +**
         3163  +** Normally, if an error occurs while the pager layer is attempting to 
         3164  +** finalize the underlying journal file, this function returns an error and
         3165  +** the upper layer will attempt a rollback. However, if the second argument
         3166  +** is non-zero then this b-tree transaction is part of a multi-file 
         3167  +** transaction. In this case, the transaction has already been committed 
         3168  +** (by deleting a master journal file) and the caller will ignore this 
         3169  +** functions return code. So, even if an error occurs in the pager layer,
         3170  +** reset the b-tree objects internal state to indicate that the write
         3171  +** transaction has been closed. This is quite safe, as the pager will have
         3172  +** transitioned to the error state.
  3162   3173   **
  3163   3174   ** This will release the write lock on the database file.  If there
  3164   3175   ** are no active cursors, it also releases the read lock.
  3165   3176   */
  3166         -int sqlite3BtreeCommitPhaseTwo(Btree *p){
         3177  +int sqlite3BtreeCommitPhaseTwo(Btree *p, int bCleanup){
  3167   3178   
  3168   3179     if( p->inTrans==TRANS_NONE ) return SQLITE_OK;
  3169   3180     sqlite3BtreeEnter(p);
  3170   3181     btreeIntegrity(p);
  3171   3182   
  3172   3183     /* If the handle has a write-transaction open, commit the shared-btrees 
  3173   3184     ** transaction and set the shared state to TRANS_READ.
................................................................................
  3174   3185     */
  3175   3186     if( p->inTrans==TRANS_WRITE ){
  3176   3187       int rc;
  3177   3188       BtShared *pBt = p->pBt;
  3178   3189       assert( pBt->inTransaction==TRANS_WRITE );
  3179   3190       assert( pBt->nTransaction>0 );
  3180   3191       rc = sqlite3PagerCommitPhaseTwo(pBt->pPager);
  3181         -    if( rc!=SQLITE_OK ){
         3192  +    if( rc!=SQLITE_OK && bCleanup==0 ){
  3182   3193         sqlite3BtreeLeave(p);
  3183   3194         return rc;
  3184   3195       }
  3185   3196       pBt->inTransaction = TRANS_READ;
  3186   3197     }
  3187   3198   
  3188   3199     btreeEndTransaction(p);
................................................................................
  3194   3205   ** Do both phases of a commit.
  3195   3206   */
  3196   3207   int sqlite3BtreeCommit(Btree *p){
  3197   3208     int rc;
  3198   3209     sqlite3BtreeEnter(p);
  3199   3210     rc = sqlite3BtreeCommitPhaseOne(p, 0);
  3200   3211     if( rc==SQLITE_OK ){
  3201         -    rc = sqlite3BtreeCommitPhaseTwo(p);
         3212  +    rc = sqlite3BtreeCommitPhaseTwo(p, 0);
  3202   3213     }
  3203   3214     sqlite3BtreeLeave(p);
  3204   3215     return rc;
  3205   3216   }
  3206   3217   
  3207   3218   #ifndef NDEBUG
  3208   3219   /*

Changes to src/btree.h.

    83     83   u32 sqlite3BtreeLastPage(Btree*);
    84     84   int sqlite3BtreeSecureDelete(Btree*,int);
    85     85   int sqlite3BtreeGetReserve(Btree*);
    86     86   int sqlite3BtreeSetAutoVacuum(Btree *, int);
    87     87   int sqlite3BtreeGetAutoVacuum(Btree *);
    88     88   int sqlite3BtreeBeginTrans(Btree*,int);
    89     89   int sqlite3BtreeCommitPhaseOne(Btree*, const char *zMaster);
    90         -int sqlite3BtreeCommitPhaseTwo(Btree*);
           90  +int sqlite3BtreeCommitPhaseTwo(Btree*, int);
    91     91   int sqlite3BtreeCommit(Btree*);
    92     92   int sqlite3BtreeRollback(Btree*);
    93     93   int sqlite3BtreeBeginStmt(Btree*,int);
    94     94   int sqlite3BtreeCreateTable(Btree*, int*, int flags);
    95     95   int sqlite3BtreeIsInTrans(Btree*);
    96     96   int sqlite3BtreeIsInReadTrans(Btree*);
    97     97   int sqlite3BtreeIsInBackup(Btree*);

Changes to src/test_syscall.c.

   116    116   } aSyscall[] = {
   117    117     /*  0 */ { "open",      (sqlite3_syscall_ptr)ts_open,      0, EACCES, 0 },
   118    118     /*  1 */ { "close",     (sqlite3_syscall_ptr)ts_close,     0, 0, 0 },
   119    119     /*  2 */ { "access",    (sqlite3_syscall_ptr)ts_access,    0, 0, 0 },
   120    120     /*  3 */ { "getcwd",    (sqlite3_syscall_ptr)ts_getcwd,    0, 0, 0 },
   121    121     /*  4 */ { "stat",      (sqlite3_syscall_ptr)ts_stat,      0, 0, 0 },
   122    122     /*  5 */ { "fstat",     (sqlite3_syscall_ptr)ts_fstat,     0, 0, 0 },
   123         -  /*  6 */ { "ftruncate", (sqlite3_syscall_ptr)ts_ftruncate, 0, 0, 0 },
          123  +  /*  6 */ { "ftruncate", (sqlite3_syscall_ptr)ts_ftruncate, 0, EIO, 0 },
   124    124     /*  7 */ { "fcntl",     (sqlite3_syscall_ptr)ts_fcntl,     0, 0, 0 },
   125    125     /*  8 */ { "read",      (sqlite3_syscall_ptr)ts_read,      0, 0, 0 },
   126    126     /*  9 */ { "pread",     (sqlite3_syscall_ptr)ts_pread,     0, 0, 0 },
   127    127     /* 10 */ { "pread64",   (sqlite3_syscall_ptr)ts_pread64,   0, 0, 0 },
   128    128     /* 11 */ { "write",     (sqlite3_syscall_ptr)ts_write,     0, 0, 0 },
   129    129     /* 12 */ { "pwrite",    (sqlite3_syscall_ptr)ts_pwrite,    0, 0, 0 },
   130    130     /* 13 */ { "pwrite64",  (sqlite3_syscall_ptr)ts_pwrite64,  0, 0, 0 },
................................................................................
   147    147   #define orig_write     ((ssize_t(*)(int,const void*,size_t))aSyscall[11].xOrig)
   148    148   #define orig_pwrite    ((ssize_t(*)(int,const void*,size_t,off_t))\
   149    149                          aSyscall[12].xOrig)
   150    150   #define orig_pwrite64  ((ssize_t(*)(int,const void*,size_t,off_t))\
   151    151                          aSyscall[13].xOrig)
   152    152   #define orig_fchmod    ((int(*)(int,mode_t))aSyscall[14].xOrig)
   153    153   #define orig_fallocate ((int(*)(int,off_t,off_t))aSyscall[15].xOrig)
   154         -
   155    154   
   156    155   /*
   157    156   ** This function is called exactly once from within each invocation of a
   158    157   ** system call wrapper in this file. It returns 1 if the function should
   159    158   ** fail, or 0 if it should succeed.
   160    159   */
   161    160   static int tsIsFail(void){
................................................................................
   260    259     return orig_fstat(fd, p);
   261    260   }
   262    261   
   263    262   /*
   264    263   ** A wrapper around ftruncate().
   265    264   */
   266    265   static int ts_ftruncate(int fd, off_t n){
   267         -  if( tsIsFail() ){
          266  +  if( tsIsFailErrno("ftruncate") ){
   268    267       return -1;
   269    268     }
   270    269     return orig_ftruncate(fd, n);
   271    270   }
   272    271   
   273    272   /*
   274    273   ** A wrapper around fcntl().
................................................................................
   529    528     int rc;
   530    529   
   531    530     struct Errno {
   532    531       const char *z;
   533    532       int i;
   534    533     } aErrno[] = {
   535    534       { "EACCES", EACCES },
          535  +    { "EINTR", EINTR },
          536  +    { "EIO", EIO },
   536    537       { 0, 0 }
   537    538     };
   538    539   
   539    540     if( objc!=4 ){
   540    541       Tcl_WrongNumArgs(interp, 2, objv, "SYSCALL ERRNO");
   541    542       return TCL_ERROR;
   542    543     }

Changes to src/vdbeaux.c.

  1699   1699       ** If one of the BtreeCommitPhaseOne() calls fails, this indicates an
  1700   1700       ** IO error while deleting or truncating a journal file. It is unlikely,
  1701   1701       ** but could happen. In this case abandon processing and return the error.
  1702   1702       */
  1703   1703       for(i=0; rc==SQLITE_OK && i<db->nDb; i++){
  1704   1704         Btree *pBt = db->aDb[i].pBt;
  1705   1705         if( pBt ){
  1706         -        rc = sqlite3BtreeCommitPhaseTwo(pBt);
         1706  +        rc = sqlite3BtreeCommitPhaseTwo(pBt, 0);
  1707   1707         }
  1708   1708       }
  1709   1709       if( rc==SQLITE_OK ){
  1710   1710         sqlite3VtabCommit(db);
  1711   1711       }
  1712   1712     }
  1713   1713   
................................................................................
  1831   1831       ** may be lying around. Returning an error code won't help matters.
  1832   1832       */
  1833   1833       disable_simulated_io_errors();
  1834   1834       sqlite3BeginBenignMalloc();
  1835   1835       for(i=0; i<db->nDb; i++){ 
  1836   1836         Btree *pBt = db->aDb[i].pBt;
  1837   1837         if( pBt ){
  1838         -        sqlite3BtreeCommitPhaseTwo(pBt);
         1838  +        sqlite3BtreeCommitPhaseTwo(pBt, 1);
  1839   1839         }
  1840   1840       }
  1841   1841       sqlite3EndBenignMalloc();
  1842   1842       enable_simulated_io_errors();
  1843   1843   
  1844   1844       sqlite3VtabCommit(db);
  1845   1845     }

Changes to test/fts3fault2.test.

     9      9   #
    10     10   #***********************************************************************
    11     11   #
    12     12   
    13     13   set testdir [file dirname $argv0]
    14     14   source $testdir/tester.tcl
    15     15   set ::testprefix fts3fault2
           16  +
           17  +# If SQLITE_ENABLE_FTS3 is not defined, omit this file.
           18  +ifcapable !fts3 { finish_test ; return }
    16     19   
    17     20   do_test 1.0 {
    18     21     execsql {
    19     22       CREATE VIRTUAL TABLE t1 USING fts4(x);
    20     23       INSERT INTO t1 VALUES('a b c');
    21     24       INSERT INTO t1 VALUES('c d e');
    22     25       CREATE VIRTUAL TABLE terms USING fts4aux(t1);

test/progress.test became a regular file.


Changes to test/syscall.test.

    54     54   #
    55     55   set syscall_list [list                                \
    56     56       open close access getcwd stat fstat ftruncate     \
    57     57       fcntl read pread write pwrite fchmod              \
    58     58   ]
    59     59   if {[test_syscall exists fallocate]} {lappend syscall_list fallocate}
    60     60   do_test 3.1 { test_syscall list } $syscall_list
           61  +
           62  +#-------------------------------------------------------------------------
           63  +# This test verifies that if a call to open() fails and errno is set to
           64  +# EINTR, the call is retried. If it succeeds, execution continues as if
           65  +# nothing happened. 
           66  +#
           67  +test_syscall reset
           68  +forcedelete test.db2
           69  +do_execsql_test 4.1 {
           70  +  CREATE TABLE t1(x, y);
           71  +  INSERT INTO t1 VALUES(1, 2);
           72  +  ATTACH 'test.db2' AS aux;
           73  +  CREATE TABLE aux.t2(x, y);
           74  +  INSERT INTO t2 VALUES(3, 4);
           75  +}
           76  +
           77  +db_save_and_close
           78  +test_syscall install open
           79  +foreach jrnl [list wal delete] {
           80  +  for {set i 1} {$i < 20} {incr i} {
           81  +    db_restore_and_reopen
           82  +    test_syscall fault $i 0
           83  +    test_syscall errno open EINTR
           84  +  
           85  +    do_test 4.2.$jrnl.$i {
           86  +      sqlite3 db test.db
           87  +      execsql { ATTACH 'test.db2' AS aux }
           88  +      execsql "PRAGMA main.journal_mode = $jrnl"
           89  +      execsql "PRAGMA aux.journal_mode = $jrnl"
           90  +      execsql {
           91  +        BEGIN;
           92  +          INSERT INTO t1 VALUES(5, 6);
           93  +          INSERT INTO t2 VALUES(7, 8);
           94  +        COMMIT;
           95  +      }
           96  +
           97  +      db close
           98  +      sqlite3 db test.db
           99  +      execsql { ATTACH 'test.db2' AS aux }
          100  +      execsql {
          101  +        SELECT * FROM t1;
          102  +        SELECT * FROM t2;
          103  +      }
          104  +    } {1 2 5 6 3 4 7 8}
          105  +  }
          106  +
          107  +}
          108  +
          109  +
    61    110   
    62    111   finish_test

Changes to test/sysfault.test.

    61     61       INSERT INTO t2 VALUES('y');
    62     62     }
    63     63   } -test {
    64     64     faultsim_test_result {0 {wal 1 2 3 4}}       \
    65     65       {1 {unable to open database file}}         \
    66     66       {1 {attempt to write a readonly database}}
    67     67   }
           68  +
           69  +#-------------------------------------------------------------------------
           70  +# Check that a single EINTR error does not affect processing.
           71  +#
           72  +proc vfsfault_install {} { 
           73  +  test_syscall reset
           74  +  test_syscall install {open ftruncate close}
           75  +}
           76  +
           77  +forcedelete test.db test.db2
           78  +sqlite3 db test.db
           79  +do_test 2.setup {
           80  +  execsql {
           81  +    CREATE TABLE t1(a, b, c, PRIMARY KEY(a));
           82  +    INSERT INTO t1 VALUES('abc', 'def', 'ghi');
           83  +    ATTACH 'test.db2' AS 'aux';
           84  +    CREATE TABLE aux.t2(x);
           85  +    INSERT INTO t2 VALUES(1);
           86  +  }
           87  +  faultsim_save_and_close
           88  +} {}
           89  +
           90  +do_faultsim_test 2.1 -faults vfsfault-transient -prep {
           91  +  catch { db close }
           92  +  faultsim_restore
           93  +} -body {
           94  +  test_syscall errno open      EINTR
           95  +  test_syscall errno ftruncate EINTR
           96  +  test_syscall errno close     EINTR
           97  +
           98  +  sqlite3 db test.db
           99  +  set res [db eval {
          100  +    ATTACH 'test.db2' AS 'aux';
          101  +    SELECT * FROM t1;
          102  +    PRAGMA journal_mode = truncate;
          103  +    BEGIN;
          104  +      INSERT INTO t1 VALUES('jkl', 'mno', 'pqr');
          105  +      UPDATE t2 SET x = 2;
          106  +    COMMIT;
          107  +    SELECT * FROM t1;
          108  +    SELECT * FROM t2;
          109  +  }]
          110  +  db close
          111  +  set res
          112  +} -test {
          113  +  faultsim_test_result {0 {abc def ghi truncate abc def ghi jkl mno pqr 2}}
          114  +}
          115  +
          116  +do_faultsim_test 2.2 -faults vfsfault-* -prep {
          117  +  catch { db close }
          118  +  faultsim_restore
          119  +} -body {
          120  +  sqlite3 db test.db
          121  +  set res [db eval {
          122  +    ATTACH 'test.db2' AS 'aux';
          123  +    SELECT * FROM t1;
          124  +    PRAGMA journal_mode = truncate;
          125  +    BEGIN;
          126  +      INSERT INTO t1 VALUES('jkl', 'mno', 'pqr');
          127  +      UPDATE t2 SET x = 2;
          128  +    COMMIT;
          129  +    SELECT * FROM t1;
          130  +    SELECT * FROM t2;
          131  +  }]
          132  +  db close
          133  +  set res
          134  +} -test {
          135  +  faultsim_test_result {0 {abc def ghi truncate abc def ghi jkl mno pqr 2}} \
          136  +    {1 {unable to open database file}}                                      \
          137  +    {1 {unable to open database: test.db2}}                                 \
          138  +    {1 {attempt to write a readonly database}}                              \
          139  +    {1 {disk I/O error}}                                                  
          140  +}
          141  +
    68    142   
    69    143   finish_test
    70    144