/ Check-in [1c41250f]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:If an SQLITE_IOERR error is encountered as part of an atomic commit on an F2FS file-system, retry the commit in legacy journal mode.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | exp-retry-atomic-commit
Files: files | file ages | folders
SHA3-256:1c41250f67ac5de423b0426ef2ab8fe3303278a270225920033933ca9609592a
User & Date: dan 2018-07-14 20:25:13
Context
2018-07-16
20:44
Add new file doc/F2FS.txt, containing notes on the way SQLite uses the F2FS atomic commit feature. check-in: 59efb1bf user: dan tags: exp-retry-atomic-commit
2018-07-14
20:25
If an SQLITE_IOERR error is encountered as part of an atomic commit on an F2FS file-system, retry the commit in legacy journal mode. check-in: 1c41250f user: dan tags: exp-retry-atomic-commit
2018-07-13
20:28
Remove an unused function declaration from fts5. check-in: 148d9b61 user: dan tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to src/pager.c.

6378
6379
6380
6381
6382
6383
6384

6385
6386
6387

6388
6389
6390
6391
6392
6393
6394
....
6403
6404
6405
6406
6407
6408
6409
6410
6411
6412
6413
6414
6415
6416
6417
....
6460
6461
6462
6463
6464
6465
6466
6467
6468
6469
6470
6471

6472
6473
6474
6475
6476
6477
6478
6479
6480
6481
6482
....
6492
6493
6494
6495
6496
6497
6498

6499
6500
6501
6502
6503
6504
6505
6506

6507
6508
6509
6510
6511
6512
6513
6514
6515















6516
6517
6518
6519
6520
6521
6522
  assert( isOpen(pPager->fd) || pPager->tempFile );
  if( 0==pagerFlushOnCommit(pPager, 1) ){
    /* If this is an in-memory db, or no pages have been written to, or this
    ** function has already been called, it is mostly a no-op.  However, any
    ** backup in progress needs to be restarted.  */
    sqlite3BackupRestart(pPager->pBackup);
  }else{

    if( pagerUseWal(pPager) ){
      PgHdr *pList = sqlite3PcacheDirtyList(pPager->pPCache);
      PgHdr *pPageOne = 0;

      if( pList==0 ){
        /* Must have at least one page for the WAL commit flag.
        ** Ticket [2d1a5c67dfc2363e44f29d9bbd57f] 2011-05-18 */
        rc = sqlite3PagerGet(pPager, 1, &pPageOne, 0);
        pList = pPageOne;
        pList->pDirty = 0;
      }
................................................................................
    }else{
      /* The bBatch boolean is true if the batch-atomic-write commit method
      ** should be used.  No rollback journal is created if batch-atomic-write
      ** is enabled.
      */
      sqlite3_file *fd = pPager->fd;
#ifdef SQLITE_ENABLE_BATCH_ATOMIC_WRITE
      const int bBatch = zMaster==0    /* An SQLITE_IOCAP_BATCH_ATOMIC commit */
        && (sqlite3OsDeviceCharacteristics(fd) & SQLITE_IOCAP_BATCH_ATOMIC)
        && !pPager->noSync
        && sqlite3JournalIsInMemory(pPager->jfd);
#else
# define bBatch 0
#endif

................................................................................
        }else{
          rc = sqlite3JournalCreate(pPager->jfd);
          if( rc==SQLITE_OK ){
            rc = pager_incr_changecounter(pPager, 0);
          }
        }
      }
#else 
#ifdef SQLITE_ENABLE_BATCH_ATOMIC_WRITE
      if( zMaster ){
        rc = sqlite3JournalCreate(pPager->jfd);
        if( rc!=SQLITE_OK ) goto commit_phase_one_exit;

      }
#endif
      rc = pager_incr_changecounter(pPager, 0);
#endif
      if( rc!=SQLITE_OK ) goto commit_phase_one_exit;
  
      /* Write the master journal name into the journal file. If a master 
      ** journal file name has already been written to the journal file, 
      ** or if zMaster is NULL (no master journal), then this call is a no-op.
      */
      rc = writeMasterJournal(pPager, zMaster);
................................................................................
      ** on a system under memory pressure it is just possible that this is 
      ** not the case. In this case it is likely enough that the redundant
      ** xSync() call will be changed to a no-op by the OS anyhow. 
      */
      rc = syncJournal(pPager, 0);
      if( rc!=SQLITE_OK ) goto commit_phase_one_exit;


      if( bBatch ){
        /* The pager is now in DBMOD state. But regardless of what happens
        ** next, attempting to play the journal back into the database would
        ** be unsafe. Close it now to make sure that does not happen.  */
        sqlite3OsClose(pPager->jfd);
        rc = sqlite3OsFileControl(fd, SQLITE_FCNTL_BEGIN_ATOMIC_WRITE, 0);
        if( rc!=SQLITE_OK ) goto commit_phase_one_exit;
      }

      rc = pager_write_pagelist(pPager,sqlite3PcacheDirtyList(pPager->pPCache));
      if( bBatch ){
        if( rc==SQLITE_OK ){
          rc = sqlite3OsFileControl(fd, SQLITE_FCNTL_COMMIT_ATOMIC_WRITE, 0);
        }
        if( rc!=SQLITE_OK ){
          sqlite3OsFileControlHint(fd, SQLITE_FCNTL_ROLLBACK_ATOMIC_WRITE, 0);
        }
      }
















      if( rc!=SQLITE_OK ){
        assert( rc!=SQLITE_IOERR_BLOCKED );
        goto commit_phase_one_exit;
      }
      sqlite3PcacheCleanAll(pPager->pPCache);








>

<

>







 







|







 







|




>



|







 







>

<
<
<
<

<
<
>
|
<
|
|
|
|
|
|
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







6378
6379
6380
6381
6382
6383
6384
6385
6386

6387
6388
6389
6390
6391
6392
6393
6394
6395
....
6404
6405
6406
6407
6408
6409
6410
6411
6412
6413
6414
6415
6416
6417
6418
....
6461
6462
6463
6464
6465
6466
6467
6468
6469
6470
6471
6472
6473
6474
6475
6476
6477
6478
6479
6480
6481
6482
6483
6484
....
6494
6495
6496
6497
6498
6499
6500
6501
6502




6503


6504
6505

6506
6507
6508
6509
6510
6511
6512
6513
6514
6515
6516
6517
6518
6519
6520
6521
6522
6523
6524
6525
6526
6527
6528
6529
6530
6531
6532
6533
6534
  assert( isOpen(pPager->fd) || pPager->tempFile );
  if( 0==pagerFlushOnCommit(pPager, 1) ){
    /* If this is an in-memory db, or no pages have been written to, or this
    ** function has already been called, it is mostly a no-op.  However, any
    ** backup in progress needs to be restarted.  */
    sqlite3BackupRestart(pPager->pBackup);
  }else{
    PgHdr *pList;
    if( pagerUseWal(pPager) ){

      PgHdr *pPageOne = 0;
      pList = sqlite3PcacheDirtyList(pPager->pPCache);
      if( pList==0 ){
        /* Must have at least one page for the WAL commit flag.
        ** Ticket [2d1a5c67dfc2363e44f29d9bbd57f] 2011-05-18 */
        rc = sqlite3PagerGet(pPager, 1, &pPageOne, 0);
        pList = pPageOne;
        pList->pDirty = 0;
      }
................................................................................
    }else{
      /* The bBatch boolean is true if the batch-atomic-write commit method
      ** should be used.  No rollback journal is created if batch-atomic-write
      ** is enabled.
      */
      sqlite3_file *fd = pPager->fd;
#ifdef SQLITE_ENABLE_BATCH_ATOMIC_WRITE
      int bBatch = zMaster==0    /* An SQLITE_IOCAP_BATCH_ATOMIC commit */
        && (sqlite3OsDeviceCharacteristics(fd) & SQLITE_IOCAP_BATCH_ATOMIC)
        && !pPager->noSync
        && sqlite3JournalIsInMemory(pPager->jfd);
#else
# define bBatch 0
#endif

................................................................................
        }else{
          rc = sqlite3JournalCreate(pPager->jfd);
          if( rc==SQLITE_OK ){
            rc = pager_incr_changecounter(pPager, 0);
          }
        }
      }
#else  /* SQLITE_ENABLE_ATOMIC_WRITE */
#ifdef SQLITE_ENABLE_BATCH_ATOMIC_WRITE
      if( zMaster ){
        rc = sqlite3JournalCreate(pPager->jfd);
        if( rc!=SQLITE_OK ) goto commit_phase_one_exit;
        assert( bBatch==0 );
      }
#endif
      rc = pager_incr_changecounter(pPager, 0);
#endif /* !SQLITE_ENABLE_ATOMIC_WRITE */
      if( rc!=SQLITE_OK ) goto commit_phase_one_exit;
  
      /* Write the master journal name into the journal file. If a master 
      ** journal file name has already been written to the journal file, 
      ** or if zMaster is NULL (no master journal), then this call is a no-op.
      */
      rc = writeMasterJournal(pPager, zMaster);
................................................................................
      ** on a system under memory pressure it is just possible that this is 
      ** not the case. In this case it is likely enough that the redundant
      ** xSync() call will be changed to a no-op by the OS anyhow. 
      */
      rc = syncJournal(pPager, 0);
      if( rc!=SQLITE_OK ) goto commit_phase_one_exit;

      pList = sqlite3PcacheDirtyList(pPager->pPCache);
      if( bBatch ){




        rc = sqlite3OsFileControl(fd, SQLITE_FCNTL_BEGIN_ATOMIC_WRITE, 0);


        if( rc==SQLITE_OK ){
          rc = pager_write_pagelist(pPager, pList);

          if( rc==SQLITE_OK ){
            rc = sqlite3OsFileControl(fd, SQLITE_FCNTL_COMMIT_ATOMIC_WRITE, 0);
          }
          if( rc!=SQLITE_OK ){
            sqlite3OsFileControlHint(fd, SQLITE_FCNTL_ROLLBACK_ATOMIC_WRITE, 0);
          }
        }

        if( (rc&0xFF)==SQLITE_IOERR && rc!=SQLITE_IOERR_NOMEM ){
          rc = sqlite3JournalCreate(pPager->jfd);
          if( rc!=SQLITE_OK ){
            sqlite3OsClose(pPager->jfd);
          }
          bBatch = 0;
        }else{
          sqlite3OsClose(pPager->jfd);
        }
      }

      if( bBatch==0 && rc==SQLITE_OK ){
        rc = pager_write_pagelist(pPager, pList);
      }

      if( rc!=SQLITE_OK ){
        assert( rc!=SQLITE_IOERR_BLOCKED );
        goto commit_phase_one_exit;
      }
      sqlite3PcacheCleanAll(pPager->pPCache);

Changes to src/test_vfs.c.

129
130
131
132
133
134
135

136
137
138
139
140
141
142
143
144
...
513
514
515
516
517
518
519
520

521
522
523
524
525
526
527
...
531
532
533
534
535
536
537
538
539
540
541























542
543
544
545
546
547
548
549
....
1156
1157
1158
1159
1160
1161
1162

1163
1164
1165
1166
1167
1168
1169
#define TESTVFS_TRUNCATE_MASK     0x00002000
#define TESTVFS_ACCESS_MASK       0x00004000
#define TESTVFS_FULLPATHNAME_MASK 0x00008000
#define TESTVFS_READ_MASK         0x00010000
#define TESTVFS_UNLOCK_MASK       0x00020000
#define TESTVFS_LOCK_MASK         0x00040000
#define TESTVFS_CKLOCK_MASK       0x00080000


#define TESTVFS_ALL_MASK          0x000FFFFF


#define TESTVFS_MAX_PAGES 1024

/*
** A shared-memory buffer. There is one of these objects for each shared
** memory region opened by clients. If two clients open the same file,
................................................................................
  return sqlite3OsCheckReservedLock(pFd->pReal, pResOut);
}

/*
** File control method. For custom operations on an tvfs-file.
*/
static int tvfsFileControl(sqlite3_file *pFile, int op, void *pArg){
  TestvfsFd *p = tvfsGetFd(pFile);

  if( op==SQLITE_FCNTL_PRAGMA ){
    char **argv = (char**)pArg;
    if( sqlite3_stricmp(argv[1],"error")==0 ){
      int rc = SQLITE_ERROR;
      if( argv[2] ){
        const char *z = argv[2];
        int x = atoi(z);
................................................................................
          while( sqlite3Isspace(z[0]) ){ z++; }
        }
        if( z[0] ) argv[0] = sqlite3_mprintf("%s", z);
      }
      return rc;
    }
    if( sqlite3_stricmp(argv[1], "filename")==0 ){
      argv[0] = sqlite3_mprintf("%s", p->zFilename);
      return SQLITE_OK;
    }
  }























  return sqlite3OsFileControl(p->pReal, op, pArg);
}

/*
** Return the sector-size in bytes for an tvfs-file.
*/
static int tvfsSectorSize(sqlite3_file *pFile){
  TestvfsFd *pFd = tvfsGetFd(pFile);
................................................................................
        { "xOpen",              TESTVFS_OPEN_MASK },
        { "xClose",             TESTVFS_CLOSE_MASK },
        { "xAccess",            TESTVFS_ACCESS_MASK },
        { "xFullPathname",      TESTVFS_FULLPATHNAME_MASK },
        { "xUnlock",            TESTVFS_UNLOCK_MASK },
        { "xLock",              TESTVFS_LOCK_MASK },
        { "xCheckReservedLock", TESTVFS_CKLOCK_MASK },

      };
      Tcl_Obj **apElem = 0;
      int nElem = 0;
      int mask = 0;
      if( objc!=3 ){
        Tcl_WrongNumArgs(interp, 2, objv, "LIST");
        return TCL_ERROR;







>

|







 







|
>







 







|



>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|







 







>







129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
...
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
...
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
....
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
#define TESTVFS_TRUNCATE_MASK     0x00002000
#define TESTVFS_ACCESS_MASK       0x00004000
#define TESTVFS_FULLPATHNAME_MASK 0x00008000
#define TESTVFS_READ_MASK         0x00010000
#define TESTVFS_UNLOCK_MASK       0x00020000
#define TESTVFS_LOCK_MASK         0x00040000
#define TESTVFS_CKLOCK_MASK       0x00080000
#define TESTVFS_FCNTL_MASK        0x00100000

#define TESTVFS_ALL_MASK          0x001FFFFF


#define TESTVFS_MAX_PAGES 1024

/*
** A shared-memory buffer. There is one of these objects for each shared
** memory region opened by clients. If two clients open the same file,
................................................................................
  return sqlite3OsCheckReservedLock(pFd->pReal, pResOut);
}

/*
** File control method. For custom operations on an tvfs-file.
*/
static int tvfsFileControl(sqlite3_file *pFile, int op, void *pArg){
  TestvfsFd *pFd = tvfsGetFd(pFile);
  Testvfs *p = (Testvfs *)pFd->pVfs->pAppData;
  if( op==SQLITE_FCNTL_PRAGMA ){
    char **argv = (char**)pArg;
    if( sqlite3_stricmp(argv[1],"error")==0 ){
      int rc = SQLITE_ERROR;
      if( argv[2] ){
        const char *z = argv[2];
        int x = atoi(z);
................................................................................
          while( sqlite3Isspace(z[0]) ){ z++; }
        }
        if( z[0] ) argv[0] = sqlite3_mprintf("%s", z);
      }
      return rc;
    }
    if( sqlite3_stricmp(argv[1], "filename")==0 ){
      argv[0] = sqlite3_mprintf("%s", pFd->zFilename);
      return SQLITE_OK;
    }
  }
  if( p->pScript && (p->mask&TESTVFS_FCNTL_MASK) ){
    struct Fcntl {
      int iFnctl;
      const char *zFnctl;
    } aF[] = {
      { SQLITE_FCNTL_BEGIN_ATOMIC_WRITE, "BEGIN_ATOMIC_WRITE" },
      { SQLITE_FCNTL_COMMIT_ATOMIC_WRITE, "COMMIT_ATOMIC_WRITE" },
    };
    int i;
    for(i=0; i<sizeof(aF)/sizeof(aF[0]); i++){
      if( op==aF[i].iFnctl ) break;
    }
    if( i<sizeof(aF)/sizeof(aF[0]) ){
      int rc = 0;
      tvfsExecTcl(p, "xFileControl", 
          Tcl_NewStringObj(pFd->zFilename, -1), 
          Tcl_NewStringObj(aF[i].zFnctl, -1),
          0, 0
      );
      tvfsResultCode(p, &rc);
      if( rc ) return rc;
    }
  }
  return sqlite3OsFileControl(pFd->pReal, op, pArg);
}

/*
** Return the sector-size in bytes for an tvfs-file.
*/
static int tvfsSectorSize(sqlite3_file *pFile){
  TestvfsFd *pFd = tvfsGetFd(pFile);
................................................................................
        { "xOpen",              TESTVFS_OPEN_MASK },
        { "xClose",             TESTVFS_CLOSE_MASK },
        { "xAccess",            TESTVFS_ACCESS_MASK },
        { "xFullPathname",      TESTVFS_FULLPATHNAME_MASK },
        { "xUnlock",            TESTVFS_UNLOCK_MASK },
        { "xLock",              TESTVFS_LOCK_MASK },
        { "xCheckReservedLock", TESTVFS_CKLOCK_MASK },
        { "xFileControl",       TESTVFS_FCNTL_MASK },
      };
      Tcl_Obj **apElem = 0;
      int nElem = 0;
      int mask = 0;
      if( objc!=3 ){
        Tcl_WrongNumArgs(interp, 2, objv, "LIST");
        return TCL_ERROR;

Added test/atomic2.test.































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
# 2018-07-15
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#***********************************************************************
# This file implements regression tests for SQLite library.  The
# focus of this file is testing that if an IO error is encountered
# as part of an atomic F2FS commit, an attempt is made to commit the
# transaction using a legacy journal commit.
#

set testdir [file dirname $argv0]
source $testdir/tester.tcl
source $testdir/malloc_common.tcl
set ::testprefix atomic2

db close
if {[atomic_batch_write test.db]==0} {
  puts "No f2fs atomic-batch-write support. Skipping tests..."
  finish_test
  return
}

reset_db

do_execsql_test 1.0 {
  CREATE TABLE t1(x, y);
  CREATE INDEX i1x ON t1(x);
  CREATE INDEX i2x ON t1(y);

  WITH s(i) AS ( SELECT 1 UNION ALL SELECT i+1 FROM s WHERE i<100 )
  INSERT INTO t1 SELECT randomblob(400), randomblob(400) FROM s;
}

set setup [list \
  -injectstart at_injectstart \
  -injectstop  at_injectstop  \
]

set ::at_fail  0
set ::at_nfail 0

proc at_injectstart {iFail} {
  set ::at_fail $iFail
  set ::at_nfail 0
}
proc at_injectstop {} {
  set ::at_fail 0
  return $::at_nfail
}

proc at_vfs_callback {method file z args} {
  if {$::at_fail>0} {
    incr ::at_fail -1
    if {$::at_fail==0} {
      incr ::at_nfail
      return SQLITE_IOERR
    } elseif {$method=="xFileControl" && $z=="COMMIT_ATOMIC_WRITE"} {
      set ::at_fail 0
    }
  }
  return SQLITE_OK
}

testvfs tvfs -default 1
tvfs script at_vfs_callback
tvfs filter {xFileControl xWrite}

faultsim_save_and_close

do_one_faultsim_test 2.0 {*}$setup -prep {
  faultsim_restore_and_reopen
} -body {
  execsql {
    WITH s(i) AS ( SELECT 1 UNION ALL SELECT i+1 FROM s WHERE i<100 )
    INSERT INTO t1 SELECT randomblob(400), randomblob(400) FROM s;
  }
} -test {
  faultsim_test_result {0 {}}

  set res [execsql {SELECT count(*) FROM t1; PRAGMA integrity_check}]
  if {$res!="200 ok"} {
    error "expected {200 ok}, got $res"
  }
}

db close
tvfs delete

finish_test