Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Changes In Branch exp-retry-atomic-commit Excluding Merge-Ins
This is equivalent to a diff from 148d9b61 to b10ec14e
2018-07-17
| ||
14:01 | If an SQLITE_IOERR error is encountered as part of an atomic commit on an F2FS file-system, retry the commit in legacy journal mode. (check-in: 2e0357c2 user: dan tags: trunk) | |
13:55 | Fix for builds without SQLITE_ENABLE_BATCH_ATOMIC_WRITE. (Closed-Leaf check-in: b10ec14e user: dan tags: exp-retry-atomic-commit) | |
2018-07-16
| ||
20:44 | Add new file doc/F2FS.txt, containing notes on the way SQLite uses the F2FS atomic commit feature. (check-in: 59efb1bf user: dan tags: exp-retry-atomic-commit) | |
11:32 | Minor simplification to sqlite3RollbackAll(). (check-in: 432fdc22 user: drh tags: trunk) | |
2018-07-14
| ||
20:25 | If an SQLITE_IOERR error is encountered as part of an atomic commit on an F2FS file-system, retry the commit in legacy journal mode. (check-in: 1c41250f user: dan tags: exp-retry-atomic-commit) | |
2018-07-13
| ||
20:28 | Remove an unused function declaration from fts5. (check-in: 148d9b61 user: dan tags: trunk) | |
19:52 | Add the "categories" option to the unicode61 tokenizer in fts5. (check-in: 80d2b9e6 user: dan tags: trunk) | |
Added doc/F2FS.txt.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 | SQLite's OS layer contains the following definitions used in F2FS related calls: #define F2FS_IOCTL_MAGIC 0xf5 #define F2FS_IOC_START_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 1) #define F2FS_IOC_COMMIT_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 2) #define F2FS_IOC_START_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 3) #define F2FS_IOC_ABORT_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 5) #define F2FS_IOC_GET_FEATURES _IOR(F2FS_IOCTL_MAGIC, 12, u32) #define F2FS_FEATURE_ATOMIC_WRITE 0x0004 After opening a database file on Linux (including Android), SQLite determines whether or not a file supports F2FS atomic commits as follows: u32 flags = 0; rc = ioctl(fd, F2FS_IOC_GET_FEATURES, &flags); if( rc==0 && (flags & F2FS_FEATURE_ATOMIC_WRITE) ){ /* File supports F2FS atomic commits */ }else{ /* File does NOT support F2FS atomic commits */ } where "fd" is the file-descriptor open on the database file. Usually, when writing to a database file that supports atomic commits, SQLite accumulates the entire transaction in heap memory, deferring all writes to the db file until the transaction is committed. When it is time to commit a transaction on a file that supports atomic commits, SQLite does: /* Take an F_WRLCK lock on the database file. This prevents any other ** SQLite clients from reading or writing the file until the lock ** is released. */ rc = fcntl(fd, F_SETLK, ...); if( rc!=0 ) goto failed; rc = ioctl(fd, F2FS_IOC_START_ATOMIC_WRITE); if( rc!=0 ) goto fallback_to_legacy_journal_commit; foreach (dirty page){ rc = write(fd, ...dirty page...); if( rc!=0 ){ ioctl(fd, F2FS_IOC_ABORT_VOLATILE_WRITE); goto fallback_to_legacy_journal_commit; } } rc = ioctl(fd, F2FS_IOC_COMMIT_ATOMIC_WRITE); if( rc!=0 ){ ioctl(fd, F2FS_IOC_ABORT_VOLATILE_WRITE); goto fallback_to_legacy_journal_commit; } /* If we get there, the transaction has been successfully ** committed to persistent storage. The following call ** relinquishes the F_WRLCK lock. */ fcntl(fd, F_SETLK, ...); Assumptions: 1. After either of the F2FS_IOC_ABORT_VOLATILE_WRITE calls return, the database file is in the state that it was in before F2FS_IOC_START_ATOMIC_WRITE was invoked. Even if the ioctl() fails - we're ignoring the return code. This is true regardless of the type of error that occurred in ioctl() or write(). 2. If the system fails before the F2FS_IOC_COMMIT_ATOMIC_WRITE is completed, then following a reboot the database file is in the state that it was in before F2FS_IOC_START_ATOMIC_WRITE was invoked. Or, if the write was commited right before the system failed, in a state indicating that all write() calls were successfully committed to persistent storage before the failure occurred. 3. If the process crashes before the F2FS_IOC_COMMIT_ATOMIC_WRITE is completed then the file is automatically restored to the state that it was in before F2FS_IOC_START_ATOMIC_WRITE was called. This occurs before the posix advisory lock is automatically dropped - there is no chance that another client will be able to read the file in a half-committed state before the rollback operation occurs. |
Changes to src/pager.c.
︙ | ︙ | |||
6378 6379 6380 6381 6382 6383 6384 6385 | assert( isOpen(pPager->fd) || pPager->tempFile ); if( 0==pagerFlushOnCommit(pPager, 1) ){ /* If this is an in-memory db, or no pages have been written to, or this ** function has already been called, it is mostly a no-op. However, any ** backup in progress needs to be restarted. */ sqlite3BackupRestart(pPager->pBackup); }else{ if( pagerUseWal(pPager) ){ | > < > | 6378 6379 6380 6381 6382 6383 6384 6385 6386 6387 6388 6389 6390 6391 6392 6393 6394 6395 | assert( isOpen(pPager->fd) || pPager->tempFile ); if( 0==pagerFlushOnCommit(pPager, 1) ){ /* If this is an in-memory db, or no pages have been written to, or this ** function has already been called, it is mostly a no-op. However, any ** backup in progress needs to be restarted. */ sqlite3BackupRestart(pPager->pBackup); }else{ PgHdr *pList; if( pagerUseWal(pPager) ){ PgHdr *pPageOne = 0; pList = sqlite3PcacheDirtyList(pPager->pPCache); if( pList==0 ){ /* Must have at least one page for the WAL commit flag. ** Ticket [2d1a5c67dfc2363e44f29d9bbd57f] 2011-05-18 */ rc = sqlite3PagerGet(pPager, 1, &pPageOne, 0); pList = pPageOne; pList->pDirty = 0; } |
︙ | ︙ | |||
6403 6404 6405 6406 6407 6408 6409 | }else{ /* The bBatch boolean is true if the batch-atomic-write commit method ** should be used. No rollback journal is created if batch-atomic-write ** is enabled. */ sqlite3_file *fd = pPager->fd; #ifdef SQLITE_ENABLE_BATCH_ATOMIC_WRITE | | | 6404 6405 6406 6407 6408 6409 6410 6411 6412 6413 6414 6415 6416 6417 6418 | }else{ /* The bBatch boolean is true if the batch-atomic-write commit method ** should be used. No rollback journal is created if batch-atomic-write ** is enabled. */ sqlite3_file *fd = pPager->fd; #ifdef SQLITE_ENABLE_BATCH_ATOMIC_WRITE int bBatch = zMaster==0 /* An SQLITE_IOCAP_BATCH_ATOMIC commit */ && (sqlite3OsDeviceCharacteristics(fd) & SQLITE_IOCAP_BATCH_ATOMIC) && !pPager->noSync && sqlite3JournalIsInMemory(pPager->jfd); #else # define bBatch 0 #endif |
︙ | ︙ | |||
6460 6461 6462 6463 6464 6465 6466 | }else{ rc = sqlite3JournalCreate(pPager->jfd); if( rc==SQLITE_OK ){ rc = pager_incr_changecounter(pPager, 0); } } } | | > | | 6461 6462 6463 6464 6465 6466 6467 6468 6469 6470 6471 6472 6473 6474 6475 6476 6477 6478 6479 6480 6481 6482 6483 6484 | }else{ rc = sqlite3JournalCreate(pPager->jfd); if( rc==SQLITE_OK ){ rc = pager_incr_changecounter(pPager, 0); } } } #else /* SQLITE_ENABLE_ATOMIC_WRITE */ #ifdef SQLITE_ENABLE_BATCH_ATOMIC_WRITE if( zMaster ){ rc = sqlite3JournalCreate(pPager->jfd); if( rc!=SQLITE_OK ) goto commit_phase_one_exit; assert( bBatch==0 ); } #endif rc = pager_incr_changecounter(pPager, 0); #endif /* !SQLITE_ENABLE_ATOMIC_WRITE */ if( rc!=SQLITE_OK ) goto commit_phase_one_exit; /* Write the master journal name into the journal file. If a master ** journal file name has already been written to the journal file, ** or if zMaster is NULL (no master journal), then this call is a no-op. */ rc = writeMasterJournal(pPager, zMaster); |
︙ | ︙ | |||
6492 6493 6494 6495 6496 6497 6498 6499 | ** on a system under memory pressure it is just possible that this is ** not the case. In this case it is likely enough that the redundant ** xSync() call will be changed to a no-op by the OS anyhow. */ rc = syncJournal(pPager, 0); if( rc!=SQLITE_OK ) goto commit_phase_one_exit; if( bBatch ){ | > > < < < < | < | < | | | | | | | > > > > > > > > > > > > > > > > | 6494 6495 6496 6497 6498 6499 6500 6501 6502 6503 6504 6505 6506 6507 6508 6509 6510 6511 6512 6513 6514 6515 6516 6517 6518 6519 6520 6521 6522 6523 6524 6525 6526 6527 6528 6529 6530 6531 6532 6533 6534 6535 6536 | ** on a system under memory pressure it is just possible that this is ** not the case. In this case it is likely enough that the redundant ** xSync() call will be changed to a no-op by the OS anyhow. */ rc = syncJournal(pPager, 0); if( rc!=SQLITE_OK ) goto commit_phase_one_exit; pList = sqlite3PcacheDirtyList(pPager->pPCache); #ifdef SQLITE_ENABLE_BATCH_ATOMIC_WRITE if( bBatch ){ rc = sqlite3OsFileControl(fd, SQLITE_FCNTL_BEGIN_ATOMIC_WRITE, 0); if( rc==SQLITE_OK ){ rc = pager_write_pagelist(pPager, pList); if( rc==SQLITE_OK ){ rc = sqlite3OsFileControl(fd, SQLITE_FCNTL_COMMIT_ATOMIC_WRITE, 0); } if( rc!=SQLITE_OK ){ sqlite3OsFileControlHint(fd, SQLITE_FCNTL_ROLLBACK_ATOMIC_WRITE, 0); } } if( (rc&0xFF)==SQLITE_IOERR && rc!=SQLITE_IOERR_NOMEM ){ rc = sqlite3JournalCreate(pPager->jfd); if( rc!=SQLITE_OK ){ sqlite3OsClose(pPager->jfd); } bBatch = 0; }else{ sqlite3OsClose(pPager->jfd); } } #endif /* SQLITE_ENABLE_BATCH_ATOMIC_WRITE */ if( bBatch==0 && rc==SQLITE_OK ){ rc = pager_write_pagelist(pPager, pList); } if( rc!=SQLITE_OK ){ assert( rc!=SQLITE_IOERR_BLOCKED ); goto commit_phase_one_exit; } sqlite3PcacheCleanAll(pPager->pPCache); |
︙ | ︙ |
Changes to src/test_vfs.c.
︙ | ︙ | |||
129 130 131 132 133 134 135 136 | #define TESTVFS_TRUNCATE_MASK 0x00002000 #define TESTVFS_ACCESS_MASK 0x00004000 #define TESTVFS_FULLPATHNAME_MASK 0x00008000 #define TESTVFS_READ_MASK 0x00010000 #define TESTVFS_UNLOCK_MASK 0x00020000 #define TESTVFS_LOCK_MASK 0x00040000 #define TESTVFS_CKLOCK_MASK 0x00080000 | > | | 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 | #define TESTVFS_TRUNCATE_MASK 0x00002000 #define TESTVFS_ACCESS_MASK 0x00004000 #define TESTVFS_FULLPATHNAME_MASK 0x00008000 #define TESTVFS_READ_MASK 0x00010000 #define TESTVFS_UNLOCK_MASK 0x00020000 #define TESTVFS_LOCK_MASK 0x00040000 #define TESTVFS_CKLOCK_MASK 0x00080000 #define TESTVFS_FCNTL_MASK 0x00100000 #define TESTVFS_ALL_MASK 0x001FFFFF #define TESTVFS_MAX_PAGES 1024 /* ** A shared-memory buffer. There is one of these objects for each shared ** memory region opened by clients. If two clients open the same file, |
︙ | ︙ | |||
513 514 515 516 517 518 519 | return sqlite3OsCheckReservedLock(pFd->pReal, pResOut); } /* ** File control method. For custom operations on an tvfs-file. */ static int tvfsFileControl(sqlite3_file *pFile, int op, void *pArg){ | | > | > > > > > > > > > > > > > > > > > > > > > > > | | 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 | return sqlite3OsCheckReservedLock(pFd->pReal, pResOut); } /* ** File control method. For custom operations on an tvfs-file. */ static int tvfsFileControl(sqlite3_file *pFile, int op, void *pArg){ TestvfsFd *pFd = tvfsGetFd(pFile); Testvfs *p = (Testvfs *)pFd->pVfs->pAppData; if( op==SQLITE_FCNTL_PRAGMA ){ char **argv = (char**)pArg; if( sqlite3_stricmp(argv[1],"error")==0 ){ int rc = SQLITE_ERROR; if( argv[2] ){ const char *z = argv[2]; int x = atoi(z); if( x ){ rc = x; while( sqlite3Isdigit(z[0]) ){ z++; } while( sqlite3Isspace(z[0]) ){ z++; } } if( z[0] ) argv[0] = sqlite3_mprintf("%s", z); } return rc; } if( sqlite3_stricmp(argv[1], "filename")==0 ){ argv[0] = sqlite3_mprintf("%s", pFd->zFilename); return SQLITE_OK; } } if( p->pScript && (p->mask&TESTVFS_FCNTL_MASK) ){ struct Fcntl { int iFnctl; const char *zFnctl; } aF[] = { { SQLITE_FCNTL_BEGIN_ATOMIC_WRITE, "BEGIN_ATOMIC_WRITE" }, { SQLITE_FCNTL_COMMIT_ATOMIC_WRITE, "COMMIT_ATOMIC_WRITE" }, }; int i; for(i=0; i<sizeof(aF)/sizeof(aF[0]); i++){ if( op==aF[i].iFnctl ) break; } if( i<sizeof(aF)/sizeof(aF[0]) ){ int rc = 0; tvfsExecTcl(p, "xFileControl", Tcl_NewStringObj(pFd->zFilename, -1), Tcl_NewStringObj(aF[i].zFnctl, -1), 0, 0 ); tvfsResultCode(p, &rc); if( rc ) return rc; } } return sqlite3OsFileControl(pFd->pReal, op, pArg); } /* ** Return the sector-size in bytes for an tvfs-file. */ static int tvfsSectorSize(sqlite3_file *pFile){ TestvfsFd *pFd = tvfsGetFd(pFile); |
︙ | ︙ | |||
1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 | { "xOpen", TESTVFS_OPEN_MASK }, { "xClose", TESTVFS_CLOSE_MASK }, { "xAccess", TESTVFS_ACCESS_MASK }, { "xFullPathname", TESTVFS_FULLPATHNAME_MASK }, { "xUnlock", TESTVFS_UNLOCK_MASK }, { "xLock", TESTVFS_LOCK_MASK }, { "xCheckReservedLock", TESTVFS_CKLOCK_MASK }, }; Tcl_Obj **apElem = 0; int nElem = 0; int mask = 0; if( objc!=3 ){ Tcl_WrongNumArgs(interp, 2, objv, "LIST"); return TCL_ERROR; | > | 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 | { "xOpen", TESTVFS_OPEN_MASK }, { "xClose", TESTVFS_CLOSE_MASK }, { "xAccess", TESTVFS_ACCESS_MASK }, { "xFullPathname", TESTVFS_FULLPATHNAME_MASK }, { "xUnlock", TESTVFS_UNLOCK_MASK }, { "xLock", TESTVFS_LOCK_MASK }, { "xCheckReservedLock", TESTVFS_CKLOCK_MASK }, { "xFileControl", TESTVFS_FCNTL_MASK }, }; Tcl_Obj **apElem = 0; int nElem = 0; int mask = 0; if( objc!=3 ){ Tcl_WrongNumArgs(interp, 2, objv, "LIST"); return TCL_ERROR; |
︙ | ︙ |
Added test/atomic2.test.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 | # 2018-07-15 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #*********************************************************************** # This file implements regression tests for SQLite library. The # focus of this file is testing that if an IO error is encountered # as part of an atomic F2FS commit, an attempt is made to commit the # transaction using a legacy journal commit. # set testdir [file dirname $argv0] source $testdir/tester.tcl source $testdir/malloc_common.tcl set ::testprefix atomic2 db close if {[atomic_batch_write test.db]==0} { puts "No f2fs atomic-batch-write support. Skipping tests..." finish_test return } reset_db do_execsql_test 1.0 { CREATE TABLE t1(x, y); CREATE INDEX i1x ON t1(x); CREATE INDEX i2x ON t1(y); WITH s(i) AS ( SELECT 1 UNION ALL SELECT i+1 FROM s WHERE i<100 ) INSERT INTO t1 SELECT randomblob(400), randomblob(400) FROM s; } set setup [list \ -injectstart at_injectstart \ -injectstop at_injectstop \ ] set ::at_fail 0 set ::at_nfail 0 proc at_injectstart {iFail} { set ::at_fail $iFail set ::at_nfail 0 } proc at_injectstop {} { set ::at_fail 0 return $::at_nfail } proc at_vfs_callback {method file z args} { if {$::at_fail>0} { incr ::at_fail -1 if {$::at_fail==0} { incr ::at_nfail return SQLITE_IOERR } elseif {$method=="xFileControl" && $z=="COMMIT_ATOMIC_WRITE"} { set ::at_fail 0 } } return SQLITE_OK } testvfs tvfs -default 1 tvfs script at_vfs_callback tvfs filter {xFileControl xWrite} faultsim_save_and_close do_one_faultsim_test 2.0 {*}$setup -prep { faultsim_restore_and_reopen } -body { execsql { WITH s(i) AS ( SELECT 1 UNION ALL SELECT i+1 FROM s WHERE i<100 ) INSERT INTO t1 SELECT randomblob(400), randomblob(400) FROM s; } } -test { faultsim_test_result {0 {}} set res [execsql {SELECT count(*) FROM t1; PRAGMA integrity_check}] if {$res!="200 ok"} { error "expected {200 ok}, got $res" } } db close tvfs delete finish_test |