/ Check-in [8084eae0]
Login
SQLite training in Houston TX on 2019-11-05 (details)
Part of the 2019 Tcl Conference

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Have sqlite3_snapshot_open() avoid a race condition by taking a shared CHECKPOINTER lock while checking pInfo->nBackfillAttempted.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | snapshot-get
Files: files | file ages | folders
SHA1: 8084eae0bc4f6513b1147fb890a6b2813f1c0a09
User & Date: dan 2015-12-10 15:45:15
Context
2015-12-10
18:06
Add tests to ensure that an sqlite3_snapshot_open() client cannot be tricked into reading a corrupt snapshot even if another process fails mid-checkpoint. check-in: b908048b user: dan tags: snapshot-get
15:45
Have sqlite3_snapshot_open() avoid a race condition by taking a shared CHECKPOINTER lock while checking pInfo->nBackfillAttempted. check-in: 8084eae0 user: dan tags: snapshot-get
03:16
Fix spacing typo in comment. No changes to code. check-in: 3a18526f user: mistachkin tags: snapshot-get
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to src/wal.c.

  1213   1213   
  1214   1214       /* Reset the checkpoint-header. This is safe because this thread is 
  1215   1215       ** currently holding locks that exclude all other readers, writers and
  1216   1216       ** checkpointers.
  1217   1217       */
  1218   1218       pInfo = walCkptInfo(pWal);
  1219   1219       pInfo->nBackfill = 0;
  1220         -    pInfo->nBackfillAttempted = 0;
         1220  +    pInfo->nBackfillAttempted = pWal->hdr.mxFrame;
  1221   1221       pInfo->aReadMark[0] = 0;
  1222   1222       for(i=1; i<WAL_NREADER; i++) pInfo->aReadMark[i] = READMARK_NOT_USED;
  1223   1223       if( pWal->hdr.mxFrame ) pInfo->aReadMark[1] = pWal->hdr.mxFrame;
  1224   1224   
  1225   1225       /* If more than one frame was recovered from the log file, report an
  1226   1226       ** event via sqlite3_log(). This is to help with identifying performance
  1227   1227       ** problems caused by applications routinely shutting down without
................................................................................
  1753   1753   
  1754   1754       /* Compute in mxSafeFrame the index of the last frame of the WAL that is
  1755   1755       ** safe to write into the database.  Frames beyond mxSafeFrame might
  1756   1756       ** overwrite database pages that are in use by active readers and thus
  1757   1757       ** cannot be backfilled from the WAL.
  1758   1758       */
  1759   1759       mxSafeFrame = pWal->hdr.mxFrame;
  1760         -    pInfo->nBackfillAttempted = mxSafeFrame;
  1761   1760       mxPage = pWal->hdr.nPage;
  1762   1761       for(i=1; i<WAL_NREADER; i++){
  1763   1762         /* Thread-sanitizer reports that the following is an unsafe read,
  1764   1763         ** as some other thread may be in the process of updating the value
  1765   1764         ** of the aReadMark[] slot. The assumption here is that if that is
  1766   1765         ** happening, the other client may only be increasing the value,
  1767   1766         ** not decreasing it. So assuming either that either the "old" or
................................................................................
  1785   1784       }
  1786   1785   
  1787   1786       if( pInfo->nBackfill<mxSafeFrame
  1788   1787        && (rc = walBusyLock(pWal, xBusy, pBusyArg, WAL_READ_LOCK(0),1))==SQLITE_OK
  1789   1788       ){
  1790   1789         i64 nSize;                    /* Current size of database file */
  1791   1790         u32 nBackfill = pInfo->nBackfill;
         1791  +
         1792  +      pInfo->nBackfillAttempted = mxSafeFrame;
  1792   1793   
  1793   1794         /* Sync the WAL to disk */
  1794   1795         if( sync_flags ){
  1795   1796           rc = sqlite3OsSync(pWal->pWalFd, sync_flags);
  1796   1797         }
  1797   1798   
  1798   1799         /* If the database may grow as a result of this checkpoint, hint
................................................................................
  2292   2293         assert( thisMark!=READMARK_NOT_USED );
  2293   2294         mxReadMark = thisMark;
  2294   2295         mxI = i;
  2295   2296       }
  2296   2297     }
  2297   2298     if( (pWal->readOnly & WAL_SHM_RDONLY)==0
  2298   2299      && (mxReadMark<mxFrame || mxI==0)
  2299         -#ifdef SQLITE_ENABLE_SNAPSHOT
  2300         -   && pWal->pSnapshot==0
  2301         -#endif
  2302   2300     ){
  2303   2301       for(i=1; i<WAL_NREADER; i++){
  2304   2302         rc = walLockExclusive(pWal, WAL_READ_LOCK(i), 1);
  2305   2303         if( rc==SQLITE_OK ){
  2306   2304           mxReadMark = pInfo->aReadMark[i] = mxFrame;
  2307   2305           mxI = i;
  2308   2306           walUnlockExclusive(pWal, WAL_READ_LOCK(i), 1);
................................................................................
  2309   2307           break;
  2310   2308         }else if( rc!=SQLITE_BUSY ){
  2311   2309           return rc;
  2312   2310         }
  2313   2311       }
  2314   2312     }
  2315   2313     if( mxI==0 ){
  2316         -#ifdef SQLITE_ENABLE_SNAPSHOT
  2317         -    if( pWal->pSnapshot ) return SQLITE_BUSY_SNAPSHOT;
  2318         -#endif
  2319   2314       assert( rc==SQLITE_BUSY || (pWal->readOnly & WAL_SHM_RDONLY)!=0 );
  2320   2315       return rc==SQLITE_BUSY ? WAL_RETRY : SQLITE_READONLY_CANTLOCK;
  2321   2316     }
  2322   2317   
  2323   2318     rc = walLockShared(pWal, WAL_READ_LOCK(mxI));
  2324   2319     if( rc ){
  2325   2320       return rc==SQLITE_BUSY ? WAL_RETRY : rc;
................................................................................
  2406   2401     testcase( rc==SQLITE_PROTOCOL );
  2407   2402     testcase( rc==SQLITE_OK );
  2408   2403   
  2409   2404   #ifdef SQLITE_ENABLE_SNAPSHOT
  2410   2405     if( rc==SQLITE_OK ){
  2411   2406       if( pSnapshot && memcmp(pSnapshot, &pWal->hdr, sizeof(WalIndexHdr))!=0 ){
  2412   2407         /* At this point the client has a lock on an aReadMark[] slot holding
  2413         -      ** a value equal to or smaller than pSnapshot->mxFrame.  Verify that
  2414         -      ** pSnapshot is still valid before continuing.  Reasons why pSnapshot
  2415         -      ** might no longer be valid:
         2408  +      ** a value equal to or smaller than pSnapshot->mxFrame, but pWal->hdr
         2409  +      ** is populated with the wal-index header corresponding to the head
         2410  +      ** of the wal file. Verify that pSnapshot is still valid before
         2411  +      ** continuing.  Reasons why pSnapshot might no longer be valid:
  2416   2412         **
  2417   2413         **    (1)  The WAL file has been reset since the snapshot was taken.
  2418   2414         **         In this case, the salt will have changed.
  2419   2415         **
  2420   2416         **    (2)  A checkpoint as been attempted that wrote frames past
  2421   2417         **         pSnapshot->mxFrame into the database file.  Note that the
  2422   2418         **         checkpoint need not have completed for this to cause problems.
  2423   2419         */
  2424   2420         volatile WalCkptInfo *pInfo = walCkptInfo(pWal);
  2425   2421   
  2426   2422         assert( pWal->readLock>0 );
  2427   2423         assert( pInfo->aReadMark[pWal->readLock]<=pSnapshot->mxFrame );
  2428   2424   
  2429         -      /* Check that the wal file has not been wrapped. Assuming it has not,
  2430         -      ** overwrite pWal->hdr with *pSnapshot and set *pChanged as appropriate
  2431         -      ** for opening the snapshot. Or, if the wal file has been wrapped
  2432         -      ** since pSnapshot was written, return SQLITE_BUSY_SNAPSHOT. */
         2425  +      /* It is possible that there is a checkpointer thread running 
         2426  +      ** concurrent with this code. If this is the case, it may be that the
         2427  +      ** checkpointer has already determined that it will checkpoint 
         2428  +      ** snapshot X, where X is later in the wal file than pSnapshot, but 
         2429  +      ** has not yet set the pInfo->nBackfillAttempted variable to indicate 
         2430  +      ** its intent. To avoid the race condition this leads to, ensure that
         2431  +      ** there is no checkpointer process by taking a shared CKPT lock 
         2432  +      ** before checking pInfo->nBackfillAttempted.  */
         2433  +      rc = walLockShared(pWal, WAL_CKPT_LOCK);
         2434  +
         2435  +      /* Check that the wal file has not been wrapped. Assuming that it has
         2436  +      ** not, also check that no checkpointer has attempted to checkpoint
         2437  +      ** any frames beyond pSnapshot->mxFrame. If either of these conditions
         2438  +      ** are true, return SQLTIE_BUSY_SNAPSHOT. Otherwise, overwrite pWal->hdr
         2439  +      ** with *pSnapshot and set *pChanged as appropriate for opening the
         2440  +      ** snapshot.  */
  2433   2441         if( memcmp(pSnapshot->aSalt, pWal->hdr.aSalt, sizeof(pWal->hdr.aSalt))==0
  2434   2442          && pSnapshot->mxFrame>=pInfo->nBackfillAttempted
  2435   2443         ){
  2436   2444           memcpy(&pWal->hdr, pSnapshot, sizeof(WalIndexHdr));
  2437   2445           *pChanged = bChanged;
  2438   2446         }else{
  2439   2447           rc = SQLITE_BUSY_SNAPSHOT;
  2440   2448         }
         2449  +
         2450  +      /* Release the shared CKPT lock obtained above. */
         2451  +      walUnlockShared(pWal, WAL_CKPT_LOCK);
  2441   2452   
  2442   2453         if( rc!=SQLITE_OK ){
  2443   2454           sqlite3WalEndReadTransaction(pWal);
  2444   2455         }
  2445   2456       }
  2446   2457     }
  2447   2458   #endif

Changes to test/snapshot.test.

    53     53   } {1 SQLITE_ERROR}
    54     54   do_execsql_test 1.3.2 COMMIT
    55     55   
    56     56   #-------------------------------------------------------------------------
    57     57   # Check that a simple case works. Reuse the database created by the
    58     58   # block of tests above.
    59     59   #
    60         -# UPDATE: This case (2.1) no longer works. 2.2 does.
    61         -#
    62     60   do_execsql_test 2.1.0 {
    63     61     BEGIN;
    64     62       SELECT * FROM t1;
    65     63   } {1 2 3 4 5 6 7 8}
    66     64   
    67     65   do_test 2.1.1 {
    68     66     set snapshot [sqlite3_snapshot_get db main]
................................................................................
    71     69       INSERT INTO t1 VALUES(9, 10);
    72     70       SELECT * FROM t1;
    73     71     }
    74     72   } {1 2 3 4 5 6 7 8 9 10}
    75     73   
    76     74   do_test 2.1.2 {
    77     75     execsql BEGIN
    78         -  list [catch { sqlite3_snapshot_open db main $snapshot } msg] $msg
    79         -} {1 SQLITE_BUSY_SNAPSHOT}
           76  +  sqlite3_snapshot_open db main $snapshot
           77  +  execsql {
           78  +    SELECT * FROM t1;
           79  +  }
           80  +} {1 2 3 4 5 6 7 8}
    80     81   
    81     82   do_test 2.1.3 {
    82     83     sqlite3_snapshot_free $snapshot
    83     84     execsql COMMIT
    84     85   } {}
    85     86   
    86     87   do_test 2.2.0 {
................................................................................
    95     96     set snapshot [sqlite3_snapshot_get db2 main]
    96     97     execsql {
    97     98       INSERT INTO t1 VALUES(11, 12);
    98     99       SELECT * FROM t1;
    99    100     }
   100    101   } {1 2 3 4 5 6 7 8 9 10 11 12}
   101    102   
   102         -do_test 2.1.2 {
          103  +do_test 2.2.2 {
   103    104     execsql BEGIN
   104    105     sqlite3_snapshot_open db main $snapshot
   105    106     execsql {
   106    107       SELECT * FROM t1;
   107    108     }
   108    109   } {1 2 3 4 5 6 7 8 9 10}
   109    110   
   110         -do_test 2.1.3 {
          111  +do_test 2.2.3 {
   111    112     sqlite3_snapshot_free $snapshot
   112    113     execsql COMMIT
   113    114     execsql COMMIT db2
   114    115     db2 close
   115    116   } {}
   116    117   
   117    118   #-------------------------------------------------------------------------