SQLite

Check-in [8084eae0bc]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Have sqlite3_snapshot_open() avoid a race condition by taking a shared CHECKPOINTER lock while checking pInfo->nBackfillAttempted.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | snapshot-get
Files: files | file ages | folders
SHA1: 8084eae0bc4f6513b1147fb890a6b2813f1c0a09
User & Date: dan 2015-12-10 15:45:15.186
Context
2015-12-10
18:06
Add tests to ensure that an sqlite3_snapshot_open() client cannot be tricked into reading a corrupt snapshot even if another process fails mid-checkpoint. (check-in: b908048b6c user: dan tags: snapshot-get)
15:45
Have sqlite3_snapshot_open() avoid a race condition by taking a shared CHECKPOINTER lock while checking pInfo->nBackfillAttempted. (check-in: 8084eae0bc user: dan tags: snapshot-get)
03:16
Fix spacing typo in comment. No changes to code. (check-in: 3a18526fc2 user: mistachkin tags: snapshot-get)
Changes
Unified Diff Ignore Whitespace Patch
Changes to src/wal.c.
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227

    /* Reset the checkpoint-header. This is safe because this thread is 
    ** currently holding locks that exclude all other readers, writers and
    ** checkpointers.
    */
    pInfo = walCkptInfo(pWal);
    pInfo->nBackfill = 0;
    pInfo->nBackfillAttempted = 0;
    pInfo->aReadMark[0] = 0;
    for(i=1; i<WAL_NREADER; i++) pInfo->aReadMark[i] = READMARK_NOT_USED;
    if( pWal->hdr.mxFrame ) pInfo->aReadMark[1] = pWal->hdr.mxFrame;

    /* If more than one frame was recovered from the log file, report an
    ** event via sqlite3_log(). This is to help with identifying performance
    ** problems caused by applications routinely shutting down without







|







1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227

    /* Reset the checkpoint-header. This is safe because this thread is 
    ** currently holding locks that exclude all other readers, writers and
    ** checkpointers.
    */
    pInfo = walCkptInfo(pWal);
    pInfo->nBackfill = 0;
    pInfo->nBackfillAttempted = pWal->hdr.mxFrame;
    pInfo->aReadMark[0] = 0;
    for(i=1; i<WAL_NREADER; i++) pInfo->aReadMark[i] = READMARK_NOT_USED;
    if( pWal->hdr.mxFrame ) pInfo->aReadMark[1] = pWal->hdr.mxFrame;

    /* If more than one frame was recovered from the log file, report an
    ** event via sqlite3_log(). This is to help with identifying performance
    ** problems caused by applications routinely shutting down without
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767

    /* Compute in mxSafeFrame the index of the last frame of the WAL that is
    ** safe to write into the database.  Frames beyond mxSafeFrame might
    ** overwrite database pages that are in use by active readers and thus
    ** cannot be backfilled from the WAL.
    */
    mxSafeFrame = pWal->hdr.mxFrame;
    pInfo->nBackfillAttempted = mxSafeFrame;
    mxPage = pWal->hdr.nPage;
    for(i=1; i<WAL_NREADER; i++){
      /* Thread-sanitizer reports that the following is an unsafe read,
      ** as some other thread may be in the process of updating the value
      ** of the aReadMark[] slot. The assumption here is that if that is
      ** happening, the other client may only be increasing the value,
      ** not decreasing it. So assuming either that either the "old" or







<







1753
1754
1755
1756
1757
1758
1759

1760
1761
1762
1763
1764
1765
1766

    /* Compute in mxSafeFrame the index of the last frame of the WAL that is
    ** safe to write into the database.  Frames beyond mxSafeFrame might
    ** overwrite database pages that are in use by active readers and thus
    ** cannot be backfilled from the WAL.
    */
    mxSafeFrame = pWal->hdr.mxFrame;

    mxPage = pWal->hdr.nPage;
    for(i=1; i<WAL_NREADER; i++){
      /* Thread-sanitizer reports that the following is an unsafe read,
      ** as some other thread may be in the process of updating the value
      ** of the aReadMark[] slot. The assumption here is that if that is
      ** happening, the other client may only be increasing the value,
      ** not decreasing it. So assuming either that either the "old" or
1785
1786
1787
1788
1789
1790
1791


1792
1793
1794
1795
1796
1797
1798
    }

    if( pInfo->nBackfill<mxSafeFrame
     && (rc = walBusyLock(pWal, xBusy, pBusyArg, WAL_READ_LOCK(0),1))==SQLITE_OK
    ){
      i64 nSize;                    /* Current size of database file */
      u32 nBackfill = pInfo->nBackfill;



      /* Sync the WAL to disk */
      if( sync_flags ){
        rc = sqlite3OsSync(pWal->pWalFd, sync_flags);
      }

      /* If the database may grow as a result of this checkpoint, hint







>
>







1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
    }

    if( pInfo->nBackfill<mxSafeFrame
     && (rc = walBusyLock(pWal, xBusy, pBusyArg, WAL_READ_LOCK(0),1))==SQLITE_OK
    ){
      i64 nSize;                    /* Current size of database file */
      u32 nBackfill = pInfo->nBackfill;

      pInfo->nBackfillAttempted = mxSafeFrame;

      /* Sync the WAL to disk */
      if( sync_flags ){
        rc = sqlite3OsSync(pWal->pWalFd, sync_flags);
      }

      /* If the database may grow as a result of this checkpoint, hint
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
      assert( thisMark!=READMARK_NOT_USED );
      mxReadMark = thisMark;
      mxI = i;
    }
  }
  if( (pWal->readOnly & WAL_SHM_RDONLY)==0
   && (mxReadMark<mxFrame || mxI==0)
#ifdef SQLITE_ENABLE_SNAPSHOT
   && pWal->pSnapshot==0
#endif
  ){
    for(i=1; i<WAL_NREADER; i++){
      rc = walLockExclusive(pWal, WAL_READ_LOCK(i), 1);
      if( rc==SQLITE_OK ){
        mxReadMark = pInfo->aReadMark[i] = mxFrame;
        mxI = i;
        walUnlockExclusive(pWal, WAL_READ_LOCK(i), 1);
        break;
      }else if( rc!=SQLITE_BUSY ){
        return rc;
      }
    }
  }
  if( mxI==0 ){
#ifdef SQLITE_ENABLE_SNAPSHOT
    if( pWal->pSnapshot ) return SQLITE_BUSY_SNAPSHOT;
#endif
    assert( rc==SQLITE_BUSY || (pWal->readOnly & WAL_SHM_RDONLY)!=0 );
    return rc==SQLITE_BUSY ? WAL_RETRY : SQLITE_READONLY_CANTLOCK;
  }

  rc = walLockShared(pWal, WAL_READ_LOCK(mxI));
  if( rc ){
    return rc==SQLITE_BUSY ? WAL_RETRY : rc;







<
<
<














<
<
<







2293
2294
2295
2296
2297
2298
2299



2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313



2314
2315
2316
2317
2318
2319
2320
      assert( thisMark!=READMARK_NOT_USED );
      mxReadMark = thisMark;
      mxI = i;
    }
  }
  if( (pWal->readOnly & WAL_SHM_RDONLY)==0
   && (mxReadMark<mxFrame || mxI==0)



  ){
    for(i=1; i<WAL_NREADER; i++){
      rc = walLockExclusive(pWal, WAL_READ_LOCK(i), 1);
      if( rc==SQLITE_OK ){
        mxReadMark = pInfo->aReadMark[i] = mxFrame;
        mxI = i;
        walUnlockExclusive(pWal, WAL_READ_LOCK(i), 1);
        break;
      }else if( rc!=SQLITE_BUSY ){
        return rc;
      }
    }
  }
  if( mxI==0 ){



    assert( rc==SQLITE_BUSY || (pWal->readOnly & WAL_SHM_RDONLY)!=0 );
    return rc==SQLITE_BUSY ? WAL_RETRY : SQLITE_READONLY_CANTLOCK;
  }

  rc = walLockShared(pWal, WAL_READ_LOCK(mxI));
  if( rc ){
    return rc==SQLITE_BUSY ? WAL_RETRY : rc;
2406
2407
2408
2409
2410
2411
2412
2413

2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428










2429



2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440



2441
2442
2443
2444
2445
2446
2447
  testcase( rc==SQLITE_PROTOCOL );
  testcase( rc==SQLITE_OK );

#ifdef SQLITE_ENABLE_SNAPSHOT
  if( rc==SQLITE_OK ){
    if( pSnapshot && memcmp(pSnapshot, &pWal->hdr, sizeof(WalIndexHdr))!=0 ){
      /* At this point the client has a lock on an aReadMark[] slot holding
      ** a value equal to or smaller than pSnapshot->mxFrame.  Verify that

      ** pSnapshot is still valid before continuing.  Reasons why pSnapshot
      ** might no longer be valid:
      **
      **    (1)  The WAL file has been reset since the snapshot was taken.
      **         In this case, the salt will have changed.
      **
      **    (2)  A checkpoint as been attempted that wrote frames past
      **         pSnapshot->mxFrame into the database file.  Note that the
      **         checkpoint need not have completed for this to cause problems.
      */
      volatile WalCkptInfo *pInfo = walCkptInfo(pWal);

      assert( pWal->readLock>0 );
      assert( pInfo->aReadMark[pWal->readLock]<=pSnapshot->mxFrame );











      /* Check that the wal file has not been wrapped. Assuming it has not,



      ** overwrite pWal->hdr with *pSnapshot and set *pChanged as appropriate
      ** for opening the snapshot. Or, if the wal file has been wrapped
      ** since pSnapshot was written, return SQLITE_BUSY_SNAPSHOT. */
      if( memcmp(pSnapshot->aSalt, pWal->hdr.aSalt, sizeof(pWal->hdr.aSalt))==0
       && pSnapshot->mxFrame>=pInfo->nBackfillAttempted
      ){
        memcpy(&pWal->hdr, pSnapshot, sizeof(WalIndexHdr));
        *pChanged = bChanged;
      }else{
        rc = SQLITE_BUSY_SNAPSHOT;
      }




      if( rc!=SQLITE_OK ){
        sqlite3WalEndReadTransaction(pWal);
      }
    }
  }
#endif







|
>
|
|













>
>
>
>
>
>
>
>
>
>
|
>
>
>
|
|
<








>
>
>







2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440

2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
  testcase( rc==SQLITE_PROTOCOL );
  testcase( rc==SQLITE_OK );

#ifdef SQLITE_ENABLE_SNAPSHOT
  if( rc==SQLITE_OK ){
    if( pSnapshot && memcmp(pSnapshot, &pWal->hdr, sizeof(WalIndexHdr))!=0 ){
      /* At this point the client has a lock on an aReadMark[] slot holding
      ** a value equal to or smaller than pSnapshot->mxFrame, but pWal->hdr
      ** is populated with the wal-index header corresponding to the head
      ** of the wal file. Verify that pSnapshot is still valid before
      ** continuing.  Reasons why pSnapshot might no longer be valid:
      **
      **    (1)  The WAL file has been reset since the snapshot was taken.
      **         In this case, the salt will have changed.
      **
      **    (2)  A checkpoint as been attempted that wrote frames past
      **         pSnapshot->mxFrame into the database file.  Note that the
      **         checkpoint need not have completed for this to cause problems.
      */
      volatile WalCkptInfo *pInfo = walCkptInfo(pWal);

      assert( pWal->readLock>0 );
      assert( pInfo->aReadMark[pWal->readLock]<=pSnapshot->mxFrame );

      /* It is possible that there is a checkpointer thread running 
      ** concurrent with this code. If this is the case, it may be that the
      ** checkpointer has already determined that it will checkpoint 
      ** snapshot X, where X is later in the wal file than pSnapshot, but 
      ** has not yet set the pInfo->nBackfillAttempted variable to indicate 
      ** its intent. To avoid the race condition this leads to, ensure that
      ** there is no checkpointer process by taking a shared CKPT lock 
      ** before checking pInfo->nBackfillAttempted.  */
      rc = walLockShared(pWal, WAL_CKPT_LOCK);

      /* Check that the wal file has not been wrapped. Assuming that it has
      ** not, also check that no checkpointer has attempted to checkpoint
      ** any frames beyond pSnapshot->mxFrame. If either of these conditions
      ** are true, return SQLTIE_BUSY_SNAPSHOT. Otherwise, overwrite pWal->hdr
      ** with *pSnapshot and set *pChanged as appropriate for opening the
      ** snapshot.  */

      if( memcmp(pSnapshot->aSalt, pWal->hdr.aSalt, sizeof(pWal->hdr.aSalt))==0
       && pSnapshot->mxFrame>=pInfo->nBackfillAttempted
      ){
        memcpy(&pWal->hdr, pSnapshot, sizeof(WalIndexHdr));
        *pChanged = bChanged;
      }else{
        rc = SQLITE_BUSY_SNAPSHOT;
      }

      /* Release the shared CKPT lock obtained above. */
      walUnlockShared(pWal, WAL_CKPT_LOCK);

      if( rc!=SQLITE_OK ){
        sqlite3WalEndReadTransaction(pWal);
      }
    }
  }
#endif
Changes to test/snapshot.test.
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78



79
80
81
82
83
84
85
86
} {1 SQLITE_ERROR}
do_execsql_test 1.3.2 COMMIT

#-------------------------------------------------------------------------
# Check that a simple case works. Reuse the database created by the
# block of tests above.
#
# UPDATE: This case (2.1) no longer works. 2.2 does.
#
do_execsql_test 2.1.0 {
  BEGIN;
    SELECT * FROM t1;
} {1 2 3 4 5 6 7 8}

do_test 2.1.1 {
  set snapshot [sqlite3_snapshot_get db main]
  execsql {
    COMMIT;
    INSERT INTO t1 VALUES(9, 10);
    SELECT * FROM t1;
  }
} {1 2 3 4 5 6 7 8 9 10}

do_test 2.1.2 {
  execsql BEGIN
  list [catch { sqlite3_snapshot_open db main $snapshot } msg] $msg



} {1 SQLITE_BUSY_SNAPSHOT}

do_test 2.1.3 {
  sqlite3_snapshot_free $snapshot
  execsql COMMIT
} {}

do_test 2.2.0 {







<
<
















|
>
>
>
|







53
54
55
56
57
58
59


60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
} {1 SQLITE_ERROR}
do_execsql_test 1.3.2 COMMIT

#-------------------------------------------------------------------------
# Check that a simple case works. Reuse the database created by the
# block of tests above.
#


do_execsql_test 2.1.0 {
  BEGIN;
    SELECT * FROM t1;
} {1 2 3 4 5 6 7 8}

do_test 2.1.1 {
  set snapshot [sqlite3_snapshot_get db main]
  execsql {
    COMMIT;
    INSERT INTO t1 VALUES(9, 10);
    SELECT * FROM t1;
  }
} {1 2 3 4 5 6 7 8 9 10}

do_test 2.1.2 {
  execsql BEGIN
  sqlite3_snapshot_open db main $snapshot
  execsql {
    SELECT * FROM t1;
  }
} {1 2 3 4 5 6 7 8}

do_test 2.1.3 {
  sqlite3_snapshot_free $snapshot
  execsql COMMIT
} {}

do_test 2.2.0 {
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
  set snapshot [sqlite3_snapshot_get db2 main]
  execsql {
    INSERT INTO t1 VALUES(11, 12);
    SELECT * FROM t1;
  }
} {1 2 3 4 5 6 7 8 9 10 11 12}

do_test 2.1.2 {
  execsql BEGIN
  sqlite3_snapshot_open db main $snapshot
  execsql {
    SELECT * FROM t1;
  }
} {1 2 3 4 5 6 7 8 9 10}

do_test 2.1.3 {
  sqlite3_snapshot_free $snapshot
  execsql COMMIT
  execsql COMMIT db2
  db2 close
} {}

#-------------------------------------------------------------------------







|







|







96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
  set snapshot [sqlite3_snapshot_get db2 main]
  execsql {
    INSERT INTO t1 VALUES(11, 12);
    SELECT * FROM t1;
  }
} {1 2 3 4 5 6 7 8 9 10 11 12}

do_test 2.2.2 {
  execsql BEGIN
  sqlite3_snapshot_open db main $snapshot
  execsql {
    SELECT * FROM t1;
  }
} {1 2 3 4 5 6 7 8 9 10}

do_test 2.2.3 {
  sqlite3_snapshot_free $snapshot
  execsql COMMIT
  execsql COMMIT db2
  db2 close
} {}

#-------------------------------------------------------------------------