/ Check-in [7315f7cb]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Update sqlite3_snapshot_open() to reduce the chances of reading a corrupt snapshot created by a checkpointer process exiting unexpectedly.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | snapshot-get
Files: files | file ages | folders
SHA1: 7315f7cbf4179aadda0f1a0baa1526a9b9f9729f
User & Date: dan 2015-12-09 20:05:27
Context
2015-12-10
02:15
Add the nBackfillAttempted field in formerly unused space in WalCkptInfo and use that field to close the race condition on opening a snapshot. check-in: cb68e9d0 user: drh tags: snapshot-get
2015-12-09
20:05
Update sqlite3_snapshot_open() to reduce the chances of reading a corrupt snapshot created by a checkpointer process exiting unexpectedly. check-in: 7315f7cb user: dan tags: snapshot-get
16:04
Merge unrelated fixes from trunk. check-in: 362615b4 user: drh tags: snapshot-get
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to src/sqlite.h.in.

7891
7892
7893
7894
7895
7896
7897




























7898
7899
7900
7901
7902

7903
7904
7905
7906
7907
7908
7909
7910
7911
7912
7913
7914
7915
** SQLITE_OK.
**
** If the specified database does not exist, or is not a wal mode database, 
** or the database handle does not have an open read transaction on it,
** SQLITE_ERROR is returned. If any other error occurs, for example an IO 
** error or an OOM condition, the corresponding SQLite error code is 
** returned.




























*/
typedef struct sqlite3_snapshot sqlite3_snapshot;
int sqlite3_snapshot_get(sqlite3*, const char*, sqlite3_snapshot **ppSnapshot);
int sqlite3_snapshot_open(sqlite3*, const char*, sqlite3_snapshot*);
void sqlite3_snapshot_free(sqlite3_snapshot*);


/*
** Undo the hack that converts floating point types to integer for
** builds on processors without floating point support.
*/
#ifdef SQLITE_OMIT_FLOATING_POINT
# undef double
#endif

#ifdef __cplusplus
}  /* End of the 'extern "C"' block */
#endif
#endif /* _SQLITE3_H_ */







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>



<

>













7891
7892
7893
7894
7895
7896
7897
7898
7899
7900
7901
7902
7903
7904
7905
7906
7907
7908
7909
7910
7911
7912
7913
7914
7915
7916
7917
7918
7919
7920
7921
7922
7923
7924
7925
7926
7927
7928

7929
7930
7931
7932
7933
7934
7935
7936
7937
7938
7939
7940
7941
7942
7943
** SQLITE_OK.
**
** If the specified database does not exist, or is not a wal mode database, 
** or the database handle does not have an open read transaction on it,
** SQLITE_ERROR is returned. If any other error occurs, for example an IO 
** error or an OOM condition, the corresponding SQLite error code is 
** returned.
**
** Each successful call to sqlite3_snapshot_get() must be matched by a call
** to sqlite3_snapshot_free() to delete the snapshot handle. Not doing so
** is a memory leak. The results of using a snapshot handle after it has 
** been deleted by sqlite3_snapshot_free() are undefined.
**
** Given a snapshot handle, the sqlite3_snapshot_open() API function may be
** used to open a read transaction on the same database snapshot that was
** being read when sqlite3_snapshot_get() was called to obtain it. The
** combination of the first two arguments to sqlite3_snapshot_open() - a
** database handle and the name (e.g. "main") of one of its attached 
** databases - must refer to the same database file as that identified by 
** the arguments passed to the sqlite3_snapshot_get() call. The database
** handle must not have an open read or write transaction on this database
** file, and must not be in auto-commit mode.
**
** An old database snapshot may only be opened if SQLite is able to 
** determine that it is still valid. The only way for an application to 
** guarantee that a snapshot remains valid is by holding an open 
** read-transaction on it or on an older snapshot of the same database 
** file. If SQLite cannot determine that the snapshot identified by the
** snapshot handle, SQLITE_BUSY_SNAPSHOT is returned.
**
** Otherwise, if the read transaction is successfully opened, SQLITE_OK is
** returned. If the named database is not in wal mode or if the database
** handle already has an open read or write transaction on it, or if the 
** database handle is in auto-commit mode, SQLITE_ERROR is returned. If
** an OOM or IO error occurs, the associated SQLite error code is returned.
*/
typedef struct sqlite3_snapshot sqlite3_snapshot;
int sqlite3_snapshot_get(sqlite3*, const char*, sqlite3_snapshot **ppSnapshot);

void sqlite3_snapshot_free(sqlite3_snapshot*);
int sqlite3_snapshot_open(sqlite3*, const char*, sqlite3_snapshot*);

/*
** Undo the hack that converts floating point types to integer for
** builds on processors without floating point support.
*/
#ifdef SQLITE_OMIT_FLOATING_POINT
# undef double
#endif

#ifdef __cplusplus
}  /* End of the 'extern "C"' block */
#endif
#endif /* _SQLITE3_H_ */

Changes to src/wal.c.

2271
2272
2273
2274
2275
2276
2277



2278
2279
2280
2281
2282
2283
2284
....
2285
2286
2287
2288
2289
2290
2291



2292
2293
2294
2295
2296
2297
2298
....
2379
2380
2381
2382
2383
2384
2385






























2386
2387
2388


2389





2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
      mxI = i;
    }
  }
  /* There was once an "if" here. The extra "{" is to preserve indentation. */
  {
    if( (pWal->readOnly & WAL_SHM_RDONLY)==0
     && (mxReadMark<mxFrame || mxI==0)



    ){
      for(i=1; i<WAL_NREADER; i++){
        rc = walLockExclusive(pWal, WAL_READ_LOCK(i), 1);
        if( rc==SQLITE_OK ){
          mxReadMark = pInfo->aReadMark[i] = mxFrame;
          mxI = i;
          walUnlockExclusive(pWal, WAL_READ_LOCK(i), 1);
................................................................................
          break;
        }else if( rc!=SQLITE_BUSY ){
          return rc;
        }
      }
    }
    if( mxI==0 ){



      assert( rc==SQLITE_BUSY || (pWal->readOnly & WAL_SHM_RDONLY)!=0 );
      return rc==SQLITE_BUSY ? WAL_RETRY : SQLITE_READONLY_CANTLOCK;
    }

    rc = walLockShared(pWal, WAL_READ_LOCK(mxI));
    if( rc ){
      return rc==SQLITE_BUSY ? WAL_RETRY : rc;
................................................................................
  testcase( (rc&0xff)==SQLITE_IOERR );
  testcase( rc==SQLITE_PROTOCOL );
  testcase( rc==SQLITE_OK );

#ifdef SQLITE_ENABLE_SNAPSHOT
  if( rc==SQLITE_OK ){
    if( pSnapshot && memcmp(pSnapshot, &pWal->hdr, sizeof(WalIndexHdr)) ){






























      volatile WalCkptInfo *pInfo = walCkptInfo(pWal);
      rc = walLockShared(pWal, WAL_READ_LOCK(0));
      if( rc==SQLITE_OK ){


        if( pInfo->nBackfill<=pSnapshot->mxFrame 





         && pSnapshot->aSalt[0]==pWal->hdr.aSalt[0]
         && pSnapshot->aSalt[1]==pWal->hdr.aSalt[1]
        ){
          assert( pWal->readLock>0 );
          assert( pInfo->aReadMark[pWal->readLock]<=pSnapshot->mxFrame );
          memcpy(&pWal->hdr, pSnapshot, sizeof(WalIndexHdr));
          *pChanged = bChanged;
        }else{
          rc = SQLITE_BUSY_SNAPSHOT;
        }
        walUnlockShared(pWal, WAL_READ_LOCK(0));
      }
      if( rc!=SQLITE_OK ){
        sqlite3WalEndReadTransaction(pWal);
      }
    }
  }
#endif
  return rc;







>
>
>







 







>
>
>







 







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>

<
<
>
>
|
>
>
>
>
>
|
|
|
<
<
|
|
|
|
|
<
|







2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
....
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
....
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422


2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433


2434
2435
2436
2437
2438

2439
2440
2441
2442
2443
2444
2445
2446
      mxI = i;
    }
  }
  /* There was once an "if" here. The extra "{" is to preserve indentation. */
  {
    if( (pWal->readOnly & WAL_SHM_RDONLY)==0
     && (mxReadMark<mxFrame || mxI==0)
#ifdef SQLITE_ENABLE_SNAPSHOT
     && pWal->pSnapshot==0
#endif
    ){
      for(i=1; i<WAL_NREADER; i++){
        rc = walLockExclusive(pWal, WAL_READ_LOCK(i), 1);
        if( rc==SQLITE_OK ){
          mxReadMark = pInfo->aReadMark[i] = mxFrame;
          mxI = i;
          walUnlockExclusive(pWal, WAL_READ_LOCK(i), 1);
................................................................................
          break;
        }else if( rc!=SQLITE_BUSY ){
          return rc;
        }
      }
    }
    if( mxI==0 ){
#ifdef SQLITE_ENABLE_SNAPSHOT
      if( pWal->pSnapshot ) return SQLITE_BUSY_SNAPSHOT;
#endif
      assert( rc==SQLITE_BUSY || (pWal->readOnly & WAL_SHM_RDONLY)!=0 );
      return rc==SQLITE_BUSY ? WAL_RETRY : SQLITE_READONLY_CANTLOCK;
    }

    rc = walLockShared(pWal, WAL_READ_LOCK(mxI));
    if( rc ){
      return rc==SQLITE_BUSY ? WAL_RETRY : rc;
................................................................................
  testcase( (rc&0xff)==SQLITE_IOERR );
  testcase( rc==SQLITE_PROTOCOL );
  testcase( rc==SQLITE_OK );

#ifdef SQLITE_ENABLE_SNAPSHOT
  if( rc==SQLITE_OK ){
    if( pSnapshot && memcmp(pSnapshot, &pWal->hdr, sizeof(WalIndexHdr)) ){
      /* At this point the client has a lock on an aReadMark[] slot holding
      ** a value equal to or smaller than pSnapshot->mxFrame. This client
      ** did not populate the aReadMark[] slot. pWal->hdr is populated with
      ** the wal-index header for the snapshot currently at the head of the
      ** wal file, which is different from pSnapshot.
      **
      ** The presence of the aReadMark[] slot entry makes it very likely 
      ** that either there is currently another read-transaction open on
      ** pSnapshot, or that there has been one more recently than the last
      ** checkpoint of any frames greater than pSnapshot->mxFrame was 
      ** started. There is an exception though: client 1 may have called
      ** walTryBeginRead and started to open snapshot pSnapshot, setting
      ** the aReadMark[] slot to do so. At the same time, client 2 may 
      ** have committed a new snapshot to disk and started a checkpoint.
      ** In this circumstance client 1 does not end up reading pSnapshot,
      ** but may leave the aReadMark[] slot populated.
      **
      ** The race condition above is difficult to detect. One approach would
      ** be to check the aReadMark[] slot for another client. But this is
      ** prone to false-positives from other snapshot clients. And there
      ** is no equivalent to xCheckReservedLock() for wal locks. Another
      ** approach would be to take the checkpointer lock and check that
      ** fewer than pSnapshot->mxFrame frames have been checkpointed. But
      ** that does not account for checkpointer processes that failed after
      ** checkpointing frames but before updating WalCkptInfo.nBackfill.
      ** And it would mean that this function would block on checkpointers
      ** and vice versa.
      **
      ** TODO: For now, this race condition is ignored.
      */
      volatile WalCkptInfo *pInfo = walCkptInfo(pWal);



      assert( pWal->readLock>0 );
      assert( pInfo->aReadMark[pWal->readLock]<=pSnapshot->mxFrame );

      /* Check that the wal file has not been wrapped. Assuming it has not,
      ** overwrite pWal->hdr with *pSnapshot and set *pChanged as appropriate
      ** for opening the snapshot. Or, if the wal file has been wrapped
      ** since pSnapshot was written, return SQLITE_BUSY_SNAPSHOT. */
      if( pSnapshot->aSalt[0]==pWal->hdr.aSalt[0]
       && pSnapshot->aSalt[1]==pWal->hdr.aSalt[1]
      ){


        memcpy(&pWal->hdr, pSnapshot, sizeof(WalIndexHdr));
        *pChanged = bChanged;
      }else{
        rc = SQLITE_BUSY_SNAPSHOT;
      }


      if( rc!=SQLITE_OK ){
        sqlite3WalEndReadTransaction(pWal);
      }
    }
  }
#endif
  return rc;

Changes to test/snapshot.test.

53
54
55
56
57
58
59


60
61
62
63
64
65
66
67
68
69
70
71
72
73










74
















75
76

77

78
79
80
81
82


83
84
85
86
87
88
89
} {1 SQLITE_ERROR}
do_execsql_test 1.3.2 COMMIT

#-------------------------------------------------------------------------
# Check that a simple case works. Reuse the database created by the
# block of tests above.
#


do_execsql_test 2.0 {
  BEGIN;
    SELECT * FROM t1;
} {1 2 3 4 5 6 7 8}

do_test 2.1 {
  set snapshot [sqlite3_snapshot_get db main]
  execsql {
    COMMIT;
    INSERT INTO t1 VALUES(9, 10);
    SELECT * FROM t1;
  }
} {1 2 3 4 5 6 7 8 9 10}











do_test 2.2 {
















  execsql BEGIN
  sqlite3_snapshot_open db main $snapshot

  execsql { SELECT * FROM t1 }

} {1 2 3 4 5 6 7 8}

do_test 2.3 {
  sqlite3_snapshot_free $snapshot
  execsql COMMIT


} {}

#-------------------------------------------------------------------------
# Check some errors in sqlite3_snapshot_open(). It is an error if:
#
#   1) the db is in auto-commit mode,
#   2) the db has an open (read or write) transaction,







>
>
|




|








>
>
>
>
>
>
>
>
>
>
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>


>
|
>
|

|


>
>







53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
} {1 SQLITE_ERROR}
do_execsql_test 1.3.2 COMMIT

#-------------------------------------------------------------------------
# Check that a simple case works. Reuse the database created by the
# block of tests above.
#
# UPDATE: This case (2.1) no longer works. 2.2 does.
#
do_execsql_test 2.1.0 {
  BEGIN;
    SELECT * FROM t1;
} {1 2 3 4 5 6 7 8}

do_test 2.1.1 {
  set snapshot [sqlite3_snapshot_get db main]
  execsql {
    COMMIT;
    INSERT INTO t1 VALUES(9, 10);
    SELECT * FROM t1;
  }
} {1 2 3 4 5 6 7 8 9 10}

do_test 2.1.2 {
  execsql BEGIN
  list [catch { sqlite3_snapshot_open db main $snapshot } msg] $msg
} {1 SQLITE_BUSY_SNAPSHOT}

do_test 2.1.3 {
  sqlite3_snapshot_free $snapshot
  execsql COMMIT
} {}

do_test 2.2.0 {
  sqlite3 db2 test.db
  execsql {
    BEGIN;
      SELECT * FROM t1;
  } db2
} {1 2 3 4 5 6 7 8 9 10}

do_test 2.2.1 {
  set snapshot [sqlite3_snapshot_get db2 main]
  execsql {
    INSERT INTO t1 VALUES(11, 12);
    SELECT * FROM t1;
  }
} {1 2 3 4 5 6 7 8 9 10 11 12}

do_test 2.1.2 {
  execsql BEGIN
  sqlite3_snapshot_open db main $snapshot
  execsql {
    SELECT * FROM t1;
  }
} {1 2 3 4 5 6 7 8 9 10}

do_test 2.1.3 {
  sqlite3_snapshot_free $snapshot
  execsql COMMIT
  execsql COMMIT db2
  db2 close
} {}

#-------------------------------------------------------------------------
# Check some errors in sqlite3_snapshot_open(). It is an error if:
#
#   1) the db is in auto-commit mode,
#   2) the db has an open (read or write) transaction,