/ Check-in [a84cf4f5]
Login
SQLite training in Houston TX on 2019-11-05 (details)
Part of the 2019 Tcl Conference

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:When searching the wal file for a frame, do not search that part that was already checkpointed when the transaction was opened.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: a84cf4f5d326270a61faf4ff867260f2dd1e68a6
User & Date: dan 2015-08-28 16:18:45
Context
2015-08-28
16:41
Fix compiler warnings in rbu code. check-in: 0fdc36fe user: dan tags: trunk
16:18
When searching the wal file for a frame, do not search that part that was already checkpointed when the transaction was opened. check-in: a84cf4f5 user: dan tags: trunk
15:50
Merge latest trunk into this branch. Closed-Leaf check-in: ab93024d user: dan tags: wal-read-change
03:48
Add the json_check() function, which returns its argument if the argument is well-formed JSON or which throws an error otherwise. check-in: 64abb65d user: drh tags: trunk
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to src/wal.c.

   424    424     u8 writeLock;              /* True if in a write transaction */
   425    425     u8 ckptLock;               /* True if holding a checkpoint lock */
   426    426     u8 readOnly;               /* WAL_RDWR, WAL_RDONLY, or WAL_SHM_RDONLY */
   427    427     u8 truncateOnCommit;       /* True to truncate WAL file on commit */
   428    428     u8 syncHeader;             /* Fsync the WAL header if true */
   429    429     u8 padToSectorBoundary;    /* Pad transactions out to the next sector */
   430    430     WalIndexHdr hdr;           /* Wal-index header for current transaction */
          431  +  u32 minFrame;              /* Ignore wal frames before this one */
   431    432     const char *zWalName;      /* Name of WAL file */
   432    433     u32 nCkpt;                 /* Checkpoint sequence counter in the wal-header */
   433    434   #ifdef SQLITE_DEBUG
   434    435     u8 lockError;              /* True if a locking error has occurred */
   435    436   #endif
   436    437   };
   437    438   
................................................................................
  2292   2293       ** that the log file may have been wrapped by a writer, or that frames
  2293   2294       ** that occur later in the log than pWal->hdr.mxFrame may have been
  2294   2295       ** copied into the database by a checkpointer. If either of these things
  2295   2296       ** happened, then reading the database with the current value of
  2296   2297       ** pWal->hdr.mxFrame risks reading a corrupted snapshot. So, retry
  2297   2298       ** instead.
  2298   2299       **
  2299         -    ** This does not guarantee that the copy of the wal-index header is up to
  2300         -    ** date before proceeding. That would not be possible without somehow
  2301         -    ** blocking writers. It only guarantees that a dangerous checkpoint or 
  2302         -    ** log-wrap (either of which would require an exclusive lock on
  2303         -    ** WAL_READ_LOCK(mxI)) has not occurred since the snapshot was valid.
         2300  +    ** Before checking that the live wal-index header has not changed
         2301  +    ** since it was read, set Wal.minFrame to the first frame in the wal
         2302  +    ** file that has not yet been checkpointed. This client will not need
         2303  +    ** to read any frames earlier than minFrame from the wal file - they
         2304  +    ** can be safely read directly from the database file.
         2305  +    **
         2306  +    ** Because a ShmBarrier() call is made between taking the copy of 
         2307  +    ** nBackfill and checking that the wal-header in shared-memory still
         2308  +    ** matches the one cached in pWal->hdr, it is guaranteed that the 
         2309  +    ** checkpointer that set nBackfill was not working with a wal-index
         2310  +    ** header newer than that cached in pWal->hdr. If it were, that could
         2311  +    ** cause a problem. The checkpointer could omit to checkpoint
         2312  +    ** a version of page X that lies before pWal->minFrame (call that version
         2313  +    ** A) on the basis that there is a newer version (version B) of the same
         2314  +    ** page later in the wal file. But if version B happens to like past
         2315  +    ** frame pWal->hdr.mxFrame - then the client would incorrectly assume
         2316  +    ** that it can read version A from the database file. However, since
         2317  +    ** we can guarantee that the checkpointer that set nBackfill could not
         2318  +    ** see any pages past pWal->hdr.mxFrame, this problem does not come up.
  2304   2319       */
         2320  +    pWal->minFrame = pInfo->nBackfill+1;
  2305   2321       walShmBarrier(pWal);
  2306   2322       if( pInfo->aReadMark[mxI]!=mxReadMark
  2307   2323        || memcmp((void *)walIndexHdr(pWal), &pWal->hdr, sizeof(WalIndexHdr))
  2308   2324       ){
  2309   2325         walUnlockShared(pWal, WAL_READ_LOCK(mxI));
  2310   2326         return WAL_RETRY;
  2311   2327       }else{
................................................................................
  2368   2384     Wal *pWal,                      /* WAL handle */
  2369   2385     Pgno pgno,                      /* Database page number to read data for */
  2370   2386     u32 *piRead                     /* OUT: Frame number (or zero) */
  2371   2387   ){
  2372   2388     u32 iRead = 0;                  /* If !=0, WAL frame to return data from */
  2373   2389     u32 iLast = pWal->hdr.mxFrame;  /* Last page in WAL for this reader */
  2374   2390     int iHash;                      /* Used to loop through N hash tables */
         2391  +  int iMinHash;
  2375   2392   
  2376   2393     /* This routine is only be called from within a read transaction. */
  2377   2394     assert( pWal->readLock>=0 || pWal->lockError );
  2378   2395   
  2379   2396     /* If the "last page" field of the wal-index header snapshot is 0, then
  2380   2397     ** no data will be read from the wal under any circumstances. Return early
  2381   2398     ** in this case as an optimization.  Likewise, if pWal->readLock==0, 
................................................................................
  2408   2425     **   (aPgno[iFrame]==pgno): 
  2409   2426     **     This condition filters out normal hash-table collisions.
  2410   2427     **
  2411   2428     **   (iFrame<=iLast): 
  2412   2429     **     This condition filters out entries that were added to the hash
  2413   2430     **     table after the current read-transaction had started.
  2414   2431     */
  2415         -  for(iHash=walFramePage(iLast); iHash>=0 && iRead==0; iHash--){
         2432  +  iMinHash = walFramePage(pWal->minFrame);
         2433  +  for(iHash=walFramePage(iLast); iHash>=iMinHash && iRead==0; iHash--){
  2416   2434       volatile ht_slot *aHash;      /* Pointer to hash table */
  2417   2435       volatile u32 *aPgno;          /* Pointer to array of page numbers */
  2418   2436       u32 iZero;                    /* Frame number corresponding to aPgno[0] */
  2419   2437       int iKey;                     /* Hash slot index */
  2420   2438       int nCollide;                 /* Number of hash collisions remaining */
  2421   2439       int rc;                       /* Error code */
  2422   2440   
................................................................................
  2423   2441       rc = walHashGet(pWal, iHash, &aHash, &aPgno, &iZero);
  2424   2442       if( rc!=SQLITE_OK ){
  2425   2443         return rc;
  2426   2444       }
  2427   2445       nCollide = HASHTABLE_NSLOT;
  2428   2446       for(iKey=walHash(pgno); aHash[iKey]; iKey=walNextHash(iKey)){
  2429   2447         u32 iFrame = aHash[iKey] + iZero;
  2430         -      if( iFrame<=iLast && aPgno[aHash[iKey]]==pgno ){
         2448  +      if( iFrame<=iLast && iFrame>=pWal->minFrame && aPgno[aHash[iKey]]==pgno ){
  2431   2449           assert( iFrame>iRead || CORRUPT_DB );
  2432   2450           iRead = iFrame;
  2433   2451         }
  2434   2452         if( (nCollide--)==0 ){
  2435   2453           return SQLITE_CORRUPT_BKPT;
  2436   2454         }
  2437   2455       }

Changes to test/wal6.test.

   188    188   } {}
   189    189   
   190    190   db eval {SELECT test4('3.3.2')}
   191    191   
   192    192   do_test 3.x {
   193    193     db2 close
   194    194   } {}
          195  +
          196  +#-------------------------------------------------------------------------
          197  +# Check that if a wal file has been partially checkpointed, no frames are
          198  +# read from the checkpointed part.
          199  +#
          200  +reset_db
          201  +do_execsql_test 4.1 {
          202  +  PRAGMA page_size = 1024;
          203  +  PRAGMA journal_mode = wal;
          204  +  CREATE TABLE t1(a, b);
          205  +  CREATE TABLE t2(a, b);
          206  +  PRAGMA wal_checkpoint = truncate;
          207  +} {wal 0 0 0}
          208  +
          209  +do_test 4.2 {
          210  +  execsql { INSERT INTO t1 VALUES(1, 2) }
          211  +  file size test.db-wal
          212  +} [wal_file_size 1 1024]
          213  +
          214  +do_test 4.3 {
          215  +  sqlite3 db2 test.db
          216  +  execsql { 
          217  +    BEGIN;
          218  +    INSERT INTO t2 VALUES(3, 4);
          219  +  }
          220  +  execsql { PRAGMA wal_checkpoint = passive } db2
          221  +} {0 1 1}
          222  +
          223  +do_test 4.3 {
          224  +  execsql { COMMIT }
          225  +  db2 close
          226  +  hexio_write test.db-wal 0 [string repeat 00 2000]
          227  +  sqlite3 db2 test.db
          228  +} {}
          229  +
          230  +do_test 4.4.1 { 
          231  +  catchsql { SELECT * FROM t1 } db2 
          232  +} {0 {1 2}}
          233  +do_test 4.4.2 { 
          234  +  catchsql { SELECT * FROM t2 } db2 
          235  +} {1 {database disk image is malformed}}
          236  +
   195    237   
   196    238   finish_test
   197    239