/ Check-in [d3c25740]
Login
Overview
Comment:Improved comments and variable names in the read-only WAL logic.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | readonly-wal-recovery
Files: files | file ages | folders
SHA3-256:d3c25740eec9a2a41c29e6e488fcf6587c1fb821147a442c29439b25a92154a5
User & Date: drh 2017-11-10 20:00:50
Context
2017-11-11
13:30
Further comment improvements in wal.c. No code changes. check-in: 34638800 user: drh tags: readonly-wal-recovery
2017-11-10
20:00
Improved comments and variable names in the read-only WAL logic. check-in: d3c25740 user: drh tags: readonly-wal-recovery
2017-11-09
23:24
Avoid superfluous SHM unlock call in the Win32 VFS. check-in: 5a384be6 user: mistachkin tags: readonly-wal-recovery
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to src/wal.c.

451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
....
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
....
2095
2096
2097
2098
2099
2100
2101


2102






2103
2104

2105
2106
2107
2108
2109


2110
2111
2112
2113
2114
2115
2116
....
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
....
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171









2172
2173


2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212

2213
2214
2215
2216
2217
2218

2219
2220
2221

2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235




2236









2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
....
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
....
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
....
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
....
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
....
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
  u8 exclusiveMode;          /* Non-zero if connection is in exclusive mode */
  u8 writeLock;              /* True if in a write transaction */
  u8 ckptLock;               /* True if holding a checkpoint lock */
  u8 readOnly;               /* WAL_RDWR, WAL_RDONLY, or WAL_SHM_RDONLY */
  u8 truncateOnCommit;       /* True to truncate WAL file on commit */
  u8 syncHeader;             /* Fsync the WAL header if true */
  u8 padToSectorBoundary;    /* Pad transactions out to the next sector */
  u8 bUnlocked;
  WalIndexHdr hdr;           /* Wal-index header for current transaction */
  u32 minFrame;              /* Ignore wal frames before this one */
  u32 iReCksum;              /* On commit, recalculate checksums from here */
  const char *zWalName;      /* Name of WAL file */
  u32 nCkpt;                 /* Checkpoint sequence counter in the wal-header */
#ifdef SQLITE_DEBUG
  u8 lockError;              /* True if a locking error has occurred */
................................................................................
  return rc;
}

/*
** Close an open wal-index.
*/
static void walIndexClose(Wal *pWal, int isDelete){
  if( pWal->exclusiveMode==WAL_HEAPMEMORY_MODE || pWal->bUnlocked ){
    int i;
    for(i=0; i<pWal->nWiData; i++){
      sqlite3_free((void *)pWal->apWiData[i]);
      pWal->apWiData[i] = 0;
    }
  }
  if( pWal->exclusiveMode!=WAL_HEAPMEMORY_MODE ){
................................................................................
  volatile u32 *page0;            /* Chunk of wal-index containing header */

  /* Ensure that page 0 of the wal-index (the page that contains the 
  ** wal-index header) is mapped. Return early if an error occurs here.
  */
  assert( pChanged );
  rc = walIndexPage(pWal, 0, &page0);


  if( rc==SQLITE_READONLY_CANTINIT ){






    assert( page0==0 && pWal->writeLock==0 );
    pWal->bUnlocked = 1;

    pWal->exclusiveMode = WAL_HEAPMEMORY_MODE;
    *pChanged = 1;
  }else
  if( rc!=SQLITE_OK ){
    return rc;


  };
  assert( page0 || pWal->writeLock==0 );

  /* If the first page of the wal-index has been mapped, try to read the
  ** wal-index header immediately, without holding any lock. This usually
  ** works, but may fail if the wal-index header is corrupt or currently 
  ** being modified by another thread or process.
................................................................................
  badHdr = (page0 ? walIndexTryHdr(pWal, pChanged) : 1);

  /* If the first attempt failed, it might have been due to a race
  ** with a writer.  So get a WRITE lock and try again.
  */
  assert( badHdr==0 || pWal->writeLock==0 );
  if( badHdr ){
    if( pWal->bUnlocked==0 && (pWal->readOnly & WAL_SHM_RDONLY) ){
      if( SQLITE_OK==(rc = walLockShared(pWal, WAL_WRITE_LOCK)) ){
        walUnlockShared(pWal, WAL_WRITE_LOCK);
        rc = SQLITE_READONLY_RECOVERY;
      }
    }else if( SQLITE_OK==(rc = walLockExclusive(pWal, WAL_WRITE_LOCK, 1)) ){
      pWal->writeLock = 1;
      if( SQLITE_OK==(rc = walIndexPage(pWal, 0, &page0)) ){
................................................................................
  /* If the header is read successfully, check the version number to make
  ** sure the wal-index was not constructed with some future format that
  ** this version of SQLite cannot understand.
  */
  if( badHdr==0 && pWal->hdr.iVersion!=WALINDEX_MAX_VERSION ){
    rc = SQLITE_CANTOPEN_BKPT;
  }
  if( pWal->bUnlocked ){
    if( rc!=SQLITE_OK ){
      walIndexClose(pWal, 0);
      pWal->bUnlocked = 0;
      assert( pWal->nWiData>0 && pWal->apWiData[0]==0 );
      if( rc==SQLITE_IOERR_SHORT_READ ) rc = WAL_RETRY;
    }
    pWal->exclusiveMode = WAL_NORMAL_MODE;
  }

  return rc;
}

/*
** Open an "unlocked" transaction. An unlocked transaction is a read 
** transaction used by a read-only client in cases where the *-shm
** file cannot be mapped and its contents cannot be trusted. It is









** assumed that the *-wal file has been read and that a wal-index 
** constructed in heap memory is currently available in Wal.apWiData[].


**
** If this function returns SQLITE_OK, then the read transaction has
** been successfully opened. In this case output variable (*pChanged) 
** is set to true before returning if the caller should discard the
** contents of the page cache before proceeding. Or, if it returns 
** WAL_RETRY, then the heap memory wal-index has been discarded and 
** the caller should retry opening the read transaction from the 
** beginning (including attempting to map the *-shm file). 
**
** If an error occurs, an SQLite error code is returned.
*/
static int walBeginUnlocked(Wal *pWal, int *pChanged){
  i64 szWal;                      /* Size of wal file on disk in bytes */
  i64 iOffset;                    /* Current offset when reading wal file */
  u8 aBuf[WAL_HDRSIZE];           /* Buffer to load WAL header into */
  u8 *aFrame = 0;                 /* Malloc'd buffer to load entire frame */
  int szFrame;                    /* Number of bytes in buffer aFrame[] */
  u8 *aData;                      /* Pointer to data part of aFrame buffer */
  volatile void *pDummy;          /* Dummy argument for xShmMap */
  int rc;                         /* Return code */
  u32 aSaveCksum[2];              /* Saved copy of pWal->hdr.aFrameCksum */

  assert( pWal->bUnlocked );
  assert( pWal->readOnly & WAL_SHM_RDONLY );
  assert( pWal->nWiData>0 && pWal->apWiData[0] );

  /* Take WAL_READ_LOCK(0). This has the effect of preventing any
  ** live clients from running a checkpoint, but does not stop them
  ** from running recovery.  */
  rc = walLockShared(pWal, WAL_READ_LOCK(0));
  if( rc!=SQLITE_OK ){
    if( rc==SQLITE_BUSY ) rc = WAL_RETRY;
    goto begin_unlocked_out;
  }
  pWal->readLock = 0;

  /* Try to map the *-shm file again. If it succeeds this time, then 
  ** a non-readonly_shm connection has already connected to the database.
  ** In this case, start over with opening the transaction.

  **
  ** The *-shm file was opened read-only, so sqlite3OsShmMap() can never
  ** return SQLITE_OK here, as that would imply that it had established
  ** a read/write mapping.  A return of SQLITE_READONLY means success - that
  ** a mapping has been established to a shared-memory segment that is actively
  ** maintained by a writer.  SQLITE_READONLY_CANTINIT means that all

  ** all connections to the -shm file are read-only and hence the content
  ** of the -shm file might be out-of-date.
  ** 

  ** The WAL_READ_LOCK(0) lock held by this client prevents a checkpoint
  ** from taking place. But it does not prevent the wal from being wrapped
  ** if a checkpoint has already taken place. This means that if another
  ** client is connected at this point, it may have already checkpointed 
  ** the entire wal. In that case it would not be safe to continue with
  ** the unlocked transaction, as the other client may overwrite wal 
  ** frames that this client is still using.  */
  rc = sqlite3OsShmMap(pWal->pDbFd, 0, WALINDEX_PGSZ, 0, &pDummy);
  assert( rc!=SQLITE_OK ); /* SQLITE_OK not possible for read-only connection */
  if( rc!=SQLITE_READONLY_CANTINIT ){
    rc = (rc==SQLITE_READONLY ? WAL_RETRY : rc);
    goto begin_unlocked_out;
  }





  memcpy(&pWal->hdr, (void*)walIndexHdr(pWal), sizeof(WalIndexHdr));









  rc = sqlite3OsFileSize(pWal->pWalFd, &szWal);
  if( rc!=SQLITE_OK ){
    goto begin_unlocked_out;
  }
  if( szWal<WAL_HDRSIZE ){
    /* If the wal file is too small to contain a wal-header and the
    ** wal-index header has mxFrame==0, then it must be safe to proceed
    ** reading the database file only. However, the page cache cannot
    ** be trusted, as a read/write connection may have connected, written
    ** the db, run a checkpoint, truncated the wal file and disconnected
    ** since this client's last read transaction.  */
    *pChanged = 1;
    rc = (pWal->hdr.mxFrame==0 ? SQLITE_OK : WAL_RETRY);
    goto begin_unlocked_out;
  }

  /* Check the salt keys at the start of the wal file still match. */
  rc = sqlite3OsRead(pWal->pWalFd, aBuf, WAL_HDRSIZE, 0);
  if( rc!=SQLITE_OK ){
    goto begin_unlocked_out;
  }
  if( memcmp(&pWal->hdr.aSalt, &aBuf[16], 8) ){
    rc = WAL_RETRY;
    goto begin_unlocked_out;
  }

  /* Allocate a buffer to read frames into */
  szFrame = pWal->hdr.szPage + WAL_FRAME_HDRSIZE;
  aFrame = (u8 *)sqlite3_malloc64(szFrame);
  if( aFrame==0 ){
    rc = SQLITE_NOMEM_BKPT;
    goto begin_unlocked_out;
  }
  aData = &aFrame[WAL_FRAME_HDRSIZE];

  /* Check to see if a complete transaction has been appended to the
  ** wal file since the heap-memory wal-index was created. If so, the
  ** heap-memory wal-index is discarded and WAL_RETRY returned to
  ** the caller.  */
................................................................................
      rc = WAL_RETRY;
      break;
    }
  }
  pWal->hdr.aFrameCksum[0] = aSaveCksum[0];
  pWal->hdr.aFrameCksum[1] = aSaveCksum[1];

 begin_unlocked_out:
  sqlite3_free(aFrame);
  if( rc!=SQLITE_OK ){
    int i;
    for(i=0; i<pWal->nWiData; i++){
      sqlite3_free((void*)pWal->apWiData[i]);
      pWal->apWiData[i] = 0;
    }
    pWal->bUnlocked = 0;
    sqlite3WalEndReadTransaction(pWal);
    *pChanged = 1;
  }
  return rc;
}

/*
................................................................................
    }
    if( cnt>=10 ) nDelay = (cnt-9)*(cnt-9)*39;
    sqlite3OsSleep(pWal->pVfs, nDelay);
  }

  if( !useWal ){
    assert( rc==SQLITE_OK );
    if( pWal->bUnlocked==0 ){
      rc = walIndexReadHdr(pWal, pChanged);
    }
    if( rc==SQLITE_BUSY ){
      /* If there is not a recovery running in another thread or process
      ** then convert BUSY errors to WAL_RETRY.  If recovery is known to
      ** be running, convert BUSY to BUSY_RECOVERY.  There is a race here
      ** which might cause WAL_RETRY to be returned even if BUSY_RECOVERY
................................................................................
      }else if( rc==SQLITE_BUSY ){
        rc = SQLITE_BUSY_RECOVERY;
      }
    }
    if( rc!=SQLITE_OK ){
      return rc;
    }
    else if( pWal->bUnlocked ){
      return walBeginUnlocked(pWal, pChanged);
    }
  }

  assert( pWal->nWiData>0 );
  assert( pWal->apWiData[0] || (pWal->readOnly & WAL_SHM_RDONLY) );
  pInfo = pWal->apWiData[0] ? walCkptInfo(pWal) : 0;
  if( !useWal && (pInfo==0 || pInfo->nBackfill==pWal->hdr.mxFrame)
................................................................................

  /* If the "last page" field of the wal-index header snapshot is 0, then
  ** no data will be read from the wal under any circumstances. Return early
  ** in this case as an optimization.  Likewise, if pWal->readLock==0, 
  ** then the WAL is ignored by the reader so return early, as if the 
  ** WAL were empty.
  */
  if( iLast==0 || (pWal->readLock==0 && pWal->bUnlocked==0) ){
    *piRead = 0;
    return SQLITE_OK;
  }

  /* Search the hash table or tables for an entry matching page number
  ** pgno. Each iteration of the following for() loop searches one
  ** hash table (each hash table indexes up to HASHTABLE_NPAGE frames).
................................................................................
#ifdef SQLITE_ENABLE_EXPENSIVE_ASSERT
  /* If expensive assert() statements are available, do a linear search
  ** of the wal-index file content. Make sure the results agree with the
  ** result obtained using the hash indexes above.  */
  {
    u32 iRead2 = 0;
    u32 iTest;
    assert( pWal->bUnlocked || pWal->minFrame>0 );
    for(iTest=iLast; iTest>=pWal->minFrame && iTest>0; iTest--){
      if( walFramePgno(pWal, iTest)==pgno ){
        iRead2 = iTest;
        break;
      }
    }
    assert( iRead==iRead2 );







|







 







|







 







>
>
|
>
>
>
>
>
>
|
<
>
|
|
|
<
<
>
>







 







|







 







|


|










|
|
|
>
>
>
>
>
>
>
>
>
|
<
>
>











|










|




|




|



|
|
|
>

<
|
|
<
|
>
|
<
|
>
|
|
|
<
<
<
<




|


>
>
>
>

>
>
>
>
>
>
>
>
>


|










|





|



|







|







 







|







|







 







|







 







|
|







 







|







 







|







451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
....
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
....
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111

2112
2113
2114
2115


2116
2117
2118
2119
2120
2121
2122
2123
2124
....
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
....
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189

2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232

2233
2234

2235
2236
2237

2238
2239
2240
2241
2242




2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
....
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
....
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
....
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
....
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
....
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
  u8 exclusiveMode;          /* Non-zero if connection is in exclusive mode */
  u8 writeLock;              /* True if in a write transaction */
  u8 ckptLock;               /* True if holding a checkpoint lock */
  u8 readOnly;               /* WAL_RDWR, WAL_RDONLY, or WAL_SHM_RDONLY */
  u8 truncateOnCommit;       /* True to truncate WAL file on commit */
  u8 syncHeader;             /* Fsync the WAL header if true */
  u8 padToSectorBoundary;    /* Pad transactions out to the next sector */
  u8 bShmUnreliable;         /* SHM content is read-only and unreliable */
  WalIndexHdr hdr;           /* Wal-index header for current transaction */
  u32 minFrame;              /* Ignore wal frames before this one */
  u32 iReCksum;              /* On commit, recalculate checksums from here */
  const char *zWalName;      /* Name of WAL file */
  u32 nCkpt;                 /* Checkpoint sequence counter in the wal-header */
#ifdef SQLITE_DEBUG
  u8 lockError;              /* True if a locking error has occurred */
................................................................................
  return rc;
}

/*
** Close an open wal-index.
*/
static void walIndexClose(Wal *pWal, int isDelete){
  if( pWal->exclusiveMode==WAL_HEAPMEMORY_MODE || pWal->bShmUnreliable ){
    int i;
    for(i=0; i<pWal->nWiData; i++){
      sqlite3_free((void *)pWal->apWiData[i]);
      pWal->apWiData[i] = 0;
    }
  }
  if( pWal->exclusiveMode!=WAL_HEAPMEMORY_MODE ){
................................................................................
  volatile u32 *page0;            /* Chunk of wal-index containing header */

  /* Ensure that page 0 of the wal-index (the page that contains the 
  ** wal-index header) is mapped. Return early if an error occurs here.
  */
  assert( pChanged );
  rc = walIndexPage(pWal, 0, &page0);
  if( rc!=SQLITE_OK ){
    assert( rc!=SQLITE_READONLY ); /* READONLY changed to OK in walIndexPage */
    if( rc==SQLITE_READONLY_CANTINIT ){
      /* The SQLITE_READONLY_CANTINIT return means that the shared-memory
      ** was openable but is not writable, and this thread is unable to
      ** confirm that another write-capable connection has the shared-memory
      ** open, and hence the content of the shared-memory is unreliable,
      ** since the shared-memory might be inconsistent with the WAL file
      ** and there is no writer on hand to fix it. */
      assert( page0==0 && pWal->writeLock==0 );

      pWal->bShmUnreliable = 1;
      pWal->exclusiveMode = WAL_HEAPMEMORY_MODE;
      *pChanged = 1;
    }else{


      return rc; /* Any other non-OK return is just an error */
    }
  };
  assert( page0 || pWal->writeLock==0 );

  /* If the first page of the wal-index has been mapped, try to read the
  ** wal-index header immediately, without holding any lock. This usually
  ** works, but may fail if the wal-index header is corrupt or currently 
  ** being modified by another thread or process.
................................................................................
  badHdr = (page0 ? walIndexTryHdr(pWal, pChanged) : 1);

  /* If the first attempt failed, it might have been due to a race
  ** with a writer.  So get a WRITE lock and try again.
  */
  assert( badHdr==0 || pWal->writeLock==0 );
  if( badHdr ){
    if( pWal->bShmUnreliable==0 && (pWal->readOnly & WAL_SHM_RDONLY) ){
      if( SQLITE_OK==(rc = walLockShared(pWal, WAL_WRITE_LOCK)) ){
        walUnlockShared(pWal, WAL_WRITE_LOCK);
        rc = SQLITE_READONLY_RECOVERY;
      }
    }else if( SQLITE_OK==(rc = walLockExclusive(pWal, WAL_WRITE_LOCK, 1)) ){
      pWal->writeLock = 1;
      if( SQLITE_OK==(rc = walIndexPage(pWal, 0, &page0)) ){
................................................................................
  /* If the header is read successfully, check the version number to make
  ** sure the wal-index was not constructed with some future format that
  ** this version of SQLite cannot understand.
  */
  if( badHdr==0 && pWal->hdr.iVersion!=WALINDEX_MAX_VERSION ){
    rc = SQLITE_CANTOPEN_BKPT;
  }
  if( pWal->bShmUnreliable ){
    if( rc!=SQLITE_OK ){
      walIndexClose(pWal, 0);
      pWal->bShmUnreliable = 0;
      assert( pWal->nWiData>0 && pWal->apWiData[0]==0 );
      if( rc==SQLITE_IOERR_SHORT_READ ) rc = WAL_RETRY;
    }
    pWal->exclusiveMode = WAL_NORMAL_MODE;
  }

  return rc;
}

/*
** Open a transaction in a connection where the shared-memory is read-only
** and where we cannot verify that there is a separate write-capable connection
** on hand to keep the shared-memory up-to-date with the WAL file.
**
** This can happen, for example, when the shared-memory is implemented by
** memory-mapping a *-shm file, where a prior writer has shut down and
** left the *-shm file on disk, and now the present connection is trying
** to use that database but lacks write permission on the *-shm file.
** Other scenarios are also possible, depending on the VFS implementation.
**
** Precondition:
**
**    The *-wal file has been read and an appropriate wal-index has been

**    constructed in pWal->apWiData[] using heap memory instead of shared
**    memory. 
**
** If this function returns SQLITE_OK, then the read transaction has
** been successfully opened. In this case output variable (*pChanged) 
** is set to true before returning if the caller should discard the
** contents of the page cache before proceeding. Or, if it returns 
** WAL_RETRY, then the heap memory wal-index has been discarded and 
** the caller should retry opening the read transaction from the 
** beginning (including attempting to map the *-shm file). 
**
** If an error occurs, an SQLite error code is returned.
*/
static int walBeginShmUnreliable(Wal *pWal, int *pChanged){
  i64 szWal;                      /* Size of wal file on disk in bytes */
  i64 iOffset;                    /* Current offset when reading wal file */
  u8 aBuf[WAL_HDRSIZE];           /* Buffer to load WAL header into */
  u8 *aFrame = 0;                 /* Malloc'd buffer to load entire frame */
  int szFrame;                    /* Number of bytes in buffer aFrame[] */
  u8 *aData;                      /* Pointer to data part of aFrame buffer */
  volatile void *pDummy;          /* Dummy argument for xShmMap */
  int rc;                         /* Return code */
  u32 aSaveCksum[2];              /* Saved copy of pWal->hdr.aFrameCksum */

  assert( pWal->bShmUnreliable );
  assert( pWal->readOnly & WAL_SHM_RDONLY );
  assert( pWal->nWiData>0 && pWal->apWiData[0] );

  /* Take WAL_READ_LOCK(0). This has the effect of preventing any
  ** writers from running a checkpoint, but does not stop them
  ** from running recovery.  */
  rc = walLockShared(pWal, WAL_READ_LOCK(0));
  if( rc!=SQLITE_OK ){
    if( rc==SQLITE_BUSY ) rc = WAL_RETRY;
    goto begin_unreliable_shm_out;
  }
  pWal->readLock = 0;

  /* Check to see if a separate writer has attached to the shared-memory area,
  ** thus making the shared-memory "reliable" again.  Do this by invoking
  ** the xShmMap() routine of the VFS and looking to see if the return
  ** is SQLITE_READONLY instead of SQLITE_READONLY_CANTINIT.
  **

  ** Once sqlite3OsShmMap() has been called for a file and has returned
  ** any SQLITE_READONLY value, it must SQLITE_READONLY or

  ** SQLITE_READONLY_CANTINIT or some error for all subsequent invocations,
  ** until sqlite3OsShmUnmap() has been called.  This is a requirement
  ** on the VFS implementation.

  **
  ** If the shared-memory is now "reliable" return WAL_RETRY, which will
  ** cause the heap-memory WAL-index to be discarded and the actual
  ** shared memory to be used in its place.
  */




  rc = sqlite3OsShmMap(pWal->pDbFd, 0, WALINDEX_PGSZ, 0, &pDummy);
  assert( rc!=SQLITE_OK ); /* SQLITE_OK not possible for read-only connection */
  if( rc!=SQLITE_READONLY_CANTINIT ){
    rc = (rc==SQLITE_READONLY ? WAL_RETRY : rc);
    goto begin_unreliable_shm_out;
  }

  /* Reach this point only if the real shared-memory is still unreliable.
  ** Assume the in-memory WAL-index substitute is correct and load it
  ** into pWal->hdr.
  */
  memcpy(&pWal->hdr, (void*)walIndexHdr(pWal), sizeof(WalIndexHdr));

  /* The WAL_READ_LOCK(0) lock held by this client prevents a checkpoint
  ** from taking place. But it does not prevent the wal from being wrapped
  ** if a checkpoint has already taken place. This means that if another
  ** client is connected at this point, it may have already checkpointed 
  ** the entire wal. In that case it would not be safe to continue with
  ** the this transaction, as the other client may overwrite wal 
  ** frames that this client is still using.
  */
  rc = sqlite3OsFileSize(pWal->pWalFd, &szWal);
  if( rc!=SQLITE_OK ){
    goto begin_unreliable_shm_out;
  }
  if( szWal<WAL_HDRSIZE ){
    /* If the wal file is too small to contain a wal-header and the
    ** wal-index header has mxFrame==0, then it must be safe to proceed
    ** reading the database file only. However, the page cache cannot
    ** be trusted, as a read/write connection may have connected, written
    ** the db, run a checkpoint, truncated the wal file and disconnected
    ** since this client's last read transaction.  */
    *pChanged = 1;
    rc = (pWal->hdr.mxFrame==0 ? SQLITE_OK : WAL_RETRY);
    goto begin_unreliable_shm_out;
  }

  /* Check the salt keys at the start of the wal file still match. */
  rc = sqlite3OsRead(pWal->pWalFd, aBuf, WAL_HDRSIZE, 0);
  if( rc!=SQLITE_OK ){
    goto begin_unreliable_shm_out;
  }
  if( memcmp(&pWal->hdr.aSalt, &aBuf[16], 8) ){
    rc = WAL_RETRY;
    goto begin_unreliable_shm_out;
  }

  /* Allocate a buffer to read frames into */
  szFrame = pWal->hdr.szPage + WAL_FRAME_HDRSIZE;
  aFrame = (u8 *)sqlite3_malloc64(szFrame);
  if( aFrame==0 ){
    rc = SQLITE_NOMEM_BKPT;
    goto begin_unreliable_shm_out;
  }
  aData = &aFrame[WAL_FRAME_HDRSIZE];

  /* Check to see if a complete transaction has been appended to the
  ** wal file since the heap-memory wal-index was created. If so, the
  ** heap-memory wal-index is discarded and WAL_RETRY returned to
  ** the caller.  */
................................................................................
      rc = WAL_RETRY;
      break;
    }
  }
  pWal->hdr.aFrameCksum[0] = aSaveCksum[0];
  pWal->hdr.aFrameCksum[1] = aSaveCksum[1];

 begin_unreliable_shm_out:
  sqlite3_free(aFrame);
  if( rc!=SQLITE_OK ){
    int i;
    for(i=0; i<pWal->nWiData; i++){
      sqlite3_free((void*)pWal->apWiData[i]);
      pWal->apWiData[i] = 0;
    }
    pWal->bShmUnreliable = 0;
    sqlite3WalEndReadTransaction(pWal);
    *pChanged = 1;
  }
  return rc;
}

/*
................................................................................
    }
    if( cnt>=10 ) nDelay = (cnt-9)*(cnt-9)*39;
    sqlite3OsSleep(pWal->pVfs, nDelay);
  }

  if( !useWal ){
    assert( rc==SQLITE_OK );
    if( pWal->bShmUnreliable==0 ){
      rc = walIndexReadHdr(pWal, pChanged);
    }
    if( rc==SQLITE_BUSY ){
      /* If there is not a recovery running in another thread or process
      ** then convert BUSY errors to WAL_RETRY.  If recovery is known to
      ** be running, convert BUSY to BUSY_RECOVERY.  There is a race here
      ** which might cause WAL_RETRY to be returned even if BUSY_RECOVERY
................................................................................
      }else if( rc==SQLITE_BUSY ){
        rc = SQLITE_BUSY_RECOVERY;
      }
    }
    if( rc!=SQLITE_OK ){
      return rc;
    }
    else if( pWal->bShmUnreliable ){
      return walBeginShmUnreliable(pWal, pChanged);
    }
  }

  assert( pWal->nWiData>0 );
  assert( pWal->apWiData[0] || (pWal->readOnly & WAL_SHM_RDONLY) );
  pInfo = pWal->apWiData[0] ? walCkptInfo(pWal) : 0;
  if( !useWal && (pInfo==0 || pInfo->nBackfill==pWal->hdr.mxFrame)
................................................................................

  /* If the "last page" field of the wal-index header snapshot is 0, then
  ** no data will be read from the wal under any circumstances. Return early
  ** in this case as an optimization.  Likewise, if pWal->readLock==0, 
  ** then the WAL is ignored by the reader so return early, as if the 
  ** WAL were empty.
  */
  if( iLast==0 || (pWal->readLock==0 && pWal->bShmUnreliable==0) ){
    *piRead = 0;
    return SQLITE_OK;
  }

  /* Search the hash table or tables for an entry matching page number
  ** pgno. Each iteration of the following for() loop searches one
  ** hash table (each hash table indexes up to HASHTABLE_NPAGE frames).
................................................................................
#ifdef SQLITE_ENABLE_EXPENSIVE_ASSERT
  /* If expensive assert() statements are available, do a linear search
  ** of the wal-index file content. Make sure the results agree with the
  ** result obtained using the hash indexes above.  */
  {
    u32 iRead2 = 0;
    u32 iTest;
    assert( pWal->bShmUnreliable || pWal->minFrame>0 );
    for(iTest=iLast; iTest>=pWal->minFrame && iTest>0; iTest--){
      if( walFramePgno(pWal, iTest)==pgno ){
        iRead2 = iTest;
        break;
      }
    }
    assert( iRead==iRead2 );