SQLite

Check-in [9b0d5c4ff7]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Allow readonly_shm connections to access the *-shm file using read() even if it is unable to take a DMS lock.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | fcntl_shm_read
Files: files | file ages | folders
SHA3-256: 9b0d5c4ff707f58b07ae28cfe0a7de62fbd22fa20678e446350d1472391bc0b9
User & Date: dan 2017-11-03 17:17:44.636
Context
2017-11-03
17:51
Merge latest trunk changes with this branch. (check-in: 585e0a09c5 user: dan tags: fcntl_shm_read)
17:17
Allow readonly_shm connections to access the *-shm file using read() even if it is unable to take a DMS lock. (check-in: 9b0d5c4ff7 user: dan tags: fcntl_shm_read)
2017-11-02
18:57
Fix test cases in wal2.test broken by the locking change in the previous commit. (check-in: f569c35172 user: dan tags: readonly-wal-recovery)
Changes
Unified Diff Ignore Whitespace Patch
Changes to src/os_unix.c.
3791
3792
3793
3794
3795
3796
3797

3798
3799
3800
3801
3802
3803
3804
  }else{
    pFile->ctrlFlags |= mask;
  }
}

/* Forward declaration */
static int unixGetTempname(int nBuf, char *zBuf);


/*
** Information and control of an open file handle.
*/
static int unixFileControl(sqlite3_file *id, int op, void *pArg){
  unixFile *pFile = (unixFile*)id;
  switch( op ){







>







3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
  }else{
    pFile->ctrlFlags |= mask;
  }
}

/* Forward declaration */
static int unixGetTempname(int nBuf, char *zBuf);
static int fcntlReadShm(unixFile*,void*);

/*
** Information and control of an open file handle.
*/
static int unixFileControl(sqlite3_file *id, int op, void *pArg){
  unixFile *pFile = (unixFile*)id;
  switch( op ){
3812
3813
3814
3815
3816
3817
3818




3819
3820
3821
3822
3823
3824
3825
      return rc ? SQLITE_IOERR_COMMIT_ATOMIC : SQLITE_OK;
    }
    case SQLITE_FCNTL_ROLLBACK_ATOMIC_WRITE: {
      int rc = osIoctl(pFile->h, F2FS_IOC_ABORT_VOLATILE_WRITE);
      return rc ? SQLITE_IOERR_ROLLBACK_ATOMIC : SQLITE_OK;
    }
#endif /* __linux__ && SQLITE_ENABLE_BATCH_ATOMIC_WRITE */





    case SQLITE_FCNTL_LOCKSTATE: {
      *(int*)pArg = pFile->eFileLock;
      return SQLITE_OK;
    }
    case SQLITE_FCNTL_LAST_ERRNO: {
      *(int*)pArg = pFile->lastErrno;







>
>
>
>







3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
      return rc ? SQLITE_IOERR_COMMIT_ATOMIC : SQLITE_OK;
    }
    case SQLITE_FCNTL_ROLLBACK_ATOMIC_WRITE: {
      int rc = osIoctl(pFile->h, F2FS_IOC_ABORT_VOLATILE_WRITE);
      return rc ? SQLITE_IOERR_ROLLBACK_ATOMIC : SQLITE_OK;
    }
#endif /* __linux__ && SQLITE_ENABLE_BATCH_ATOMIC_WRITE */

    case SQLITE_FCNTL_READ_SHM: {
      return fcntlReadShm(pFile, pArg);
    }

    case SQLITE_FCNTL_LOCKSTATE: {
      *(int*)pArg = pFile->eFileLock;
      return SQLITE_OK;
    }
    case SQLITE_FCNTL_LAST_ERRNO: {
      *(int*)pArg = pFile->lastErrno;
4142
4143
4144
4145
4146
4147
4148













































4149
4150
4151
4152
4153
4154
4155
};

/*
** Constants used for locking
*/
#define UNIX_SHM_BASE   ((22+SQLITE_SHM_NLOCK)*4)         /* first lock byte */
#define UNIX_SHM_DMS    (UNIX_SHM_BASE+SQLITE_SHM_NLOCK)  /* deadman switch */














































/*
** Apply posix advisory locks for all bytes from ofst through ofst+n-1.
**
** Locks block if the mask is exactly UNIX_SHM_C and are non-blocking
** otherwise.
*/







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







4147
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205
};

/*
** Constants used for locking
*/
#define UNIX_SHM_BASE   ((22+SQLITE_SHM_NLOCK)*4)         /* first lock byte */
#define UNIX_SHM_DMS    (UNIX_SHM_BASE+SQLITE_SHM_NLOCK)  /* deadman switch */

/*
** Implementation of file-control SQLITE_FCNTL_READ_SHM.
*/
static int fcntlReadShm(unixFile *pFile, void *pArg){
  struct ReadShmParam {
    void *pBuf;
    int nBuf;
  } *pParam = (struct ReadShmParam*)pArg;
  unixShmNode *pShmNode = pFile->pShm->pShmNode;
  int rc = SQLITE_OK;
  sqlite3_mutex_enter( pShmNode->mutex );
  if( pShmNode->isUnlocked==1 ){
    struct stat buf;          /* Used to hold return values of fstat() */
    if( osFstat(pShmNode->h, &buf) ){
      rc = SQLITE_IOERR_FSTAT;
    }else if( 0!=lseek(pShmNode->h, 0, SEEK_SET) ){
      rc = SQLITE_IOERR_READ;
    }else{
      int nRead = buf.st_size;
      if( nRead==0 ){
        pParam->nBuf = 0;
        pParam->pBuf = 0;
      }else{    
        pParam->pBuf = sqlite3_malloc(nRead);
        if( pParam->pBuf ){
          int res = osRead(pShmNode->h, pParam->pBuf, nRead);
          if( res!=nRead ){
            sqlite3_free(pParam->pBuf);
            pParam->pBuf = 0;
            rc = SQLITE_IOERR_READ;
          }else{
            pParam->nBuf = nRead;
          }
        }else{
          rc = SQLITE_NOMEM_BKPT;
        }
      }
    }
  }else{
    rc = SQLITE_BUSY;
  }
  sqlite3_mutex_leave( pShmNode->mutex );
  return rc;
}

/*
** Apply posix advisory locks for all bytes from ofst through ofst+n-1.
**
** Locks block if the mask is exactly UNIX_SHM_C and are non-blocking
** otherwise.
*/
Changes to src/sqlite.h.in.
1091
1092
1093
1094
1095
1096
1097

1098
1099
1100
1101
1102
1103
1104
#define SQLITE_FCNTL_VFS_POINTER            27
#define SQLITE_FCNTL_JOURNAL_POINTER        28
#define SQLITE_FCNTL_WIN32_GET_HANDLE       29
#define SQLITE_FCNTL_PDB                    30
#define SQLITE_FCNTL_BEGIN_ATOMIC_WRITE     31
#define SQLITE_FCNTL_COMMIT_ATOMIC_WRITE    32
#define SQLITE_FCNTL_ROLLBACK_ATOMIC_WRITE  33


/* deprecated names */
#define SQLITE_GET_LOCKPROXYFILE      SQLITE_FCNTL_GET_LOCKPROXYFILE
#define SQLITE_SET_LOCKPROXYFILE      SQLITE_FCNTL_SET_LOCKPROXYFILE
#define SQLITE_LAST_ERRNO             SQLITE_FCNTL_LAST_ERRNO









>







1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
#define SQLITE_FCNTL_VFS_POINTER            27
#define SQLITE_FCNTL_JOURNAL_POINTER        28
#define SQLITE_FCNTL_WIN32_GET_HANDLE       29
#define SQLITE_FCNTL_PDB                    30
#define SQLITE_FCNTL_BEGIN_ATOMIC_WRITE     31
#define SQLITE_FCNTL_COMMIT_ATOMIC_WRITE    32
#define SQLITE_FCNTL_ROLLBACK_ATOMIC_WRITE  33
#define SQLITE_FCNTL_READ_SHM               34

/* deprecated names */
#define SQLITE_GET_LOCKPROXYFILE      SQLITE_FCNTL_GET_LOCKPROXYFILE
#define SQLITE_SET_LOCKPROXYFILE      SQLITE_FCNTL_SET_LOCKPROXYFILE
#define SQLITE_LAST_ERRNO             SQLITE_FCNTL_LAST_ERRNO


Changes to src/wal.c.
451
452
453
454
455
456
457

458
459
460
461
462
463
464
  u8 exclusiveMode;          /* Non-zero if connection is in exclusive mode */
  u8 writeLock;              /* True if in a write transaction */
  u8 ckptLock;               /* True if holding a checkpoint lock */
  u8 readOnly;               /* WAL_RDWR, WAL_RDONLY, or WAL_SHM_RDONLY */
  u8 truncateOnCommit;       /* True to truncate WAL file on commit */
  u8 syncHeader;             /* Fsync the WAL header if true */
  u8 padToSectorBoundary;    /* Pad transactions out to the next sector */

  WalIndexHdr hdr;           /* Wal-index header for current transaction */
  u32 minFrame;              /* Ignore wal frames before this one */
  u32 iReCksum;              /* On commit, recalculate checksums from here */
  const char *zWalName;      /* Name of WAL file */
  u32 nCkpt;                 /* Checkpoint sequence counter in the wal-header */
#ifdef SQLITE_DEBUG
  u8 lockError;              /* True if a locking error has occurred */







>







451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
  u8 exclusiveMode;          /* Non-zero if connection is in exclusive mode */
  u8 writeLock;              /* True if in a write transaction */
  u8 ckptLock;               /* True if holding a checkpoint lock */
  u8 readOnly;               /* WAL_RDWR, WAL_RDONLY, or WAL_SHM_RDONLY */
  u8 truncateOnCommit;       /* True to truncate WAL file on commit */
  u8 syncHeader;             /* Fsync the WAL header if true */
  u8 padToSectorBoundary;    /* Pad transactions out to the next sector */
  u8 bUnlocked;              /* A readonly_shm connection without DMS lock */
  WalIndexHdr hdr;           /* Wal-index header for current transaction */
  u32 minFrame;              /* Ignore wal frames before this one */
  u32 iReCksum;              /* On commit, recalculate checksums from here */
  const char *zWalName;      /* Name of WAL file */
  u32 nCkpt;                 /* Checkpoint sequence counter in the wal-header */
#ifdef SQLITE_DEBUG
  u8 lockError;              /* True if a locking error has occurred */
534
535
536
537
538
539
540





















541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
*/
#define HASHTABLE_NPAGE_ONE  (HASHTABLE_NPAGE - (WALINDEX_HDR_SIZE/sizeof(u32)))

/* The wal-index is divided into pages of WALINDEX_PGSZ bytes each. */
#define WALINDEX_PGSZ   (                                         \
    sizeof(ht_slot)*HASHTABLE_NSLOT + HASHTABLE_NPAGE*sizeof(u32) \
)






















/*
** Obtain a pointer to the iPage'th page of the wal-index. The wal-index
** is broken into pages of WALINDEX_PGSZ bytes. Wal-index pages are
** numbered from zero.
**
** If this call is successful, *ppPage is set to point to the wal-index
** page and SQLITE_OK is returned. If an error (an OOM or VFS error) occurs,
** then an SQLite error code is returned and *ppPage is set to 0.
*/
static int walIndexPage(Wal *pWal, int iPage, volatile u32 **ppPage){
  int rc = SQLITE_OK;

  /* Enlarge the pWal->apWiData[] array if required */
  if( pWal->nWiData<=iPage ){
    int nByte = sizeof(u32*)*(iPage+1);
    volatile u32 **apNew;
    apNew = (volatile u32 **)sqlite3_realloc64((void *)pWal->apWiData, nByte);
    if( !apNew ){
      *ppPage = 0;
      return SQLITE_NOMEM_BKPT;
    }
    memset((void*)&apNew[pWal->nWiData], 0,
           sizeof(u32*)*(iPage+1-pWal->nWiData));
    pWal->apWiData = apNew;
    pWal->nWiData = iPage+1;
  }

  /* Request a pointer to the required page from the VFS */
  if( pWal->apWiData[iPage]==0 ){
    if( pWal->exclusiveMode==WAL_HEAPMEMORY_MODE ){
      pWal->apWiData[iPage] = (u32 volatile *)sqlite3MallocZero(WALINDEX_PGSZ);
      if( !pWal->apWiData[iPage] ) rc = SQLITE_NOMEM_BKPT;







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>














<
|
<
<
<
|
|
<
<
<
<
<







535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576

577



578
579





580
581
582
583
584
585
586
*/
#define HASHTABLE_NPAGE_ONE  (HASHTABLE_NPAGE - (WALINDEX_HDR_SIZE/sizeof(u32)))

/* The wal-index is divided into pages of WALINDEX_PGSZ bytes each. */
#define WALINDEX_PGSZ   (                                         \
    sizeof(ht_slot)*HASHTABLE_NSLOT + HASHTABLE_NPAGE*sizeof(u32) \
)

/*
** Grow the pWal->apWiData[] array so that it is at least nPage entries
** in size. Return SQLITE_NOMEM if an OOM error occurs, or SQLITE_OK 
** otherwise.
*/
static int walIndexGrowArray(Wal *pWal, int nPage){
  if( nPage>pWal->nWiData ){
    int nByte = sizeof(u32*)*nPage;
    volatile u32 **apNew;
    apNew = (volatile u32 **)sqlite3_realloc64((void *)pWal->apWiData, nByte);
    if( !apNew ){
      return SQLITE_NOMEM_BKPT;
    }
    memset((void*)&apNew[pWal->nWiData], 0,
           sizeof(u32*)*(nPage-pWal->nWiData));
    pWal->apWiData = apNew;
    pWal->nWiData = nPage;
  }
  return SQLITE_OK;
}

/*
** Obtain a pointer to the iPage'th page of the wal-index. The wal-index
** is broken into pages of WALINDEX_PGSZ bytes. Wal-index pages are
** numbered from zero.
**
** If this call is successful, *ppPage is set to point to the wal-index
** page and SQLITE_OK is returned. If an error (an OOM or VFS error) occurs,
** then an SQLite error code is returned and *ppPage is set to 0.
*/
static int walIndexPage(Wal *pWal, int iPage, volatile u32 **ppPage){
  int rc = SQLITE_OK;

  /* Enlarge the pWal->apWiData[] array if required */

  if( walIndexGrowArray(pWal, iPage+1) ){



    *ppPage = 0;
    return SQLITE_NOMEM;





  }

  /* Request a pointer to the required page from the VFS */
  if( pWal->apWiData[iPage]==0 ){
    if( pWal->exclusiveMode==WAL_HEAPMEMORY_MODE ){
      pWal->apWiData[iPage] = (u32 volatile *)sqlite3MallocZero(WALINDEX_PGSZ);
      if( !pWal->apWiData[iPage] ) rc = SQLITE_NOMEM_BKPT;
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
*/
static int walIndexRecover(Wal *pWal){
  int rc;                         /* Return Code */
  i64 nSize;                      /* Size of log file */
  u32 aFrameCksum[2] = {0, 0};
  int iLock;                      /* Lock offset to lock for checkpoint */

  /* Obtain an exclusive lock on all byte in the locking range not already
  ** locked by the caller. The caller is guaranteed to have locked the
  ** WAL_WRITE_LOCK byte, and may have also locked the WAL_CKPT_LOCK byte.
  ** If successful, the same bytes that are locked here are unlocked before
  ** this function returns.
  */
  assert( pWal->ckptLock==1 || pWal->ckptLock==0 );
  assert( WAL_ALL_BUT_WRITE==WAL_WRITE_LOCK+1 );
  assert( WAL_CKPT_LOCK==WAL_ALL_BUT_WRITE );
  assert( pWal->writeLock );
  iLock = WAL_ALL_BUT_WRITE + pWal->ckptLock;
  rc = walLockExclusive(pWal, iLock, WAL_READ_LOCK(0)-iLock);
  if( rc==SQLITE_OK ){







|
|
|
|
|
<







1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122

1123
1124
1125
1126
1127
1128
1129
*/
static int walIndexRecover(Wal *pWal){
  int rc;                         /* Return Code */
  i64 nSize;                      /* Size of log file */
  u32 aFrameCksum[2] = {0, 0};
  int iLock;                      /* Lock offset to lock for checkpoint */

  /* Obtain an exclusive lock on all bytes in the locking range except
  ** WAL_READ_LOCK(0) not already locked by the caller. The caller is
  ** guaranteed to have locked the WAL_WRITE_LOCK byte, and may have also
  ** locked the WAL_CKPT_LOCK byte. If successful, the same bytes that are
  ** locked here are unlocked before this function returns.  */

  assert( pWal->ckptLock==1 || pWal->ckptLock==0 );
  assert( WAL_ALL_BUT_WRITE==WAL_WRITE_LOCK+1 );
  assert( WAL_CKPT_LOCK==WAL_ALL_BUT_WRITE );
  assert( pWal->writeLock );
  iLock = WAL_ALL_BUT_WRITE + pWal->ckptLock;
  rc = walLockExclusive(pWal, iLock, WAL_READ_LOCK(0)-iLock);
  if( rc==SQLITE_OK ){
2086
2087
2088
2089
2090
2091
2092

2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119



2120
2121
2122
2123
2124
2125
2126
2127
  int badHdr;                     /* True if a header read failed */
  volatile u32 *page0;            /* Chunk of wal-index containing header */

  /* Ensure that page 0 of the wal-index (the page that contains the 
  ** wal-index header) is mapped. Return early if an error occurs here.
  */
  assert( pChanged );

  rc = walIndexPage(pWal, 0, &page0);
  if( rc!=SQLITE_OK ){
    if( rc==SQLITE_READONLY_CANTLOCK 
#ifdef SQLITE_ENABLE_SNAPSHOT
        && pWal->pSnapshot==0 
#endif
    ){
      memset(&pWal->hdr, 0, sizeof(WalIndexHdr));
      rc = SQLITE_OK;
    }
    return rc;
  };
  assert( page0 || pWal->writeLock==0 );

  /* If the first page of the wal-index has been mapped, try to read the
  ** wal-index header immediately, without holding any lock. This usually
  ** works, but may fail if the wal-index header is corrupt or currently 
  ** being modified by another thread or process.
  */
  badHdr = (page0 ? walIndexTryHdr(pWal, pChanged) : 1);

  /* If the first attempt failed, it might have been due to a race
  ** with a writer.  So get a WRITE lock and try again.
  */
  assert( badHdr==0 || pWal->writeLock==0 );
  if( badHdr ){
    if( pWal->readOnly & WAL_SHM_RDONLY ){



      if( SQLITE_OK==(rc = walLockShared(pWal, WAL_WRITE_LOCK)) ){
        walUnlockShared(pWal, WAL_WRITE_LOCK);
        rc = SQLITE_READONLY_RECOVERY;
      }
    }else if( SQLITE_OK==(rc = walLockExclusive(pWal, WAL_WRITE_LOCK, 1)) ){
      pWal->writeLock = 1;
      if( SQLITE_OK==(rc = walIndexPage(pWal, 0, &page0)) ){
        badHdr = walIndexTryHdr(pWal, pChanged);







>


<
<
<
<
<
<
<
<

















>
>
>
|







2098
2099
2100
2101
2102
2103
2104
2105
2106
2107








2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
  int badHdr;                     /* True if a header read failed */
  volatile u32 *page0;            /* Chunk of wal-index containing header */

  /* Ensure that page 0 of the wal-index (the page that contains the 
  ** wal-index header) is mapped. Return early if an error occurs here.
  */
  assert( pChanged );
  assert( pWal->bUnlocked==0 );
  rc = walIndexPage(pWal, 0, &page0);
  if( rc!=SQLITE_OK ){








    return rc;
  };
  assert( page0 || pWal->writeLock==0 );

  /* If the first page of the wal-index has been mapped, try to read the
  ** wal-index header immediately, without holding any lock. This usually
  ** works, but may fail if the wal-index header is corrupt or currently 
  ** being modified by another thread or process.
  */
  badHdr = (page0 ? walIndexTryHdr(pWal, pChanged) : 1);

  /* If the first attempt failed, it might have been due to a race
  ** with a writer.  So get a WRITE lock and try again.
  */
  assert( badHdr==0 || pWal->writeLock==0 );
  if( badHdr ){
    if( pWal->readOnly & WAL_SHM_RDONLY ){
      if( pWal->bUnlocked ){
        rc = SQLITE_READONLY_RECOVERY;
      }
      else if( SQLITE_OK==(rc = walLockShared(pWal, WAL_WRITE_LOCK)) ){
        walUnlockShared(pWal, WAL_WRITE_LOCK);
        rc = SQLITE_READONLY_RECOVERY;
      }
    }else if( SQLITE_OK==(rc = walLockExclusive(pWal, WAL_WRITE_LOCK, 1)) ){
      pWal->writeLock = 1;
      if( SQLITE_OK==(rc = walIndexPage(pWal, 0, &page0)) ){
        badHdr = walIndexTryHdr(pWal, pChanged);
2151
2152
2153
2154
2155
2156
2157

























































































2158
2159
2160
2161
2162
2163
2164
}

/*
** This is the value that walTryBeginRead returns when it needs to
** be retried.
*/
#define WAL_RETRY  (-1)


























































































/*
** Attempt to start a read transaction.  This might fail due to a race or
** other transient condition.  When that happens, it returns WAL_RETRY to
** indicate to the caller that it is safe to retry immediately.
**
** On success return SQLITE_OK.  On a permanent failure (such an







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
}

/*
** This is the value that walTryBeginRead returns when it needs to
** be retried.
*/
#define WAL_RETRY  (-1)

/*
** When this function is called, a call to xShmMap has just returned
** SQLITE_READONLY_CANTLOCK, indicating that the *-shm file was opened
** read-only and that there were no other connections to the database 
** at that point.
*/
static int walBeginUnlocked(Wal *pWal, int *pChanged){
  struct ReadShmParam {
    void *pBuf;
    int nBuf;
  } buf = {0, 0};
  i64 nSize;
  int rc;
  volatile void *pDummy;

  assert( pWal->nWiData>0 && pWal->apWiData[0]==0 );
  assert( pWal->readOnly & WAL_SHM_RDONLY );
  
  /* Take WAL_READ_LOCK(0). This has the effect of preventing any
  ** live clients from running a checkpoint, but does not stop them
  ** from running recovery.  */
  rc = walLockShared(pWal, WAL_READ_LOCK(0));
  if( rc!=SQLITE_OK ){
    return (rc==SQLITE_BUSY ? WAL_RETRY : rc);
  }

  /* Try to map the *-shm file again. If it succeeds this time, then 
  ** a non-readonly_shm connection has already connected to the database.
  ** In this case, start over with opening the transaction.
  **
  ** The WAL_READ_LOCK(0) lock held by this client prevents a checkpoint
  ** from taking place. But it does not prevent the wal from being wrapped
  ** if a checkpoint has already taken place. This means that if another
  ** client is connected at this point, it may have already checkpointed 
  ** the entire wal. In that case it would not be safe to continue with
  ** the unlocked transaction, as the other client may overwrite wal 
  ** frames that this client is still using.  */
  rc = sqlite3OsShmMap(pWal->pDbFd, 0, WALINDEX_PGSZ, 0, &pDummy);
  if( rc!=SQLITE_READONLY_CANTLOCK ){
    rc = (rc==SQLITE_OK ? WAL_RETRY : rc);
    goto begin_unlocked_out;
  }
  pWal->readLock = 0;

  /* Figure out how large the wal file is. If it is zero bytes in size,
  ** do not bother loading the *-shm file. Just read from the db file
  ** exclusively like any other Wal.readLock==0 client. Otherwise,
  ** use SQLITE_FCNTL_READ_SHM to load the contents of the *-shm file 
  ** into heap memory. */
  rc = sqlite3OsFileSize(pWal->pWalFd, &nSize);
  if( rc!=SQLITE_OK ) return rc;

  memset(&pWal->hdr, 0, sizeof(WalIndexHdr));
  if( nSize>0 ){
    rc = sqlite3OsFileControl(pWal->pDbFd, SQLITE_FCNTL_READ_SHM, (void*)&buf);
    if( rc!=SQLITE_OK ){
      return rc==SQLITE_BUSY ? WAL_RETRY : rc;
    }
    if( buf.nBuf<WALINDEX_PGSZ ){
      rc = SQLITE_READONLY_RECOVERY;
    }else{
      int i;
      int nChunk = buf.nBuf/WALINDEX_PGSZ;
      if( walIndexGrowArray(pWal, nChunk) ){
        rc = SQLITE_NOMEM;
      }else{
        u8 *aBuf = (u8*)buf.pBuf;
        for(i=0; i<nChunk; i++){
          pWal->apWiData[i] = (volatile u32*)&aBuf[i*WALINDEX_PGSZ];
        }
        if( walIndexTryHdr(pWal, pChanged) ){
          rc = SQLITE_READONLY_RECOVERY;
        }
      }
    }
  }

 begin_unlocked_out:
  if( rc!=SQLITE_OK ){
    sqlite3_free(buf.pBuf);
    memset(pWal->apWiData, 0, pWal->nWiData*sizeof(u32*));
    walUnlockShared(pWal, WAL_READ_LOCK(0));
  }else{
    pWal->bUnlocked = 1;
    *pChanged = 1;
  }
  return rc;
}

/*
** Attempt to start a read transaction.  This might fail due to a race or
** other transient condition.  When that happens, it returns WAL_RETRY to
** indicate to the caller that it is safe to retry immediately.
**
** On success return SQLITE_OK.  On a permanent failure (such an
2241
2242
2243
2244
2245
2246
2247








2248
2249
2250
2251
2252
2253
2254
    }
    if( cnt>=10 ) nDelay = (cnt-9)*(cnt-9)*39;
    sqlite3OsSleep(pWal->pVfs, nDelay);
  }

  if( !useWal ){
    rc = walIndexReadHdr(pWal, pChanged);








    if( rc==SQLITE_BUSY ){
      /* If there is not a recovery running in another thread or process
      ** then convert BUSY errors to WAL_RETRY.  If recovery is known to
      ** be running, convert BUSY to BUSY_RECOVERY.  There is a race here
      ** which might cause WAL_RETRY to be returned even if BUSY_RECOVERY
      ** would be technically correct.  But the race is benign since with
      ** WAL_RETRY this routine will be called again and will probably be







>
>
>
>
>
>
>
>







2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
    }
    if( cnt>=10 ) nDelay = (cnt-9)*(cnt-9)*39;
    sqlite3OsSleep(pWal->pVfs, nDelay);
  }

  if( !useWal ){
    rc = walIndexReadHdr(pWal, pChanged);
    if( rc==SQLITE_READONLY_CANTLOCK ){
      /* This is a readonly_shm connection and there are no other connections
      ** to the database. So the *-shm file may not be accessed using mmap.
      ** Take WAL_READ_LOCK(0) before proceding.  */
      assert( pWal->nWiData>0 && pWal->apWiData[0]==0 );
      assert( pWal->readOnly & WAL_SHM_RDONLY );
      return walBeginUnlocked(pWal, pChanged);
    }
    if( rc==SQLITE_BUSY ){
      /* If there is not a recovery running in another thread or process
      ** then convert BUSY errors to WAL_RETRY.  If recovery is known to
      ** be running, convert BUSY to BUSY_RECOVERY.  There is a race here
      ** which might cause WAL_RETRY to be returned even if BUSY_RECOVERY
      ** would be technically correct.  But the race is benign since with
      ** WAL_RETRY this routine will be called again and will probably be
2592
2593
2594
2595
2596
2597
2598





2599
2600
2601
2602
2603
2604
2605
** read-lock.
*/
void sqlite3WalEndReadTransaction(Wal *pWal){
  sqlite3WalEndWriteTransaction(pWal);
  if( pWal->readLock>=0 ){
    walUnlockShared(pWal, WAL_READ_LOCK(pWal->readLock));
    pWal->readLock = -1;





  }
}

/*
** Search the wal file for page pgno. If found, set *piRead to the frame that
** contains the page. Otherwise, if pgno is not in the wal file, set *piRead
** to zero.







>
>
>
>
>







2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
** read-lock.
*/
void sqlite3WalEndReadTransaction(Wal *pWal){
  sqlite3WalEndWriteTransaction(pWal);
  if( pWal->readLock>=0 ){
    walUnlockShared(pWal, WAL_READ_LOCK(pWal->readLock));
    pWal->readLock = -1;
    if( pWal->bUnlocked ){
      sqlite3_free((void*)pWal->apWiData[0]);
      memset(pWal->apWiData, 0, sizeof(u32*)*pWal->nWiData);
      pWal->bUnlocked = 0;
    }
  }
}

/*
** Search the wal file for page pgno. If found, set *piRead to the frame that
** contains the page. Otherwise, if pgno is not in the wal file, set *piRead
** to zero.
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636

  /* If the "last page" field of the wal-index header snapshot is 0, then
  ** no data will be read from the wal under any circumstances. Return early
  ** in this case as an optimization.  Likewise, if pWal->readLock==0, 
  ** then the WAL is ignored by the reader so return early, as if the 
  ** WAL were empty.
  */
  if( iLast==0 || pWal->readLock==0 ){
    *piRead = 0;
    return SQLITE_OK;
  }

  /* Search the hash table or tables for an entry matching page number
  ** pgno. Each iteration of the following for() loop searches one
  ** hash table (each hash table indexes up to HASHTABLE_NPAGE frames).







|







2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746

  /* If the "last page" field of the wal-index header snapshot is 0, then
  ** no data will be read from the wal under any circumstances. Return early
  ** in this case as an optimization.  Likewise, if pWal->readLock==0, 
  ** then the WAL is ignored by the reader so return early, as if the 
  ** WAL were empty.
  */
  if( iLast==0 || (pWal->readLock==0 && !pWal->bUnlocked) ){
    *piRead = 0;
    return SQLITE_OK;
  }

  /* Search the hash table or tables for an entry matching page number
  ** pgno. Each iteration of the following for() loop searches one
  ** hash table (each hash table indexes up to HASHTABLE_NPAGE frames).
Changes to test/walro.test.
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
  do_test 1.1.13  { sql2 "INSERT INTO t1 VALUES('i', 'j')" } {}

  do_test 1.2.1 {
    code2 { db2 close }
    code1 { db close }
    list [file exists test.db-wal] [file exists test.db-shm]
  } {1 1}

  do_test 1.2.2 {
    code1 { sqlite3 db file:test.db?readonly_shm=1 }
    list [catch { sql1 { SELECT * FROM t1 } } msg] $msg
  } {1 {unable to open database file}}

  do_test 1.2.3 {
    code1 { db close }
    file attributes test.db-shm -permissions rw-r--r--
    hexio_write test.db-shm 0 01020304 
    file attributes test.db-shm -permissions r--r--r--
    code1 { sqlite3 db file:test.db?readonly_shm=1 }
    csql1 { SELECT * FROM t1 }
  } {1 {unable to open database file}}
  do_test 1.2.4 {
    code1 { sqlite3_extended_errcode db } 
  } {SQLITE_CANTOPEN}

  do_test 1.2.5 {
    file attributes test.db-shm -permissions rw-r--r--
    code2 { sqlite3 db2 test.db }
    sql2 "SELECT * FROM t1" 
  } {a b c d e f g h i j}
  file attributes test.db-shm -permissions r--r--r--







<


|
|








|


|







97
98
99
100
101
102
103

104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
  do_test 1.1.13  { sql2 "INSERT INTO t1 VALUES('i', 'j')" } {}

  do_test 1.2.1 {
    code2 { db2 close }
    code1 { db close }
    list [file exists test.db-wal] [file exists test.db-shm]
  } {1 1}

  do_test 1.2.2 {
    code1 { sqlite3 db file:test.db?readonly_shm=1 }
    sql1 { SELECT * FROM t1 }
  } {a b c d e f g h i j}

  do_test 1.2.3 {
    code1 { db close }
    file attributes test.db-shm -permissions rw-r--r--
    hexio_write test.db-shm 0 01020304 
    file attributes test.db-shm -permissions r--r--r--
    code1 { sqlite3 db file:test.db?readonly_shm=1 }
    csql1 { SELECT * FROM t1 }
  } {1 {attempt to write a readonly database}}
  do_test 1.2.4 {
    code1 { sqlite3_extended_errcode db } 
  } {SQLITE_READONLY_RECOVERY}

  do_test 1.2.5 {
    file attributes test.db-shm -permissions rw-r--r--
    code2 { sqlite3 db2 test.db }
    sql2 "SELECT * FROM t1" 
  } {a b c d e f g h i j}
  file attributes test.db-shm -permissions r--r--r--
152
153
154
155
156
157
158





159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
    code2 { db2 close }
    file exists test.db-shm
  } {0}
  do_test 1.3.2.2 {
    code1 { sqlite3 db file:test.db?readonly_shm=1 }
    csql1 { SELECT * FROM sqlite_master }
  } {1 {unable to open database file}}





  do_test 1.3.2.3 {
    code1 { db close }
    close [open test.db-shm w]
    file attributes test.db-shm -permissions r--r--r--
    code1 { sqlite3 db file:test.db?readonly_shm=1 }
    csql1 { SELECT * FROM t1 }
  } {1 {unable to open database file}}
  do_test 1.3.2.4 {
    code1 { sqlite3_extended_errcode db } 
  } {SQLITE_CANTOPEN}

  #-----------------------------------------------------------------------
  # Test cases 1.4.* check that checkpoints and log wraps don't prevent
  # read-only connections from reading the database.
  do_test 1.4.1 {
    code1 { db close }
    forcedelete test.db-shm







>
>
>
>
>






|


|







151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
    code2 { db2 close }
    file exists test.db-shm
  } {0}
  do_test 1.3.2.2 {
    code1 { sqlite3 db file:test.db?readonly_shm=1 }
    csql1 { SELECT * FROM sqlite_master }
  } {1 {unable to open database file}}

  # Update: This test used to fail (because a 0 byte *-shm file does not
  # contain a valid shm-header) but now succeeds (because a readonly_shm
  # connection ignores the *-shm file completely if the wal file is also
  # zero bytes in size).
  do_test 1.3.2.3 {
    code1 { db close }
    close [open test.db-shm w]
    file attributes test.db-shm -permissions r--r--r--
    code1 { sqlite3 db file:test.db?readonly_shm=1 }
    csql1 { SELECT * FROM t1 }
  } {0 {a b c d e f g h i j k l}}
  do_test 1.3.2.4 {
    code1 { sqlite3_extended_errcode db } 
  } {SQLITE_OK}

  #-----------------------------------------------------------------------
  # Test cases 1.4.* check that checkpoints and log wraps don't prevent
  # read-only connections from reading the database.
  do_test 1.4.1 {
    code1 { db close }
    forcedelete test.db-shm
Changes to test/walro2.test.
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72

73
74
75
76
77
78
79
80
81
      PRAGMA journal_mode = WAL;
      INSERT INTO t1 VALUES('a', 'b');
      INSERT INTO t1 VALUES('c', 'd');
    }
    file exists test.db-shm
  } {1}

  do_test 1.2 {
    forcecopy test.db test.db2
    forcecopy test.db-wal test.db2-wal
    forcecopy test.db-shm test.db2-shm
    code1 {
      sqlite3 db file:test.db2?readonly_shm=1
    }


    sql1 { SELECT * FROM t1 }
  } {}

  do_test 1.3.1 {
    code3 { sqlite3 db3 test.db2 }
    sql3 { SELECT * FROM t1 }
  } {a b c d}

  do_test 1.3.2 {







|






|
>

|







58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
      PRAGMA journal_mode = WAL;
      INSERT INTO t1 VALUES('a', 'b');
      INSERT INTO t1 VALUES('c', 'd');
    }
    file exists test.db-shm
  } {1}

  do_test 1.2.1 {
    forcecopy test.db test.db2
    forcecopy test.db-wal test.db2-wal
    forcecopy test.db-shm test.db2-shm
    code1 {
      sqlite3 db file:test.db2?readonly_shm=1
    }
  } {}
  do_test 1.2.2 {
    sql1 { SELECT * FROM t1 }
  } {a b c d}

  do_test 1.3.1 {
    code3 { sqlite3 db3 test.db2 }
    sql3 { SELECT * FROM t1 }
  } {a b c d}

  do_test 1.3.2 {
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
    code1 {
      sqlite3 db file:test.db2?readonly_shm=1
    }
    sql1 { 
      BEGIN;
      SELECT * FROM t1;
    }
  } {a b c d}

  do_test 2.3.1 {
    code3 { sqlite3 db3 test.db2 }
    sql3 { SELECT * FROM t1 }
  } {a b c d e f g h}

}

finish_test







|









103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
    code1 {
      sqlite3 db file:test.db2?readonly_shm=1
    }
    sql1 { 
      BEGIN;
      SELECT * FROM t1;
    }
  } {a b c d e f g h}

  do_test 2.3.1 {
    code3 { sqlite3 db3 test.db2 }
    sql3 { SELECT * FROM t1 }
  } {a b c d e f g h}

}

finish_test