SQLite

Check-in [b1abfaaf53]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Delay the decision to restart the log file until data is actually ready to be written to the log file (instead of at the start of a write transaction).
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: b1abfaaf5309cc0d0dda4fb2c237862c8cf83261
User & Date: dan 2010-06-01 15:44:57.000
Context
2010-06-01
17:46
Change the OOM and IO error test cases in walfault.test so that each test case runs both types of error simulation. (check-in: b627e15368 user: dan tags: trunk)
15:44
Delay the decision to restart the log file until data is actually ready to be written to the log file (instead of at the start of a write transaction). (check-in: b1abfaaf53 user: dan tags: trunk)
15:24
The incremental checkpoint feature is not perfect yet, but it is working well enough to merge it into the trunk. (check-in: 1d3e569e59 user: drh tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Changes to src/wal.c.
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
** thread to write as doing so would cause a fork.  So this routine
** returns SQLITE_BUSY in that case and no write transaction is started.
**
** There can only be a single writer active at a time.
*/
int sqlite3WalBeginWriteTransaction(Wal *pWal){
  int rc;
  volatile WalCkptInfo *pInfo;

  /* Cannot start a write transaction without first holding a read
  ** transaction. */
  assert( pWal->readLock>=0 );

  /* Only one writer allowed at a time.  Get the write lock.  Return
  ** SQLITE_BUSY if unable.







<







1998
1999
2000
2001
2002
2003
2004

2005
2006
2007
2008
2009
2010
2011
** thread to write as doing so would cause a fork.  So this routine
** returns SQLITE_BUSY in that case and no write transaction is started.
**
** There can only be a single writer active at a time.
*/
int sqlite3WalBeginWriteTransaction(Wal *pWal){
  int rc;


  /* Cannot start a write transaction without first holding a read
  ** transaction. */
  assert( pWal->readLock>=0 );

  /* Only one writer allowed at a time.  Get the write lock.  Return
  ** SQLITE_BUSY if unable.
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
    walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1);
    pWal->writeLock = 0;
    return rc;
  }
  if( memcmp(&pWal->hdr, (void*)pWal->pWiData, sizeof(WalIndexHdr))!=0 ){
    walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1);
    pWal->writeLock = 0;
    walIndexUnmap(pWal);
    return SQLITE_BUSY;
  }

  pInfo = walCkptInfo(pWal);
  if( pWal->readLock==0 ){
    assert( pInfo->nBackfill==pWal->hdr.mxFrame );
    if( pInfo->nBackfill>0 ){
      rc = walLockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1);
      if( rc==SQLITE_OK ){
        /* If all readers are using WAL_READ_LOCK(0) (in other words if no
        ** readers are currently using the WAL) */
        pWal->nCkpt++;
        pWal->hdr.mxFrame = 0;
        sqlite3Put4byte((u8*)pWal->hdr.aSalt,
                         1 + sqlite3Get4byte((u8*)pWal->hdr.aSalt));
        sqlite3_randomness(4, &pWal->hdr.aSalt[1]);
        walIndexWriteHdr(pWal);
        pInfo->nBackfill = 0;
        memset((void*)&pInfo->aReadMark[1], 0,
               sizeof(pInfo->aReadMark)-sizeof(u32));
        rc = sqlite3OsTruncate(pWal->pDbFd, 
                               ((i64)pWal->hdr.nPage*(i64)pWal->szPage));
        walUnlockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1);
      }
    }
    walUnlockShared(pWal, WAL_READ_LOCK(0));
    pWal->readLock = -1;
    do{
      int notUsed;
      rc = walTryBeginRead(pWal, &notUsed, 1);
    }while( rc==WAL_RETRY );
  }
  walIndexUnmap(pWal);
  return rc;
}

/*
** End a write transaction.  The commit has already been done.  This
** routine merely releases the lock.







<
|


<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







2025
2026
2027
2028
2029
2030
2031

2032
2033
2034





























2035
2036
2037
2038
2039
2040
2041
    walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1);
    pWal->writeLock = 0;
    return rc;
  }
  if( memcmp(&pWal->hdr, (void*)pWal->pWiData, sizeof(WalIndexHdr))!=0 ){
    walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1);
    pWal->writeLock = 0;

    rc = SQLITE_BUSY;
  }






























  walIndexUnmap(pWal);
  return rc;
}

/*
** End a write transaction.  The commit has already been done.  This
** routine merely releases the lock.
2145
2146
2147
2148
2149
2150
2151
2152
2153

























































2154
2155
2156
2157
2158
2159
2160
2161
2162
    if( rc==SQLITE_OK ){
      walCleanupHash(pWal);
      walIndexUnmap(pWal);
    }
  }
  return rc;
}

/* 

























































** Write a set of frames to the log. The caller must hold the write-lock
** on the log file (obtained using sqlite3WalWriteLock()).
*/
int sqlite3WalFrames(
  Wal *pWal,                      /* Wal handle to write to */
  int szPage,                     /* Database page-size in bytes */
  PgHdr *pList,                   /* List of dirty pages to write */
  Pgno nTruncate,                 /* Database size after this commit */
  int isCommit,                   /* True if this is a commit */








|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>

|







2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
    if( rc==SQLITE_OK ){
      walCleanupHash(pWal);
      walIndexUnmap(pWal);
    }
  }
  return rc;
}

/*
** This function is called just before writing a set of frames to the log
** file (see sqlite3WalFrames()). It checks to see if, instead of appending
** to the current log file, it is possible to overwrite the start of the
** existing log file with the new frames (i.e. "reset" the log). If so,
** it sets pWal->hdr.mxFrame to 0. Otherwise, pWal->hdr.mxFrame is left
** unchanged.
**
** SQLITE_OK is returned if no error is encountered (regardless of whether
** or not pWal->hdr.mxFrame is modified). An SQLite error code is returned
** if some error 
*/
static int walRestartLog(Wal *pWal){
  int rc = SQLITE_OK;
  if( pWal->readLock==0 
   && SQLITE_OK==(rc = walIndexMap(pWal, walMappingSize(pWal->hdr.mxFrame)))
  ){
    volatile WalCkptInfo *pInfo = walCkptInfo(pWal);
    assert( pInfo->nBackfill==pWal->hdr.mxFrame );
    if( pInfo->nBackfill>0 ){
      rc = walLockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1);
      if( rc==SQLITE_OK ){
        /* If all readers are using WAL_READ_LOCK(0) (in other words if no
        ** readers are currently using the WAL), then the transactions
        ** frames will overwrite the start of the existing log. Update the
        ** wal-index header to reflect this.
        **
        ** In theory it would be Ok to update the cache of the header only
        ** at this point. But updating the actual wal-index header is also
        ** safe and means there is no special case for sqlite3WalUndo()
        ** to handle if this transaction is rolled back.
        */
        u32 *aSalt = pWal->hdr.aSalt;       /* Big-endian salt values */
        pWal->nCkpt++;
        pWal->hdr.mxFrame = 0;
        sqlite3Put4byte((u8*)&aSalt[0], 1 + sqlite3Get4byte((u8*)&aSalt[0]));
        sqlite3_randomness(4, &aSalt[1]);
        walIndexWriteHdr(pWal);
        memset((void*)pInfo, 0, sizeof(*pInfo));
        walUnlockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1);
      }
    }
    walUnlockShared(pWal, WAL_READ_LOCK(0));
    pWal->readLock = -1;
    do{
      int notUsed;
      rc = walTryBeginRead(pWal, &notUsed, 1);
    }while( rc==WAL_RETRY );

    /* Unmap the wal-index before returning. Otherwise the VFS layer may
    ** hold a mutex for the duration of the IO performed by WalFrames().
    */
    walIndexUnmap(pWal);
  }
  return rc;
}

/* 
** Write a set of frames to the log. The caller must hold the write-lock
** on the log file (obtained using sqlite3WalBeginWriteTransaction()).
*/
int sqlite3WalFrames(
  Wal *pWal,                      /* Wal handle to write to */
  int szPage,                     /* Database page-size in bytes */
  PgHdr *pList,                   /* List of dirty pages to write */
  Pgno nTruncate,                 /* Database size after this commit */
  int isCommit,                   /* True if this is a commit */
2175
2176
2177
2178
2179
2180
2181









2182
2183
2184
2185
2186
2187
2188

#if defined(SQLITE_TEST) && defined(SQLITE_DEBUG)
  { int cnt; for(cnt=0, p=pList; p; p=p->pDirty, cnt++){}
    WALTRACE(("WAL%p: frame write begin. %d frames. mxFrame=%d. %s\n",
              pWal, cnt, pWal->hdr.mxFrame, isCommit ? "Commit" : "Spill"));
  }
#endif










  /* If this is the first frame written into the log, write the WAL
  ** header to the start of the WAL file. See comments at the top of
  ** this source file for a description of the WAL header format.
  */
  iFrame = pWal->hdr.mxFrame;
  if( iFrame==0 ){







>
>
>
>
>
>
>
>
>







2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223

#if defined(SQLITE_TEST) && defined(SQLITE_DEBUG)
  { int cnt; for(cnt=0, p=pList; p; p=p->pDirty, cnt++){}
    WALTRACE(("WAL%p: frame write begin. %d frames. mxFrame=%d. %s\n",
              pWal, cnt, pWal->hdr.mxFrame, isCommit ? "Commit" : "Spill"));
  }
#endif

  /* See if it is possible to write these frames into the start of the
  ** log file, instead of appending to it at pWal->hdr.mxFrame.
  */
  if( SQLITE_OK!=(rc = walRestartLog(pWal)) ){
    assert( pWal->pWiData==0 );
    return rc;
  }
  assert( pWal->pWiData==0 && pWal->readLock>0 );

  /* If this is the first frame written into the log, write the WAL
  ** header to the start of the WAL file. See comments at the top of
  ** this source file for a description of the WAL header format.
  */
  iFrame = pWal->hdr.mxFrame;
  if( iFrame==0 ){
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
    if( rc!=SQLITE_OK ){
      return rc;
    }
    walChecksumBytes(1, aWalHdr, sizeof(aWalHdr), 0, pWal->hdr.aFrameCksum);
  }
  assert( pWal->szPage==szPage );

    /* Write the log file. */
  for(p=pList; p; p=p->pDirty){
    u32 nDbsize;                  /* Db-size field for frame header */
    i64 iOffset;                  /* Write offset in log file */

    iOffset = walFrameOffset(++iFrame, szPage);
    
    /* Populate and write the frame header */







|







2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
    if( rc!=SQLITE_OK ){
      return rc;
    }
    walChecksumBytes(1, aWalHdr, sizeof(aWalHdr), 0, pWal->hdr.aFrameCksum);
  }
  assert( pWal->szPage==szPage );

  /* Write the log file. */
  for(p=pList; p; p=p->pDirty){
    u32 nDbsize;                  /* Db-size field for frame header */
    i64 iOffset;                  /* Write offset in log file */

    iOffset = walFrameOffset(++iFrame, szPage);
    
    /* Populate and write the frame header */