SQLite

Check-in [2c2afdd0ad]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Add detailed error logging to WAL in an effort to track down an obscure SQLITE_PROTOCOL problem. This code is intended for debugging and not for release.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | wal-trace-375
Files: files | file ages | folders
SHA1: 2c2afdd0adad7d364915ac200c71603deddf148e
User & Date: drh 2011-01-05 12:50:18.000
Context
2011-01-18
17:34
Pull over all the latest trunk changes. (check-in: ca86d04be1 user: drh tags: wal-trace-375)
2011-01-05
12:50
Add detailed error logging to WAL in an effort to track down an obscure SQLITE_PROTOCOL problem. This code is intended for debugging and not for release. (check-in: 2c2afdd0ad user: drh tags: wal-trace-375)
12:20
Add the sqlite3_vsnprintf() interface. (check-in: fc67adea41 user: drh tags: trunk)
Changes
Unified Diff Show Whitespace Changes Patch
Changes to src/wal.c.
248
249
250
251
252
253
254















255
256
257
258
259
260
261
*/
#if defined(SQLITE_TEST) && defined(SQLITE_DEBUG)
int sqlite3WalTrace = 0;
# define WALTRACE(X)  if(sqlite3WalTrace) sqlite3DebugPrintf X
#else
# define WALTRACE(X)
#endif
















/*
** The maximum (and only) versions of the wal and wal-index formats
** that may be interpreted by this version of SQLite.
**
** If a client begins recovering a WAL file and finds that (a) the checksum
** values in the wal-header are correct and (b) the version field is not







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
*/
#if defined(SQLITE_TEST) && defined(SQLITE_DEBUG)
int sqlite3WalTrace = 0;
# define WALTRACE(X)  if(sqlite3WalTrace) sqlite3DebugPrintf X
#else
# define WALTRACE(X)
#endif

/*
** WAL tracing logic added to search for an SQLITE_PROTOCOL error.
*/
static void walTrace(const char *zFormat, ...){
  va_list ap;
  char zMsg[100];
  va_start(ap, zFormat);
  sqlite3_vsnprintf(sizeof(zMsg), zMsg, zFormat, ap);
  va_end(ap);
#ifdef SQLITE_WAL_TRACE
  fprintf(stdout, "WALTRACE: [%s]\n", zMsg); fflush(stdout);
#endif
  sqlite3_log(99, "%s", zMsg);
}

/*
** The maximum (and only) versions of the wal and wal-index formats
** that may be interpreted by this version of SQLite.
**
** If a client begins recovering a WAL file and finds that (a) the checksum
** values in the wal-header are correct and (b) the version field is not
507
508
509
510
511
512
513

514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531

532
533
534
535
536
537
538
  /* Enlarge the pWal->apWiData[] array if required */
  if( pWal->nWiData<=iPage ){
    int nByte = sizeof(u32*)*(iPage+1);
    volatile u32 **apNew;
    apNew = (volatile u32 **)sqlite3_realloc((void *)pWal->apWiData, nByte);
    if( !apNew ){
      *ppPage = 0;

      return SQLITE_NOMEM;
    }
    memset((void*)&apNew[pWal->nWiData], 0,
           sizeof(u32*)*(iPage+1-pWal->nWiData));
    pWal->apWiData = apNew;
    pWal->nWiData = iPage+1;
  }

  /* Request a pointer to the required page from the VFS */
  if( pWal->apWiData[iPage]==0 ){
    if( pWal->exclusiveMode==WAL_HEAPMEMORY_MODE ){
      pWal->apWiData[iPage] = (u32 volatile *)sqlite3MallocZero(WALINDEX_PGSZ);
      if( !pWal->apWiData[iPage] ) rc = SQLITE_NOMEM;
    }else{
      rc = sqlite3OsShmMap(pWal->pDbFd, iPage, WALINDEX_PGSZ, 
          pWal->writeLock, (void volatile **)&pWal->apWiData[iPage]
      );
    }

  }

  *ppPage = pWal->apWiData[iPage];
  assert( iPage==0 || *ppPage || rc!=SQLITE_OK );
  return rc;
}








>


















>







522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
  /* Enlarge the pWal->apWiData[] array if required */
  if( pWal->nWiData<=iPage ){
    int nByte = sizeof(u32*)*(iPage+1);
    volatile u32 **apNew;
    apNew = (volatile u32 **)sqlite3_realloc((void *)pWal->apWiData, nByte);
    if( !apNew ){
      *ppPage = 0;
      walTrace("realloc(%d) in walIndexPage()", nByte);
      return SQLITE_NOMEM;
    }
    memset((void*)&apNew[pWal->nWiData], 0,
           sizeof(u32*)*(iPage+1-pWal->nWiData));
    pWal->apWiData = apNew;
    pWal->nWiData = iPage+1;
  }

  /* Request a pointer to the required page from the VFS */
  if( pWal->apWiData[iPage]==0 ){
    if( pWal->exclusiveMode==WAL_HEAPMEMORY_MODE ){
      pWal->apWiData[iPage] = (u32 volatile *)sqlite3MallocZero(WALINDEX_PGSZ);
      if( !pWal->apWiData[iPage] ) rc = SQLITE_NOMEM;
    }else{
      rc = sqlite3OsShmMap(pWal->pDbFd, iPage, WALINDEX_PGSZ, 
          pWal->writeLock, (void volatile **)&pWal->apWiData[iPage]
      );
    }
    if( rc ) walTrace("xShmMap():%d in walIndexPage(iPage=%d)",rc,iPage);
  }

  *ppPage = pWal->apWiData[iPage];
  assert( iPage==0 || *ppPage || rc!=SQLITE_OK );
  return rc;
}

1856
1857
1858
1859
1860
1861
1862

1863
1864
1865
1866
1867
1868
1869

1870

1871
1872
1873
1874
1875
1876

1877
1878
1879
1880
1881
1882
1883

  /* If the first attempt failed, it might have been due to a race
  ** with a writer.  So get a WRITE lock and try again.
  */
  assert( badHdr==0 || pWal->writeLock==0 );
  if( badHdr && SQLITE_OK==(rc = walLockExclusive(pWal, WAL_WRITE_LOCK, 1)) ){
    pWal->writeLock = 1;

    if( SQLITE_OK==(rc = walIndexPage(pWal, 0, &page0)) ){
      badHdr = walIndexTryHdr(pWal, pChanged);
      if( badHdr ){
        /* If the wal-index header is still malformed even while holding
        ** a WRITE lock, it can only mean that the header is corrupted and
        ** needs to be reconstructed.  So run recovery to do exactly that.
        */

        rc = walIndexRecover(pWal);

        *pChanged = 1;
      }
    }
    pWal->writeLock = 0;
    walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1);
  }


  /* If the header is read successfully, check the version number to make
  ** sure the wal-index was not constructed with some future format that
  ** this version of SQLite cannot understand.
  */
  if( badHdr==0 && pWal->hdr.iVersion!=WALINDEX_MAX_VERSION ){
    rc = SQLITE_CANTOPEN_BKPT;







>







>

>






>







1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904

  /* If the first attempt failed, it might have been due to a race
  ** with a writer.  So get a WRITE lock and try again.
  */
  assert( badHdr==0 || pWal->writeLock==0 );
  if( badHdr && SQLITE_OK==(rc = walLockExclusive(pWal, WAL_WRITE_LOCK, 1)) ){
    pWal->writeLock = 1;
    walTrace("trying walIndexTryHdr w/lock");
    if( SQLITE_OK==(rc = walIndexPage(pWal, 0, &page0)) ){
      badHdr = walIndexTryHdr(pWal, pChanged);
      if( badHdr ){
        /* If the wal-index header is still malformed even while holding
        ** a WRITE lock, it can only mean that the header is corrupted and
        ** needs to be reconstructed.  So run recovery to do exactly that.
        */
        walTrace("walIndexTryHdr() failed w/lock");
        rc = walIndexRecover(pWal);
        if( rc ) walTrace("walIndexRecover():%d", rc);
        *pChanged = 1;
      }
    }
    pWal->writeLock = 0;
    walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1);
  }
  else if(badHdr) walTrace("walLockExcl():%d in walIndexReadHdr()", rc);

  /* If the header is read successfully, check the version number to make
  ** sure the wal-index was not constructed with some future format that
  ** this version of SQLite cannot understand.
  */
  if( badHdr==0 && pWal->hdr.iVersion!=WALINDEX_MAX_VERSION ){
    rc = SQLITE_CANTOPEN_BKPT;
1949
1950
1951
1952
1953
1954
1955

1956
1957
1958
1959
1960
1961
1962
  int i;                          /* Loop counter */
  int rc = SQLITE_OK;             /* Return code  */

  assert( pWal->readLock<0 );     /* Not currently locked */

  /* Take steps to avoid spinning forever if there is a protocol error. */
  if( cnt>5 ){

    if( cnt>100 ) return SQLITE_PROTOCOL;
    sqlite3OsSleep(pWal->pVfs, 1);
  }

  if( !useWal ){
    rc = walIndexReadHdr(pWal, pChanged);
    if( rc==SQLITE_BUSY ){







>







1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
  int i;                          /* Loop counter */
  int rc = SQLITE_OK;             /* Return code  */

  assert( pWal->readLock<0 );     /* Not currently locked */

  /* Take steps to avoid spinning forever if there is a protocol error. */
  if( cnt>5 ){
    walTrace("cnt=%d",cnt);
    if( cnt>100 ) return SQLITE_PROTOCOL;
    sqlite3OsSleep(pWal->pVfs, 1);
  }

  if( !useWal ){
    rc = walIndexReadHdr(pWal, pChanged);
    if( rc==SQLITE_BUSY ){
2007
2008
2009
2010
2011
2012
2013

2014
2015
2016
2017
2018
2019

2020
2021
2022
2023
2024
2025
2026
        **
        ** However, if frames have been appended to the log (or if the log 
        ** is wrapped and written for that matter) before the READ_LOCK(0)
        ** is obtained, that is not necessarily true. A checkpointer may
        ** have started to backfill the appended frames but crashed before
        ** it finished. Leaving a corrupt image in the database file.
        */

        walUnlockShared(pWal, WAL_READ_LOCK(0));
        return WAL_RETRY;
      }
      pWal->readLock = 0;
      return SQLITE_OK;
    }else if( rc!=SQLITE_BUSY ){

      return rc;
    }
  }

  /* If we get this far, it means that the reader will want to use
  ** the WAL to get at content from recent commits.  The job now is
  ** to select one of the aReadMark[] entries that is closest to







>






>







2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
        **
        ** However, if frames have been appended to the log (or if the log 
        ** is wrapped and written for that matter) before the READ_LOCK(0)
        ** is obtained, that is not necessarily true. A checkpointer may
        ** have started to backfill the appended frames but crashed before
        ** it finished. Leaving a corrupt image in the database file.
        */
        walTrace("wal read/write race - writer won"); 
        walUnlockShared(pWal, WAL_READ_LOCK(0));
        return WAL_RETRY;
      }
      pWal->readLock = 0;
      return SQLITE_OK;
    }else if( rc!=SQLITE_BUSY ){
      walTrace("walLockShared(0):%d in walTryBeginRead", rc);
      return rc;
    }
  }

  /* If we get this far, it means that the reader will want to use
  ** the WAL to get at content from recent commits.  The job now is
  ** to select one of the aReadMark[] entries that is closest to
2042
2043
2044
2045
2046
2047
2048

2049
2050

2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062

2063
2064
2065
2066
2067
2068
2069

2070
2071
2072
2073
2074
2075
2076
    ** be mxFrame, then retry.
    */
    rc = walLockExclusive(pWal, WAL_READ_LOCK(1), 1);
    if( rc==SQLITE_OK ){
      pInfo->aReadMark[1] = pWal->hdr.mxFrame;
      walUnlockExclusive(pWal, WAL_READ_LOCK(1), 1);
      rc = WAL_RETRY;

    }else if( rc==SQLITE_BUSY ){
      rc = WAL_RETRY;

    }
    return rc;
  }else{
    if( mxReadMark < pWal->hdr.mxFrame ){
      for(i=1; i<WAL_NREADER; i++){
        rc = walLockExclusive(pWal, WAL_READ_LOCK(i), 1);
        if( rc==SQLITE_OK ){
          mxReadMark = pInfo->aReadMark[i] = pWal->hdr.mxFrame;
          mxI = i;
          walUnlockExclusive(pWal, WAL_READ_LOCK(i), 1);
          break;
        }else if( rc!=SQLITE_BUSY ){

          return rc;
        }
      }
    }

    rc = walLockShared(pWal, WAL_READ_LOCK(mxI));
    if( rc ){

      return rc==SQLITE_BUSY ? WAL_RETRY : rc;
    }
    /* Now that the read-lock has been obtained, check that neither the
    ** value in the aReadMark[] array or the contents of the wal-index
    ** header have changed.
    **
    ** It is necessary to check that the wal-index header did not change







>


>












>







>







2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
    ** be mxFrame, then retry.
    */
    rc = walLockExclusive(pWal, WAL_READ_LOCK(1), 1);
    if( rc==SQLITE_OK ){
      pInfo->aReadMark[1] = pWal->hdr.mxFrame;
      walUnlockExclusive(pWal, WAL_READ_LOCK(1), 1);
      rc = WAL_RETRY;
      walTrace("aReadMark[1] <- %d", pWal->hdr.mxFrame);
    }else if( rc==SQLITE_BUSY ){
      rc = WAL_RETRY;
      walTrace("aReadMark[1] is busy");
    }
    return rc;
  }else{
    if( mxReadMark < pWal->hdr.mxFrame ){
      for(i=1; i<WAL_NREADER; i++){
        rc = walLockExclusive(pWal, WAL_READ_LOCK(i), 1);
        if( rc==SQLITE_OK ){
          mxReadMark = pInfo->aReadMark[i] = pWal->hdr.mxFrame;
          mxI = i;
          walUnlockExclusive(pWal, WAL_READ_LOCK(i), 1);
          break;
        }else if( rc!=SQLITE_BUSY ){
          walTrace("walLockExclusive(%d):%d", i, rc);
          return rc;
        }
      }
    }

    rc = walLockShared(pWal, WAL_READ_LOCK(mxI));
    if( rc ){
      walTrace("walLockShared(mxI=%d):%d", mxI, rc);
      return rc==SQLITE_BUSY ? WAL_RETRY : rc;
    }
    /* Now that the read-lock has been obtained, check that neither the
    ** value in the aReadMark[] array or the contents of the wal-index
    ** header have changed.
    **
    ** It is necessary to check that the wal-index header did not change