SQLite

Check-in [15b9dac455]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Pager changes attempting to verify that ticket #2565 cannot recur. (CVS 6126)
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 15b9dac455b3f457bb177fc4985b45957647cbec
User & Date: drh 2009-01-07 02:03:35.000
Context
2009-01-07
03:59
Add a HIGHSTRESS parameter to the sqlite3_config_alt_pcache debugging command in the test harness - to force calling pagerStress() more frequently. (CVS 6127) (check-in: e426860b94 user: drh tags: trunk)
02:03
Pager changes attempting to verify that ticket #2565 cannot recur. (CVS 6126) (check-in: 15b9dac455 user: drh tags: trunk)
2009-01-06
18:43
Now that we have permutations.test, it is really only necessary to run all.test for a single cycle. So make that the default. (CVS 6125) (check-in: 3c2f292fb7 user: drh tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Changes to src/pager.c.
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
** The pager is used to access a database disk file.  It implements
** atomic commit and rollback through the use of a journal file that
** is separate from the database file.  The pager also implements file
** locking to prevent two processes from writing the same database
** file simultaneously, or one process from reading the database while
** another is writing.
**
** @(#) $Id: pager.c,v 1.534 2009/01/06 15:58:57 drh Exp $
*/
#ifndef SQLITE_OMIT_DISKIO
#include "sqliteInt.h"

/*
** Macros for troubleshooting.  Normally turned off
*/







|







14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
** The pager is used to access a database disk file.  It implements
** atomic commit and rollback through the use of a journal file that
** is separate from the database file.  The pager also implements file
** locking to prevent two processes from writing the same database
** file simultaneously, or one process from reading the database while
** another is writing.
**
** @(#) $Id: pager.c,v 1.535 2009/01/07 02:03:35 drh Exp $
*/
#ifndef SQLITE_OMIT_DISKIO
#include "sqliteInt.h"

/*
** Macros for troubleshooting.  Normally turned off
*/
730
731
732
733
734
735
736

737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754

  return rc;
}

/*
** The journal file must be open when this is called. A journal header file
** (JOURNAL_HDR_SZ bytes) is read from the current location in the journal

** file. See comments above function writeJournalHdr() for a description of
** the journal header format.
**
** If the header is read successfully, *nRec is set to the number of
** page records following this header and *dbSize is set to the size of the
** database before the transaction began, in pages. Also, pPager->cksumInit
** is set to the value read from the journal header. SQLITE_OK is returned
** in this case.
**
** If the journal header file appears to be corrupted, SQLITE_DONE is
** returned and *nRec and *dbSize are not set.  If JOURNAL_HDR_SZ bytes
** cannot be read from the journal file an error code is returned.
*/
static int readJournalHdr(
  Pager *pPager, 
  i64 journalSize,
  u32 *pNRec, 
  u32 *pDbSize







>
|
|








|







730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755

  return rc;
}

/*
** The journal file must be open when this is called. A journal header file
** (JOURNAL_HDR_SZ bytes) is read from the current location in the journal
** file. The current location in the journal file is given by
** pPager->journalOff.  See comments above function writeJournalHdr() for
** a description of the journal header format.
**
** If the header is read successfully, *nRec is set to the number of
** page records following this header and *dbSize is set to the size of the
** database before the transaction began, in pages. Also, pPager->cksumInit
** is set to the value read from the journal header. SQLITE_OK is returned
** in this case.
**
** If the journal header file appears to be corrupted, SQLITE_DONE is
** returned and *nRec and *dbSize are undefined.  If JOURNAL_HDR_SZ bytes
** cannot be read from the journal file an error code is returned.
*/
static int readJournalHdr(
  Pager *pPager, 
  i64 journalSize,
  u32 *pNRec, 
  u32 *pDbSize
1120
1121
1122
1123
1124
1125
1126
1127
1128


1129
1130
1131
1132






1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152





1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
    cksum += aData[i];
    i -= 200;
  }
  return cksum;
}

/*
** Read a single page from the journal file opened on file descriptor
** jfd.  Playback this one page.


**
** The isMainJrnl flag is true if this is the main rollback journal and
** false for the statement journal.  The main rollback journal uses
** checksums - the statement journal does not.






*/
static int pager_playback_one_page(
  Pager *pPager,                /* The pager being played back */
  int isMainJrnl,               /* 1 -> main journal. 0 -> sub-journal. */
  i64 offset,                   /* Offset of record to playback */
  int isSavepnt,                /* True for a savepoint rollback */
  Bitvec *pDone                 /* Bitvec of pages already played back */
){
  int rc;
  PgHdr *pPg;                   /* An existing page in the cache */
  Pgno pgno;                    /* The page number of a page in journal */
  u32 cksum;                    /* Checksum used for sanity checking */
  u8 *aData = (u8 *)pPager->pTmpSpace;   /* Temp storage for a page */
  sqlite3_file *jfd = (isMainJrnl ? pPager->jfd : pPager->sjfd);

  /* The temp storage must be allocated at this point */
  assert( aData );
  assert( isMainJrnl || pDone );
  assert( isSavepnt || pDone==0 );






  rc = read32bits(jfd, offset, &pgno);
  if( rc!=SQLITE_OK ) return rc;
  rc = sqlite3OsRead(jfd, aData, pPager->pageSize, offset+4);
  if( rc!=SQLITE_OK ) return rc;
  pPager->journalOff += pPager->pageSize + 4 + (isMainJrnl?4:0);

  /* Sanity checking on the page.  This is more important that I originally
  ** thought.  If a power failure occurs while the journal is being written,
  ** it could cause invalid data to be written into the journal.  We need to
  ** detect this invalid data (with high probability) and ignore it.
  */
  if( pgno==0 || pgno==PAGER_MJ_PGNO(pPager) ){
    return SQLITE_DONE;
  }
  if( pgno>(Pgno)pPager->dbSize || sqlite3BitvecTest(pDone, pgno) ){
    return SQLITE_OK;
  }
  if( isMainJrnl ){
    rc = read32bits(jfd, offset+pPager->pageSize+4, &cksum);
    if( rc ) return rc;
    if( !isSavepnt && pager_cksum(pPager, aData)!=cksum ){
      return SQLITE_DONE;
    }
  }
  if( pDone && (rc = sqlite3BitvecSet(pDone, pgno)) ){
    return rc;







|
|
>
>




>
>
>
>
>
>




|







|
|

|
|
|
|

>
>
>
>
>
|

|

|













|







1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
    cksum += aData[i];
    i -= 200;
  }
  return cksum;
}

/*
** Read a single page from either the journal file (if isMainJrnl==1) or
** from the sub-journal (if isMainJrnl==0) and playback that page.
** The page begins at offset *pOffset into the file.  The  *pOffset
** value is increased to the start of the next page in the journal.
**
** The isMainJrnl flag is true if this is the main rollback journal and
** false for the statement journal.  The main rollback journal uses
** checksums - the statement journal does not.
**
** If pDone is not NULL, then it is a record of pages that have already
** been played back.  If the page at *pOffset has already been played back
** (if the corresponding pDone bit is set) then skip the playback.
** Make sure the pDone bit corresponding to the *pOffset page is set
** prior to returning.
*/
static int pager_playback_one_page(
  Pager *pPager,                /* The pager being played back */
  int isMainJrnl,               /* 1 -> main journal. 0 -> sub-journal. */
  i64 *pOffset,                 /* Offset of record to playback */
  int isSavepnt,                /* True for a savepoint rollback */
  Bitvec *pDone                 /* Bitvec of pages already played back */
){
  int rc;
  PgHdr *pPg;                   /* An existing page in the cache */
  Pgno pgno;                    /* The page number of a page in journal */
  u32 cksum;                    /* Checksum used for sanity checking */
  u8 *aData;                    /* Temporary storage for the page */
  sqlite3_file *jfd;            /* The file descriptor for the journal file */

  assert( (isMainJrnl&~1)==0 );      /* isMainJrnl is 0 or 1 */
  assert( (isSavepnt&~1)==0 );       /* isSavepnt is 0 or 1 */
  assert( isMainJrnl || pDone );     /* pDone always used on sub-journals */
  assert( isSavepnt || pDone==0 );   /* pDone never used on non-savepoint */

  aData = (u8*)pPager->pTmpSpace;
  assert( aData );         /* Temp storage must have already been allocated */

  jfd = isMainJrnl ? pPager->jfd : pPager->sjfd;

  rc = read32bits(jfd, *pOffset, &pgno);
  if( rc!=SQLITE_OK ) return rc;
  rc = sqlite3OsRead(jfd, aData, pPager->pageSize, (*pOffset)+4);
  if( rc!=SQLITE_OK ) return rc;
  *pOffset += pPager->pageSize + 4 + isMainJrnl*4;

  /* Sanity checking on the page.  This is more important that I originally
  ** thought.  If a power failure occurs while the journal is being written,
  ** it could cause invalid data to be written into the journal.  We need to
  ** detect this invalid data (with high probability) and ignore it.
  */
  if( pgno==0 || pgno==PAGER_MJ_PGNO(pPager) ){
    return SQLITE_DONE;
  }
  if( pgno>(Pgno)pPager->dbSize || sqlite3BitvecTest(pDone, pgno) ){
    return SQLITE_OK;
  }
  if( isMainJrnl ){
    rc = read32bits(jfd, (*pOffset)-4, &cksum);
    if( rc ) return rc;
    if( !isSavepnt && pager_cksum(pPager, aData)!=cksum ){
      return SQLITE_DONE;
    }
  }
  if( pDone && (rc = sqlite3BitvecSet(pDone, pgno)) ){
    return rc;
1296
1297
1298
1299
1300
1301
1302








































1303
1304
1305
1306
1307
1308
1309
    /* Decode the page just read from disk */
    CODEC1(pPager, pData, pPg->pgno, 3);
    sqlite3PcacheRelease(pPg);
  }
  return rc;
}









































/*
** Parameter zMaster is the name of a master journal file. A single journal
** file that referred to the master journal file has just been rolled back.
** This routine checks if it is possible to delete the master journal file,
** and does so if it is.
**
** Argument zMaster may point to Pager.pTmpSpace. So that buffer is not 







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
    /* Decode the page just read from disk */
    CODEC1(pPager, pData, pPg->pgno, 3);
    sqlite3PcacheRelease(pPg);
  }
  return rc;
}

#if /* !defined(NDEBUG) || */ defined(SQLITE_COVERAGE_TEST)
/*
** This routine looks ahead into the main journal file and determines
** whether or not the next record (the record that begins at file
** offset pPager->journalOff) is a well-formed page record consisting
** of a valid page number, pPage->pageSize bytes of content, followed
** by a valid checksum.
**
** The pager never needs to know this in order to do its job.   This
** routine is only used from with assert() and testcase() macros.
*/
static int pagerNextJournalPageIsValid(Pager *pPager){
  Pgno pgno;           /* The page number of the page */
  u32 cksum;           /* The page checksum */
  int rc;              /* Return code from read operations */
  sqlite3_file *fd;    /* The file descriptor from which we are reading */
  u8 *aData;           /* Content of the page */

  /* Read the page number header */
  fd = pPager->jfd;
  rc = read32bits(fd, pPager->journalOff, &pgno);
  if( rc!=SQLITE_OK ){ return 0; }                                  /*NO_TEST*/
  if( pgno==0 || pgno==PAGER_MJ_PGNO(pPager) ){ return 0; }         /*NO_TEST*/
  if( pgno>(Pgno)pPager->dbSize ){ return 0; }                      /*NO_TEST*/

  /* Read the checksum */
  rc = read32bits(fd, pPager->journalOff+pPager->pageSize+4, &cksum);
  if( rc!=SQLITE_OK ){ return 0; }                                  /*NO_TEST*/

  /* Read the data and verify the checksum */
  aData = (u8*)pPager->pTmpSpace;
  rc = sqlite3OsRead(fd, aData, pPager->pageSize, pPager->journalOff+4);
  if( rc!=SQLITE_OK ){ return 0; }                                  /*NO_TEST*/
  if( pager_cksum(pPager, aData)!=cksum ){ return 0; }              /*NO_TEST*/

  /* Reach this point only if the page is valid */
  return 1;
}
#endif /* !defined(NDEBUG) || defined(SQLITE_COVERAGE_TEST) */

/*
** Parameter zMaster is the name of a master journal file. A single journal
** file that referred to the master journal file has just been rolled back.
** This routine checks if it is possible to delete the master journal file,
** and does so if it is.
**
** Argument zMaster may point to Pager.pTmpSpace. So that buffer is not 
1585
1586
1587
1588
1589
1590
1591






1592





1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
    /* If nRec is 0 and this rollback is of a transaction created by this
    ** process and if this is the final header in the journal, then it means
    ** that this part of the journal was being filled but has not yet been
    ** synced to disk.  Compute the number of pages based on the remaining
    ** size of the file.
    **
    ** The third term of the test was added to fix ticket #2565.






    */





    if( nRec==0 && !isHot &&
        pPager->journalHdr+JOURNAL_HDR_SZ(pPager)==pPager->journalOff ){
      nRec = (int)((szJ - pPager->journalOff) / JOURNAL_PG_SZ(pPager));
    }

    /* If this is the first header read from the journal, truncate the
    ** database file back to its original size.
    */
    if( pPager->journalOff==JOURNAL_HDR_SZ(pPager) ){
      rc = pager_truncate(pPager, mxPg);
      if( rc!=SQLITE_OK ){
        goto end_playback;
      }
    }

    /* Copy original pages out of the journal and back into the database file.
    */
    for(u=0; u<nRec; u++){
      rc = pager_playback_one_page(pPager, 1, pPager->journalOff, 0, 0);
      if( rc!=SQLITE_OK ){
        if( rc==SQLITE_DONE ){
          rc = SQLITE_OK;
          pPager->journalOff = szJ;
          break;
        }else{
          /* If we are unable to rollback, then the database is probably







>
>
>
>
>
>

>
>
>
>
>


















|







1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
    /* If nRec is 0 and this rollback is of a transaction created by this
    ** process and if this is the final header in the journal, then it means
    ** that this part of the journal was being filled but has not yet been
    ** synced to disk.  Compute the number of pages based on the remaining
    ** size of the file.
    **
    ** The third term of the test was added to fix ticket #2565.
    ** When rolling back a hot journal, nRec==0 always means that the next
    ** chunk of the journal contains zero pages to be rolled back.  But
    ** when doing a ROLLBACK and the nRec==0 chunk is the last chunk in
    ** the journal, it means that the journal might contain additional
    ** pages that need to be rolled back and that the number of pages 
    ** should be computed based on the journal file size.  
    */
    testcase( nRec==0 && !isHot
         && pPager->journalHdr+JOURNAL_HDR_SZ(pPager)!=pPager->journalOff
         && ((szJ - pPager->journalOff) / JOURNAL_PG_SZ(pPager))>0
         && pagerNextJournalPageIsValid(pPager)
    );
    if( nRec==0 && !isHot &&
        pPager->journalHdr+JOURNAL_HDR_SZ(pPager)==pPager->journalOff ){
      nRec = (int)((szJ - pPager->journalOff) / JOURNAL_PG_SZ(pPager));
    }

    /* If this is the first header read from the journal, truncate the
    ** database file back to its original size.
    */
    if( pPager->journalOff==JOURNAL_HDR_SZ(pPager) ){
      rc = pager_truncate(pPager, mxPg);
      if( rc!=SQLITE_OK ){
        goto end_playback;
      }
    }

    /* Copy original pages out of the journal and back into the database file.
    */
    for(u=0; u<nRec; u++){
      rc = pager_playback_one_page(pPager, 1, &pPager->journalOff, 0, 0);
      if( rc!=SQLITE_OK ){
        if( rc==SQLITE_DONE ){
          rc = SQLITE_OK;
          pPager->journalOff = szJ;
          break;
        }else{
          /* If we are unable to rollback, then the database is probably
1648
1649
1650
1651
1652
1653
1654
1655




1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684








1685
1686
1687
1688
1689
1690
1691
1692
1693
1694






1695
1696
1697
1698
1699











1700


1701
1702
1703
1704
1705
1706
1707
1708
1709
1710



1711

1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
  ** value. Reset it to the correct value for this process.
  */
  setSectorSize(pPager);
  return rc;
}

/*
** Playback a savepoint.




*/
static int pagerPlaybackSavepoint(Pager *pPager, PagerSavepoint *pSavepoint){
  i64 szJ;                 /* Size of the full journal */
  i64 iHdrOff;             /* End of first segment of main-journal records */
  Pgno ii;                 /* Loop counter */
  int rc = SQLITE_OK;      /* Return code */
  Bitvec *pDone = 0;       /* Bitvec to ensure pages played back only once */

  /* Allocate a bitvec to use to store the set of pages rolled back */
  if( pSavepoint ){
    pDone = sqlite3BitvecCreate(pSavepoint->nOrig);
    if( !pDone ){
      return SQLITE_NOMEM;
    }
  }

  /* Truncate the database back to the size it was before the 
  ** savepoint being reverted was opened.
  */
  pPager->dbSize = pSavepoint?pSavepoint->nOrig:pPager->dbOrigSize;
  assert( pPager->state>=PAGER_SHARED );

  /* Now roll back all main journal file records that occur after byte
  ** byte offset PagerSavepoint.iOffset that have a page number less than
  ** or equal to PagerSavepoint.nOrig. As each record is played back,
  ** the corresponding bit in bitvec PagerSavepoint.pInSavepoint is 
  ** cleared.
  */
  szJ = pPager->journalOff;








  if( pSavepoint ){
    iHdrOff = pSavepoint->iHdrOffset ? pSavepoint->iHdrOffset : szJ;
    pPager->journalOff = pSavepoint->iOffset;
    while( rc==SQLITE_OK && pPager->journalOff<iHdrOff ){
      rc = pager_playback_one_page(pPager, 1, pPager->journalOff, 1, pDone);
      assert( rc!=SQLITE_DONE );
    }
  }else{
    pPager->journalOff = 0;
  }






  while( rc==SQLITE_OK && pPager->journalOff<szJ ){
    u32 nJRec = 0;     /* Number of Journal Records */
    u32 dummy;
    rc = readJournalHdr(pPager, szJ, &nJRec, &dummy);
    assert( rc!=SQLITE_DONE );











    if( nJRec==0 ){


      nJRec = (szJ - pPager->journalOff) / (pPager->pageSize+8);
    }
    for(ii=0; rc==SQLITE_OK && ii<nJRec && pPager->journalOff<szJ; ii++){
      rc = pager_playback_one_page(pPager, 1, pPager->journalOff, 1, pDone);
      assert( rc!=SQLITE_DONE );
    }
  }
  assert( rc!=SQLITE_OK || pPager->journalOff==szJ );

  /* Now roll back pages from the sub-journal. */



  if( pSavepoint ){

    for(ii=pSavepoint->iSubRec; rc==SQLITE_OK&&ii<(u32)pPager->stmtNRec; ii++){
      i64 offset = ii*(4+pPager->pageSize);
      rc = pager_playback_one_page(pPager, 0, offset, 1, pDone);
      assert( rc!=SQLITE_DONE );
    }
  }

  sqlite3BitvecDestroy(pDone);
  if( rc==SQLITE_OK ){
    pPager->journalOff = szJ;







|
>
>
>
>


|
















|


|
|
|
<
|


>
>
>
>
>
>
>
>




|





>
>
>
>
>
>





>
>
>
>
>
>
>
>
>
>
>
|
>
>
|


|





|
>
>
>

>

|
|







1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749

1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
  ** value. Reset it to the correct value for this process.
  */
  setSectorSize(pPager);
  return rc;
}

/*
** Playback savepoint pSavepoint.  Or, if pSavepoint==NULL, then playback
** the entire master journal file.
**
** The case pSavepoint==NULL occurs when a ROLLBACK TO command is invoked
** on a SAVEPOINT that is a transaction savepoint.
*/
static int pagerPlaybackSavepoint(Pager *pPager, PagerSavepoint *pSavepoint){
  i64 szJ;                 /* Effective size of the main journal */
  i64 iHdrOff;             /* End of first segment of main-journal records */
  Pgno ii;                 /* Loop counter */
  int rc = SQLITE_OK;      /* Return code */
  Bitvec *pDone = 0;       /* Bitvec to ensure pages played back only once */

  /* Allocate a bitvec to use to store the set of pages rolled back */
  if( pSavepoint ){
    pDone = sqlite3BitvecCreate(pSavepoint->nOrig);
    if( !pDone ){
      return SQLITE_NOMEM;
    }
  }

  /* Truncate the database back to the size it was before the 
  ** savepoint being reverted was opened.
  */
  pPager->dbSize = pSavepoint ? pSavepoint->nOrig : pPager->dbOrigSize;
  assert( pPager->state>=PAGER_SHARED );

  /* Use pPager->journalOff as the effective size of the main rollback
  ** journal.  The actual file might be larger than this in
  ** PAGER_JOURNALMODE_TRUNCATE or PAGER_JOURNALMODE_PERSIST.  But anything

  ** past pPager->journalOff is off-limits to us.
  */
  szJ = pPager->journalOff;

  /* Begin by rolling back records from the main journal starting at
  ** PagerSavepoint.iOffset and continuing to the next journal header.
  ** There might be records in the main journal that have a page number
  ** greater than the current database size (pPager->dbSize) but those
  ** will be skipped automatically.  Pages are added to pDone as they
  ** are played back.
  */
  if( pSavepoint ){
    iHdrOff = pSavepoint->iHdrOffset ? pSavepoint->iHdrOffset : szJ;
    pPager->journalOff = pSavepoint->iOffset;
    while( rc==SQLITE_OK && pPager->journalOff<iHdrOff ){
      rc = pager_playback_one_page(pPager, 1, &pPager->journalOff, 1, pDone);
      assert( rc!=SQLITE_DONE );
    }
  }else{
    pPager->journalOff = 0;
  }

  /* Continue rolling back records out of the main journal starting at
  ** the first journal header seen and continuing until the effective end
  ** of the main journal file.  Continue to skip out-of-range pages and
  ** continue adding pages rolled back to pDone.
  */
  while( rc==SQLITE_OK && pPager->journalOff<szJ ){
    u32 nJRec = 0;     /* Number of Journal Records */
    u32 dummy;
    rc = readJournalHdr(pPager, szJ, &nJRec, &dummy);
    assert( rc!=SQLITE_DONE );

    /*
    ** The "pPager->journalHdr+JOURNAL_HDR_SZ(pPager)==pPager->journalOff"
    ** test is related to ticket #2565.  See the discussion in the
    ** pager_playback() function for additional information.
    */
    testcase( nJRec==0
         && pPager->journalHdr+JOURNAL_HDR_SZ(pPager)!=pPager->journalOff
         && ((szJ - pPager->journalOff) / JOURNAL_PG_SZ(pPager))>0
         && pagerNextJournalPageIsValid(pPager)
    );
    if( nJRec==0 
     && pPager->journalHdr+JOURNAL_HDR_SZ(pPager)==pPager->journalOff
    ){
      nJRec = (szJ - pPager->journalOff)/JOURNAL_PG_SZ(pPager);
    }
    for(ii=0; rc==SQLITE_OK && ii<nJRec && pPager->journalOff<szJ; ii++){
      rc = pager_playback_one_page(pPager, 1, &pPager->journalOff, 1, pDone);
      assert( rc!=SQLITE_DONE );
    }
  }
  assert( rc!=SQLITE_OK || pPager->journalOff==szJ );

  /* Finally,  rollback pages from the sub-journal.  Page that were
  ** previously rolled back out of the main journal (and are hence in pDone)
  ** will be skipped.  Out-of-range pages are also skipped.
  */
  if( pSavepoint ){
    i64 offset = pSavepoint->iSubRec*(4+pPager->pageSize);
    for(ii=pSavepoint->iSubRec; rc==SQLITE_OK&&ii<(u32)pPager->stmtNRec; ii++){
      assert( offset == ii*(4+pPager->pageSize) );
      rc = pager_playback_one_page(pPager, 0, &offset, 1, pDone);
      assert( rc!=SQLITE_DONE );
    }
  }

  sqlite3BitvecDestroy(pDone);
  if( rc==SQLITE_OK ){
    pPager->journalOff = szJ;