/ Check-in [8ec56325]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Modify the journal format to be more robust against garbage that might appear in the file after a power failure. The changes are mostly working but more testing is still required. This check-in is to checkpoint the changes so far. (CVS 861)
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 8ec5632536eea31197a3b1fd6abc57881a0cf1d7
User & Date: drh 2003-02-11 14:55:41
Context
2003-02-12
02:10
Fix a bug in the legacy journal format writing logic. (CVS 862) check-in: 6c927dd3 user: drh tags: trunk
2003-02-11
14:55
Modify the journal format to be more robust against garbage that might appear in the file after a power failure. The changes are mostly working but more testing is still required. This check-in is to checkpoint the changes so far. (CVS 861) check-in: 8ec56325 user: drh tags: trunk
2003-02-05
14:06
Make the shell run much faster for inputs where a single SQL statement spans thousands of lines by avoiding the call to sqlite_complete() unless the input ends in a semicolon. (CVS 860) check-in: e21afb82 user: drh tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to VERSION.

1
2.7.6
|
1
2.8.0

Changes to src/pager.c.

14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
..
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
...
134
135
136
137
138
139
140


141
142
143
144
145
146
147
...
148
149
150
151
152
153
154

155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
...
177
178
179
180
181
182
183




184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199



















200
201
202
203
204
205
206



207
208

209
210
211
212
213







214
215
216
217
218

219
220










221
222
223
224
225
226
227
...
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
...
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280

281
282
283
284
285
286
287
...
418
419
420
421
422
423
424












425
426
427
428



429
430
431
432
433

434
435
436
437
438
439
440







441









442
443
444
445
446
447
448
...
486
487
488
489
490
491
492

493
494
495

496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524


525
526
527
528











529
530
531
532
533
534
535
536
537
538
539
540
541
542
543






544
545
546
547
548
549
550
...
594
595
596
597
598
599
600

601
602
603
604
605
606
607
608
...
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642


643
644
645
646
647
648
649
650
651
652
653
...
927
928
929
930
931
932
933

934
935
936
937
938
939




940
941


942













943
944
945
946
947
948
949
....
1103
1104
1105
1106
1107
1108
1109
1110

1111
1112
1113
1114
1115
1116
1117
....
1366
1367
1368
1369
1370
1371
1372

1373
1374
1375

1376
1377









1378

1379
1380
1381
1382
1383
1384
1385
1386
....
1500
1501
1502
1503
1504
1505
1506
1507










1508




1509
1510
1511
1512
1513
1514
1515
....
1526
1527
1528
1529
1530
1531
1532
1533


1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
....
1725
1726
1727
1728
1729
1730
1731

1732
1733
1734
1735
1736
1737
1738

1739
1740
1741
1742
1743
1744
1745
....
1799
1800
1801
1802
1803
1804
1805

1806
1807





1808
1809
1810
1811
1812
1813
1814
** The pager is used to access a database disk file.  It implements
** atomic commit and rollback through the use of a journal file that
** is separate from the database file.  The pager also implements file
** locking to prevent two processes from writing the same database
** file simultaneously, or one process from reading the database while
** another is writing.
**
** @(#) $Id: pager.c,v 1.72 2003/01/29 22:58:26 drh Exp $
*/
#include "os.h"         /* Must be first to enable large file support */
#include "sqliteInt.h"
#include "pager.h"
#include <assert.h>
#include <string.h>

................................................................................
  u8 inJournal;                  /* TRUE if has been written to journal */
  u8 inCkpt;                     /* TRUE if written to the checkpoint journal */
  u8 dirty;                      /* TRUE if we need to write back changes */
  u8 needSync;                   /* Sync journal before writing this page */
  u8 alwaysRollback;             /* Disable dont_rollback() for this page */
  PgHdr *pDirty;                 /* Dirty pages sorted by PgHdr.pgno */
  /* SQLITE_PAGE_SIZE bytes of page data follow this header */
  /* Pager.nExtra bytes of local data follow the page data */
};

/*
** Convert a pointer to a PgHdr into a pointer to its data
** and back again.
*/
#define PGHDR_TO_DATA(P)  ((void*)(&(P)[1]))
................................................................................
  int dbSize;                 /* Number of pages in the file */
  int origDbSize;             /* dbSize before the current change */
  int ckptSize;               /* Size of database (in pages) at ckpt_begin() */
  off_t ckptJSize;            /* Size of journal at ckpt_begin() */
#ifndef NDEBUG
  off_t syncJSize;            /* Size of journal at last fsync() call */
#endif


  int ckptNRec;               /* Number of records in the checkpoint journal */
  int nExtra;                 /* Add this many bytes to each in-memory page */
  void (*xDestructor)(void*); /* Call this routine when freeing pages */
  int nPage;                  /* Total number of in-memory pages */
  int nRef;                   /* Number of in-memory pages with PgHdr.nRef>0 */
  int mxPage;                 /* Maximum number of pages to hold in cache */
  int nHit, nMiss, nOvfl;     /* Cache hits, missing, and LRU overflows */
................................................................................
  u8 journalOpen;             /* True if journal file descriptors is valid */
  u8 journalStarted;          /* True if initial magic of journal is synced */
  u8 useJournal;              /* Do not use a rollback journal on this file */
  u8 ckptOpen;                /* True if the checkpoint journal is open */
  u8 ckptInUse;               /* True we are in a checkpoint */
  u8 ckptAutoopen;            /* Open ckpt journal when main journal is opened*/
  u8 noSync;                  /* Do not sync the journal if true */

  u8 state;                   /* SQLITE_UNLOCK, _READLOCK or _WRITELOCK */
  u8 errMask;                 /* One of several kinds of errors */
  u8 tempFile;                /* zFilename is a temporary file */
  u8 readOnly;                /* True for a read-only database */
  u8 needSync;                /* True if an fsync() is needed on the journal */
  u8 dirtyFile;               /* True if database file has changed in any way */
  u8 alwaysRollback;          /* Disable dont_rollback() for all pages */
  u8 journalFormat;           /* Version number of the journal file */
  u8 *aInJournal;             /* One bit for each page in the database file */
  u8 *aInCkpt;                /* One bit for each page in the database */
  PgHdr *pFirst, *pLast;      /* List of free pages */
  PgHdr *pFirstSynced;        /* First free page with PgHdr.needSync==0 */
  PgHdr *pAll;                /* List of all pages */
  PgHdr *pCkpt;               /* List of pages in the checkpoint journal */
  PgHdr *aHash[N_PG_HASH];    /* Hash table to map page number of PgHdr */
................................................................................
#define PAGER_ERR_LOCK     0x04  /* error in the locking protocol */
#define PAGER_ERR_CORRUPT  0x08  /* database or journal corruption */
#define PAGER_ERR_DISK     0x10  /* general disk I/O error - bad hard drive? */

/*
** The journal file contains page records in the following
** format.




*/
typedef struct PageRecord PageRecord;
struct PageRecord {
  Pgno pgno;                     /* The page number */
  char aData[SQLITE_PAGE_SIZE];  /* Original data for page pgno */
};

/*
** Journal files begin with the following magic string.  The data
** was obtained from /dev/random.  It is used only as a sanity check.
**
** There are two journal formats.  The older journal format writes
** 32-bit integers in the byte-order of the host machine.  The new
** format writes integers as big-endian.  All new journals use the
** new format, but we have to be able to read an older journal in order
** to roll it back.



















*/
static const unsigned char aOldJournalMagic[] = {
  0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd4,
};
static const unsigned char aJournalMagic[] = {
  0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd5,
};



#define SQLITE_NEW_JOURNAL_FORMAT 1
#define SQLITE_OLD_JOURNAL_FORMAT 0


/*
** The following integer, if set, causes journals to be written in the
** old format.  This is used for testing purposes only - to make sure
** the code is able to rollback an old journal.







*/
#ifdef SQLITE_TEST
int pager_old_format = 0;
#else
# define pager_old_format 0

#endif











/*
** Enable reference count tracking here:
*/
#ifdef SQLITE_TEST
  int pager_refinfo_enable = 0;
  static void pager_refinfo(PgHdr *p){
    static int cnt = 0;
................................................................................
#else
# define REFINFO(X)
#endif

/*
** Read a 32-bit integer from the given file descriptor
*/
static int read32bits(Pager *pPager, OsFile *fd, u32 *pRes){
  u32 res;
  int rc;
  rc = sqliteOsRead(fd, &res, sizeof(res));
  if( rc==SQLITE_OK && pPager->journalFormat==SQLITE_NEW_JOURNAL_FORMAT ){
    unsigned char ac[4];
    memcpy(ac, &res, 4);
    res = (ac[0]<<24) | (ac[1]<<16) | (ac[2]<<8) | ac[3];
  }
  *pRes = res;
  return rc;
}
................................................................................

/*
** Write a 32-bit integer into the given file descriptor.  Writing
** is always done using the new journal format.
*/
static int write32bits(OsFile *fd, u32 val){
  unsigned char ac[4];
  if( pager_old_format ){
    return sqliteOsWrite(fd, &val, 4);
  }
  ac[0] = (val>>24) & 0xff;
  ac[1] = (val>>16) & 0xff;
  ac[2] = (val>>8) & 0xff;
  ac[3] = val & 0xff;
  return sqliteOsWrite(fd, ac, 4);
}

/*
** Write a 32-bit integer into a page header right before the
** page data.  This will overwrite the PgHdr.pDirty pointer.
*/
static void storePageNumber(PgHdr *p){
  u32 val = p->pgno;
  unsigned char *ac;
  ac = &((char*)PGHDR_TO_DATA(p))[-4];
  if( pager_old_format ){

    memcpy(ac, &val, 4);
  }else{
    ac[0] = (val>>24) & 0xff;
    ac[1] = (val>>16) & 0xff;
    ac[2] = (val>>8) & 0xff;
    ac[3] = val & 0xff;
  }
................................................................................
    ** child process does the COMMIT.  Because of the semantics of unix
    ** file locking, the unlock will fail.
    */
    pPager->state = SQLITE_UNLOCK;
  }
  return rc;
}













/*
** Read a single page from the journal file opened on file descriptor
** jfd.  Playback this one page.



*/
static int pager_playback_one_page(Pager *pPager, OsFile *jfd){
  int rc;
  PgHdr *pPg;              /* An existing page in the cache */
  PageRecord pgRec;


  rc = read32bits(pPager, jfd, &pgRec.pgno);
  if( rc!=SQLITE_OK ) return rc;
  rc = sqliteOsRead(jfd, &pgRec.aData, sizeof(pgRec.aData));
  if( rc!=SQLITE_OK ) return rc;

  /* Sanity checking on the page */







  if( pgRec.pgno>pPager->dbSize || pgRec.pgno==0 ) return SQLITE_CORRUPT;










  /* Playback the page.  Update the in-memory copy of the page
  ** at the same time, if there is one.
  */
  pPg = pager_lookup(pPager, pgRec.pgno);
  if( pPg==0 || pPg->needSync==0 ){
    TRACE2("PLAYBACK %d\n", pgRec.pgno);
................................................................................
** at the beginning) then this routine returns SQLITE_PROTOCOL.
** If any other errors occur during playback, the database will
** likely be corrupted, so the PAGER_ERR_CORRUPT bit is set in
** pPager->errMask and SQLITE_CORRUPT is returned.  If it all
** works, then this routine returns SQLITE_OK.
*/
static int pager_playback(Pager *pPager){

  off_t nRec;              /* Number of Records */
  int i;                   /* Loop counter */
  Pgno mxPg = 0;           /* Size of the original file in pages */

  unsigned char aMagic[sizeof(aJournalMagic)];
  int rc;

  /* Figure out how many records are in the journal.  Abort early if
  ** the journal is empty.
  */
  assert( pPager->journalOpen );
  sqliteOsSeek(&pPager->jfd, 0);
  rc = sqliteOsFileSize(&pPager->jfd, &nRec);
  if( rc!=SQLITE_OK ){
    goto end_playback;
  }
  if( nRec < sizeof(aMagic)+sizeof(Pgno) ){
    goto end_playback;
  }
  nRec = (nRec - (sizeof(aMagic)+sizeof(Pgno))) / sizeof(PageRecord);

  /* Read the beginning of the journal and truncate the
  ** database file back to its original size.
  */
  rc = sqliteOsRead(&pPager->jfd, aMagic, sizeof(aMagic));
  if( rc!=SQLITE_OK ){
    rc = SQLITE_PROTOCOL;
    goto end_playback;
  }
  if( memcmp(aMagic, aOldJournalMagic, sizeof(aMagic))==0 ){
    pPager->journalFormat = SQLITE_OLD_JOURNAL_FORMAT;
  }else if( memcmp(aMagic, aJournalMagic, sizeof(aMagic))==0 ){
    pPager->journalFormat = SQLITE_NEW_JOURNAL_FORMAT;


  }else{
    rc = SQLITE_PROTOCOL;
    goto end_playback;
  }











  rc = read32bits(pPager, &pPager->jfd, &mxPg);
  if( rc!=SQLITE_OK ){
    goto end_playback;
  }
  rc = sqliteOsTruncate(&pPager->fd, SQLITE_PAGE_SIZE*(off_t)mxPg);
  if( rc!=SQLITE_OK ){
    goto end_playback;
  }
  pPager->dbSize = mxPg;
  
  /* Copy original pages out of the journal and back into the database file.
  */
  for(i=nRec-1; i>=0; i--){
    rc = pager_playback_one_page(pPager, &pPager->jfd);
    if( rc!=SQLITE_OK ) break;






  }


end_playback:
#if !defined(NDEBUG) && defined(SQLITE_TEST)
  /* For pages that were never written into the journal, restore the
  ** memory copy from the original database file.
................................................................................
**         journal file itself.
**
**    (2)  In addition to playing back the checkpoint journal, also
**         playback all pages of the transaction journal beginning
**         at offset pPager->ckptJSize.
*/
static int pager_ckpt_playback(Pager *pPager){

  off_t nRec;              /* Number of Records */
  int i;                   /* Loop counter */
  int rc;

  /* Truncate the database back to its original size.
  */
  rc = sqliteOsTruncate(&pPager->fd, SQLITE_PAGE_SIZE*(off_t)pPager->ckptSize);
  pPager->dbSize = pPager->ckptSize;
................................................................................
  /* Figure out how many records are in the checkpoint journal.
  */
  assert( pPager->ckptInUse && pPager->journalOpen );
  sqliteOsSeek(&pPager->cpfd, 0);
  nRec = pPager->ckptNRec;
  
  /* Copy original pages out of the checkpoint journal and back into the
  ** database file.
  */
  if( pager_old_format ){
    pPager->journalFormat = SQLITE_OLD_JOURNAL_FORMAT;
  }else{
    pPager->journalFormat = SQLITE_NEW_JOURNAL_FORMAT;
  }
  for(i=nRec-1; i>=0; i--){
    rc = pager_playback_one_page(pPager, &pPager->cpfd);
    if( rc!=SQLITE_OK ) goto end_ckpt_playback;
  }

  /* Figure out how many pages need to be copied out of the transaction
  ** journal.
  */
  rc = sqliteOsSeek(&pPager->jfd, pPager->ckptJSize);
  if( rc!=SQLITE_OK ){
    goto end_ckpt_playback;
  }
  rc = sqliteOsFileSize(&pPager->jfd, &nRec);
  if( rc!=SQLITE_OK ){
    goto end_ckpt_playback;
  }
  nRec = (nRec - pPager->ckptJSize)/sizeof(PageRecord);
  for(i=nRec-1; i>=0; i--){
    rc = pager_playback_one_page(pPager, &pPager->jfd);


    if( rc!=SQLITE_OK ) goto end_ckpt_playback;
  }
  

end_ckpt_playback:
  if( rc!=SQLITE_OK ){
    pPager->errMask |= PAGER_ERR_CORRUPT;
    rc = SQLITE_CORRUPT;
  }
  return rc;
}
................................................................................
  int rc = SQLITE_OK;

  /* Sync the journal before modifying the main database
  ** (assuming there is a journal and it needs to be synced.)
  */
  if( pPager->needSync ){
    if( !pPager->tempFile ){

      assert( pPager->journalOpen );
      assert( !pPager->noSync );
      TRACE1("SYNC\n");
      rc = sqliteOsSync(&pPager->jfd);
      if( rc!=0 ) return rc;
#ifndef NDEBUG




      rc = sqliteOsFileSize(&pPager->jfd, &pPager->syncJSize);
      if( rc!=0 ) return rc;


#endif













      pPager->journalStarted = 1;
    }
    pPager->needSync = 0;

    /* Erase the needSync flag from every page.
    */
    for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
................................................................................
  }
  if( pPg==0 ){
    /* The requested page is not in the page cache. */
    int h;
    pPager->nMiss++;
    if( pPager->nPage<pPager->mxPage || pPager->pFirst==0 ){
      /* Create a new page */
      pPg = sqliteMallocRaw( sizeof(*pPg) + SQLITE_PAGE_SIZE + pPager->nExtra );

      if( pPg==0 ){
        *ppPage = 0;
        pager_unwritelock(pPager);
        pPager->errMask |= PAGER_ERR_MEM;
        return SQLITE_NOMEM;
      }
      memset(pPg, 0, sizeof(*pPg));
................................................................................
    pPager->state = SQLITE_READLOCK;
    return SQLITE_CANTOPEN;
  }
  pPager->journalOpen = 1;
  pPager->journalStarted = 0;
  pPager->needSync = 0;
  pPager->alwaysRollback = 0;

  sqlitepager_pagecount(pPager);
  pPager->origDbSize = pPager->dbSize;
  if( pager_old_format ){

    rc = sqliteOsWrite(&pPager->jfd, aOldJournalMagic,
                       sizeof(aOldJournalMagic));









  }else{

    rc = sqliteOsWrite(&pPager->jfd, aJournalMagic, sizeof(aJournalMagic));
  }
  if( rc==SQLITE_OK ){
    rc = write32bits(&pPager->jfd, pPager->dbSize);
  }
  if( pPager->ckptAutoopen && rc==SQLITE_OK ){
    rc = sqlitepager_ckpt_begin(pPager);
  }
................................................................................

  /* The transaction journal now exists and we have a write lock on the
  ** main database file.  Write the current page to the transaction 
  ** journal if it is not there already.
  */
  if( !pPg->inJournal && pPager->useJournal ){
    if( (int)pPg->pgno <= pPager->origDbSize ){
      storePageNumber(pPg);










      rc = sqliteOsWrite(&pPager->jfd, &((char*)pData)[-4], SQLITE_PAGE_SIZE+4);




      if( rc!=SQLITE_OK ){
        sqlitepager_rollback(pPager);
        pPager->errMask |= PAGER_ERR_FULL;
        return rc;
      }
      assert( pPager->aInJournal!=0 );
      pPager->aInJournal[pPg->pgno/8] |= 1<<(pPg->pgno&7);
................................................................................
    }
    if( pPg->needSync ){
      pPager->needSync = 1;
    }
  }

  /* If the checkpoint journal is open and the page is not in it,
  ** then write the current page to the checkpoint journal.


  */
  if( pPager->ckptInUse && !pPg->inCkpt && (int)pPg->pgno<=pPager->ckptSize ){
    assert( pPg->inJournal || (int)pPg->pgno>pPager->origDbSize );
    storePageNumber(pPg);
    rc = sqliteOsWrite(&pPager->cpfd, &((char*)pData)[-4], SQLITE_PAGE_SIZE+4);
    if( rc!=SQLITE_OK ){
      sqlitepager_rollback(pPager);
      pPager->errMask |= PAGER_ERR_FULL;
      return rc;
    }
    pPager->ckptNRec++;
................................................................................
  /* Truncate the journal to the size it was at the conclusion of the
  ** last sqliteOsSync() call.  This is really an error check.  If the
  ** rollback still works, it means that the rollback would have also
  ** worked if it had occurred after an OS crash or unexpected power
  ** loss.
  */
  if( !pPager->noSync ){

    assert( !pPager->tempFile );
    if( pPager->syncJSize<sizeof(aJournalMagic)+sizeof(Pgno) ){
      pPager->syncJSize = sizeof(aJournalMagic)+sizeof(Pgno);
    }
    TRACE2("TRUNCATE JOURNAL %lld\n", pPager->syncJSize);
    rc =  sqliteOsTruncate(&pPager->jfd, pPager->syncJSize);
    if( rc ) return rc;

  }
#endif

  if( pPager->errMask!=0 && pPager->errMask!=PAGER_ERR_FULL ){
    if( pPager->state>=SQLITE_WRITELOCK ){
      pager_playback(pPager);
    }
................................................................................
  assert( pPager->journalOpen );
  assert( !pPager->ckptInUse );
  pPager->aInCkpt = sqliteMalloc( pPager->dbSize/8 + 1 );
  if( pPager->aInCkpt==0 ){
    sqliteOsReadLock(&pPager->fd);
    return SQLITE_NOMEM;
  }

  rc = sqliteOsFileSize(&pPager->jfd, &pPager->ckptJSize);
  if( rc ) goto ckpt_begin_failed;





  pPager->ckptSize = pPager->dbSize;
  if( !pPager->ckptOpen ){
    rc = sqlitepager_opentemp(zTemp, &pPager->cpfd);
    if( rc ) goto ckpt_begin_failed;
    pPager->ckptOpen = 1;
    pPager->ckptNRec = 0;
  }







|







 







|







 







>
>







 







>







<







 







>
>
>
>











|
|
|

|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>

|


|


>
>
>
|
|
>


<
<
<
>
>
>
>
>
>
>


|

<
>


>
>
>
>
>
>
>
>
>
>







 







|



|







 







|













|
<

|
<
>







 







>
>
>
>
>
>
>
>
>
>
>
>




>
>
>

|



>

|
|

|

|
>
>
>
>
>
>
>
|
>
>
>
>
>
>
>
>
>







 







>
|


>
|







|



|


<









|
|
|
|
>
>




>
>
>
>
>
>
>
>
>
>
>
|











|
|
|
>
>
>
>
>
>







 







>
|







 







|
|
|
|
|
|
|
<
<










|



|

|
>
>
|
|
|
|







 







>


<
<
<

>
>
>
>
|
|
>
>

>
>
>
>
>
>
>
>
>
>
>
>
>







 







|
>







 







>


<
>
|
<
>
>
>
>
>
>
>
>
>

>
|







 







|
>
>
>
>
>
>
>
>
>
>
|
>
>
>
>







 







|
>
>



|







 







>

|
|




>







 







>


>
>
>
>
>







14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
..
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
...
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
...
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164

165
166
167
168
169
170
171
...
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239



240
241
242
243
244
245
246
247
248
249
250

251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
...
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
...
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319

320
321

322
323
324
325
326
327
328
329
...
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
...
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586

587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
...
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
...
705
706
707
708
709
710
711
712
713
714
715
716
717
718


719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
....
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031



1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
....
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
....
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488

1489
1490

1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
....
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
....
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
....
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
....
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
** The pager is used to access a database disk file.  It implements
** atomic commit and rollback through the use of a journal file that
** is separate from the database file.  The pager also implements file
** locking to prevent two processes from writing the same database
** file simultaneously, or one process from reading the database while
** another is writing.
**
** @(#) $Id: pager.c,v 1.73 2003/02/11 14:55:41 drh Exp $
*/
#include "os.h"         /* Must be first to enable large file support */
#include "sqliteInt.h"
#include "pager.h"
#include <assert.h>
#include <string.h>

................................................................................
  u8 inJournal;                  /* TRUE if has been written to journal */
  u8 inCkpt;                     /* TRUE if written to the checkpoint journal */
  u8 dirty;                      /* TRUE if we need to write back changes */
  u8 needSync;                   /* Sync journal before writing this page */
  u8 alwaysRollback;             /* Disable dont_rollback() for this page */
  PgHdr *pDirty;                 /* Dirty pages sorted by PgHdr.pgno */
  /* SQLITE_PAGE_SIZE bytes of page data follow this header */
  /* Pager.nExtra bytes of local data follow the page data and checksum */
};

/*
** Convert a pointer to a PgHdr into a pointer to its data
** and back again.
*/
#define PGHDR_TO_DATA(P)  ((void*)(&(P)[1]))
................................................................................
  int dbSize;                 /* Number of pages in the file */
  int origDbSize;             /* dbSize before the current change */
  int ckptSize;               /* Size of database (in pages) at ckpt_begin() */
  off_t ckptJSize;            /* Size of journal at ckpt_begin() */
#ifndef NDEBUG
  off_t syncJSize;            /* Size of journal at last fsync() call */
#endif
  int nRec;                   /* Number of pages written to the journal */
  u32 cksumInit;              /* Quasi-random value added to every checksum */
  int ckptNRec;               /* Number of records in the checkpoint journal */
  int nExtra;                 /* Add this many bytes to each in-memory page */
  void (*xDestructor)(void*); /* Call this routine when freeing pages */
  int nPage;                  /* Total number of in-memory pages */
  int nRef;                   /* Number of in-memory pages with PgHdr.nRef>0 */
  int mxPage;                 /* Maximum number of pages to hold in cache */
  int nHit, nMiss, nOvfl;     /* Cache hits, missing, and LRU overflows */
................................................................................
  u8 journalOpen;             /* True if journal file descriptors is valid */
  u8 journalStarted;          /* True if initial magic of journal is synced */
  u8 useJournal;              /* Do not use a rollback journal on this file */
  u8 ckptOpen;                /* True if the checkpoint journal is open */
  u8 ckptInUse;               /* True we are in a checkpoint */
  u8 ckptAutoopen;            /* Open ckpt journal when main journal is opened*/
  u8 noSync;                  /* Do not sync the journal if true */
  u8 fullSync;                /* Do extra syncs of the journal for robustness */
  u8 state;                   /* SQLITE_UNLOCK, _READLOCK or _WRITELOCK */
  u8 errMask;                 /* One of several kinds of errors */
  u8 tempFile;                /* zFilename is a temporary file */
  u8 readOnly;                /* True for a read-only database */
  u8 needSync;                /* True if an fsync() is needed on the journal */
  u8 dirtyFile;               /* True if database file has changed in any way */
  u8 alwaysRollback;          /* Disable dont_rollback() for all pages */

  u8 *aInJournal;             /* One bit for each page in the database file */
  u8 *aInCkpt;                /* One bit for each page in the database */
  PgHdr *pFirst, *pLast;      /* List of free pages */
  PgHdr *pFirstSynced;        /* First free page with PgHdr.needSync==0 */
  PgHdr *pAll;                /* List of all pages */
  PgHdr *pCkpt;               /* List of pages in the checkpoint journal */
  PgHdr *aHash[N_PG_HASH];    /* Hash table to map page number of PgHdr */
................................................................................
#define PAGER_ERR_LOCK     0x04  /* error in the locking protocol */
#define PAGER_ERR_CORRUPT  0x08  /* database or journal corruption */
#define PAGER_ERR_DISK     0x10  /* general disk I/O error - bad hard drive? */

/*
** The journal file contains page records in the following
** format.
**
** Actually, this structure is the complete page record for pager
** formats less than 3.  Beginning with format 3, this record is surrounded
** by two checksums.
*/
typedef struct PageRecord PageRecord;
struct PageRecord {
  Pgno pgno;                     /* The page number */
  char aData[SQLITE_PAGE_SIZE];  /* Original data for page pgno */
};

/*
** Journal files begin with the following magic string.  The data
** was obtained from /dev/random.  It is used only as a sanity check.
**
** There are three journal formats (so far). The 1st journal format writes
** 32-bit integers in the byte-order of the host machine.  New
** formats writes integers as big-endian.  All new journals use the
** new format, but we have to be able to read an older journal in order
** to rollback journals created by older versions of the library.
**
** The 3rd journal format (added for 2.8.0) adds additional sanity
** checking information to the journal.  If the power fails while the
** journal is being written, semi-random garbage data might appear in
** the journal file after power is restored.  If an attempt is then made
** to roll the journal back, the database could be corrupted.  The additional
** sanity checking data is an attempt to discover the garbage in the
** journal and ignore it.
**
** The sanity checking information for the 3rd journal format consists
** of a 32-bit checksum on each page of data.  The checksum covers both
** the page number and the SQLITE_PAGE_SIZE bytes of data for the page.
** This cksum is initialized to a 32-bit random value that appears in the
** journal file right after the header.  The random initializer is important,
** because garbage data that appears at the end of a journal is likely
** data that was once in other files that have now been deleted.  If the
** garbage data came from an obsolete journal file, the checksums might
** be correct.  But by initializing the checksum to random value which
** is different for every journal, we minimize that risk.
*/
static const unsigned char aJournalMagic1[] = {
  0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd4,
};
static const unsigned char aJournalMagic2[] = {
  0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd5,
};
static const unsigned char aJournalMagic3[] = {
  0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd6,
};
#define JOURNAL_FORMAT_1 1
#define JOURNAL_FORMAT_2 2
#define JOURNAL_FORMAT_3 3

/*



** The following integer determines what format to use when creating
** new primary journal files.  By default we always use format 3.
** When testing, we can set this value to older journal formats in order to
** make sure that newer versions of the library are able to rollback older
** journal files.
**
** Note that checkpoint journals always use format 2 and omit the header.
*/
#ifdef SQLITE_TEST
int journal_format = 3;
#else

# define journal_format 3
#endif

/*
** The size of the header and of each page in the journal varies according
** to which journal format is being used.  The following macros figure out
** the sizes based on format numbers.
*/
#define JOURNAL_HDR_SZ(X) \
   (sizeof(aJournalMagic1) + sizeof(Pgno) + ((X)>=3)*2*sizeof(u32))
#define JOURNAL_PG_SZ(X) \
   (SQLITE_PAGE_SIZE + sizeof(Pgno) + ((X)>=3)*sizeof(u32))

/*
** Enable reference count tracking here:
*/
#ifdef SQLITE_TEST
  int pager_refinfo_enable = 0;
  static void pager_refinfo(PgHdr *p){
    static int cnt = 0;
................................................................................
#else
# define REFINFO(X)
#endif

/*
** Read a 32-bit integer from the given file descriptor
*/
static int read32bits(int format, OsFile *fd, u32 *pRes){
  u32 res;
  int rc;
  rc = sqliteOsRead(fd, &res, sizeof(res));
  if( rc==SQLITE_OK && format>JOURNAL_FORMAT_1 ){
    unsigned char ac[4];
    memcpy(ac, &res, 4);
    res = (ac[0]<<24) | (ac[1]<<16) | (ac[2]<<8) | ac[3];
  }
  *pRes = res;
  return rc;
}
................................................................................

/*
** Write a 32-bit integer into the given file descriptor.  Writing
** is always done using the new journal format.
*/
static int write32bits(OsFile *fd, u32 val){
  unsigned char ac[4];
  if( journal_format<=1 ){
    return sqliteOsWrite(fd, &val, 4);
  }
  ac[0] = (val>>24) & 0xff;
  ac[1] = (val>>16) & 0xff;
  ac[2] = (val>>8) & 0xff;
  ac[3] = val & 0xff;
  return sqliteOsWrite(fd, ac, 4);
}

/*
** Write a 32-bit integer into a page header right before the
** page data.  This will overwrite the PgHdr.pDirty pointer.
*/
static void store32bits(u32 val, PgHdr *p, int offset){

  unsigned char *ac;
  ac = &((char*)PGHDR_TO_DATA(p))[offset];

  if( journal_format<=1 ){
    memcpy(ac, &val, 4);
  }else{
    ac[0] = (val>>24) & 0xff;
    ac[1] = (val>>16) & 0xff;
    ac[2] = (val>>8) & 0xff;
    ac[3] = val & 0xff;
  }
................................................................................
    ** child process does the COMMIT.  Because of the semantics of unix
    ** file locking, the unlock will fail.
    */
    pPager->state = SQLITE_UNLOCK;
  }
  return rc;
}

/*
** Compute and return a checksum for the page of data.
*/
static u32 pager_cksum(Pager *pPager, Pgno pgno, const char *aData){
  u32 cksum = pPager->cksumInit + pgno;
  /* const u8 *a = (const u8*)aData;
  int i;
  for(i=0; i<SQLITE_PAGE_SIZE; i++){ cksum += a[i]; } */
  /* fprintf(stderr,"CKSUM for %p(%08x) page %d: %08x\n", pPager, pPager->cksumInit, pgno, cksum); */
  return cksum;
}

/*
** Read a single page from the journal file opened on file descriptor
** jfd.  Playback this one page.
**
** There are three different journal formats.  The format parameter determines
** which format is used by the journal that is played back.
*/
static int pager_playback_one_page(Pager *pPager, OsFile *jfd, int format){
  int rc;
  PgHdr *pPg;              /* An existing page in the cache */
  PageRecord pgRec;
  u32 cksum;

  rc = read32bits(format, jfd, &pgRec.pgno);
  if( rc!=SQLITE_OK ) return SQLITE_DONE;
  rc = sqliteOsRead(jfd, &pgRec.aData, sizeof(pgRec.aData));
  if( rc!=SQLITE_OK ) return SQLITE_DONE;

  /* Sanity checking on the page.  This is more important that I originally
  ** thought.  If a power failure occurs while the journal is being written,
  ** it could cause invalid data to be written into the journal.  We need to
  ** detect this invalid data (with high probability) and ignore it.
  */
  if( pgRec.pgno==0 ){
    return SQLITE_DONE;
  }
  if( pgRec.pgno>pPager->dbSize ){
    return SQLITE_OK;
  }
  if( format>=JOURNAL_FORMAT_3 ){
    rc = read32bits(format, jfd, &cksum);
    if( rc ) return SQLITE_DONE;
    if( pager_cksum(pPager, pgRec.pgno, pgRec.aData)!=cksum ){
      return SQLITE_DONE;
    }
  }

  /* Playback the page.  Update the in-memory copy of the page
  ** at the same time, if there is one.
  */
  pPg = pager_lookup(pPager, pgRec.pgno);
  if( pPg==0 || pPg->needSync==0 ){
    TRACE2("PLAYBACK %d\n", pgRec.pgno);
................................................................................
** at the beginning) then this routine returns SQLITE_PROTOCOL.
** If any other errors occur during playback, the database will
** likely be corrupted, so the PAGER_ERR_CORRUPT bit is set in
** pPager->errMask and SQLITE_CORRUPT is returned.  If it all
** works, then this routine returns SQLITE_OK.
*/
static int pager_playback(Pager *pPager){
  off_t szJ;               /* Size of the journal file in bytes */
  int nRec;                /* Number of Records in the journal */
  int i;                   /* Loop counter */
  Pgno mxPg = 0;           /* Size of the original file in pages */
  int format;              /* Format of the journal file. */
  unsigned char aMagic[sizeof(aJournalMagic1)];
  int rc;

  /* Figure out how many records are in the journal.  Abort early if
  ** the journal is empty.
  */
  assert( pPager->journalOpen );
  sqliteOsSeek(&pPager->jfd, 0);
  rc = sqliteOsFileSize(&pPager->jfd, &szJ);
  if( rc!=SQLITE_OK ){
    goto end_playback;
  }
  if( szJ < sizeof(aMagic)+sizeof(Pgno) ){
    goto end_playback;
  }


  /* Read the beginning of the journal and truncate the
  ** database file back to its original size.
  */
  rc = sqliteOsRead(&pPager->jfd, aMagic, sizeof(aMagic));
  if( rc!=SQLITE_OK ){
    rc = SQLITE_PROTOCOL;
    goto end_playback;
  }
  if( memcmp(aMagic, aJournalMagic3, sizeof(aMagic))==0 ){
    format = JOURNAL_FORMAT_3;
  }else if( memcmp(aMagic, aJournalMagic2, sizeof(aMagic))==0 ){
    format = JOURNAL_FORMAT_2;
  }else if( memcmp(aMagic, aJournalMagic1, sizeof(aMagic))==0 ){
    format = JOURNAL_FORMAT_1;
  }else{
    rc = SQLITE_PROTOCOL;
    goto end_playback;
  }
  if( format>=JOURNAL_FORMAT_3 ){
    rc = read32bits(format, &pPager->jfd, &nRec);
    if( rc ) goto end_playback;
    rc = read32bits(format, &pPager->jfd, &pPager->cksumInit);
    if( rc ) goto end_playback;
    if( nRec==0xffffffff ){
      nRec = (szJ - JOURNAL_HDR_SZ(3))/JOURNAL_PG_SZ(3);
    }
  }else{
    nRec = (szJ - (sizeof(aMagic)+sizeof(Pgno))) / sizeof(PageRecord);
  }
  rc = read32bits(format, &pPager->jfd, &mxPg);
  if( rc!=SQLITE_OK ){
    goto end_playback;
  }
  rc = sqliteOsTruncate(&pPager->fd, SQLITE_PAGE_SIZE*(off_t)mxPg);
  if( rc!=SQLITE_OK ){
    goto end_playback;
  }
  pPager->dbSize = mxPg;
  
  /* Copy original pages out of the journal and back into the database file.
  */
  for(i=0; i<nRec; i++){
    rc = pager_playback_one_page(pPager, &pPager->jfd, format);
    if( rc!=SQLITE_OK ){
      if( rc==SQLITE_DONE ){
fprintf(stderr,"Playback complete after %d of %d records\n", i, nRec);
        rc = SQLITE_OK;
      }
      break;
    }
  }


end_playback:
#if !defined(NDEBUG) && defined(SQLITE_TEST)
  /* For pages that were never written into the journal, restore the
  ** memory copy from the original database file.
................................................................................
**         journal file itself.
**
**    (2)  In addition to playing back the checkpoint journal, also
**         playback all pages of the transaction journal beginning
**         at offset pPager->ckptJSize.
*/
static int pager_ckpt_playback(Pager *pPager){
  off_t szJ;               /* Size of the full journal */
  int nRec;                /* Number of Records */
  int i;                   /* Loop counter */
  int rc;

  /* Truncate the database back to its original size.
  */
  rc = sqliteOsTruncate(&pPager->fd, SQLITE_PAGE_SIZE*(off_t)pPager->ckptSize);
  pPager->dbSize = pPager->ckptSize;
................................................................................
  /* Figure out how many records are in the checkpoint journal.
  */
  assert( pPager->ckptInUse && pPager->journalOpen );
  sqliteOsSeek(&pPager->cpfd, 0);
  nRec = pPager->ckptNRec;
  
  /* Copy original pages out of the checkpoint journal and back into the
  ** database file.  Note that the checkpoint journal always uses format
  ** 2 instead of format 3 since it does not need to be concerned with
  ** power failures corrupting the journal and can thus omit the checksums.
  */
  for(i=nRec-1; i>=0; i--){
    rc = pager_playback_one_page(pPager, &pPager->cpfd, 2);
    assert( rc!=SQLITE_DONE );


    if( rc!=SQLITE_OK ) goto end_ckpt_playback;
  }

  /* Figure out how many pages need to be copied out of the transaction
  ** journal.
  */
  rc = sqliteOsSeek(&pPager->jfd, pPager->ckptJSize);
  if( rc!=SQLITE_OK ){
    goto end_ckpt_playback;
  }
  rc = sqliteOsFileSize(&pPager->jfd, &szJ);
  if( rc!=SQLITE_OK ){
    goto end_ckpt_playback;
  }
  nRec = (szJ - pPager->ckptJSize)/JOURNAL_PG_SZ(journal_format);
  for(i=nRec-1; i>=0; i--){
    rc = pager_playback_one_page(pPager, &pPager->jfd, journal_format);
    if( rc!=SQLITE_OK ){
      assert( rc!=SQLITE_DONE );
      goto end_ckpt_playback;
    }
  }
  
end_ckpt_playback:
  if( rc!=SQLITE_OK ){
    pPager->errMask |= PAGER_ERR_CORRUPT;
    rc = SQLITE_CORRUPT;
  }
  return rc;
}
................................................................................
  int rc = SQLITE_OK;

  /* Sync the journal before modifying the main database
  ** (assuming there is a journal and it needs to be synced.)
  */
  if( pPager->needSync ){
    if( !pPager->tempFile ){
      off_t szJ;
      assert( pPager->journalOpen );
      assert( !pPager->noSync );



#ifndef NDEBUG
      {
        off_t hdrSz, pgSz;
        hdrSz = JOURNAL_HDR_SZ(journal_format);
        pgSz = JOURNAL_PG_SZ(journal_format);
        rc = sqliteOsFileSize(&pPager->jfd, &pPager->syncJSize);
        if( rc!=0 ) return rc;
        assert( pPager->nRec*pgSz+hdrSz==pPager->syncJSize );
      }
#endif
      if( pPager->fullSync ){
        TRACE1("SYNC\n");
        rc = sqliteOsSync(&pPager->jfd);
        if( rc!=0 ) return rc;
      }
      sqliteOsSeek(&pPager->jfd, sizeof(aJournalMagic1));
      write32bits(&pPager->jfd, pPager->nRec);
      szJ = JOURNAL_HDR_SZ(journal_format) +
               pPager->nRec*JOURNAL_PG_SZ(journal_format);
      sqliteOsSeek(&pPager->jfd, szJ);
      TRACE1("SYNC\n");
      rc = sqliteOsSync(&pPager->jfd);
      if( rc!=0 ) return rc;
      pPager->journalStarted = 1;
    }
    pPager->needSync = 0;

    /* Erase the needSync flag from every page.
    */
    for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
................................................................................
  }
  if( pPg==0 ){
    /* The requested page is not in the page cache. */
    int h;
    pPager->nMiss++;
    if( pPager->nPage<pPager->mxPage || pPager->pFirst==0 ){
      /* Create a new page */
      pPg = sqliteMallocRaw( sizeof(*pPg) + SQLITE_PAGE_SIZE 
                              + sizeof(u32) + pPager->nExtra );
      if( pPg==0 ){
        *ppPage = 0;
        pager_unwritelock(pPager);
        pPager->errMask |= PAGER_ERR_MEM;
        return SQLITE_NOMEM;
      }
      memset(pPg, 0, sizeof(*pPg));
................................................................................
    pPager->state = SQLITE_READLOCK;
    return SQLITE_CANTOPEN;
  }
  pPager->journalOpen = 1;
  pPager->journalStarted = 0;
  pPager->needSync = 0;
  pPager->alwaysRollback = 0;
  pPager->nRec = 0;
  sqlitepager_pagecount(pPager);
  pPager->origDbSize = pPager->dbSize;

  if( journal_format==JOURNAL_FORMAT_3 ){
    rc = sqliteOsWrite(&pPager->jfd, aJournalMagic3, sizeof(aJournalMagic3));

    if( rc==SQLITE_OK ){
      rc = write32bits(&pPager->jfd, pPager->tempFile ? 0xffffffff : 0);
    }
    if( rc==SQLITE_OK ){
      pPager->cksumInit = (u32)sqliteRandomInteger();
      rc = write32bits(&pPager->jfd, pPager->cksumInit);
    }
  }else if( journal_format==JOURNAL_FORMAT_2 ){
    rc = sqliteOsWrite(&pPager->jfd, aJournalMagic2, sizeof(aJournalMagic2));
  }else{
    assert( journal_format==JOURNAL_FORMAT_1 );
    rc = sqliteOsWrite(&pPager->jfd, aJournalMagic1, sizeof(aJournalMagic1));
  }
  if( rc==SQLITE_OK ){
    rc = write32bits(&pPager->jfd, pPager->dbSize);
  }
  if( pPager->ckptAutoopen && rc==SQLITE_OK ){
    rc = sqlitepager_ckpt_begin(pPager);
  }
................................................................................

  /* The transaction journal now exists and we have a write lock on the
  ** main database file.  Write the current page to the transaction 
  ** journal if it is not there already.
  */
  if( !pPg->inJournal && pPager->useJournal ){
    if( (int)pPg->pgno <= pPager->origDbSize ){
      int szPg;
      u32 saved;
      if( journal_format>=JOURNAL_FORMAT_3 ){
        u32 cksum = pager_cksum(pPager, pPg->pgno, pData);
        saved = *(u32*)PGHDR_TO_EXTRA(pPg);
        store32bits(cksum, pPg, SQLITE_PAGE_SIZE);
        szPg = SQLITE_PAGE_SIZE+8;
      }else{
        szPg = SQLITE_PAGE_SIZE+4;
      }
      store32bits(pPg->pgno, pPg, -4);
      rc = sqliteOsWrite(&pPager->jfd, &((char*)pData)[-4], szPg);
      if( journal_format>=JOURNAL_FORMAT_3 ){
        *(u32*)PGHDR_TO_EXTRA(pPg) = saved;
      }
      pPager->nRec++;
      if( rc!=SQLITE_OK ){
        sqlitepager_rollback(pPager);
        pPager->errMask |= PAGER_ERR_FULL;
        return rc;
      }
      assert( pPager->aInJournal!=0 );
      pPager->aInJournal[pPg->pgno/8] |= 1<<(pPg->pgno&7);
................................................................................
    }
    if( pPg->needSync ){
      pPager->needSync = 1;
    }
  }

  /* If the checkpoint journal is open and the page is not in it,
  ** then write the current page to the checkpoint journal.  Note that
  ** the checkpoint journal always uses the simplier format 2 that lacks
  ** checksums.  The header is also omitted from the checkpoint journal.
  */
  if( pPager->ckptInUse && !pPg->inCkpt && (int)pPg->pgno<=pPager->ckptSize ){
    assert( pPg->inJournal || (int)pPg->pgno>pPager->origDbSize );
    store32bits(pPg->pgno, pPg, -4);
    rc = sqliteOsWrite(&pPager->cpfd, &((char*)pData)[-4], SQLITE_PAGE_SIZE+4);
    if( rc!=SQLITE_OK ){
      sqlitepager_rollback(pPager);
      pPager->errMask |= PAGER_ERR_FULL;
      return rc;
    }
    pPager->ckptNRec++;
................................................................................
  /* Truncate the journal to the size it was at the conclusion of the
  ** last sqliteOsSync() call.  This is really an error check.  If the
  ** rollback still works, it means that the rollback would have also
  ** worked if it had occurred after an OS crash or unexpected power
  ** loss.
  */
  if( !pPager->noSync ){
    int m = JOURNAL_HDR_SZ(journal_format);
    assert( !pPager->tempFile );
    if( pPager->syncJSize<m ){
      pPager->syncJSize = m;
    }
    TRACE2("TRUNCATE JOURNAL %lld\n", pPager->syncJSize);
    rc =  sqliteOsTruncate(&pPager->jfd, pPager->syncJSize);
    if( rc ) return rc;
    pPager->nRec = 0;
  }
#endif

  if( pPager->errMask!=0 && pPager->errMask!=PAGER_ERR_FULL ){
    if( pPager->state>=SQLITE_WRITELOCK ){
      pager_playback(pPager);
    }
................................................................................
  assert( pPager->journalOpen );
  assert( !pPager->ckptInUse );
  pPager->aInCkpt = sqliteMalloc( pPager->dbSize/8 + 1 );
  if( pPager->aInCkpt==0 ){
    sqliteOsReadLock(&pPager->fd);
    return SQLITE_NOMEM;
  }
#ifndef NDEBUG
  rc = sqliteOsFileSize(&pPager->jfd, &pPager->ckptJSize);
  if( rc ) goto ckpt_begin_failed;
  assert( pPager->ckptJSize == 
    pPager->nRec*JOURNAL_PG_SZ(journal_format)+JOURNAL_HDR_SZ(journal_format) );
#endif
  pPager->ckptJSize = pPager->nRec*JOURNAL_PG_SZ(journal_format)
                         + JOURNAL_HDR_SZ(journal_format);
  pPager->ckptSize = pPager->dbSize;
  if( !pPager->ckptOpen ){
    rc = sqlitepager_opentemp(zTemp, &pPager->cpfd);
    if( rc ) goto ckpt_begin_failed;
    pPager->ckptOpen = 1;
    pPager->ckptNRec = 0;
  }

Changes to src/pager.h.

9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
..
70
71
72
73
74
75
76
77
78
**    May you share freely, never taking more than you give.
**
*************************************************************************
** This header file defines the interface that the sqlite page cache
** subsystem.  The page cache subsystem reads and writes a file a page
** at a time and provides a journal for rollback.
**
** @(#) $Id: pager.h,v 1.18 2002/12/02 04:25:21 drh Exp $
*/

/*
** The size of one page
**
** You can change this value to another (reasonable) power of two
** such as 512, 2048, 4096, or 8192 and things will still work.  But
................................................................................
void sqlitepager_dont_rollback(void*);
void sqlitepager_dont_write(Pager*, Pgno);
int *sqlitepager_stats(Pager*);

#ifdef SQLITE_TEST
void sqlitepager_refdump(Pager*);
int pager_refinfo_enable;
int pager_old_format;
#endif







|







 







|

9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
..
70
71
72
73
74
75
76
77
78
**    May you share freely, never taking more than you give.
**
*************************************************************************
** This header file defines the interface that the sqlite page cache
** subsystem.  The page cache subsystem reads and writes a file a page
** at a time and provides a journal for rollback.
**
** @(#) $Id: pager.h,v 1.19 2003/02/11 14:55:41 drh Exp $
*/

/*
** The size of one page
**
** You can change this value to another (reasonable) power of two
** such as 512, 2048, 4096, or 8192 and things will still work.  But
................................................................................
void sqlitepager_dont_rollback(void*);
void sqlitepager_dont_write(Pager*, Pgno);
int *sqlitepager_stats(Pager*);

#ifdef SQLITE_TEST
void sqlitepager_refdump(Pager*);
int pager_refinfo_enable;
int journal_format;
#endif

Changes to src/sqliteInt.h.

7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
...
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
**    May you do good and not evil.
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** Internal interface definitions for SQLite.
**
** @(#) $Id: sqliteInt.h,v 1.159 2003/01/29 18:46:53 drh Exp $
*/
#include "config.h"
#include "sqlite.h"
#include "hash.h"
#include "vdbe.h"
#include "parse.h"
#include "btree.h"
................................................................................
** RESTRICT, SETNULL, and CASCADE actions apply only to foreign keys.
** RESTRICT is the same as ABORT for IMMEDIATE foreign keys and the
** same as ROLLBACK for DEFERRED keys.  SETNULL means that the foreign
** key is set to NULL.  CASCADE means that a DELETE or UPDATE of the
** referenced table row is propagated into the row that holds the
** foreign key.
** 
** The following there symbolic values are used to record which type
** of action to take.
*/
#define OE_None     0   /* There is no constraint to check */
#define OE_Rollback 1   /* Fail the operation and rollback the transaction */
#define OE_Abort    2   /* Back out changes but do no rollback transaction */
#define OE_Fail     3   /* Stop the operation but leave all prior changes */
#define OE_Ignore   4   /* Ignore the error. Do not do the INSERT or UPDATE */







|







 







|







7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
...
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
**    May you do good and not evil.
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** Internal interface definitions for SQLite.
**
** @(#) $Id: sqliteInt.h,v 1.160 2003/02/11 14:55:41 drh Exp $
*/
#include "config.h"
#include "sqlite.h"
#include "hash.h"
#include "vdbe.h"
#include "parse.h"
#include "btree.h"
................................................................................
** RESTRICT, SETNULL, and CASCADE actions apply only to foreign keys.
** RESTRICT is the same as ABORT for IMMEDIATE foreign keys and the
** same as ROLLBACK for DEFERRED keys.  SETNULL means that the foreign
** key is set to NULL.  CASCADE means that a DELETE or UPDATE of the
** referenced table row is propagated into the row that holds the
** foreign key.
** 
** The following symbolic values are used to record which type
** of action to take.
*/
#define OE_None     0   /* There is no constraint to check */
#define OE_Rollback 1   /* Fail the operation and rollback the transaction */
#define OE_Abort    2   /* Back out changes but do no rollback transaction */
#define OE_Fail     3   /* Stop the operation but leave all prior changes */
#define OE_Ignore   4   /* Ignore the error. Do not do the INSERT or UPDATE */

Changes to src/test2.c.

9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
...
549
550
551
552
553
554
555
556
557
558
559
560
**    May you share freely, never taking more than you give.
**
*************************************************************************
** Code for testing the pager.c module in SQLite.  This code
** is not included in the SQLite library.  It is used for automated
** testing of the SQLite library.
**
** $Id: test2.c,v 1.14 2002/12/17 14:19:49 drh Exp $
*/
#include "os.h"
#include "sqliteInt.h"
#include "pager.h"
#include "tcl.h"
#include <stdlib.h>
#include <string.h>
................................................................................
  int i;
  for(i=0; i<sizeof(aCmd)/sizeof(aCmd[0]); i++){
    Tcl_CreateCommand(interp, aCmd[i].zName, aCmd[i].xProc, 0, 0);
  }
  Tcl_LinkVar(interp, "sqlite_io_error_pending",
     (char*)&sqlite_io_error_pending, TCL_LINK_INT);
#ifdef SQLITE_TEST
  Tcl_LinkVar(interp, "pager_old_format",
     (char*)&pager_old_format, TCL_LINK_INT);
#endif
  return TCL_OK;
}







|







 







|
|



9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
...
549
550
551
552
553
554
555
556
557
558
559
560
**    May you share freely, never taking more than you give.
**
*************************************************************************
** Code for testing the pager.c module in SQLite.  This code
** is not included in the SQLite library.  It is used for automated
** testing of the SQLite library.
**
** $Id: test2.c,v 1.15 2003/02/11 14:55:41 drh Exp $
*/
#include "os.h"
#include "sqliteInt.h"
#include "pager.h"
#include "tcl.h"
#include <stdlib.h>
#include <string.h>
................................................................................
  int i;
  for(i=0; i<sizeof(aCmd)/sizeof(aCmd[0]); i++){
    Tcl_CreateCommand(interp, aCmd[i].zName, aCmd[i].xProc, 0, 0);
  }
  Tcl_LinkVar(interp, "sqlite_io_error_pending",
     (char*)&sqlite_io_error_pending, TCL_LINK_INT);
#ifdef SQLITE_TEST
  Tcl_LinkVar(interp, "journal_format",
     (char*)&journal_format, TCL_LINK_INT);
#endif
  return TCL_OK;
}

Changes to test/trans.test.

7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
...
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#***********************************************************************
# This file implements regression tests for SQLite library.  The
# focus of this script is database locks.
#
# $Id: trans.test,v 1.16 2002/08/18 20:28:07 drh Exp $


set testdir [file dirname $argv0]
source $testdir/tester.tcl


# Create several tables to work with.
................................................................................
}

# Do rollbacks.  Make sure the signature does not change.
#
for {set i 2} {$i<=$limit} {incr i} {
  set ::sig [signature]
  set cnt [lindex $::sig 0]
  set ::pager_old_format [expr {($i%4)==0}]
  do_test trans-9.$i.1-$cnt {
     execsql {
       BEGIN;
       DELETE FROM t3 WHERE random()%10!=0;
       INSERT INTO t3 SELECT randstr(10,10)||x FROM t3;
       INSERT INTO t3 SELECT randstr(10,10)||x FROM t3;
       ROLLBACK;







|







 







|







7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
...
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#***********************************************************************
# This file implements regression tests for SQLite library.  The
# focus of this script is database locks.
#
# $Id: trans.test,v 1.17 2003/02/11 14:55:42 drh Exp $


set testdir [file dirname $argv0]
source $testdir/tester.tcl


# Create several tables to work with.
................................................................................
}

# Do rollbacks.  Make sure the signature does not change.
#
for {set i 2} {$i<=$limit} {incr i} {
  set ::sig [signature]
  set cnt [lindex $::sig 0]
  set ::journal_format [expr {($i%3)+1}]
  do_test trans-9.$i.1-$cnt {
     execsql {
       BEGIN;
       DELETE FROM t3 WHERE random()%10!=0;
       INSERT INTO t3 SELECT randstr(10,10)||x FROM t3;
       INSERT INTO t3 SELECT randstr(10,10)||x FROM t3;
       ROLLBACK;