SQLite

Check-in [48832d35ed]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Fix inaccuracies and add details to comments in the pager. Change the name of one function to make its purpose clearer. Ticket #599. (CVS 1209)
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 48832d35ed0d5ba02908822c749591e76b790c48
User & Date: drh 2004-02-08 06:05:46.000
Context
2004-02-08
06:06
Add the crashtest1.c program used to test the ability of the database to survive a program crash or power failure. Ticket #599. (CVS 1210) (check-in: 597a59a72d user: drh tags: trunk)
06:05
Fix inaccuracies and add details to comments in the pager. Change the name of one function to make its purpose clearer. Ticket #599. (CVS 1209) (check-in: 48832d35ed user: drh tags: trunk)
00:40
Preliminary fix for ticket #599. More testing and analysis needed. (CVS 1208) (check-in: dc5be2c82b user: drh tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Changes to src/pager.c.
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
** The pager is used to access a database disk file.  It implements
** atomic commit and rollback through the use of a journal file that
** is separate from the database file.  The pager also implements file
** locking to prevent two processes from writing the same database
** file simultaneously, or one process from reading the database while
** another is writing.
**
** @(#) $Id: pager.c,v 1.93 2004/02/08 00:40:52 drh Exp $
*/
#include "os.h"         /* Must be first to enable large file support */
#include "sqliteInt.h"
#include "pager.h"
#include <assert.h>
#include <string.h>








|







14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
** The pager is used to access a database disk file.  It implements
** atomic commit and rollback through the use of a journal file that
** is separate from the database file.  The pager also implements file
** locking to prevent two processes from writing the same database
** file simultaneously, or one process from reading the database while
** another is writing.
**
** @(#) $Id: pager.c,v 1.94 2004/02/08 06:05:46 drh Exp $
*/
#include "os.h"         /* Must be first to enable large file support */
#include "sqliteInt.h"
#include "pager.h"
#include <assert.h>
#include <string.h>

142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
  int nExtra;                 /* Add this many bytes to each in-memory page */
  void (*xDestructor)(void*); /* Call this routine when freeing pages */
  int nPage;                  /* Total number of in-memory pages */
  int nRef;                   /* Number of in-memory pages with PgHdr.nRef>0 */
  int mxPage;                 /* Maximum number of pages to hold in cache */
  int nHit, nMiss, nOvfl;     /* Cache hits, missing, and LRU overflows */
  u8 journalOpen;             /* True if journal file descriptors is valid */
  u8 journalStarted;          /* True if initial magic of journal is synced */
  u8 useJournal;              /* Do not use a rollback journal on this file */
  u8 ckptOpen;                /* True if the checkpoint journal is open */
  u8 ckptInUse;               /* True we are in a checkpoint */
  u8 ckptAutoopen;            /* Open ckpt journal when main journal is opened*/
  u8 noSync;                  /* Do not sync the journal if true */
  u8 fullSync;                /* Do extra syncs of the journal for robustness */
  u8 state;                   /* SQLITE_UNLOCK, _READLOCK or _WRITELOCK */
  u8 errMask;                 /* One of several kinds of errors */







|
|







142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
  int nExtra;                 /* Add this many bytes to each in-memory page */
  void (*xDestructor)(void*); /* Call this routine when freeing pages */
  int nPage;                  /* Total number of in-memory pages */
  int nRef;                   /* Number of in-memory pages with PgHdr.nRef>0 */
  int mxPage;                 /* Maximum number of pages to hold in cache */
  int nHit, nMiss, nOvfl;     /* Cache hits, missing, and LRU overflows */
  u8 journalOpen;             /* True if journal file descriptors is valid */
  u8 journalStarted;          /* True if header of journal is synced */
  u8 useJournal;              /* Use a rollback journal on this file */
  u8 ckptOpen;                /* True if the checkpoint journal is open */
  u8 ckptInUse;               /* True we are in a checkpoint */
  u8 ckptAutoopen;            /* Open ckpt journal when main journal is opened*/
  u8 noSync;                  /* Do not sync the journal if true */
  u8 fullSync;                /* Do extra syncs of the journal for robustness */
  u8 state;                   /* SQLITE_UNLOCK, _READLOCK or _WRITELOCK */
  u8 errMask;                 /* One of several kinds of errors */
275
276
277
278
279
280
281
282






283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298





299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315



316
317
318
319
320
321
322
  }
# define REFINFO(X)  pager_refinfo(X)
#else
# define REFINFO(X)
#endif

/*
** Read a 32-bit integer from the given file descriptor






*/
static int read32bits(int format, OsFile *fd, u32 *pRes){
  u32 res;
  int rc;
  rc = sqliteOsRead(fd, &res, sizeof(res));
  if( rc==SQLITE_OK && format>JOURNAL_FORMAT_1 ){
    unsigned char ac[4];
    memcpy(ac, &res, 4);
    res = (ac[0]<<24) | (ac[1]<<16) | (ac[2]<<8) | ac[3];
  }
  *pRes = res;
  return rc;
}

/*
** Write a 32-bit integer into the given file descriptor.  Writing





** is always done using the new journal format.
*/
static int write32bits(OsFile *fd, u32 val){
  unsigned char ac[4];
  if( journal_format<=1 ){
    return sqliteOsWrite(fd, &val, 4);
  }
  ac[0] = (val>>24) & 0xff;
  ac[1] = (val>>16) & 0xff;
  ac[2] = (val>>8) & 0xff;
  ac[3] = val & 0xff;
  return sqliteOsWrite(fd, ac, 4);
}

/*
** Write a 32-bit integer into a page header right before the
** page data.  This will overwrite the PgHdr.pDirty pointer.



*/
static void store32bits(u32 val, PgHdr *p, int offset){
  unsigned char *ac;
  ac = &((unsigned char*)PGHDR_TO_DATA(p))[offset];
  if( journal_format<=1 ){
    memcpy(ac, &val, 4);
  }else{







|
>
>
>
>
>
>















|
>
>
>
>
>
|
















>
>
>







275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
  }
# define REFINFO(X)  pager_refinfo(X)
#else
# define REFINFO(X)
#endif

/*
** Read a 32-bit integer from the given file descriptor.  Store the integer
** that is read in *pRes.  Return SQLITE_OK if everything worked, or an
** error code is something goes wrong.
**
** If the journal format is 2 or 3, read a big-endian integer.  If the
** journal format is 1, read an integer in the native byte-order of the
** host machine.
*/
static int read32bits(int format, OsFile *fd, u32 *pRes){
  u32 res;
  int rc;
  rc = sqliteOsRead(fd, &res, sizeof(res));
  if( rc==SQLITE_OK && format>JOURNAL_FORMAT_1 ){
    unsigned char ac[4];
    memcpy(ac, &res, 4);
    res = (ac[0]<<24) | (ac[1]<<16) | (ac[2]<<8) | ac[3];
  }
  *pRes = res;
  return rc;
}

/*
** Write a 32-bit integer into the given file descriptor.  Return SQLITE_OK
** on success or an error code is something goes wrong.
**
** If the journal format is 2 or 3, write the integer as 4 big-endian
** bytes.  If the journal format is 1, write the integer in the native
** byte order.  In normal operation, only formats 2 and 3 are used.
** Journal format 1 is only used for testing.
*/
static int write32bits(OsFile *fd, u32 val){
  unsigned char ac[4];
  if( journal_format<=1 ){
    return sqliteOsWrite(fd, &val, 4);
  }
  ac[0] = (val>>24) & 0xff;
  ac[1] = (val>>16) & 0xff;
  ac[2] = (val>>8) & 0xff;
  ac[3] = val & 0xff;
  return sqliteOsWrite(fd, ac, 4);
}

/*
** Write a 32-bit integer into a page header right before the
** page data.  This will overwrite the PgHdr.pDirty pointer.
**
** The integer is big-endian for formats 2 and 3 and native byte order
** for journal format 1.
*/
static void store32bits(u32 val, PgHdr *p, int offset){
  unsigned char *ac;
  ac = &((unsigned char*)PGHDR_TO_DATA(p))[offset];
  if( journal_format<=1 ){
    memcpy(ac, &val, 4);
  }else{
465
466
467
468
469
470
471




472
473
474
475
476
477
478
    pPager->state = SQLITE_UNLOCK;
  }
  return rc;
}

/*
** Compute and return a checksum for the page of data.




*/
static u32 pager_cksum(Pager *pPager, Pgno pgno, const char *aData){
  u32 cksum = pPager->cksumInit + pgno;
  return cksum;
}

/*







>
>
>
>







479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
    pPager->state = SQLITE_UNLOCK;
  }
  return rc;
}

/*
** Compute and return a checksum for the page of data.
**
** This is not a real checksum.  It is really just the sum of the 
** random initial value and the page number.  We considered do a checksum
** of the database, but that was found to be too slow.
*/
static u32 pager_cksum(Pager *pPager, Pgno pgno, const char *aData){
  u32 cksum = pPager->cksumInit + pgno;
  return cksum;
}

/*
533
534
535
536
537
538
539
540









541
542
543
544

545
























546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
  return rc;
}

/*
** Playback the journal and thus restore the database file to
** the state it was in before we started making changes.  
**
** The journal file format is as follows:  There is an initial









** file-type string for sanity checking.  Then there is a single
** Pgno number which is the number of pages in the database before
** changes were made.  The database is truncated to this size.
** Next come zero or more page records where each page record

** consists of a Pgno and SQLITE_PAGE_SIZE bytes of data.  See
























** the PageRecord structure for details.
**
** If the file opened as the journal file is not a well-formed
** journal file (as determined by looking at the magic number
** at the beginning) then this routine returns SQLITE_PROTOCOL.
** If any other errors occur during playback, the database will
** likely be corrupted, so the PAGER_ERR_CORRUPT bit is set in
** pPager->errMask and SQLITE_CORRUPT is returned.  If it all
** works, then this routine returns SQLITE_OK.
*/
static int pager_playback(Pager *pPager, int useJournalSize){
  off_t szJ;               /* Size of the journal file in bytes */
  int nRec;                /* Number of Records in the journal */
  int i;                   /* Loop counter */
  Pgno mxPg = 0;           /* Size of the original file in pages */
  int format;              /* Format of the journal file. */







|
>
>
>
>
>
>
>
>
>
|
|
|
|
>
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|


<
<
|
|
|
|







551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600


601
602
603
604
605
606
607
608
609
610
611
  return rc;
}

/*
** Playback the journal and thus restore the database file to
** the state it was in before we started making changes.  
**
** The journal file format is as follows: 
**
**    *  8 byte prefix.  One of the aJournalMagic123 vectors defined
**       above.  The format of the journal file is determined by which
**       of the three prefix vectors is seen.
**    *  4 byte big-endian integer which is the number of valid page records
**       in the journal.  If this value is 0xffffffff, then compute the
**       number of page records from the journal size.  This field appears
**       in format 3 only.
**    *  4 byte big-endian integer which is the initial value for the 
**       sanity checksum.  This field appears in format 3 only.
**    *  4 byte integer which is the number of pages to truncate the
**       database to during a rollback.
**    *  Zero or more pages instances, each as follows:
**        +  4 byte page number.
**        +  SQLITE_PAGE_SIZE bytes of data.
**        +  4 byte checksum (format 3 only)
**
** When we speak of the journal header, we mean the first 4 bullets above.
** Each entry in the journal is an instance of the 5th bullet.  Note that
** bullets 2 and 3 only appear in format-3 journals.
**
** Call the value from the second bullet "nRec".  nRec is the number of
** valid page entries in the journal.  In most cases, you can compute the
** value of nRec from the size of the journal file.  But if a power
** failure occurred while the journal was being written, it could be the
** case that the size of the journal file had already been increased but
** the extra entries had not yet made it safely to disk.  In such a case,
** the value of nRec computed from the file size would be too large.  For
** that reason, we always use the nRec value in the header.
**
** If the nRec value is 0xffffffff it means that nRec should be computed
** from the file size.  This value is used when the user selects the
** no-sync option for the journal.  A power failure could lead to corruption
** in this case.  But for things like temporary table (which will be
** deleted when the power is restored) we don't care.  
**
** Journal formats 1 and 2 do not have an nRec value in the header so we
** have to compute nRec from the file size.  This has risks (as described
** above) which is why all persistent tables have been changed to use
** format 3.
**
** If the file opened as the journal file is not a well-formed


** journal file then the database will likely already be
** corrupted, so the PAGER_ERR_CORRUPT bit is set in pPager->errMask
** and SQLITE_CORRUPT is returned.  If it all works, then this routine
** returns SQLITE_OK.
*/
static int pager_playback(Pager *pPager, int useJournalSize){
  off_t szJ;               /* Size of the journal file in bytes */
  int nRec;                /* Number of Records in the journal */
  int i;                   /* Loop counter */
  Pgno mxPg = 0;           /* Size of the original file in pages */
  int format;              /* Format of the journal file. */
569
570
571
572
573
574
575

576


577
578
579
580
581
582
583
  sqliteOsSeek(&pPager->jfd, 0);
  rc = sqliteOsFileSize(&pPager->jfd, &szJ);
  if( rc!=SQLITE_OK ){
    goto end_playback;
  }

  /* If the journal file is too small to contain a complete header,

  ** then ignore the journal completely.


  */
  if( szJ < sizeof(aMagic)+sizeof(Pgno) ){
    goto end_playback;
  }

  /* Read the beginning of the journal and truncate the
  ** database file back to its original size.







>
|
>
>







619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
  sqliteOsSeek(&pPager->jfd, 0);
  rc = sqliteOsFileSize(&pPager->jfd, &szJ);
  if( rc!=SQLITE_OK ){
    goto end_playback;
  }

  /* If the journal file is too small to contain a complete header,
  ** it must mean that the process that created the journal was just
  ** beginning to write the journal file when it died.  In that case,
  ** the database file should have still been completely unchanged.
  ** Nothing needs to be rolled back.  We can safely ignore this journal.
  */
  if( szJ < sizeof(aMagic)+sizeof(Pgno) ){
    goto end_playback;
  }

  /* Read the beginning of the journal and truncate the
  ** database file back to its original size.
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
  }
  if( format>=JOURNAL_FORMAT_3 ){
    if( szJ < sizeof(aMagic) + 3*sizeof(u32) ){
      /* Ignore the journal if it is too small to contain a complete
      ** header.  We already did this test once above, but at the prior
      ** test, we did not know the journal format and so we had to assume
      ** the smallest possible header.  Now we know the header is bigger
      ** than that so we test again.
      */
      goto end_playback;
    }
    rc = read32bits(format, &pPager->jfd, (u32*)&nRec);
    if( rc ) goto end_playback;
    rc = read32bits(format, &pPager->jfd, &pPager->cksumInit);
    if( rc ) goto end_playback;







|







652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
  }
  if( format>=JOURNAL_FORMAT_3 ){
    if( szJ < sizeof(aMagic) + 3*sizeof(u32) ){
      /* Ignore the journal if it is too small to contain a complete
      ** header.  We already did this test once above, but at the prior
      ** test, we did not know the journal format and so we had to assume
      ** the smallest possible header.  Now we know the header is bigger
      ** than the minimum so we test again.
      */
      goto end_playback;
    }
    rc = read32bits(format, &pPager->jfd, (u32*)&nRec);
    if( rc ) goto end_playback;
    rc = read32bits(format, &pPager->jfd, &pPager->cksumInit);
    if( rc ) goto end_playback;
781
782
783
784
785
786
787
788

789
790
791
792
793
794
795
796
**              database.  This is normally adequate protection, but
**              it is theoretically possible, though very unlikely,
**              that an inopertune power failure could leave the journal
**              in a state which would cause damage to the database
**              when it is rolled back.
**
**    FULL      The journal is synced twice before writes begin on the
**              database (with some additional information being written

**              in between the two syncs.  If we assume that writing a
**              single disk sector is atomic, then this mode provides
**              assurance that the journal will not be corrupted to the
**              point of causing damage to the database during rollback.
**
** Numeric values associated with these states are OFF==1, NORMAL=2,
** and FULL=3.
*/







|
>
|







834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
**              database.  This is normally adequate protection, but
**              it is theoretically possible, though very unlikely,
**              that an inopertune power failure could leave the journal
**              in a state which would cause damage to the database
**              when it is rolled back.
**
**    FULL      The journal is synced twice before writes begin on the
**              database (with some additional information - the nRec field
**              of the journal header - being written in between the two
**              syncs).  If we assume that writing a
**              single disk sector is atomic, then this mode provides
**              assurance that the journal will not be corrupted to the
**              point of causing damage to the database during rollback.
**
** Numeric values associated with these states are OFF==1, NORMAL=2,
** and FULL=3.
*/
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
  }
  return n;
}

/*
** Forward declaration
*/
static int syncAllPages(Pager*);

/*
** Truncate the file to the number of pages specified.
*/
int sqlitepager_truncate(Pager *pPager, Pgno nPage){
  int rc;
  if( pPager->dbSize<0 ){
    sqlitepager_pagecount(pPager);
  }
  if( pPager->errMask!=0 ){
    rc = pager_errcode(pPager);
    return rc;
  }
  if( nPage>=(unsigned)pPager->dbSize ){
    return SQLITE_OK;
  }
  syncAllPages(pPager);
  rc = sqliteOsTruncate(&pPager->fd, SQLITE_PAGE_SIZE*(off_t)nPage);
  if( rc==SQLITE_OK ){
    pPager->dbSize = nPage;
  }
  return rc;
}








|
















|







996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
  }
  return n;
}

/*
** Forward declaration
*/
static int syncJournal(Pager*);

/*
** Truncate the file to the number of pages specified.
*/
int sqlitepager_truncate(Pager *pPager, Pgno nPage){
  int rc;
  if( pPager->dbSize<0 ){
    sqlitepager_pagecount(pPager);
  }
  if( pPager->errMask!=0 ){
    rc = pager_errcode(pPager);
    return rc;
  }
  if( nPage>=(unsigned)pPager->dbSize ){
    return SQLITE_OK;
  }
  syncJournal(pPager);
  rc = sqliteOsTruncate(&pPager->fd, SQLITE_PAGE_SIZE*(off_t)nPage);
  if( rc==SQLITE_OK ){
    pPager->dbSize = nPage;
  }
  return rc;
}

1065
1066
1067
1068
1069
1070
1071
1072






1073




1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085

1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100



1101
1102
1103
1104
1105
1106
1107
1108
1109

1110
1111
1112
1113
1114
1115
1116
int sqlitepager_ref(void *pData){
  PgHdr *pPg = DATA_TO_PGHDR(pData);
  page_ref(pPg);
  return SQLITE_OK;
}

/*
** Sync the journal and then write all free dirty pages to the database






** file.




**
** Writing all free dirty pages to the database after the sync is a
** non-obvious optimization.  fsync() is an expensive operation so we
** want to minimize the number ot times it is called. After an fsync() call,
** we are free to write dirty pages back to the database.  It is best
** to go ahead and write as many dirty pages as possible to minimize 
** the risk of having to do another fsync() later on.  Writing dirty
** free pages in this way was observed to make database operations go
** up to 10 times faster.
**
** If we are writing to temporary database, there is no need to preserve
** the integrity of the journal file, so we can save time and skip the

** fsync().
*/
static int syncAllPages(Pager *pPager){
  PgHdr *pPg;
  int rc = SQLITE_OK;

  /* Sync the journal before modifying the main database
  ** (assuming there is a journal and it needs to be synced.)
  */
  if( pPager->needSync ){
    if( !pPager->tempFile ){
      assert( pPager->journalOpen );
      assert( !pPager->noSync );
#ifndef NDEBUG
      {



        off_t hdrSz, pgSz, jSz;
        hdrSz = JOURNAL_HDR_SZ(journal_format);
        pgSz = JOURNAL_PG_SZ(journal_format);
        rc = sqliteOsFileSize(&pPager->jfd, &jSz);
        if( rc!=0 ) return rc;
        assert( pPager->nRec*pgSz+hdrSz==jSz );
      }
#endif
      if( journal_format>=3 ){

        off_t szJ;
        if( pPager->fullSync ){
          TRACE1("SYNC\n");
          rc = sqliteOsSync(&pPager->jfd);
          if( rc!=0 ) return rc;
        }
        sqliteOsSeek(&pPager->jfd, sizeof(aJournalMagic1));







|
>
>
>
>
>
>
|
>
>
>
>

|
<
|
<
<
<
<
<

<
<
>
|

|












>
>
>









>







1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139

1140





1141


1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
int sqlitepager_ref(void *pData){
  PgHdr *pPg = DATA_TO_PGHDR(pData);
  page_ref(pPg);
  return SQLITE_OK;
}

/*
** Sync the journal.  In other words, make sure all the pages that have
** been written to the journal have actually reached the surface of the
** disk.  It is not safe to modify the original database file until after
** the journal has been synced.  If the original database is modified before
** the journal is synced and a power failure occurs, the unsynced journal
** data would be lost and we would be unable to completely rollback the
** database changes.  Database corruption would occur.
** 
** This routine also updates the nRec field in the header of the journal.
** (See comments on the pager_playback() routine for additional information.)
** If the sync mode is FULL, two syncs will occur.  First the whole journal
** is synced, then the nRec field is updated, then a second sync occurs.
**
** For temporary databases, we do not care if we are able to rollback

** after a power failure, so sync occurs.





**


** This routine clears the needSync field of every page current held in
** memory.
*/
static int syncJournal(Pager *pPager){
  PgHdr *pPg;
  int rc = SQLITE_OK;

  /* Sync the journal before modifying the main database
  ** (assuming there is a journal and it needs to be synced.)
  */
  if( pPager->needSync ){
    if( !pPager->tempFile ){
      assert( pPager->journalOpen );
      assert( !pPager->noSync );
#ifndef NDEBUG
      {
        /* Make sure the pPager->nRec counter we are keeping agrees
        ** with the nRec computed from the size of the journal file.
        */
        off_t hdrSz, pgSz, jSz;
        hdrSz = JOURNAL_HDR_SZ(journal_format);
        pgSz = JOURNAL_PG_SZ(journal_format);
        rc = sqliteOsFileSize(&pPager->jfd, &jSz);
        if( rc!=0 ) return rc;
        assert( pPager->nRec*pgSz+hdrSz==jSz );
      }
#endif
      if( journal_format>=3 ){
        /* Write the nRec value into the journal file header */
        off_t szJ;
        if( pPager->fullSync ){
          TRACE1("SYNC\n");
          rc = sqliteOsSync(&pPager->jfd);
          if( rc!=0 ) return rc;
        }
        sqliteOsSeek(&pPager->jfd, sizeof(aJournalMagic1));
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327

      /* If we could not find a page that does not require an fsync()
      ** on the journal file then fsync the journal file.  This is a
      ** very slow operation, so we work hard to avoid it.  But sometimes
      ** it can't be helped.
      */
      if( pPg==0 ){
        int rc = syncAllPages(pPager);
        if( rc!=0 ){
          sqlitepager_rollback(pPager);
          return SQLITE_IOERR;
        }
        pPg = pPager->pFirst;
      }
      assert( pPg->nRef==0 );







|







1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388

      /* If we could not find a page that does not require an fsync()
      ** on the journal file then fsync the journal file.  This is a
      ** very slow operation, so we work hard to avoid it.  But sometimes
      ** it can't be helped.
      */
      if( pPg==0 ){
        int rc = syncJournal(pPager);
        if( rc!=0 ){
          sqlitepager_rollback(pPager);
          return SQLITE_IOERR;
        }
        pPg = pPager->pFirst;
      }
      assert( pPg->nRef==0 );
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
    ** if there have been no changes to the database file. */
    assert( pPager->needSync==0 );
    rc = pager_unwritelock(pPager);
    pPager->dbSize = -1;
    return rc;
  }
  assert( pPager->journalOpen );
  rc = syncAllPages(pPager);
  if( rc!=SQLITE_OK ){
    goto commit_abort;
  }
  pPg = pager_get_all_dirty_pages(pPager);
  if( pPg ){
    rc = pager_write_pagelist(pPg);
    if( rc || (!pPager->noSync && sqliteOsSync(&pPager->fd)!=SQLITE_OK) ){







|







1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
    ** if there have been no changes to the database file. */
    assert( pPager->needSync==0 );
    rc = pager_unwritelock(pPager);
    pPager->dbSize = -1;
    return rc;
  }
  assert( pPager->journalOpen );
  rc = syncJournal(pPager);
  if( rc!=SQLITE_OK ){
    goto commit_abort;
  }
  pPg = pager_get_all_dirty_pages(pPager);
  if( pPg ){
    rc = pager_write_pagelist(pPg);
    if( rc || (!pPager->noSync && sqliteOsSync(&pPager->fd)!=SQLITE_OK) ){