SQLite

Check-in [0e420f72cd]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Comment changes in pager.c. (CVS 1567)
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 0e420f72cd5885e32914b4d958bad811fdd9fb77
User & Date: drh 2004-06-10 23:35:50.000
Context
2004-06-11
10:51
Fix various collation sequence issues. (CVS 1568) (check-in: 66835ee670 user: danielk1977 tags: trunk)
2004-06-10
23:35
Comment changes in pager.c. (CVS 1567) (check-in: 0e420f72cd user: drh tags: trunk)
22:51
Add new contributed logo TIFF. (CVS 1566) (check-in: 86744c9aca user: drh tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Changes to src/pager.c.
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
** The pager is used to access a database disk file.  It implements
** atomic commit and rollback through the use of a journal file that
** is separate from the database file.  The pager also implements file
** locking to prevent two processes from writing the same database
** file simultaneously, or one process from reading the database while
** another is writing.
**
** @(#) $Id: pager.c,v 1.122 2004/06/10 05:59:25 danielk1977 Exp $
*/
#include "os.h"         /* Must be first to enable large file support */
#include "sqliteInt.h"
#include "pager.h"
#include <assert.h>
#include <string.h>








|







14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
** The pager is used to access a database disk file.  It implements
** atomic commit and rollback through the use of a journal file that
** is separate from the database file.  The pager also implements file
** locking to prevent two processes from writing the same database
** file simultaneously, or one process from reading the database while
** another is writing.
**
** @(#) $Id: pager.c,v 1.123 2004/06/10 23:35:50 drh Exp $
*/
#include "os.h"         /* Must be first to enable large file support */
#include "sqliteInt.h"
#include "pager.h"
#include <assert.h>
#include <string.h>

55
56
57
58
59
60
61
62


63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81

82
83
84
85
86
87
88
89
**                       state.
**
**   PAGER_SHARED        The page cache is reading the database.
**                       Writing is not permitted.  There can be
**                       multiple readers accessing the same database
**                       file at the same time.
**
**   PAGER_RESERVED      Writing is permitted to the page cache only.


**                       The original database file has not been modified.
**                       Other processes may still be reading the on-disk
**                       database file.
**
**   PAGER_EXCLUSIVE     The page cache is writing the database.
**                       Access is exclusive.  No other processes or
**                       threads can be reading or writing while one
**                       process is writing.
**
** The page cache comes up in PAGER_UNLOCK.  The first time a
** sqlite_page_get() occurs, the state transitions to PAGER_SHARED.
** After all pages have been released using sqlite_page_unref(),
** the state transitions back to PAGER_UNLOCK.  The first time
** that sqlite_page_write() is called, the state transitions to
** PAGER_RESERVED.  (Note that sqlite_page_write() can only be
** called on an outstanding page which means that the pager must
** be in PAGER_SHARED before it transitions to PAGER_RESERVED.)
** The sqlite_page_rollback() and sqlite_page_commit() functions 
** transition the state from PAGER_RESERVED to PAGER_EXCLUSIVE to

** PAGER_SHARED.
*/
#define PAGER_UNLOCK      0
#define PAGER_SHARED      1
#define PAGER_RESERVED    2
#define PAGER_EXCLUSIVE   3









|
>
>
|
|








|


|



<
|
>
|







55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81

82
83
84
85
86
87
88
89
90
91
**                       state.
**
**   PAGER_SHARED        The page cache is reading the database.
**                       Writing is not permitted.  There can be
**                       multiple readers accessing the same database
**                       file at the same time.
**
**   PAGER_RESERVED      This process has reserved the database for writing
**                       but has not yet made any changes.  Only one process
**                       at a time can reserve the database.  The original
**                       database file has not been modified so other
**                       processes may still be reading the on-disk
**                       database file.
**
**   PAGER_EXCLUSIVE     The page cache is writing the database.
**                       Access is exclusive.  No other processes or
**                       threads can be reading or writing while one
**                       process is writing.
**
** The page cache comes up in PAGER_UNLOCK.  The first time a
** sqlite3pager_get() occurs, the state transitions to PAGER_SHARED.
** After all pages have been released using sqlite_page_unref(),
** the state transitions back to PAGER_UNLOCK.  The first time
** that sqlite3pager_write() is called, the state transitions to
** PAGER_RESERVED.  (Note that sqlite_page_write() can only be
** called on an outstanding page which means that the pager must
** be in PAGER_SHARED before it transitions to PAGER_RESERVED.)

** The transition to PAGER_EXCLUSIVE occurs when before any changes
** are made to the database file.  After an sqlite3pager_rollback()
** or sqlite_pager_commit(), the state goes back to PAGER_SHARED.
*/
#define PAGER_UNLOCK      0
#define PAGER_SHARED      1
#define PAGER_RESERVED    2
#define PAGER_EXCLUSIVE   3


211
212
213
214
215
216
217


218
219
220
221
222
223
224
225
226
227
228
229
230
231
  u8 readOnly;                /* True for a read-only database */
  u8 needSync;                /* True if an fsync() is needed on the journal */
  u8 dirtyCache;              /* True if cached pages have changed */
  u8 alwaysRollback;          /* Disable dont_rollback() for all pages */
  u8 memDb;                   /* True to inhibit all file I/O */
  u8 *aInJournal;             /* One bit for each page in the database file */
  u8 *aInStmt;                /* One bit for each page in the database */


  PgHdr *pFirst, *pLast;      /* List of free pages */
  PgHdr *pFirstSynced;        /* First free page with PgHdr.needSync==0 */
  PgHdr *pAll;                /* List of all pages */
  PgHdr *pStmt;               /* List of pages in the statement subjournal */
  PgHdr *aHash[N_PG_HASH];    /* Hash table to map page number of PgHdr */
  int nMaster;                /* Number of bytes to reserve for master j.p */
  BusyHandler *pBusyHandler;  /* Pointer to sqlite.busyHandler */
};

/*
** These are bits that can be set in Pager.errMask.
*/
#define PAGER_ERR_FULL     0x01  /* a write() failed */
#define PAGER_ERR_MEM      0x02  /* malloc() failed */







>
>




|
<
<







213
214
215
216
217
218
219
220
221
222
223
224
225
226


227
228
229
230
231
232
233
  u8 readOnly;                /* True for a read-only database */
  u8 needSync;                /* True if an fsync() is needed on the journal */
  u8 dirtyCache;              /* True if cached pages have changed */
  u8 alwaysRollback;          /* Disable dont_rollback() for all pages */
  u8 memDb;                   /* True to inhibit all file I/O */
  u8 *aInJournal;             /* One bit for each page in the database file */
  u8 *aInStmt;                /* One bit for each page in the database */
  int nMaster;                /* Number of bytes to reserve for master j.p */
  BusyHandler *pBusyHandler;  /* Pointer to sqlite.busyHandler */
  PgHdr *pFirst, *pLast;      /* List of free pages */
  PgHdr *pFirstSynced;        /* First free page with PgHdr.needSync==0 */
  PgHdr *pAll;                /* List of all pages */
  PgHdr *pStmt;               /* List of pages in the statement subjournal */
  PgHdr *aHash[N_PG_HASH];    /* Hash table to map page number to PgHdr */


};

/*
** These are bits that can be set in Pager.errMask.
*/
#define PAGER_ERR_FULL     0x01  /* a write() failed */
#define PAGER_ERR_MEM      0x02  /* malloc() failed */
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
** is different for every journal, we minimize that risk.
*/
static const unsigned char aJournalMagic[] = {
  0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd7,
};

/*
** The size of the header and of each page in the journal varies according
** to which journal format is being used.  The following macros figure out
** the sizes based on format numbers.
*/
#define JOURNAL_HDR_SZ(pPager) (24 + (pPager)->nMaster)
#define JOURNAL_PG_SZ(pPager)  ((pPager->pageSize) + 8)


/*
** Enable reference count tracking here:
*/
#ifdef SQLITE_TEST
  int pager3_refinfo_enable = 0;
  static void pager_refinfo(PgHdr *p){
    static int cnt = 0;
    if( !pager3_refinfo_enable ) return;
    printf(







|
|
<






|







259
260
261
262
263
264
265
266
267

268
269
270
271
272
273
274
275
276
277
278
279
280
281
** is different for every journal, we minimize that risk.
*/
static const unsigned char aJournalMagic[] = {
  0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd7,
};

/*
** The size of the header and of each page in the journal is determined
** by the following macros.

*/
#define JOURNAL_HDR_SZ(pPager) (24 + (pPager)->nMaster)
#define JOURNAL_PG_SZ(pPager)  ((pPager->pageSize) + 8)


/*
** Enable reference count tracking (for debugging) here:
*/
#ifdef SQLITE_TEST
  int pager3_refinfo_enable = 0;
  static void pager_refinfo(PgHdr *p){
    static int cnt = 0;
    if( !pager3_refinfo_enable ) return;
    printf(
288
289
290
291
292
293
294


295
296
297
298
299
300
301
# define REFINFO(X)
#endif

/*
** Read a 32-bit integer from the given file descriptor.  Store the integer
** that is read in *pRes.  Return SQLITE_OK if everything worked, or an
** error code is something goes wrong.


*/
static int read32bits(OsFile *fd, u32 *pRes){
  u32 res;
  int rc;
  rc = sqlite3OsRead(fd, &res, sizeof(res));
  if( rc==SQLITE_OK ){
    unsigned char ac[4];







>
>







289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
# define REFINFO(X)
#endif

/*
** Read a 32-bit integer from the given file descriptor.  Store the integer
** that is read in *pRes.  Return SQLITE_OK if everything worked, or an
** error code is something goes wrong.
**
** All values are stored on disk as big-endian.
*/
static int read32bits(OsFile *fd, u32 *pRes){
  u32 res;
  int rc;
  rc = sqlite3OsRead(fd, &res, sizeof(res));
  if( rc==SQLITE_OK ){
    unsigned char ac[4];
465
466
467
468
469
470
471
472
473













474
475
476
477
478
479
480
481
482
483
484
485
486
487

488
489
490
491
492
493
494
  return SQLITE_OK;
}

/*
** Compute and return a checksum for the page of data.
**
** This is not a real checksum.  It is really just the sum of the 
** random initial value and the page number.  We considered do a checksum
** of the database, but that was found to be too slow.













*/
static u32 pager_cksum(Pager *pPager, Pgno pgno, const char *aData){
  u32 cksum = pPager->cksumInit + pgno;
  return cksum;
}

/*
** Read a single page from the journal file opened on file descriptor
** jfd.  Playback this one page.
**
** 
** 
** There are three different journal formats.  The format parameter determines
** which format is used by the journal that is played back.

*/
static int pager_playback_one_page(Pager *pPager, OsFile *jfd, int useCksum){
  int rc;
  PgHdr *pPg;                   /* An existing page in the cache */
  Pgno pgno;                    /* The page number of a page in journal */
  u32 cksum;                    /* Checksum used for sanity checking */
  u8 aData[SQLITE_PAGE_SIZE];   /* Store data here */







|
|
>
>
>
>
>
>
>
>
>
>
>
>
>










<
<
|
|
>







468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499


500
501
502
503
504
505
506
507
508
509
  return SQLITE_OK;
}

/*
** Compute and return a checksum for the page of data.
**
** This is not a real checksum.  It is really just the sum of the 
** random initial value and the page number.  We experimented with
** a checksum of the entire data, but that was found to be too slow.
**
** Note that the page number is stored at the beginning of data and
** the checksum is stored at the end.  This is important.  If journal
** corruption occurs due to a power failure, the most likely scenario
** is that one end or the other of the record will be changed.  It is
** much less likely that the two ends of the journal record will be
** correct and the middle be corrupt.  Thus, this "checksum" scheme,
** though fast and simple, catches the mostly likely kind of corruption.
**
** FIX ME:  Consider adding every 200th (or so) byte of the data to the
** checksum.  That way if a single page spans 3 or more disk sectors and
** only the middle sector is corrupt, we will still have a reasonable
** chance of failing the checksum and thus detecting the problem.
*/
static u32 pager_cksum(Pager *pPager, Pgno pgno, const char *aData){
  u32 cksum = pPager->cksumInit + pgno;
  return cksum;
}

/*
** Read a single page from the journal file opened on file descriptor
** jfd.  Playback this one page.
**


** If useCksum==0 it means this journal does not use checksums.  Checksums
** are not used in statement journals because statement journals do not
** need to survive power failures.
*/
static int pager_playback_one_page(Pager *pPager, OsFile *jfd, int useCksum){
  int rc;
  PgHdr *pPg;                   /* An existing page in the cache */
  Pgno pgno;                    /* The page number of a page in journal */
  u32 cksum;                    /* Checksum used for sanity checking */
  u8 aData[SQLITE_PAGE_SIZE];   /* Store data here */
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566





567
568
569
570
571
572
573
    ** 1 which is held in use in order to keep the lock on the database
    ** active.
    */
    void *pData;
    assert( pPg->nRef==0 || pPg->pgno==1 );
    pData = PGHDR_TO_DATA(pPg);
    memcpy(pData, aData, pPager->pageSize);
    if( pPager->xDestructor ){
      pPager->xDestructor(pData, pPager->pageSize);
    }
    if( pPager->state==PAGER_EXCLUSIVE ){
      pPg->dirty = 0;
      pPg->needSync = 0;
    }

    CODEC(pPager, pData, pPg->pgno, 3);
  }
  return rc;
}

/*
** Parameter zMaster is the name of a master journal file. A single journal
** file that referred to the master journal file has just been rolled back.
** This routine checks if it is possible to delete the master journal file,
** and does so if it is.





*/
static int pager_delmaster(const char *zMaster){
  int rc;
  int master_open = 0;
  OsFile master;
  char *zMasterJournal = 0; /* Contents of master journal file */
  off_t nMasterJournal;     /* Size of master journal file */







|






<










>
>
>
>
>







557
558
559
560
561
562
563
564
565
566
567
568
569
570

571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
    ** 1 which is held in use in order to keep the lock on the database
    ** active.
    */
    void *pData;
    assert( pPg->nRef==0 || pPg->pgno==1 );
    pData = PGHDR_TO_DATA(pPg);
    memcpy(pData, aData, pPager->pageSize);
    if( pPager->xDestructor ){  /*** FIX ME:  Should this be xReinit? ***/
      pPager->xDestructor(pData, pPager->pageSize);
    }
    if( pPager->state==PAGER_EXCLUSIVE ){
      pPg->dirty = 0;
      pPg->needSync = 0;
    }

    CODEC(pPager, pData, pPg->pgno, 3);
  }
  return rc;
}

/*
** Parameter zMaster is the name of a master journal file. A single journal
** file that referred to the master journal file has just been rolled back.
** This routine checks if it is possible to delete the master journal file,
** and does so if it is.
**
** The master journal file contains the names of all child journals.
** To tell if a master journal can be deleted, check to each of the
** children.  If all children are either missing or do not refer to
** a different master journal, then this master journal can be deleted.
*/
static int pager_delmaster(const char *zMaster){
  int rc;
  int master_open = 0;
  OsFile master;
  char *zMasterJournal = 0; /* Contents of master journal file */
  off_t nMasterJournal;     /* Size of master journal file */
591
592
593
594
595
596
597


598
599
600
601
602
603
604
    }
    rc = sqlite3OsRead(&master, zMasterJournal, nMasterJournal);
    if( rc!=SQLITE_OK ) goto delmaster_out;

    zDb = zMasterJournal;
    while( (zDb-zMasterJournal)<nMasterJournal ){
      char *zJournal = 0;


      sqlite3SetString(&zJournal, zDb, "-journal", 0);
      if( !zJournal ){
        rc = SQLITE_NOMEM;
        goto delmaster_out;
      }
      if( sqlite3OsFileExists(zJournal) ){
        /* One of the journals pointed to by the master journal exists.







>
>







610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
    }
    rc = sqlite3OsRead(&master, zMasterJournal, nMasterJournal);
    if( rc!=SQLITE_OK ) goto delmaster_out;

    zDb = zMasterJournal;
    while( (zDb-zMasterJournal)<nMasterJournal ){
      char *zJournal = 0;
      /*** FIX ME:  Store the full journal name in the master journal,
      **** not just the base database name. ***/
      sqlite3SetString(&zJournal, zDb, "-journal", 0);
      if( !zJournal ){
        rc = SQLITE_NOMEM;
        goto delmaster_out;
      }
      if( sqlite3OsFileExists(zJournal) ){
        /* One of the journals pointed to by the master journal exists.
634
635
636
637
638
639
640


641
642
643
644
645
646
647
        ** from malloc.
        */
        rc = sqlite3OsSeek(&journal, 20);
        if( rc!=SQLITE_OK ) goto delmaster_out;
        rc = read32bits(&journal, (u32*)&nMaster);
        if( rc!=SQLITE_OK ) goto delmaster_out;
        if( nMaster>0 && nMaster>=strlen(zMaster)+1 ){


          char *zMasterPtr = (char *)sqliteMalloc(nMaster);
          if( !zMasterPtr ){
            rc = SQLITE_NOMEM;
          }
          rc = sqlite3OsRead(&journal, zMasterPtr, nMaster);
          if( rc!=SQLITE_OK ){
            sqliteFree(zMasterPtr);







>
>







655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
        ** from malloc.
        */
        rc = sqlite3OsSeek(&journal, 20);
        if( rc!=SQLITE_OK ) goto delmaster_out;
        rc = read32bits(&journal, (u32*)&nMaster);
        if( rc!=SQLITE_OK ) goto delmaster_out;
        if( nMaster>0 && nMaster>=strlen(zMaster)+1 ){
          /*** FIX ME: Consider allocating this space at the same time
          **** space is allocated for holding the text of the master journal */
          char *zMasterPtr = (char *)sqliteMalloc(nMaster);
          if( !zMasterPtr ){
            rc = SQLITE_NOMEM;
          }
          rc = sqlite3OsRead(&journal, zMasterPtr, nMaster);
          if( rc!=SQLITE_OK ){
            sqliteFree(zMasterPtr);