SQLite

Check-in [3a3e8eb25d]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Changes to support medium sector sizes larger than the database page size. (CVS 3701)
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 3a3e8eb25d8e04cfccc5c0513ed04efcc25d3dbf
User & Date: danielk1977 2007-03-19 11:25:20.000
Context
2007-03-19
11:54
Fix a comment in btree.c (CVS 3702) (check-in: 05700c11a9 user: drh tags: trunk)
11:25
Changes to support medium sector sizes larger than the database page size. (CVS 3701) (check-in: 3a3e8eb25d user: danielk1977 tags: trunk)
05:54
Add new OS file method to return the sector-size of the underlying storage: sqlite3OsSectorSize() (CVS 3700) (check-in: 5752d84d37 user: danielk1977 tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Changes to src/btree.c.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
/*
** 2004 April 6
**
** The author disclaims copyright to this source code.  In place of
** a legal notice, here is a blessing:
**
**    May you do good and not evil.
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** $Id: btree.c,v 1.338 2007/03/06 15:53:44 drh Exp $
**
** This file implements a external (disk-based) database using BTrees.
** For a detailed discussion of BTrees, refer to
**
**     Donald E. Knuth, THE ART OF COMPUTER PROGRAMMING, Volume 3:
**     "Sorting And Searching", pages 473-480. Addison-Wesley
**     Publishing Company, Reading, Massachusetts.











|







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
/*
** 2004 April 6
**
** The author disclaims copyright to this source code.  In place of
** a legal notice, here is a blessing:
**
**    May you do good and not evil.
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** $Id: btree.c,v 1.339 2007/03/19 11:25:20 danielk1977 Exp $
**
** This file implements a external (disk-based) database using BTrees.
** For a detailed discussion of BTrees, refer to
**
**     Donald E. Knuth, THE ART OF COMPUTER PROGRAMMING, Volume 3:
**     "Sorting And Searching", pages 473-480. Addison-Wesley
**     Publishing Company, Reading, Massachusetts.
4027
4028
4029
4030
4031
4032
4033
4034
4035

4036
4037
4038
4039
4040
4041
4042

  assert( pNewParent!=0 );
  if( pgno==0 ) return SQLITE_OK;
  assert( pBt->pPager!=0 );
  aData = sqlite3pager_lookup(pBt->pPager, pgno);
  if( aData ){
    pThis = (MemPage*)&aData[pBt->pageSize];
    assert( pThis->aData==aData );
    if( pThis->isInit ){

      if( pThis->pParent!=pNewParent ){
        if( pThis->pParent ) sqlite3pager_unref(pThis->pParent->aData);
        pThis->pParent = pNewParent;
        sqlite3pager_ref(pNewParent->aData);
      }
      pThis->idxParent = idx;
    }







<

>







4027
4028
4029
4030
4031
4032
4033

4034
4035
4036
4037
4038
4039
4040
4041
4042

  assert( pNewParent!=0 );
  if( pgno==0 ) return SQLITE_OK;
  assert( pBt->pPager!=0 );
  aData = sqlite3pager_lookup(pBt->pPager, pgno);
  if( aData ){
    pThis = (MemPage*)&aData[pBt->pageSize];

    if( pThis->isInit ){
      assert( pThis->aData==aData );
      if( pThis->pParent!=pNewParent ){
        if( pThis->pParent ) sqlite3pager_unref(pThis->pParent->aData);
        pThis->pParent = pNewParent;
        sqlite3pager_ref(pNewParent->aData);
      }
      pThis->idxParent = idx;
    }
Changes to src/pager.c.
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
** The pager is used to access a database disk file.  It implements
** atomic commit and rollback through the use of a journal file that
** is separate from the database file.  The pager also implements file
** locking to prevent two processes from writing the same database
** file simultaneously, or one process from reading the database while
** another is writing.
**
** @(#) $Id: pager.c,v 1.289 2007/03/19 05:54:49 danielk1977 Exp $
*/
#ifndef SQLITE_OMIT_DISKIO
#include "sqliteInt.h"
#include "os.h"
#include "pager.h"
#include <assert.h>
#include <string.h>







|







14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
** The pager is used to access a database disk file.  It implements
** atomic commit and rollback through the use of a journal file that
** is separate from the database file.  The pager also implements file
** locking to prevent two processes from writing the same database
** file simultaneously, or one process from reading the database while
** another is writing.
**
** @(#) $Id: pager.c,v 1.290 2007/03/19 11:25:20 danielk1977 Exp $
*/
#ifndef SQLITE_OMIT_DISKIO
#include "sqliteInt.h"
#include "os.h"
#include "pager.h"
#include <assert.h>
#include <string.h>
281
282
283
284
285
286
287

288
289
290
291
292
293
294
  void *pCodecArg;            /* First argument to xCodec() */
  int nHash;                  /* Size of the pager hash table */
  PgHdr **aHash;              /* Hash table to map page number to PgHdr */
#ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
  Pager *pNext;               /* Linked list of pagers in this thread */
#endif
  char *pTmpSpace;            /* Pager.pageSize bytes of space for tmp use */

};

/*
** If SQLITE_TEST is defined then increment the variable given in
** the argument
*/
#ifdef SQLITE_TEST







>







281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
  void *pCodecArg;            /* First argument to xCodec() */
  int nHash;                  /* Size of the pager hash table */
  PgHdr **aHash;              /* Hash table to map page number to PgHdr */
#ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
  Pager *pNext;               /* Linked list of pagers in this thread */
#endif
  char *pTmpSpace;            /* Pager.pageSize bytes of space for tmp use */
  int doNotSync;
};

/*
** If SQLITE_TEST is defined then increment the variable given in
** the argument
*/
#ifdef SQLITE_TEST
624
625
626
627
628
629
630

631




632
633
634
635
636
637
638
639
640
641
642
643
644
645
** - 4 bytes: Initial database page count.
** - 4 bytes: Sector size used by the process that wrote this journal.
** 
** Followed by (JOURNAL_HDR_SZ - 24) bytes of unused space.
*/
static int writeJournalHdr(Pager *pPager){
  char zHeader[sizeof(aJournalMagic)+16];






  int rc = seekJournalHdr(pPager);
  if( rc ) return rc;

  pPager->journalHdr = pPager->journalOff;
  if( pPager->stmtHdrOff==0 ){
    pPager->stmtHdrOff = pPager->journalHdr;
  }
  pPager->journalOff += JOURNAL_HDR_SZ(pPager);

  /* FIX ME: 
  **
  ** Possibly for a pager not in no-sync mode, the journal magic should not
  ** be written until nRec is filled in as part of next syncJournal(). 
  **







>

>
>
>
>
|



<
<
<







625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641



642
643
644
645
646
647
648
** - 4 bytes: Initial database page count.
** - 4 bytes: Sector size used by the process that wrote this journal.
** 
** Followed by (JOURNAL_HDR_SZ - 24) bytes of unused space.
*/
static int writeJournalHdr(Pager *pPager){
  char zHeader[sizeof(aJournalMagic)+16];
  int rc;

  if( pPager->stmtHdrOff==0 ){
    pPager->stmtHdrOff = pPager->journalOff;
  }

  rc = seekJournalHdr(pPager);
  if( rc ) return rc;

  pPager->journalHdr = pPager->journalOff;



  pPager->journalOff += JOURNAL_HDR_SZ(pPager);

  /* FIX ME: 
  **
  ** Possibly for a pager not in no-sync mode, the journal magic should not
  ** be written until nRec is filled in as part of next syncJournal(). 
  **
1418
1419
1420
1421
1422
1423
1424
1425

1426
1427
1428
1429
1430
1431
1432
1433
    i64 os_szJ;
    rc = sqlite3OsFileSize(pPager->jfd, &os_szJ);
    if( rc!=SQLITE_OK ) return rc;
    assert( szJ==os_szJ );
  }
#endif

  /* Set hdrOff to be the offset to the first journal header written

  ** this statement transaction, or the end of the file if no journal
  ** header was written.
  */
  hdrOff = pPager->stmtHdrOff;
  assert( pPager->fullSync || !hdrOff );
  if( !hdrOff ){
    hdrOff = szJ;
  }







|
>
|







1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
    i64 os_szJ;
    rc = sqlite3OsFileSize(pPager->jfd, &os_szJ);
    if( rc!=SQLITE_OK ) return rc;
    assert( szJ==os_szJ );
  }
#endif

  /* Set hdrOff to be the offset just after the end of the last journal
  ** page written before the first journal-header for this statement
  ** transaction was written, or the end of the file if no journal
  ** header was written.
  */
  hdrOff = pPager->stmtHdrOff;
  assert( pPager->fullSync || !hdrOff );
  if( !hdrOff ){
    hdrOff = szJ;
  }
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
  */
  rc = sqlite3OsSeek(pPager->jfd, pPager->stmtJSize);
  if( rc!=SQLITE_OK ){
    goto end_stmt_playback;
  }
  pPager->journalOff = pPager->stmtJSize;
  pPager->cksumInit = pPager->stmtCksum;
  assert( JOURNAL_HDR_SZ(pPager)<(pPager->pageSize+8) );
  while( pPager->journalOff <= (hdrOff-(pPager->pageSize+8)) ){
    rc = pager_playback_one_page(pPager, pPager->jfd, 1);
    assert( rc!=SQLITE_DONE );
    if( rc!=SQLITE_OK ) goto end_stmt_playback;
  }

  while( pPager->journalOff < szJ ){
    u32 nJRec;         /* Number of Journal Records */







<
|







1471
1472
1473
1474
1475
1476
1477

1478
1479
1480
1481
1482
1483
1484
1485
  */
  rc = sqlite3OsSeek(pPager->jfd, pPager->stmtJSize);
  if( rc!=SQLITE_OK ){
    goto end_stmt_playback;
  }
  pPager->journalOff = pPager->stmtJSize;
  pPager->cksumInit = pPager->stmtCksum;

  while( pPager->journalOff < hdrOff ){
    rc = pager_playback_one_page(pPager, pPager->jfd, 1);
    assert( rc!=SQLITE_DONE );
    if( rc!=SQLITE_OK ) goto end_stmt_playback;
  }

  while( pPager->journalOff < szJ ){
    u32 nJRec;         /* Number of Journal Records */
2475
2476
2477
2478
2479
2480
2481

2482
2483
2484
2485
2486
2487
2488
      ** journal file. This is done to avoid ever modifying a journal
      ** header that is involved in the rollback of pages that have
      ** already been written to the database (in case the header is
      ** trashed when the nRec field is updated).
      */
      pPager->nRec = 0;
      assert( pPager->journalOff > 0 );

      rc = writeJournalHdr(pPager);
      if( rc!=0 ){
        return rc;
      }
    }
    pPg = pPager->pFirst;
  }







>







2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
      ** journal file. This is done to avoid ever modifying a journal
      ** header that is involved in the rollback of pages that have
      ** already been written to the database (in case the header is
      ** trashed when the nRec field is updated).
      */
      pPager->nRec = 0;
      assert( pPager->journalOff > 0 );
      assert( pPager->doNotSync==0 );
      rc = writeJournalHdr(pPager);
      if( rc!=0 ){
        return rc;
      }
    }
    pPg = pPager->pFirst;
  }
2727
2728
2729
2730
2731
2732
2733
2734


2735
2736
2737
2738
2739
2740
2741
      pPager->state = PAGER_SHARED;
    }
  }
  if( pPg==0 ){
    /* The requested page is not in the page cache. */
    int h;
    TEST_INCR(pPager->nMiss);
    if( pPager->nPage<pPager->mxPage || pPager->pFirst==0 || MEMDB ){


      /* Create a new page */
      if( pPager->nPage>=pPager->nHash ){
        pager_resize_hash_table(pPager,
           pPager->nHash<256 ? 256 : pPager->nHash*2);
        if( pPager->nHash==0 ){
          return SQLITE_NOMEM;
        }







|
>
>







2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
      pPager->state = PAGER_SHARED;
    }
  }
  if( pPg==0 ){
    /* The requested page is not in the page cache. */
    int h;
    TEST_INCR(pPager->nMiss);
    if( pPager->nPage<pPager->mxPage || pPager->pFirst==0 || MEMDB ||
        (pPager->pFirstSynced==0 && pPager->doNotSync)
    ){
      /* Create a new page */
      if( pPager->nPage>=pPager->nHash ){
        pager_resize_hash_table(pPager,
           pPager->nHash<256 ? 256 : pPager->nHash*2);
        if( pPager->nHash==0 ){
          return SQLITE_NOMEM;
        }
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
**
** If the journal file could not be written because the disk is full,
** then this routine returns SQLITE_FULL and does an immediate rollback.
** All subsequent write attempts also return SQLITE_FULL until there
** is a call to sqlite3pager_commit() or sqlite3pager_rollback() to
** reset.
*/
int sqlite3pager_write(void *pData){
  PgHdr *pPg = DATA_TO_PGHDR(pData);
  Pager *pPager = pPg->pPager;
  int rc = SQLITE_OK;

  /* Check for errors
  */
  if( pPager->errCode ){ 







|







3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
**
** If the journal file could not be written because the disk is full,
** then this routine returns SQLITE_FULL and does an immediate rollback.
** All subsequent write attempts also return SQLITE_FULL until there
** is a call to sqlite3pager_commit() or sqlite3pager_rollback() to
** reset.
*/
static int pager_write(void *pData){
  PgHdr *pPg = DATA_TO_PGHDR(pData);
  Pager *pPager = pPg->pPager;
  int rc = SQLITE_OK;

  /* Check for errors
  */
  if( pPager->errCode ){ 
3253
3254
3255
3256
3257
3258
3259







































































3260
3261
3262
3263
3264
3265
3266
    pPager->dbSize = pPg->pgno;
    if( !MEMDB && pPager->dbSize==PENDING_BYTE/pPager->pageSize ){
      pPager->dbSize++;
    }
  }
  return rc;
}








































































/*
** Return TRUE if the page given in the argument was previously passed
** to sqlite3pager_write().  In other words, return TRUE if it is ok
** to change the content of the page.
*/
#ifndef NDEBUG







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
    pPager->dbSize = pPg->pgno;
    if( !MEMDB && pPager->dbSize==PENDING_BYTE/pPager->pageSize ){
      pPager->dbSize++;
    }
  }
  return rc;
}

/*
** This function is used to mark a data-page as writable. It uses 
** pager_write() to open a journal file (if it is not already open)
** and write the page *pData to the journal.
**
** The difference between this function and pager_write() is that this
** function also deals with the special case where 2 or more pages
** fit on a single disk sector. In this case all co-resident pages
** must have been written to the journal file before returning.
*/
int sqlite3pager_write(void *pData){
  int rc = SQLITE_OK;

  PgHdr *pPg = DATA_TO_PGHDR(pData);
  Pager *pPager = pPg->pPager;
  Pgno nPagePerSector = (pPager->sectorSize/pPager->pageSize);

  if( !MEMDB && nPagePerSector>1 ){
    Pgno nPageCount;          /* Total number of pages in database file */
    Pgno pg1;                 /* First page of the sector pPg is located on. */
    int nPage;                /* Number of pages starting at pg1 to journal */
    int ii;

    /* Set the doNotSync flag to 1. This is because we cannot allow a journal
    ** header to be written between the pages journaled by this function.
    */
    assert( pPager->doNotSync==0 );
    pPager->doNotSync = 1;

    /* This trick assumes that both the page-size and sector-size are
    ** an integer power of 2. It sets variable pg1 to the identifier
    ** of the first page of the sector pPg is located on.
    */
    pg1 = ((pPg->pgno-1) & ~(nPagePerSector-1)) + 1;

    nPageCount = sqlite3pager_pagecount(pPager);
    if( pPg->pgno>nPageCount ){
      nPage = (pPg->pgno - pg1)+1;
    }else if( (pg1+nPagePerSector-1)>nPageCount ){
      nPage = nPageCount+1-pg1;
    }else{
      nPage = nPagePerSector;
    }
    assert(nPage>0);
    assert(pg1<=pPg->pgno);
    assert((pg1+nPage)>pPg->pgno);

    for(ii=0; ii<nPage && rc==SQLITE_OK; ii++){
      Pgno pg = pg1+ii;
      if( !pPager->aInJournal || pg==pPg->pgno || 
          pg>pPager->origDbSize || !(pPager->aInJournal[pg/8]&(1<<(pg&7)))
      ) {
        if( pg!=PAGER_MJ_PGNO(pPager) ){
          void *pPage;
          rc = sqlite3pager_get(pPager, pg, &pPage);
          if( rc==SQLITE_OK ){
            rc = pager_write(pPage);
            sqlite3pager_unref(pPage);
          }
        }
      }
    }

    assert( pPager->doNotSync==1 );
    pPager->doNotSync = 0;
  }else{
    rc = pager_write(pData);
  }
  return rc;
}

/*
** Return TRUE if the page given in the argument was previously passed
** to sqlite3pager_write().  In other words, return TRUE if it is ok
** to change the content of the page.
*/
#ifndef NDEBUG