/ Check-in [4c7bf714]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Allow SQLITE_PAGE_SIZE to be redefined on the compiler command-line. (CVS 1217)
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1:4c7bf714b5f3d2bb7366367ddf906141a7e36407
User & Date: drh 2004-02-10 01:54:28
Context
2004-02-10
02:27
Move the file-format-2 to file-format-3 conversion into sqliteInit(). (CVS 1218) check-in: dcbe2800 user: drh tags: trunk
01:54
Allow SQLITE_PAGE_SIZE to be redefined on the compiler command-line. (CVS 1217) check-in: 4c7bf714 user: drh tags: trunk
2004-02-09
14:37
After code is generated for a subquery, delete the Select structure in order to force the temporary table to be used and to prevent the subquery from being evaluated a second time. Ticket #601. (CVS 1216) check-in: 1cff1886 user: drh tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to src/btree.c.

5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
...
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
...
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
...
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
...
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
...
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
...
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
...
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
...
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
...
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
....
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
....
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
....
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
....
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
....
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
....
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
....
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
....
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
....
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
....
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
....
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
....
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
....
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
** a legal notice, here is a blessing:
**
**    May you do good and not evil.
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** $Id: btree.c,v 1.98 2004/01/01 12:33:43 drh Exp $
**
** This file implements a external (disk-based) database using BTrees.
** For a detailed discussion of BTrees, refer to
**
**     Donald E. Knuth, THE ART OF COMPUTER PROGRAMMING, Volume 3:
**     "Sorting And Searching", pages 473-480. Addison-Wesley
**     Publishing Company, Reading, Massachusetts.
................................................................................
*/
#define MIN_CELL_SIZE  (sizeof(CellHdr)+4)

/*
** The maximum number of database entries that can be held in a single
** page of the database. 
*/
#define MX_CELL ((SQLITE_PAGE_SIZE-sizeof(PageHdr))/MIN_CELL_SIZE)

/*
** The amount of usable space on a single page of the BTree.  This is the
** page size minus the overhead of the page header.
*/
#define USABLE_SPACE  (SQLITE_PAGE_SIZE - sizeof(PageHdr))

/*
** The maximum amount of payload (in bytes) that can be stored locally for
** a database entry.  If the entry contains more data than this, the
** extra goes onto overflow pages.
**
** This number is chosen so that at least 4 cells will fit on every page.
................................................................................
  u16 iSize;      /* Number of bytes in this block of free space */
  u16 iNext;      /* Index in MemPage.u.aDisk[] of the next free block */
};

/*
** The number of bytes of payload that will fit on a single overflow page.
*/
#define OVERFLOW_SIZE (SQLITE_PAGE_SIZE-sizeof(Pgno))

/*
** When the key and data for a single entry in the BTree will not fit in
** the MX_LOCAL_PAYLOAD bytes of space available on the database page,
** then all extra bytes are written to a linked list of overflow pages.
** Each overflow page is an instance of the following structure.
**
................................................................................
**
** The pParent field points back to the parent page.  This allows us to
** walk up the BTree from any leaf to the root.  Care must be taken to
** unref() the parent page pointer when this page is no longer referenced.
** The pageDestructor() routine handles that chore.
*/
struct MemPage {
  union {
    char aDisk[SQLITE_PAGE_SIZE];  /* Page data stored on disk */
    PageHdr hdr;                   /* Overlay page header */
  } u;
  u8 isInit;                     /* True if auxiliary data is initialized */
  u8 idxShift;                   /* True if apCell[] indices have changed */
  u8 isOverfull;                 /* Some apCell[] points outside u.aDisk[] */
  MemPage *pParent;              /* The parent of this page.  NULL for root */
................................................................................
};

/*
** The in-memory image of a disk page has the auxiliary information appended
** to the end.  EXTRA_SIZE is the number of bytes of space needed to hold
** that extra information.
*/
#define EXTRA_SIZE (sizeof(MemPage)-SQLITE_PAGE_SIZE)

/*
** Everything we need to know about an open database
*/
struct Btree {
  BtOps *pOps;          /* Function table */
  Pager *pPager;        /* The page cache */
................................................................................
** Defragment the page given.  All Cells are moved to the
** beginning of the page and all free space is collected 
** into one big FreeBlk at the end of the page.
*/
static void defragmentPage(Btree *pBt, MemPage *pPage){
  int pc, i, n;
  FreeBlk *pFBlk;
  char newPage[SQLITE_PAGE_SIZE];

  assert( sqlitepager_iswriteable(pPage) );
  assert( pPage->isInit );
  pc = sizeof(PageHdr);
  pPage->u.hdr.firstCell = SWAB16(pBt, pc);
  memcpy(newPage, pPage->u.aDisk, pc);
  for(i=0; i<pPage->nCell; i++){
    Cell *pCell = pPage->apCell[i];

    /* This routine should never be called on an overfull page.  The
    ** following asserts verify that constraint. */
    assert( Addr(pCell) > Addr(pPage) );
    assert( Addr(pCell) < Addr(pPage) + SQLITE_PAGE_SIZE );

    n = cellSize(pBt, pCell);
    pCell->h.iNext = SWAB16(pBt, pc + n);
    memcpy(&newPage[pc], pCell, n);
    pPage->apCell[i] = (Cell*)&pPage->u.aDisk[pc];
    pc += n;
  }
  assert( pPage->nFree==SQLITE_PAGE_SIZE-pc );
  memcpy(pPage->u.aDisk, newPage, pc);
  if( pPage->nCell>0 ){
    pPage->apCell[pPage->nCell-1]->h.iNext = 0;
  }
  pFBlk = (FreeBlk*)&pPage->u.aDisk[pc];
  pFBlk->iSize = SWAB16(pBt, SQLITE_PAGE_SIZE - pc);
  pFBlk->iNext = 0;
  pPage->u.hdr.firstFree = SWAB16(pBt, pc);
  memset(&pFBlk[1], 0, SQLITE_PAGE_SIZE - pc - sizeof(FreeBlk));
}

/*
** Allocate nByte bytes of space on a page.  nByte must be a 
** multiple of 4.
**
** Return the index into pPage->u.aDisk[] of the first byte of
................................................................................
  assert( sqlitepager_iswriteable(pPage) );
  assert( nByte==ROUNDUP(nByte) );
  assert( pPage->isInit );
  if( pPage->nFree<nByte || pPage->isOverfull ) return 0;
  pIdx = &pPage->u.hdr.firstFree;
  p = (FreeBlk*)&pPage->u.aDisk[SWAB16(pBt, *pIdx)];
  while( (iSize = SWAB16(pBt, p->iSize))<nByte ){
    assert( cnt++ < SQLITE_PAGE_SIZE/4 );
    if( p->iNext==0 ){
      defragmentPage(pBt, pPage);
      pIdx = &pPage->u.hdr.firstFree;
    }else{
      pIdx = &p->iNext;
    }
    p = (FreeBlk*)&pPage->u.aDisk[SWAB16(pBt, *pIdx)];
................................................................................
  }
  if( pPage->isInit ) return SQLITE_OK;
  pPage->isInit = 1;
  pPage->nCell = 0;
  freeSpace = USABLE_SPACE;
  idx = SWAB16(pBt, pPage->u.hdr.firstCell);
  while( idx!=0 ){
    if( idx>SQLITE_PAGE_SIZE-MIN_CELL_SIZE ) goto page_format_error;
    if( idx<sizeof(PageHdr) ) goto page_format_error;
    if( idx!=ROUNDUP(idx) ) goto page_format_error;
    pCell = (Cell*)&pPage->u.aDisk[idx];
    sz = cellSize(pBt, pCell);
    if( idx+sz > SQLITE_PAGE_SIZE ) goto page_format_error;
    freeSpace -= sz;
    pPage->apCell[pPage->nCell++] = pCell;
    idx = SWAB16(pBt, pCell->h.iNext);
  }
  pPage->nFree = 0;
  idx = SWAB16(pBt, pPage->u.hdr.firstFree);
  while( idx!=0 ){
    int iNext;
    if( idx>SQLITE_PAGE_SIZE-sizeof(FreeBlk) ) goto page_format_error;
    if( idx<sizeof(PageHdr) ) goto page_format_error;
    pFBlk = (FreeBlk*)&pPage->u.aDisk[idx];
    pPage->nFree += SWAB16(pBt, pFBlk->iSize);
    iNext = SWAB16(pBt, pFBlk->iNext);
    if( iNext>0 && iNext <= idx ) goto page_format_error;
    idx = iNext;
  }
................................................................................
** Set up a raw page so that it looks like a database page holding
** no entries.
*/
static void zeroPage(Btree *pBt, MemPage *pPage){
  PageHdr *pHdr;
  FreeBlk *pFBlk;
  assert( sqlitepager_iswriteable(pPage) );
  memset(pPage, 0, SQLITE_PAGE_SIZE);
  pHdr = &pPage->u.hdr;
  pHdr->firstCell = 0;
  pHdr->firstFree = SWAB16(pBt, sizeof(*pHdr));
  pFBlk = (FreeBlk*)&pHdr[1];
  pFBlk->iNext = 0;
  pPage->nFree = SQLITE_PAGE_SIZE - sizeof(*pHdr);
  pFBlk->iSize = SWAB16(pBt, pPage->nFree);
  pPage->nCell = 0;
  pPage->isOverfull = 0;
}

/*
** This routine is called when the reference count for a page
................................................................................
  */
  assert( sizeof(u32)==4 );
  assert( sizeof(u16)==2 );
  assert( sizeof(Pgno)==4 );
  assert( sizeof(PageHdr)==8 );
  assert( sizeof(CellHdr)==12 );
  assert( sizeof(FreeBlk)==4 );
  assert( sizeof(OverflowPage)==SQLITE_PAGE_SIZE );
  assert( sizeof(FreelistInfo)==OVERFLOW_SIZE );
  assert( sizeof(ptr)==sizeof(char*) );
  assert( sizeof(uptr)==sizeof(ptr) );

  pBt = sqliteMalloc( sizeof(*pBt) );
  if( pBt==0 ){
    *ppBtree = 0;
................................................................................
static void relinkCellList(Btree *pBt, MemPage *pPage){
  int i;
  u16 *pIdx;
  assert( sqlitepager_iswriteable(pPage) );
  pIdx = &pPage->u.hdr.firstCell;
  for(i=0; i<pPage->nCell; i++){
    int idx = Addr(pPage->apCell[i]) - Addr(pPage);
    assert( idx>0 && idx<SQLITE_PAGE_SIZE );
    *pIdx = SWAB16(pBt, idx);
    pIdx = &pPage->apCell[i]->h.iNext;
  }
  *pIdx = 0;
}

/*
................................................................................
** pointers that point into pFrom->u.aDisk[] must be adjusted to point
** into pTo->u.aDisk[] instead.  But some pFrom->apCell[] entries might
** not point to pFrom->u.aDisk[].  Those are unchanged.
*/
static void copyPage(MemPage *pTo, MemPage *pFrom){
  uptr from, to;
  int i;
  memcpy(pTo->u.aDisk, pFrom->u.aDisk, SQLITE_PAGE_SIZE);
  pTo->pParent = 0;
  pTo->isInit = 1;
  pTo->nCell = pFrom->nCell;
  pTo->nFree = pFrom->nFree;
  pTo->isOverfull = pFrom->isOverfull;
  to = Addr(pTo);
  from = Addr(pFrom);
  for(i=0; i<pTo->nCell; i++){
    uptr x = Addr(pFrom->apCell[i]);
    if( x>from && x<from+SQLITE_PAGE_SIZE ){
      *((uptr*)&pTo->apCell[i]) = x + to - from;
    }else{
      pTo->apCell[i] = pFrom->apCell[i];
    }
  }
}

................................................................................
  int szCell[(MX_CELL+2)*NB];  /* Local size of all cells */

  /* 
  ** Return without doing any work if pPage is neither overfull nor
  ** underfull.
  */
  assert( sqlitepager_iswriteable(pPage) );
  if( !pPage->isOverfull && pPage->nFree<SQLITE_PAGE_SIZE/2 
        && pPage->nCell>=2){
    relinkCellList(pBt, pPage);
    return SQLITE_OK;
  }

  /*
  ** Find the parent of the page to be balanceed.
................................................................................
        ** The root page is empty.  Copy the one child page
        ** into the root page and return.  This reduces the depth
        ** of the BTree by one.
        */
        pgnoChild = SWAB32(pBt, pPage->u.hdr.rightChild);
        rc = sqlitepager_get(pBt->pPager, pgnoChild, (void**)&pChild);
        if( rc ) return rc;
        memcpy(pPage, pChild, SQLITE_PAGE_SIZE);
        pPage->isInit = 0;
        rc = initPage(pBt, pPage, sqlitepager_pagenumber(pPage), 0);
        assert( rc==SQLITE_OK );
        reparentChildPages(pBt, pPage);
        if( pCur && pCur->pPage==pChild ){
          sqlitepager_unref(pChild);
          pCur->pPage = pPage;
................................................................................
    rc = sqlitepager_get(pFromPager, ovfl, (void**)&pOvfl);
    if( rc ) return rc;
    nextOvfl = SWAB32(pBtFrom, pOvfl->iNext);
    rc = allocatePage(pBtTo, &pNew, &new, 0);
    if( rc==SQLITE_OK ){
      rc = sqlitepager_write(pNew);
      if( rc==SQLITE_OK ){
        memcpy(pNew, pOvfl, SQLITE_PAGE_SIZE);
        *pPrev = SWAB32(pBtTo, new);
        if( pPrevPg ){
          sqlitepager_unref(pPrevPg);
        }
        pPrev = &pOvfl->iNext;
        pPrevPg = pNew;
      }
................................................................................
  rc = sqlitepager_get(pBtFrom->pPager, pgno, (void**)&pPageFrom);
  if( rc ) return rc;
  rc = allocatePage(pBt, &pPage, pTo, 0);
  if( rc==SQLITE_OK ){
    rc = sqlitepager_write(pPage);
  }
  if( rc==SQLITE_OK ){
    memcpy(pPage, pPageFrom, SQLITE_PAGE_SIZE);
    idx = SWAB16(pBt, pPage->u.hdr.firstCell);
    while( idx>0 ){
      pCell = (Cell*)&pPage->u.aDisk[idx];
      idx = SWAB16(pBt, pCell->h.iNext);
      if( pCell->h.leftChild ){
        Pgno newChld;
        rc = copyDatabasePage(pBtFrom, SWAB32(pBtFrom, pCell->h.leftChild),
................................................................................
  rc = sqlitepager_get(pBt->pPager, (Pgno)pgno, (void**)&pPage);
  if( rc ){
    return rc;
  }
  if( recursive ) printf("PAGE %d:\n", pgno);
  i = 0;
  idx = SWAB16(pBt, pPage->u.hdr.firstCell);
  while( idx>0 && idx<=SQLITE_PAGE_SIZE-MIN_CELL_SIZE ){
    Cell *pCell = (Cell*)&pPage->u.aDisk[idx];
    int sz = cellSize(pBt, pCell);
    sprintf(range,"%d..%d", idx, idx+sz-1);
    sz = NKEY(pBt, pCell->h) + NDATA(pBt, pCell->h);
    if( sz>sizeof(payload)-1 ) sz = sizeof(payload)-1;
    memcpy(payload, pCell->aPayload, sz);
    for(j=0; j<sz; j++){
................................................................................
  if( idx!=0 ){
    printf("ERROR: next cell index out of range: %d\n", idx);
  }
  printf("right_child: %d\n", SWAB32(pBt, pPage->u.hdr.rightChild));
  nFree = 0;
  i = 0;
  idx = SWAB16(pBt, pPage->u.hdr.firstFree);
  while( idx>0 && idx<SQLITE_PAGE_SIZE ){
    FreeBlk *p = (FreeBlk*)&pPage->u.aDisk[idx];
    sprintf(range,"%d..%d", idx, idx+p->iSize-1);
    nFree += SWAB16(pBt, p->iSize);
    printf("freeblock %2d: i=%-10s size=%-4d total=%d\n",
       i, range, SWAB16(pBt, p->iSize), nFree);
    idx = SWAB16(pBt, p->iNext);
    i++;
  }
  if( idx!=0 ){
    printf("ERROR: next freeblock index out of range: %d\n", idx);
  }
  if( recursive && pPage->u.hdr.rightChild!=0 ){
    idx = SWAB16(pBt, pPage->u.hdr.firstCell);
    while( idx>0 && idx<SQLITE_PAGE_SIZE-MIN_CELL_SIZE ){
      Cell *pCell = (Cell*)&pPage->u.aDisk[idx];
      fileBtreePageDump(pBt, SWAB32(pBt, pCell->h.leftChild), 1);
      idx = SWAB16(pBt, pCell->h.iNext);
    }
    fileBtreePageDump(pBt, SWAB32(pBt, pPage->u.hdr.rightChild), 1);
  }
  sqlitepager_unref(pPage);
................................................................................
  }else{
    aResult[3] = 0;
    aResult[6] = 0;
  }
  aResult[4] = pPage->nFree;
  cnt = 0;
  idx = SWAB16(pBt, pPage->u.hdr.firstFree);
  while( idx>0 && idx<SQLITE_PAGE_SIZE ){
    cnt++;
    idx = SWAB16(pBt, ((FreeBlk*)&pPage->u.aDisk[idx])->iNext);
  }
  aResult[5] = cnt;
  aResult[7] = SWAB32(pBt, pPage->u.hdr.rightChild);
  return SQLITE_OK;
}
................................................................................
  int i, rc, depth, d2, pgno;
  char *zKey1, *zKey2;
  int nKey1, nKey2;
  BtCursor cur;
  Btree *pBt;
  char zMsg[100];
  char zContext[100];
  char hit[SQLITE_PAGE_SIZE];

  /* Check that the page exists
  */
  cur.pBt = pBt = pCheck->pBt;
  if( iPage==0 ) return 0;
  if( checkRef(pCheck, iPage, zParentContext) ) return 0;
  sprintf(zContext, "On tree page %d: ", iPage);
................................................................................
  checkTreePage(pCheck, pgno, pPage, zContext, zKey1,nKey1,zUpperBound,nUpper);
  sqliteFree(zKey1);
 
  /* Check for complete coverage of the page
  */
  memset(hit, 0, sizeof(hit));
  memset(hit, 1, sizeof(PageHdr));
  for(i=SWAB16(pBt, pPage->u.hdr.firstCell); i>0 && i<SQLITE_PAGE_SIZE; ){
    Cell *pCell = (Cell*)&pPage->u.aDisk[i];
    int j;
    for(j=i+cellSize(pBt, pCell)-1; j>=i; j--) hit[j]++;
    i = SWAB16(pBt, pCell->h.iNext);
  }
  for(i=SWAB16(pBt,pPage->u.hdr.firstFree); i>0 && i<SQLITE_PAGE_SIZE; ){
    FreeBlk *pFBlk = (FreeBlk*)&pPage->u.aDisk[i];
    int j;
    for(j=i+SWAB16(pBt,pFBlk->iSize)-1; j>=i; j--) hit[j]++;
    i = SWAB16(pBt,pFBlk->iNext);
  }
  for(i=0; i<SQLITE_PAGE_SIZE; i++){
    if( hit[i]==0 ){
      sprintf(zMsg, "Unused space at byte %d of page %d", i, iPage);
      checkAppendMsg(pCheck, zMsg, 0);
      break;
    }else if( hit[i]>1 ){
      sprintf(zMsg, "Multiple uses for byte %d of page %d", i, iPage);
      checkAppendMsg(pCheck, zMsg, 0);
................................................................................
      break;
    }
  }

  /* Check that free space is kept to a minimum
  */
#if 0
  if( pParent && pParent->nCell>2 && pPage->nFree>3*SQLITE_PAGE_SIZE/4 ){
    sprintf(zMsg, "free space (%d) greater than max (%d)", pPage->nFree,
       SQLITE_PAGE_SIZE/3);
    checkAppendMsg(pCheck, zContext, zMsg);
  }
#endif

  sqlitepager_unref(pPage);
  return depth;
}
................................................................................
static int fileBtreeCopyFile(Btree *pBtTo, Btree *pBtFrom){
  int rc = SQLITE_OK;
  Pgno i, nPage, nToPage;

  if( !pBtTo->inTrans || !pBtFrom->inTrans ) return SQLITE_ERROR;
  if( pBtTo->needSwab!=pBtFrom->needSwab ) return SQLITE_ERROR;
  if( pBtTo->pCursor ) return SQLITE_BUSY;
  memcpy(pBtTo->page1, pBtFrom->page1, SQLITE_PAGE_SIZE);
  rc = sqlitepager_overwrite(pBtTo->pPager, 1, pBtFrom->page1);
  nToPage = sqlitepager_pagecount(pBtTo->pPager);
  nPage = sqlitepager_pagecount(pBtFrom->pPager);
  for(i=2; rc==SQLITE_OK && i<=nPage; i++){
    void *pPage;
    rc = sqlitepager_get(pBtFrom->pPager, i, &pPage);
    if( rc ) break;







|







 







|





|







 







|







 







|







 







|







 







|












|







|





|


|







 







|







 







|




|








|







 







|





|







 







|







 







|







 







|









|







 







|







 







|







 







|







 







|







 







|







 







|













|







 







|







 







|







 







|





|





|







 







|

|







 







|







5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
...
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
...
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
...
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
...
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
...
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
...
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
...
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
...
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
...
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
....
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
....
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
....
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
....
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
....
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
....
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
....
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
....
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
....
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
....
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
....
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
....
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
....
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
** a legal notice, here is a blessing:
**
**    May you do good and not evil.
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** $Id: btree.c,v 1.99 2004/02/10 01:54:28 drh Exp $
**
** This file implements a external (disk-based) database using BTrees.
** For a detailed discussion of BTrees, refer to
**
**     Donald E. Knuth, THE ART OF COMPUTER PROGRAMMING, Volume 3:
**     "Sorting And Searching", pages 473-480. Addison-Wesley
**     Publishing Company, Reading, Massachusetts.
................................................................................
*/
#define MIN_CELL_SIZE  (sizeof(CellHdr)+4)

/*
** The maximum number of database entries that can be held in a single
** page of the database. 
*/
#define MX_CELL ((SQLITE_USABLE_SIZE-sizeof(PageHdr))/MIN_CELL_SIZE)

/*
** The amount of usable space on a single page of the BTree.  This is the
** page size minus the overhead of the page header.
*/
#define USABLE_SPACE  (SQLITE_USABLE_SIZE - sizeof(PageHdr))

/*
** The maximum amount of payload (in bytes) that can be stored locally for
** a database entry.  If the entry contains more data than this, the
** extra goes onto overflow pages.
**
** This number is chosen so that at least 4 cells will fit on every page.
................................................................................
  u16 iSize;      /* Number of bytes in this block of free space */
  u16 iNext;      /* Index in MemPage.u.aDisk[] of the next free block */
};

/*
** The number of bytes of payload that will fit on a single overflow page.
*/
#define OVERFLOW_SIZE (SQLITE_USABLE_SIZE-sizeof(Pgno))

/*
** When the key and data for a single entry in the BTree will not fit in
** the MX_LOCAL_PAYLOAD bytes of space available on the database page,
** then all extra bytes are written to a linked list of overflow pages.
** Each overflow page is an instance of the following structure.
**
................................................................................
**
** The pParent field points back to the parent page.  This allows us to
** walk up the BTree from any leaf to the root.  Care must be taken to
** unref() the parent page pointer when this page is no longer referenced.
** The pageDestructor() routine handles that chore.
*/
struct MemPage {
  union u_page_data {
    char aDisk[SQLITE_PAGE_SIZE];  /* Page data stored on disk */
    PageHdr hdr;                   /* Overlay page header */
  } u;
  u8 isInit;                     /* True if auxiliary data is initialized */
  u8 idxShift;                   /* True if apCell[] indices have changed */
  u8 isOverfull;                 /* Some apCell[] points outside u.aDisk[] */
  MemPage *pParent;              /* The parent of this page.  NULL for root */
................................................................................
};

/*
** The in-memory image of a disk page has the auxiliary information appended
** to the end.  EXTRA_SIZE is the number of bytes of space needed to hold
** that extra information.
*/
#define EXTRA_SIZE (sizeof(MemPage)-sizeof(union u_page_data))

/*
** Everything we need to know about an open database
*/
struct Btree {
  BtOps *pOps;          /* Function table */
  Pager *pPager;        /* The page cache */
................................................................................
** Defragment the page given.  All Cells are moved to the
** beginning of the page and all free space is collected 
** into one big FreeBlk at the end of the page.
*/
static void defragmentPage(Btree *pBt, MemPage *pPage){
  int pc, i, n;
  FreeBlk *pFBlk;
  char newPage[SQLITE_USABLE_SIZE];

  assert( sqlitepager_iswriteable(pPage) );
  assert( pPage->isInit );
  pc = sizeof(PageHdr);
  pPage->u.hdr.firstCell = SWAB16(pBt, pc);
  memcpy(newPage, pPage->u.aDisk, pc);
  for(i=0; i<pPage->nCell; i++){
    Cell *pCell = pPage->apCell[i];

    /* This routine should never be called on an overfull page.  The
    ** following asserts verify that constraint. */
    assert( Addr(pCell) > Addr(pPage) );
    assert( Addr(pCell) < Addr(pPage) + SQLITE_USABLE_SIZE );

    n = cellSize(pBt, pCell);
    pCell->h.iNext = SWAB16(pBt, pc + n);
    memcpy(&newPage[pc], pCell, n);
    pPage->apCell[i] = (Cell*)&pPage->u.aDisk[pc];
    pc += n;
  }
  assert( pPage->nFree==SQLITE_USABLE_SIZE-pc );
  memcpy(pPage->u.aDisk, newPage, pc);
  if( pPage->nCell>0 ){
    pPage->apCell[pPage->nCell-1]->h.iNext = 0;
  }
  pFBlk = (FreeBlk*)&pPage->u.aDisk[pc];
  pFBlk->iSize = SWAB16(pBt, SQLITE_USABLE_SIZE - pc);
  pFBlk->iNext = 0;
  pPage->u.hdr.firstFree = SWAB16(pBt, pc);
  memset(&pFBlk[1], 0, SQLITE_USABLE_SIZE - pc - sizeof(FreeBlk));
}

/*
** Allocate nByte bytes of space on a page.  nByte must be a 
** multiple of 4.
**
** Return the index into pPage->u.aDisk[] of the first byte of
................................................................................
  assert( sqlitepager_iswriteable(pPage) );
  assert( nByte==ROUNDUP(nByte) );
  assert( pPage->isInit );
  if( pPage->nFree<nByte || pPage->isOverfull ) return 0;
  pIdx = &pPage->u.hdr.firstFree;
  p = (FreeBlk*)&pPage->u.aDisk[SWAB16(pBt, *pIdx)];
  while( (iSize = SWAB16(pBt, p->iSize))<nByte ){
    assert( cnt++ < SQLITE_USABLE_SIZE/4 );
    if( p->iNext==0 ){
      defragmentPage(pBt, pPage);
      pIdx = &pPage->u.hdr.firstFree;
    }else{
      pIdx = &p->iNext;
    }
    p = (FreeBlk*)&pPage->u.aDisk[SWAB16(pBt, *pIdx)];
................................................................................
  }
  if( pPage->isInit ) return SQLITE_OK;
  pPage->isInit = 1;
  pPage->nCell = 0;
  freeSpace = USABLE_SPACE;
  idx = SWAB16(pBt, pPage->u.hdr.firstCell);
  while( idx!=0 ){
    if( idx>SQLITE_USABLE_SIZE-MIN_CELL_SIZE ) goto page_format_error;
    if( idx<sizeof(PageHdr) ) goto page_format_error;
    if( idx!=ROUNDUP(idx) ) goto page_format_error;
    pCell = (Cell*)&pPage->u.aDisk[idx];
    sz = cellSize(pBt, pCell);
    if( idx+sz > SQLITE_USABLE_SIZE ) goto page_format_error;
    freeSpace -= sz;
    pPage->apCell[pPage->nCell++] = pCell;
    idx = SWAB16(pBt, pCell->h.iNext);
  }
  pPage->nFree = 0;
  idx = SWAB16(pBt, pPage->u.hdr.firstFree);
  while( idx!=0 ){
    int iNext;
    if( idx>SQLITE_USABLE_SIZE-sizeof(FreeBlk) ) goto page_format_error;
    if( idx<sizeof(PageHdr) ) goto page_format_error;
    pFBlk = (FreeBlk*)&pPage->u.aDisk[idx];
    pPage->nFree += SWAB16(pBt, pFBlk->iSize);
    iNext = SWAB16(pBt, pFBlk->iNext);
    if( iNext>0 && iNext <= idx ) goto page_format_error;
    idx = iNext;
  }
................................................................................
** Set up a raw page so that it looks like a database page holding
** no entries.
*/
static void zeroPage(Btree *pBt, MemPage *pPage){
  PageHdr *pHdr;
  FreeBlk *pFBlk;
  assert( sqlitepager_iswriteable(pPage) );
  memset(pPage, 0, SQLITE_USABLE_SIZE);
  pHdr = &pPage->u.hdr;
  pHdr->firstCell = 0;
  pHdr->firstFree = SWAB16(pBt, sizeof(*pHdr));
  pFBlk = (FreeBlk*)&pHdr[1];
  pFBlk->iNext = 0;
  pPage->nFree = SQLITE_USABLE_SIZE - sizeof(*pHdr);
  pFBlk->iSize = SWAB16(pBt, pPage->nFree);
  pPage->nCell = 0;
  pPage->isOverfull = 0;
}

/*
** This routine is called when the reference count for a page
................................................................................
  */
  assert( sizeof(u32)==4 );
  assert( sizeof(u16)==2 );
  assert( sizeof(Pgno)==4 );
  assert( sizeof(PageHdr)==8 );
  assert( sizeof(CellHdr)==12 );
  assert( sizeof(FreeBlk)==4 );
  assert( sizeof(OverflowPage)==SQLITE_USABLE_SIZE );
  assert( sizeof(FreelistInfo)==OVERFLOW_SIZE );
  assert( sizeof(ptr)==sizeof(char*) );
  assert( sizeof(uptr)==sizeof(ptr) );

  pBt = sqliteMalloc( sizeof(*pBt) );
  if( pBt==0 ){
    *ppBtree = 0;
................................................................................
static void relinkCellList(Btree *pBt, MemPage *pPage){
  int i;
  u16 *pIdx;
  assert( sqlitepager_iswriteable(pPage) );
  pIdx = &pPage->u.hdr.firstCell;
  for(i=0; i<pPage->nCell; i++){
    int idx = Addr(pPage->apCell[i]) - Addr(pPage);
    assert( idx>0 && idx<SQLITE_USABLE_SIZE );
    *pIdx = SWAB16(pBt, idx);
    pIdx = &pPage->apCell[i]->h.iNext;
  }
  *pIdx = 0;
}

/*
................................................................................
** pointers that point into pFrom->u.aDisk[] must be adjusted to point
** into pTo->u.aDisk[] instead.  But some pFrom->apCell[] entries might
** not point to pFrom->u.aDisk[].  Those are unchanged.
*/
static void copyPage(MemPage *pTo, MemPage *pFrom){
  uptr from, to;
  int i;
  memcpy(pTo->u.aDisk, pFrom->u.aDisk, SQLITE_USABLE_SIZE);
  pTo->pParent = 0;
  pTo->isInit = 1;
  pTo->nCell = pFrom->nCell;
  pTo->nFree = pFrom->nFree;
  pTo->isOverfull = pFrom->isOverfull;
  to = Addr(pTo);
  from = Addr(pFrom);
  for(i=0; i<pTo->nCell; i++){
    uptr x = Addr(pFrom->apCell[i]);
    if( x>from && x<from+SQLITE_USABLE_SIZE ){
      *((uptr*)&pTo->apCell[i]) = x + to - from;
    }else{
      pTo->apCell[i] = pFrom->apCell[i];
    }
  }
}

................................................................................
  int szCell[(MX_CELL+2)*NB];  /* Local size of all cells */

  /* 
  ** Return without doing any work if pPage is neither overfull nor
  ** underfull.
  */
  assert( sqlitepager_iswriteable(pPage) );
  if( !pPage->isOverfull && pPage->nFree<SQLITE_USABLE_SIZE/2 
        && pPage->nCell>=2){
    relinkCellList(pBt, pPage);
    return SQLITE_OK;
  }

  /*
  ** Find the parent of the page to be balanceed.
................................................................................
        ** The root page is empty.  Copy the one child page
        ** into the root page and return.  This reduces the depth
        ** of the BTree by one.
        */
        pgnoChild = SWAB32(pBt, pPage->u.hdr.rightChild);
        rc = sqlitepager_get(pBt->pPager, pgnoChild, (void**)&pChild);
        if( rc ) return rc;
        memcpy(pPage, pChild, SQLITE_USABLE_SIZE);
        pPage->isInit = 0;
        rc = initPage(pBt, pPage, sqlitepager_pagenumber(pPage), 0);
        assert( rc==SQLITE_OK );
        reparentChildPages(pBt, pPage);
        if( pCur && pCur->pPage==pChild ){
          sqlitepager_unref(pChild);
          pCur->pPage = pPage;
................................................................................
    rc = sqlitepager_get(pFromPager, ovfl, (void**)&pOvfl);
    if( rc ) return rc;
    nextOvfl = SWAB32(pBtFrom, pOvfl->iNext);
    rc = allocatePage(pBtTo, &pNew, &new, 0);
    if( rc==SQLITE_OK ){
      rc = sqlitepager_write(pNew);
      if( rc==SQLITE_OK ){
        memcpy(pNew, pOvfl, SQLITE_USABLE_SIZE);
        *pPrev = SWAB32(pBtTo, new);
        if( pPrevPg ){
          sqlitepager_unref(pPrevPg);
        }
        pPrev = &pOvfl->iNext;
        pPrevPg = pNew;
      }
................................................................................
  rc = sqlitepager_get(pBtFrom->pPager, pgno, (void**)&pPageFrom);
  if( rc ) return rc;
  rc = allocatePage(pBt, &pPage, pTo, 0);
  if( rc==SQLITE_OK ){
    rc = sqlitepager_write(pPage);
  }
  if( rc==SQLITE_OK ){
    memcpy(pPage, pPageFrom, SQLITE_USABLE_SIZE);
    idx = SWAB16(pBt, pPage->u.hdr.firstCell);
    while( idx>0 ){
      pCell = (Cell*)&pPage->u.aDisk[idx];
      idx = SWAB16(pBt, pCell->h.iNext);
      if( pCell->h.leftChild ){
        Pgno newChld;
        rc = copyDatabasePage(pBtFrom, SWAB32(pBtFrom, pCell->h.leftChild),
................................................................................
  rc = sqlitepager_get(pBt->pPager, (Pgno)pgno, (void**)&pPage);
  if( rc ){
    return rc;
  }
  if( recursive ) printf("PAGE %d:\n", pgno);
  i = 0;
  idx = SWAB16(pBt, pPage->u.hdr.firstCell);
  while( idx>0 && idx<=SQLITE_USABLE_SIZE-MIN_CELL_SIZE ){
    Cell *pCell = (Cell*)&pPage->u.aDisk[idx];
    int sz = cellSize(pBt, pCell);
    sprintf(range,"%d..%d", idx, idx+sz-1);
    sz = NKEY(pBt, pCell->h) + NDATA(pBt, pCell->h);
    if( sz>sizeof(payload)-1 ) sz = sizeof(payload)-1;
    memcpy(payload, pCell->aPayload, sz);
    for(j=0; j<sz; j++){
................................................................................
  if( idx!=0 ){
    printf("ERROR: next cell index out of range: %d\n", idx);
  }
  printf("right_child: %d\n", SWAB32(pBt, pPage->u.hdr.rightChild));
  nFree = 0;
  i = 0;
  idx = SWAB16(pBt, pPage->u.hdr.firstFree);
  while( idx>0 && idx<SQLITE_USABLE_SIZE ){
    FreeBlk *p = (FreeBlk*)&pPage->u.aDisk[idx];
    sprintf(range,"%d..%d", idx, idx+p->iSize-1);
    nFree += SWAB16(pBt, p->iSize);
    printf("freeblock %2d: i=%-10s size=%-4d total=%d\n",
       i, range, SWAB16(pBt, p->iSize), nFree);
    idx = SWAB16(pBt, p->iNext);
    i++;
  }
  if( idx!=0 ){
    printf("ERROR: next freeblock index out of range: %d\n", idx);
  }
  if( recursive && pPage->u.hdr.rightChild!=0 ){
    idx = SWAB16(pBt, pPage->u.hdr.firstCell);
    while( idx>0 && idx<SQLITE_USABLE_SIZE-MIN_CELL_SIZE ){
      Cell *pCell = (Cell*)&pPage->u.aDisk[idx];
      fileBtreePageDump(pBt, SWAB32(pBt, pCell->h.leftChild), 1);
      idx = SWAB16(pBt, pCell->h.iNext);
    }
    fileBtreePageDump(pBt, SWAB32(pBt, pPage->u.hdr.rightChild), 1);
  }
  sqlitepager_unref(pPage);
................................................................................
  }else{
    aResult[3] = 0;
    aResult[6] = 0;
  }
  aResult[4] = pPage->nFree;
  cnt = 0;
  idx = SWAB16(pBt, pPage->u.hdr.firstFree);
  while( idx>0 && idx<SQLITE_USABLE_SIZE ){
    cnt++;
    idx = SWAB16(pBt, ((FreeBlk*)&pPage->u.aDisk[idx])->iNext);
  }
  aResult[5] = cnt;
  aResult[7] = SWAB32(pBt, pPage->u.hdr.rightChild);
  return SQLITE_OK;
}
................................................................................
  int i, rc, depth, d2, pgno;
  char *zKey1, *zKey2;
  int nKey1, nKey2;
  BtCursor cur;
  Btree *pBt;
  char zMsg[100];
  char zContext[100];
  char hit[SQLITE_USABLE_SIZE];

  /* Check that the page exists
  */
  cur.pBt = pBt = pCheck->pBt;
  if( iPage==0 ) return 0;
  if( checkRef(pCheck, iPage, zParentContext) ) return 0;
  sprintf(zContext, "On tree page %d: ", iPage);
................................................................................
  checkTreePage(pCheck, pgno, pPage, zContext, zKey1,nKey1,zUpperBound,nUpper);
  sqliteFree(zKey1);
 
  /* Check for complete coverage of the page
  */
  memset(hit, 0, sizeof(hit));
  memset(hit, 1, sizeof(PageHdr));
  for(i=SWAB16(pBt, pPage->u.hdr.firstCell); i>0 && i<SQLITE_USABLE_SIZE; ){
    Cell *pCell = (Cell*)&pPage->u.aDisk[i];
    int j;
    for(j=i+cellSize(pBt, pCell)-1; j>=i; j--) hit[j]++;
    i = SWAB16(pBt, pCell->h.iNext);
  }
  for(i=SWAB16(pBt,pPage->u.hdr.firstFree); i>0 && i<SQLITE_USABLE_SIZE; ){
    FreeBlk *pFBlk = (FreeBlk*)&pPage->u.aDisk[i];
    int j;
    for(j=i+SWAB16(pBt,pFBlk->iSize)-1; j>=i; j--) hit[j]++;
    i = SWAB16(pBt,pFBlk->iNext);
  }
  for(i=0; i<SQLITE_USABLE_SIZE; i++){
    if( hit[i]==0 ){
      sprintf(zMsg, "Unused space at byte %d of page %d", i, iPage);
      checkAppendMsg(pCheck, zMsg, 0);
      break;
    }else if( hit[i]>1 ){
      sprintf(zMsg, "Multiple uses for byte %d of page %d", i, iPage);
      checkAppendMsg(pCheck, zMsg, 0);
................................................................................
      break;
    }
  }

  /* Check that free space is kept to a minimum
  */
#if 0
  if( pParent && pParent->nCell>2 && pPage->nFree>3*SQLITE_USABLE_SIZE/4 ){
    sprintf(zMsg, "free space (%d) greater than max (%d)", pPage->nFree,
       SQLITE_USABLE_SIZE/3);
    checkAppendMsg(pCheck, zContext, zMsg);
  }
#endif

  sqlitepager_unref(pPage);
  return depth;
}
................................................................................
static int fileBtreeCopyFile(Btree *pBtTo, Btree *pBtFrom){
  int rc = SQLITE_OK;
  Pgno i, nPage, nToPage;

  if( !pBtTo->inTrans || !pBtFrom->inTrans ) return SQLITE_ERROR;
  if( pBtTo->needSwab!=pBtFrom->needSwab ) return SQLITE_ERROR;
  if( pBtTo->pCursor ) return SQLITE_BUSY;
  memcpy(pBtTo->page1, pBtFrom->page1, SQLITE_USABLE_SIZE);
  rc = sqlitepager_overwrite(pBtTo->pPager, 1, pBtFrom->page1);
  nToPage = sqlitepager_pagecount(pBtTo->pPager);
  nPage = sqlitepager_pagecount(pBtFrom->pPager);
  for(i=2; rc==SQLITE_OK && i<=nPage; i++){
    void *pPage;
    rc = sqlitepager_get(pBtFrom->pPager, i, &pPage);
    if( rc ) break;

Changes to src/pager.c.

14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
..
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
...
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
...
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
...
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
...
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
...
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
...
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
...
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
...
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
...
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
....
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
....
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
....
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
....
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
....
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
....
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
....
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
....
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
** The pager is used to access a database disk file.  It implements
** atomic commit and rollback through the use of a journal file that
** is separate from the database file.  The pager also implements file
** locking to prevent two processes from writing the same database
** file simultaneously, or one process from reading the database while
** another is writing.
**
** @(#) $Id: pager.c,v 1.96 2004/02/09 01:20:37 drh Exp $
*/
#include "os.h"         /* Must be first to enable large file support */
#include "sqliteInt.h"
#include "pager.h"
#include <assert.h>
#include <string.h>

................................................................................
#define SET_PAGER(X)
#define CLR_PAGER(X)
#define TRACE1(X)
#define TRACE2(X,Y)
#define TRACE3(X,Y,Z)
#endif

/*
** Number of extra bytes of data allocated at the end of each page and
** stored on disk but not used by the higher level btree layer.
*/
#ifndef SQLITE_PAGE_RESERVE
#define SQLITE_PAGE_RESERVE 0
#endif

/*
** The total number of bytes stored on disk for each page.
*/
#define SQLITE_BLOCK_SIZE (SQLITE_PAGE_SIZE+SQLITE_PAGE_RESERVE)


/*
** The page cache as a whole is always in one of the following
** states:
**
**   SQLITE_UNLOCK       The page cache is not currently reading or 
**                       writing the database file.  There is no
................................................................................
  PgHdr *pNextCkpt, *pPrevCkpt;  /* List of pages in the checkpoint journal */
  u8 inJournal;                  /* TRUE if has been written to journal */
  u8 inCkpt;                     /* TRUE if written to the checkpoint journal */
  u8 dirty;                      /* TRUE if we need to write back changes */
  u8 needSync;                   /* Sync journal before writing this page */
  u8 alwaysRollback;             /* Disable dont_rollback() for this page */
  PgHdr *pDirty;                 /* Dirty pages sorted by PgHdr.pgno */
  /* SQLITE_BLOCK_SIZE bytes of page data follow this header */
  /* Pager.nExtra bytes of local data follow the page data */
};

/*
** Convert a pointer to a PgHdr into a pointer to its data
** and back again.
*/
#define PGHDR_TO_DATA(P)  ((void*)(&(P)[1]))
#define DATA_TO_PGHDR(D)  (&((PgHdr*)(D))[-1])
#define PGHDR_TO_EXTRA(P) ((void*)&((char*)(&(P)[1]))[SQLITE_BLOCK_SIZE])

/*
** How big to make the hash table used for locating in-memory pages
** by page number.
*/
#define N_PG_HASH 2048

................................................................................
** Actually, this structure is the complete page record for pager
** formats less than 3.  Beginning with format 3, this record is surrounded
** by two checksums.
*/
typedef struct PageRecord PageRecord;
struct PageRecord {
  Pgno pgno;                      /* The page number */
  char aData[SQLITE_BLOCK_SIZE];  /* Original data for page pgno */
};

/*
** Journal files begin with the following magic string.  The data
** was obtained from /dev/random.  It is used only as a sanity check.
**
** There are three journal formats (so far). The 1st journal format writes
................................................................................
** the journal file after power is restored.  If an attempt is then made
** to roll the journal back, the database could be corrupted.  The additional
** sanity checking data is an attempt to discover the garbage in the
** journal and ignore it.
**
** The sanity checking information for the 3rd journal format consists
** of a 32-bit checksum on each page of data.  The checksum covers both
** the page number and the SQLITE_BLOCK_SIZE bytes of data for the page.
** This cksum is initialized to a 32-bit random value that appears in the
** journal file right after the header.  The random initializer is important,
** because garbage data that appears at the end of a journal is likely
** data that was once in other files that have now been deleted.  If the
** garbage data came from an obsolete journal file, the checksums might
** be correct.  But by initializing the checksum to random value which
** is different for every journal, we minimize that risk.
................................................................................
** The size of the header and of each page in the journal varies according
** to which journal format is being used.  The following macros figure out
** the sizes based on format numbers.
*/
#define JOURNAL_HDR_SZ(X) \
   (sizeof(aJournalMagic1) + sizeof(Pgno) + ((X)>=3)*2*sizeof(u32))
#define JOURNAL_PG_SZ(X) \
   (SQLITE_BLOCK_SIZE + sizeof(Pgno) + ((X)>=3)*sizeof(u32))

/*
** Enable reference count tracking here:
*/
#ifdef SQLITE_TEST
  int pager_refinfo_enable = 0;
  static void pager_refinfo(PgHdr *p){
................................................................................
  }

  /* Playback the page.  Update the in-memory copy of the page
  ** at the same time, if there is one.
  */
  pPg = pager_lookup(pPager, pgRec.pgno);
  TRACE2("PLAYBACK %d\n", pgRec.pgno);
  sqliteOsSeek(&pPager->fd, (pgRec.pgno-1)*(off_t)SQLITE_BLOCK_SIZE);
  rc = sqliteOsWrite(&pPager->fd, pgRec.aData, SQLITE_BLOCK_SIZE);
  if( pPg ){
    /* No page should ever be rolled back that is in use, except for page
    ** 1 which is held in use in order to keep the lock on the database
    ** active.
    */
    assert( pPg->nRef==0 || pPg->pgno==1 );
    memcpy(PGHDR_TO_DATA(pPg), pgRec.aData, SQLITE_BLOCK_SIZE);
    memset(PGHDR_TO_EXTRA(pPg), 0, pPager->nExtra);
    pPg->dirty = 0;
    pPg->needSync = 0;
    if( pPager->xCodec ){
      pPager->xCodec(pPager->pCodecArg, PGHDR_TO_DATA(pPg), 0);
    }
  }
  return rc;
}

/*
** Playback the journal and thus restore the database file to
................................................................................
**       in format 3 only.
**    *  4 byte big-endian integer which is the initial value for the 
**       sanity checksum.  This field appears in format 3 only.
**    *  4 byte integer which is the number of pages to truncate the
**       database to during a rollback.
**    *  Zero or more pages instances, each as follows:
**        +  4 byte page number.
**        +  SQLITE_BLOCK_SIZE bytes of data.
**        +  4 byte checksum (format 3 only)
**
** When we speak of the journal header, we mean the first 4 bullets above.
** Each entry in the journal is an instance of the 5th bullet.  Note that
** bullets 2 and 3 only appear in format-3 journals.
**
** Call the value from the second bullet "nRec".  nRec is the number of
................................................................................
    assert( nRec*JOURNAL_PG_SZ(2)+JOURNAL_HDR_SZ(2)==szJ );
  }
  rc = read32bits(format, &pPager->jfd, &mxPg);
  if( rc!=SQLITE_OK ){
    goto end_playback;
  }
  assert( pPager->origDbSize==0 || pPager->origDbSize==mxPg );
  rc = sqliteOsTruncate(&pPager->fd, SQLITE_BLOCK_SIZE*(off_t)mxPg);
  if( rc!=SQLITE_OK ){
    goto end_playback;
  }
  pPager->dbSize = mxPg;
  
  /* Copy original pages out of the journal and back into the database file.
  */
................................................................................
  /* Pages that have been written to the journal but never synced
  ** where not restored by the loop above.  We have to restore those
  ** pages by reading them back from the original database.
  */
  if( rc==SQLITE_OK ){
    PgHdr *pPg;
    for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
      char zBuf[SQLITE_BLOCK_SIZE];
      if( !pPg->dirty ) continue;
      if( (int)pPg->pgno <= pPager->origDbSize ){
        sqliteOsSeek(&pPager->fd, SQLITE_BLOCK_SIZE*(off_t)(pPg->pgno-1));
        rc = sqliteOsRead(&pPager->fd, zBuf, SQLITE_BLOCK_SIZE);
        if( rc ) break;
        if( pPager->xCodec ){
          pPager->xCodec(pPager->pCodecArg, zBuf, 0);
        }
      }else{
        memset(zBuf, 0, SQLITE_BLOCK_SIZE);
      }
      if( pPg->nRef==0 || memcmp(zBuf, PGHDR_TO_DATA(pPg), SQLITE_BLOCK_SIZE) ){
        memcpy(PGHDR_TO_DATA(pPg), zBuf, SQLITE_BLOCK_SIZE);
        memset(PGHDR_TO_EXTRA(pPg), 0, pPager->nExtra);
      }
      pPg->needSync = 0;
      pPg->dirty = 0;
    }
  }

................................................................................
  off_t szJ;               /* Size of the full journal */
  int nRec;                /* Number of Records */
  int i;                   /* Loop counter */
  int rc;

  /* Truncate the database back to its original size.
  */
  rc = sqliteOsTruncate(&pPager->fd, SQLITE_BLOCK_SIZE*(off_t)pPager->ckptSize);
  pPager->dbSize = pPager->ckptSize;

  /* Figure out how many records are in the checkpoint journal.
  */
  assert( pPager->ckptInUse && pPager->journalOpen );
  sqliteOsSeek(&pPager->cpfd, 0);
  nRec = pPager->ckptNRec;
................................................................................
  if( pPager->dbSize>=0 ){
    return pPager->dbSize;
  }
  if( sqliteOsFileSize(&pPager->fd, &n)!=SQLITE_OK ){
    pPager->errMask |= PAGER_ERR_DISK;
    return 0;
  }
  n /= SQLITE_BLOCK_SIZE;
  if( pPager->state!=SQLITE_UNLOCK ){
    pPager->dbSize = n;
  }
  return n;
}

/*
................................................................................
    rc = pager_errcode(pPager);
    return rc;
  }
  if( nPage>=(unsigned)pPager->dbSize ){
    return SQLITE_OK;
  }
  syncJournal(pPager);
  rc = sqliteOsTruncate(&pPager->fd, SQLITE_BLOCK_SIZE*(off_t)nPage);
  if( rc==SQLITE_OK ){
    pPager->dbSize = nPage;
  }
  return rc;
}

/*
................................................................................
  Pager *pPager;
  int rc;

  if( pList==0 ) return SQLITE_OK;
  pPager = pList->pPager;
  while( pList ){
    assert( pList->dirty );
    sqliteOsSeek(&pPager->fd, (pList->pgno-1)*(off_t)SQLITE_BLOCK_SIZE);
    if( pPager->xCodec ){
      pPager->xCodec(pPager->pCodecArg, PGHDR_TO_DATA(pList), 1);
    }
    rc = sqliteOsWrite(&pPager->fd, PGHDR_TO_DATA(pList), SQLITE_BLOCK_SIZE);
    if( pPager->xCodec ){
      pPager->xCodec(pPager->pCodecArg, PGHDR_TO_DATA(pList), 0);
    }
    if( rc ) return rc;
    pList->dirty = 0;
    pList = pList->pDirty;
  }
................................................................................
  }
  if( pPg==0 ){
    /* The requested page is not in the page cache. */
    int h;
    pPager->nMiss++;
    if( pPager->nPage<pPager->mxPage || pPager->pFirst==0 ){
      /* Create a new page */
      pPg = sqliteMallocRaw( sizeof(*pPg) + SQLITE_BLOCK_SIZE 
                              + sizeof(u32) + pPager->nExtra );
      if( pPg==0 ){
        pager_unwritelock(pPager);
        pPager->errMask |= PAGER_ERR_MEM;
        return SQLITE_NOMEM;
      }
      memset(pPg, 0, sizeof(*pPg));
................................................................................
    if( pPager->dbSize<0 ) sqlitepager_pagecount(pPager);
    if( pPager->errMask!=0 ){
      sqlitepager_unref(PGHDR_TO_DATA(pPg));
      rc = pager_errcode(pPager);
      return rc;
    }
    if( pPager->dbSize<(int)pgno ){
      memset(PGHDR_TO_DATA(pPg), 0, SQLITE_BLOCK_SIZE);
    }else{
      int rc;
      sqliteOsSeek(&pPager->fd, (pgno-1)*(off_t)SQLITE_BLOCK_SIZE);
      rc = sqliteOsRead(&pPager->fd, PGHDR_TO_DATA(pPg), SQLITE_BLOCK_SIZE);
      if( rc!=SQLITE_OK ){
        off_t fileSize;
        if( sqliteOsFileSize(&pPager->fd,&fileSize)!=SQLITE_OK
               || fileSize>=pgno*SQLITE_BLOCK_SIZE ){
          sqlitepager_unref(PGHDR_TO_DATA(pPg));
          return rc;
        }else{
          memset(PGHDR_TO_DATA(pPg), 0, SQLITE_BLOCK_SIZE);
        }
      }else if( pPager->xCodec ){
        pPager->xCodec(pPager->pCodecArg, PGHDR_TO_DATA(pPg), 0);
      }
    }
  }else{
    /* The requested page is in the page cache. */
................................................................................
        store32bits(cksum, pPg, SQLITE_PAGE_SIZE);
        szPg = SQLITE_PAGE_SIZE+8;
      }else{
        szPg = SQLITE_PAGE_SIZE+4;
      }
      store32bits(pPg->pgno, pPg, -4);
      if( pPager->xCodec ){
        pPager->xCodec(pPager->pCodecArg, pData, 1);
      }
      rc = sqliteOsWrite(&pPager->jfd, &((char*)pData)[-4], szPg);
      if( pPager->xCodec ){
        pPager->xCodec(pPager->pCodecArg, pData, 0);
      }
      if( journal_format>=JOURNAL_FORMAT_3 ){
        *(u32*)PGHDR_TO_EXTRA(pPg) = saved;
      }
      if( rc!=SQLITE_OK ){
        sqlitepager_rollback(pPager);
        pPager->errMask |= PAGER_ERR_FULL;
................................................................................
  ** then write the current page to the checkpoint journal.  Note that
  ** the checkpoint journal always uses the simplier format 2 that lacks
  ** checksums.  The header is also omitted from the checkpoint journal.
  */
  if( pPager->ckptInUse && !pPg->inCkpt && (int)pPg->pgno<=pPager->ckptSize ){
    assert( pPg->inJournal || (int)pPg->pgno>pPager->origDbSize );
    store32bits(pPg->pgno, pPg, -4);
    rc = sqliteOsWrite(&pPager->cpfd, &((char*)pData)[-4], SQLITE_BLOCK_SIZE+4);
    if( rc!=SQLITE_OK ){
      sqlitepager_rollback(pPager);
      pPager->errMask |= PAGER_ERR_FULL;
      return rc;
    }
    pPager->ckptNRec++;
    assert( pPager->aInCkpt!=0 );
................................................................................
  void *pPage;
  int rc;

  rc = sqlitepager_get(pPager, pgno, &pPage);
  if( rc==SQLITE_OK ){
    rc = sqlitepager_write(pPage);
    if( rc==SQLITE_OK ){
      memcpy(pPage, pData, SQLITE_BLOCK_SIZE);
    }
    sqlitepager_unref(pPage);
  }
  return rc;
}

/*







|







 







<
<
<
<
<
<
<
<
<
<
<
<
<







 







|









|







 







|







 







|







 







|







 







|
|






|




|







 







|







 







|







 







|


|
|





|

|
|







 







|







 







|







 







|







 







|



|







 







|







 







|


|
|



|



|







 







|



|







 







|







 







|







14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
..
40
41
42
43
44
45
46













47
48
49
50
51
52
53
...
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
...
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
...
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
...
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
...
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
...
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
...
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
...
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
...
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
....
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
....
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
....
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
....
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
....
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
....
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
....
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
....
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
** The pager is used to access a database disk file.  It implements
** atomic commit and rollback through the use of a journal file that
** is separate from the database file.  The pager also implements file
** locking to prevent two processes from writing the same database
** file simultaneously, or one process from reading the database while
** another is writing.
**
** @(#) $Id: pager.c,v 1.97 2004/02/10 01:54:28 drh Exp $
*/
#include "os.h"         /* Must be first to enable large file support */
#include "sqliteInt.h"
#include "pager.h"
#include <assert.h>
#include <string.h>

................................................................................
#define SET_PAGER(X)
#define CLR_PAGER(X)
#define TRACE1(X)
#define TRACE2(X,Y)
#define TRACE3(X,Y,Z)
#endif















/*
** The page cache as a whole is always in one of the following
** states:
**
**   SQLITE_UNLOCK       The page cache is not currently reading or 
**                       writing the database file.  There is no
................................................................................
  PgHdr *pNextCkpt, *pPrevCkpt;  /* List of pages in the checkpoint journal */
  u8 inJournal;                  /* TRUE if has been written to journal */
  u8 inCkpt;                     /* TRUE if written to the checkpoint journal */
  u8 dirty;                      /* TRUE if we need to write back changes */
  u8 needSync;                   /* Sync journal before writing this page */
  u8 alwaysRollback;             /* Disable dont_rollback() for this page */
  PgHdr *pDirty;                 /* Dirty pages sorted by PgHdr.pgno */
  /* SQLITE_PAGE_SIZE bytes of page data follow this header */
  /* Pager.nExtra bytes of local data follow the page data */
};

/*
** Convert a pointer to a PgHdr into a pointer to its data
** and back again.
*/
#define PGHDR_TO_DATA(P)  ((void*)(&(P)[1]))
#define DATA_TO_PGHDR(D)  (&((PgHdr*)(D))[-1])
#define PGHDR_TO_EXTRA(P) ((void*)&((char*)(&(P)[1]))[SQLITE_PAGE_SIZE])

/*
** How big to make the hash table used for locating in-memory pages
** by page number.
*/
#define N_PG_HASH 2048

................................................................................
** Actually, this structure is the complete page record for pager
** formats less than 3.  Beginning with format 3, this record is surrounded
** by two checksums.
*/
typedef struct PageRecord PageRecord;
struct PageRecord {
  Pgno pgno;                      /* The page number */
  char aData[SQLITE_PAGE_SIZE];   /* Original data for page pgno */
};

/*
** Journal files begin with the following magic string.  The data
** was obtained from /dev/random.  It is used only as a sanity check.
**
** There are three journal formats (so far). The 1st journal format writes
................................................................................
** the journal file after power is restored.  If an attempt is then made
** to roll the journal back, the database could be corrupted.  The additional
** sanity checking data is an attempt to discover the garbage in the
** journal and ignore it.
**
** The sanity checking information for the 3rd journal format consists
** of a 32-bit checksum on each page of data.  The checksum covers both
** the page number and the SQLITE_PAGE_SIZE bytes of data for the page.
** This cksum is initialized to a 32-bit random value that appears in the
** journal file right after the header.  The random initializer is important,
** because garbage data that appears at the end of a journal is likely
** data that was once in other files that have now been deleted.  If the
** garbage data came from an obsolete journal file, the checksums might
** be correct.  But by initializing the checksum to random value which
** is different for every journal, we minimize that risk.
................................................................................
** The size of the header and of each page in the journal varies according
** to which journal format is being used.  The following macros figure out
** the sizes based on format numbers.
*/
#define JOURNAL_HDR_SZ(X) \
   (sizeof(aJournalMagic1) + sizeof(Pgno) + ((X)>=3)*2*sizeof(u32))
#define JOURNAL_PG_SZ(X) \
   (SQLITE_PAGE_SIZE + sizeof(Pgno) + ((X)>=3)*sizeof(u32))

/*
** Enable reference count tracking here:
*/
#ifdef SQLITE_TEST
  int pager_refinfo_enable = 0;
  static void pager_refinfo(PgHdr *p){
................................................................................
  }

  /* Playback the page.  Update the in-memory copy of the page
  ** at the same time, if there is one.
  */
  pPg = pager_lookup(pPager, pgRec.pgno);
  TRACE2("PLAYBACK %d\n", pgRec.pgno);
  sqliteOsSeek(&pPager->fd, (pgRec.pgno-1)*(off_t)SQLITE_PAGE_SIZE);
  rc = sqliteOsWrite(&pPager->fd, pgRec.aData, SQLITE_PAGE_SIZE);
  if( pPg ){
    /* No page should ever be rolled back that is in use, except for page
    ** 1 which is held in use in order to keep the lock on the database
    ** active.
    */
    assert( pPg->nRef==0 || pPg->pgno==1 );
    memcpy(PGHDR_TO_DATA(pPg), pgRec.aData, SQLITE_PAGE_SIZE);
    memset(PGHDR_TO_EXTRA(pPg), 0, pPager->nExtra);
    pPg->dirty = 0;
    pPg->needSync = 0;
    if( pPager->xCodec ){
      pPager->xCodec(pPager->pCodecArg, PGHDR_TO_DATA(pPg), 2);
    }
  }
  return rc;
}

/*
** Playback the journal and thus restore the database file to
................................................................................
**       in format 3 only.
**    *  4 byte big-endian integer which is the initial value for the 
**       sanity checksum.  This field appears in format 3 only.
**    *  4 byte integer which is the number of pages to truncate the
**       database to during a rollback.
**    *  Zero or more pages instances, each as follows:
**        +  4 byte page number.
**        +  SQLITE_PAGE_SIZE bytes of data.
**        +  4 byte checksum (format 3 only)
**
** When we speak of the journal header, we mean the first 4 bullets above.
** Each entry in the journal is an instance of the 5th bullet.  Note that
** bullets 2 and 3 only appear in format-3 journals.
**
** Call the value from the second bullet "nRec".  nRec is the number of
................................................................................
    assert( nRec*JOURNAL_PG_SZ(2)+JOURNAL_HDR_SZ(2)==szJ );
  }
  rc = read32bits(format, &pPager->jfd, &mxPg);
  if( rc!=SQLITE_OK ){
    goto end_playback;
  }
  assert( pPager->origDbSize==0 || pPager->origDbSize==mxPg );
  rc = sqliteOsTruncate(&pPager->fd, SQLITE_PAGE_SIZE*(off_t)mxPg);
  if( rc!=SQLITE_OK ){
    goto end_playback;
  }
  pPager->dbSize = mxPg;
  
  /* Copy original pages out of the journal and back into the database file.
  */
................................................................................
  /* Pages that have been written to the journal but never synced
  ** where not restored by the loop above.  We have to restore those
  ** pages by reading them back from the original database.
  */
  if( rc==SQLITE_OK ){
    PgHdr *pPg;
    for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
      char zBuf[SQLITE_PAGE_SIZE];
      if( !pPg->dirty ) continue;
      if( (int)pPg->pgno <= pPager->origDbSize ){
        sqliteOsSeek(&pPager->fd, SQLITE_PAGE_SIZE*(off_t)(pPg->pgno-1));
        rc = sqliteOsRead(&pPager->fd, zBuf, SQLITE_PAGE_SIZE);
        if( rc ) break;
        if( pPager->xCodec ){
          pPager->xCodec(pPager->pCodecArg, zBuf, 0);
        }
      }else{
        memset(zBuf, 0, SQLITE_PAGE_SIZE);
      }
      if( pPg->nRef==0 || memcmp(zBuf, PGHDR_TO_DATA(pPg), SQLITE_PAGE_SIZE) ){
        memcpy(PGHDR_TO_DATA(pPg), zBuf, SQLITE_PAGE_SIZE);
        memset(PGHDR_TO_EXTRA(pPg), 0, pPager->nExtra);
      }
      pPg->needSync = 0;
      pPg->dirty = 0;
    }
  }

................................................................................
  off_t szJ;               /* Size of the full journal */
  int nRec;                /* Number of Records */
  int i;                   /* Loop counter */
  int rc;

  /* Truncate the database back to its original size.
  */
  rc = sqliteOsTruncate(&pPager->fd, SQLITE_PAGE_SIZE*(off_t)pPager->ckptSize);
  pPager->dbSize = pPager->ckptSize;

  /* Figure out how many records are in the checkpoint journal.
  */
  assert( pPager->ckptInUse && pPager->journalOpen );
  sqliteOsSeek(&pPager->cpfd, 0);
  nRec = pPager->ckptNRec;
................................................................................
  if( pPager->dbSize>=0 ){
    return pPager->dbSize;
  }
  if( sqliteOsFileSize(&pPager->fd, &n)!=SQLITE_OK ){
    pPager->errMask |= PAGER_ERR_DISK;
    return 0;
  }
  n /= SQLITE_PAGE_SIZE;
  if( pPager->state!=SQLITE_UNLOCK ){
    pPager->dbSize = n;
  }
  return n;
}

/*
................................................................................
    rc = pager_errcode(pPager);
    return rc;
  }
  if( nPage>=(unsigned)pPager->dbSize ){
    return SQLITE_OK;
  }
  syncJournal(pPager);
  rc = sqliteOsTruncate(&pPager->fd, SQLITE_PAGE_SIZE*(off_t)nPage);
  if( rc==SQLITE_OK ){
    pPager->dbSize = nPage;
  }
  return rc;
}

/*
................................................................................
  Pager *pPager;
  int rc;

  if( pList==0 ) return SQLITE_OK;
  pPager = pList->pPager;
  while( pList ){
    assert( pList->dirty );
    sqliteOsSeek(&pPager->fd, (pList->pgno-1)*(off_t)SQLITE_PAGE_SIZE);
    if( pPager->xCodec ){
      pPager->xCodec(pPager->pCodecArg, PGHDR_TO_DATA(pList), 1);
    }
    rc = sqliteOsWrite(&pPager->fd, PGHDR_TO_DATA(pList), SQLITE_PAGE_SIZE);
    if( pPager->xCodec ){
      pPager->xCodec(pPager->pCodecArg, PGHDR_TO_DATA(pList), 0);
    }
    if( rc ) return rc;
    pList->dirty = 0;
    pList = pList->pDirty;
  }
................................................................................
  }
  if( pPg==0 ){
    /* The requested page is not in the page cache. */
    int h;
    pPager->nMiss++;
    if( pPager->nPage<pPager->mxPage || pPager->pFirst==0 ){
      /* Create a new page */
      pPg = sqliteMallocRaw( sizeof(*pPg) + SQLITE_PAGE_SIZE 
                              + sizeof(u32) + pPager->nExtra );
      if( pPg==0 ){
        pager_unwritelock(pPager);
        pPager->errMask |= PAGER_ERR_MEM;
        return SQLITE_NOMEM;
      }
      memset(pPg, 0, sizeof(*pPg));
................................................................................
    if( pPager->dbSize<0 ) sqlitepager_pagecount(pPager);
    if( pPager->errMask!=0 ){
      sqlitepager_unref(PGHDR_TO_DATA(pPg));
      rc = pager_errcode(pPager);
      return rc;
    }
    if( pPager->dbSize<(int)pgno ){
      memset(PGHDR_TO_DATA(pPg), 0, SQLITE_PAGE_SIZE);
    }else{
      int rc;
      sqliteOsSeek(&pPager->fd, (pgno-1)*(off_t)SQLITE_PAGE_SIZE);
      rc = sqliteOsRead(&pPager->fd, PGHDR_TO_DATA(pPg), SQLITE_PAGE_SIZE);
      if( rc!=SQLITE_OK ){
        off_t fileSize;
        if( sqliteOsFileSize(&pPager->fd,&fileSize)!=SQLITE_OK
               || fileSize>=pgno*SQLITE_PAGE_SIZE ){
          sqlitepager_unref(PGHDR_TO_DATA(pPg));
          return rc;
        }else{
          memset(PGHDR_TO_DATA(pPg), 0, SQLITE_PAGE_SIZE);
        }
      }else if( pPager->xCodec ){
        pPager->xCodec(pPager->pCodecArg, PGHDR_TO_DATA(pPg), 0);
      }
    }
  }else{
    /* The requested page is in the page cache. */
................................................................................
        store32bits(cksum, pPg, SQLITE_PAGE_SIZE);
        szPg = SQLITE_PAGE_SIZE+8;
      }else{
        szPg = SQLITE_PAGE_SIZE+4;
      }
      store32bits(pPg->pgno, pPg, -4);
      if( pPager->xCodec ){
        pPager->xCodec(pPager->pCodecArg, pData, 3);
      }
      rc = sqliteOsWrite(&pPager->jfd, &((char*)pData)[-4], szPg);
      if( pPager->xCodec ){
        pPager->xCodec(pPager->pCodecArg, pData, 2);
      }
      if( journal_format>=JOURNAL_FORMAT_3 ){
        *(u32*)PGHDR_TO_EXTRA(pPg) = saved;
      }
      if( rc!=SQLITE_OK ){
        sqlitepager_rollback(pPager);
        pPager->errMask |= PAGER_ERR_FULL;
................................................................................
  ** then write the current page to the checkpoint journal.  Note that
  ** the checkpoint journal always uses the simplier format 2 that lacks
  ** checksums.  The header is also omitted from the checkpoint journal.
  */
  if( pPager->ckptInUse && !pPg->inCkpt && (int)pPg->pgno<=pPager->ckptSize ){
    assert( pPg->inJournal || (int)pPg->pgno>pPager->origDbSize );
    store32bits(pPg->pgno, pPg, -4);
    rc = sqliteOsWrite(&pPager->cpfd, &((char*)pData)[-4], SQLITE_PAGE_SIZE+4);
    if( rc!=SQLITE_OK ){
      sqlitepager_rollback(pPager);
      pPager->errMask |= PAGER_ERR_FULL;
      return rc;
    }
    pPager->ckptNRec++;
    assert( pPager->aInCkpt!=0 );
................................................................................
  void *pPage;
  int rc;

  rc = sqlitepager_get(pPager, pgno, &pPage);
  if( rc==SQLITE_OK ){
    rc = sqlitepager_write(pPage);
    if( rc==SQLITE_OK ){
      memcpy(pPage, pData, SQLITE_PAGE_SIZE);
    }
    sqlitepager_unref(pPage);
  }
  return rc;
}

/*

Changes to src/pager.h.

9
10
11
12
13
14
15
16
17
18
19
20
21
22
23


24
25



26
27
28
29
30
















31
32
33
34
35
36
37
**    May you share freely, never taking more than you give.
**
*************************************************************************
** This header file defines the interface that the sqlite page cache
** subsystem.  The page cache subsystem reads and writes a file a page
** at a time and provides a journal for rollback.
**
** @(#) $Id: pager.h,v 1.24 2004/02/09 01:20:37 drh Exp $
*/

/*
** The size of one page
**
** You can change this value to another (reasonable) power of two
** such as 512, 2048, 4096, or 8192 and things will still work.  But


** experiments show that a page size of 1024 gives the best speed.
** (The speed differences are minimal.)



*/
#ifndef SQLITE_PAGE_SIZE
#define SQLITE_PAGE_SIZE 1024
#endif

















/*
** Maximum number of pages in one database.  (This is a limitation of
** imposed by 4GB files size limits.)
*/
#define SQLITE_MAX_PAGE 1073741823

/*







|





|
|
>
>
|
|
>
>
>





>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
**    May you share freely, never taking more than you give.
**
*************************************************************************
** This header file defines the interface that the sqlite page cache
** subsystem.  The page cache subsystem reads and writes a file a page
** at a time and provides a journal for rollback.
**
** @(#) $Id: pager.h,v 1.25 2004/02/10 01:54:28 drh Exp $
*/

/*
** The size of one page
**
** You can change this value to another (reasonable) value you want.
** It need not be a power of two, though the interface to the disk
** will likely be faster if it is.
**
** Experiments show that a page size of 1024 gives the best speed
** for common usages.  The speed differences for different sizes
** such as 512, 2048, 4096, an so forth, is minimal.  Note, however,
** that changing the page size results in a completely imcompatible
** file format.
*/
#ifndef SQLITE_PAGE_SIZE
#define SQLITE_PAGE_SIZE 1024
#endif

/*
** Number of extra bytes of data allocated at the end of each page and
** stored on disk but not used by the higher level btree layer.  Changing
** this value results in a completely incompatible file format.
*/
#ifndef SQLITE_PAGE_RESERVE
#define SQLITE_PAGE_RESERVE 0
#endif

/*
** The total number of usable bytes stored on disk for each page.
** The usable bytes come at the beginning of the page and the reserve
** bytes come at the end.
*/
#define SQLITE_USABLE_SIZE (SQLITE_PAGE_SIZE-SQLITE_PAGE_RESERVE)

/*
** Maximum number of pages in one database.  (This is a limitation of
** imposed by 4GB files size limits.)
*/
#define SQLITE_MAX_PAGE 1073741823

/*

Changes to src/test2.c.

9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
...
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
...
521
522
523
524
525
526
527

528
529
530
531
532
533
534
...
552
553
554
555
556
557
558






559
560
**    May you share freely, never taking more than you give.
**
*************************************************************************
** Code for testing the pager.c module in SQLite.  This code
** is not included in the SQLite library.  It is used for automated
** testing of the SQLite library.
**
** $Id: test2.c,v 1.15 2003/02/11 14:55:41 drh Exp $
*/
#include "os.h"
#include "sqliteInt.h"
#include "pager.h"
#include "tcl.h"
#include <stdlib.h>
#include <string.h>
................................................................................
  }
  if( Tcl_GetInt(interp, argv[1], (int*)&pPage) ) return TCL_ERROR;
  rc = sqlitepager_write(pPage);
  if( rc!=SQLITE_OK ){
    Tcl_AppendResult(interp, errorName(rc), 0);
    return TCL_ERROR;
  }
  strncpy((char*)pPage, argv[2], SQLITE_PAGE_SIZE-1);
  ((char*)pPage)[SQLITE_PAGE_SIZE-1] = 0;
  return TCL_OK;
}

/*
** Usage:   fake_big_file  N  FILENAME
**
** Write a few bytes at the N megabyte point of FILENAME.  This will
................................................................................
}

/*
** Register commands with the TCL interpreter.
*/
int Sqlitetest2_Init(Tcl_Interp *interp){
  extern int sqlite_io_error_pending;

  static struct {
    char *zName;
    Tcl_CmdProc *xProc;
  } aCmd[] = {
    { "pager_open",              (Tcl_CmdProc*)pager_open          },
    { "pager_close",             (Tcl_CmdProc*)pager_close         },
    { "pager_commit",            (Tcl_CmdProc*)pager_commit        },
................................................................................
  }
  Tcl_LinkVar(interp, "sqlite_io_error_pending",
     (char*)&sqlite_io_error_pending, TCL_LINK_INT);
#ifdef SQLITE_TEST
  Tcl_LinkVar(interp, "journal_format",
     (char*)&journal_format, TCL_LINK_INT);
#endif






  return TCL_OK;
}







|







 







|
|







 







>







 







>
>
>
>
>
>


9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
...
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
...
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
...
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
**    May you share freely, never taking more than you give.
**
*************************************************************************
** Code for testing the pager.c module in SQLite.  This code
** is not included in the SQLite library.  It is used for automated
** testing of the SQLite library.
**
** $Id: test2.c,v 1.16 2004/02/10 01:54:28 drh Exp $
*/
#include "os.h"
#include "sqliteInt.h"
#include "pager.h"
#include "tcl.h"
#include <stdlib.h>
#include <string.h>
................................................................................
  }
  if( Tcl_GetInt(interp, argv[1], (int*)&pPage) ) return TCL_ERROR;
  rc = sqlitepager_write(pPage);
  if( rc!=SQLITE_OK ){
    Tcl_AppendResult(interp, errorName(rc), 0);
    return TCL_ERROR;
  }
  strncpy((char*)pPage, argv[2], SQLITE_USABLE_SIZE-1);
  ((char*)pPage)[SQLITE_USABLE_SIZE-1] = 0;
  return TCL_OK;
}

/*
** Usage:   fake_big_file  N  FILENAME
**
** Write a few bytes at the N megabyte point of FILENAME.  This will
................................................................................
}

/*
** Register commands with the TCL interpreter.
*/
int Sqlitetest2_Init(Tcl_Interp *interp){
  extern int sqlite_io_error_pending;
  char zBuf[100];
  static struct {
    char *zName;
    Tcl_CmdProc *xProc;
  } aCmd[] = {
    { "pager_open",              (Tcl_CmdProc*)pager_open          },
    { "pager_close",             (Tcl_CmdProc*)pager_close         },
    { "pager_commit",            (Tcl_CmdProc*)pager_commit        },
................................................................................
  }
  Tcl_LinkVar(interp, "sqlite_io_error_pending",
     (char*)&sqlite_io_error_pending, TCL_LINK_INT);
#ifdef SQLITE_TEST
  Tcl_LinkVar(interp, "journal_format",
     (char*)&journal_format, TCL_LINK_INT);
#endif
  sprintf(zBuf, "%d", SQLITE_PAGE_SIZE);
  Tcl_SetVar(interp, "SQLITE_PAGE_SIZE", zBuf, TCL_GLOBAL_ONLY); 
  sprintf(zBuf, "%d", SQLITE_PAGE_RESERVE);
  Tcl_SetVar(interp, "SQLITE_PAGE_RESERVE", zBuf, TCL_GLOBAL_ONLY); 
  sprintf(zBuf, "%d", SQLITE_USABLE_SIZE);
  Tcl_SetVar(interp, "SQLITE_USABLE_SIZE", zBuf, TCL_GLOBAL_ONLY); 
  return TCL_OK;
}

Changes to test/btree.test.

7
8
9
10
11
12
13
14
15
16
17
18
19
20

21
22
23
24
25
26
27
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#***********************************************************************
# This file implements regression tests for SQLite library.  The
# focus of this script is btree database backend
#
# $Id: btree.test,v 1.14 2003/02/12 14:09:45 drh Exp $


set testdir [file dirname $argv0]
source $testdir/tester.tcl

if {[info commands btree_open]!=""} {


# Basic functionality.  Open and close a database.
#
do_test btree-1.1 {
  file delete -force test1.bt
  file delete -force test1.bt-journal
  set rc [catch {btree_open test1.bt} ::b1]







|





|
>







7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#***********************************************************************
# This file implements regression tests for SQLite library.  The
# focus of this script is btree database backend
#
# $Id: btree.test,v 1.15 2004/02/10 01:54:28 drh Exp $


set testdir [file dirname $argv0]
source $testdir/tester.tcl

if {[info commands btree_open]!="" && $SQLITE_PAGE_SIZE==1024 
     && $SQLITE_USABLE_SIZE==1024} {

# Basic functionality.  Open and close a database.
#
do_test btree-1.1 {
  file delete -force test1.bt
  file delete -force test1.bt-journal
  set rc [catch {btree_open test1.bt} ::b1]