SQLite

Check-in [f63fb6dd4e]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Changes to btree and pager in preparation for moving to run-time page size determination. (CVS 1374)
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: f63fb6dd4e8e33d4c1983396b1a0305836ee4df7
User & Date: drh 2004-05-14 01:58:12.000
Context
2004-05-14
11:00
Implement type affinity for table and index records (CVS 1375) (check-in: dbfe6e9316 user: danielk1977 tags: trunk)
01:58
Changes to btree and pager in preparation for moving to run-time page size determination. (CVS 1374) (check-in: f63fb6dd4e user: drh tags: trunk)
2004-05-13
13:38
Changes to make regression tests in rowid.test pass. (CVS 1373) (check-in: 790226c944 user: danielk1977 tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Changes to main.mk.
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
  $(TOP)/src/vdbe.h \
  $(TOP)/src/vdbeaux.c \
  $(TOP)/src/vdbeInt.h \
  $(TOP)/src/where.c

# Source code to the test files.
#
TESTSRC = \
   $(TOP)/src/os.c \
   $(TOP)/src/pager.c \
   $(TOP)/src/test1.c \
   $(TOP)/src/test2.c \
   $(TOP)/src/test3.c \
   $(TOP)/src/test5.c \
   $(TOP)/src/md5.c

TESTSRC_ORIG = \
  $(TOP)/src/btree.c \
  $(TOP)/src/func.c \
  $(TOP)/src/os.c \
  $(TOP)/src/pager.c \
  $(TOP)/src/test1.c \
  $(TOP)/src/test2.c \
  $(TOP)/src/test3.c \







|








|







105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
  $(TOP)/src/vdbe.h \
  $(TOP)/src/vdbeaux.c \
  $(TOP)/src/vdbeInt.h \
  $(TOP)/src/where.c

# Source code to the test files.
#
TESTSRC_SUBSET = \
   $(TOP)/src/os.c \
   $(TOP)/src/pager.c \
   $(TOP)/src/test1.c \
   $(TOP)/src/test2.c \
   $(TOP)/src/test3.c \
   $(TOP)/src/test5.c \
   $(TOP)/src/md5.c

TESTSRC = \
  $(TOP)/src/btree.c \
  $(TOP)/src/func.c \
  $(TOP)/src/os.c \
  $(TOP)/src/pager.c \
  $(TOP)/src/test1.c \
  $(TOP)/src/test2.c \
  $(TOP)/src/test3.c \
Changes to src/btree.c.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
/*
** 2004 April 6
**
** The author disclaims copyright to this source code.  In place of
** a legal notice, here is a blessing:
**
**    May you do good and not evil.
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** $Id: btree.c,v 1.133 2004/05/13 13:38:52 danielk1977 Exp $
**
** This file implements a external (disk-based) database using BTrees.
** For a detailed discussion of BTrees, refer to
**
**     Donald E. Knuth, THE ART OF COMPUTER PROGRAMMING, Volume 3:
**     "Sorting And Searching", pages 473-480. Addison-Wesley
**     Publishing Company, Reading, Massachusetts.











|







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
/*
** 2004 April 6
**
** The author disclaims copyright to this source code.  In place of
** a legal notice, here is a blessing:
**
**    May you do good and not evil.
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** $Id: btree.c,v 1.134 2004/05/14 01:58:13 drh Exp $
**
** This file implements a external (disk-based) database using BTrees.
** For a detailed discussion of BTrees, refer to
**
**     Donald E. Knuth, THE ART OF COMPUTER PROGRAMMING, Volume 3:
**     "Sorting And Searching", pages 473-480. Addison-Wesley
**     Publishing Company, Reading, Massachusetts.
263
264
265
266
267
268
269

270
271
272
273
274
275
276
277
struct Btree {
  Pager *pPager;        /* The page cache */
  BtCursor *pCursor;    /* A list of all open cursors */
  MemPage *pPage1;      /* First page of the database */
  u8 inTrans;           /* True if a transaction is in progress */
  u8 inStmt;            /* True if there is a checkpoint on the transaction */
  u8 readOnly;          /* True if the underlying file is readonly */

  int pageSize;         /* Number of usable bytes on each page */
  int maxLocal;         /* Maximum local payload in non-LEAFDATA tables */
  int minLocal;         /* Minimum local payload in non-LEAFDATA tables */
  int maxLeaf;          /* Maximum local payload in a LEAFDATA table */
  int minLeaf;          /* Minimum local payload in a LEAFDATA table */
  u8 maxEmbedFrac;      /* Maximum payload as % of total page size */
  u8 minEmbedFrac;      /* Minimum payload as % of total page size */
  u8 minLeafFrac;       /* Minimum leaf payload as % of total page size */







>
|







263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
struct Btree {
  Pager *pPager;        /* The page cache */
  BtCursor *pCursor;    /* A list of all open cursors */
  MemPage *pPage1;      /* First page of the database */
  u8 inTrans;           /* True if a transaction is in progress */
  u8 inStmt;            /* True if there is a checkpoint on the transaction */
  u8 readOnly;          /* True if the underlying file is readonly */
  int pageSize;         /* Total number of bytes on a page */
  int usableSize;       /* Number of usable bytes on each page */
  int maxLocal;         /* Maximum local payload in non-LEAFDATA tables */
  int minLocal;         /* Minimum local payload in non-LEAFDATA tables */
  int maxLeaf;          /* Maximum local payload in a LEAFDATA table */
  int minLeaf;          /* Minimum local payload in a LEAFDATA table */
  u8 maxEmbedFrac;      /* Maximum payload as % of total page size */
  u8 minEmbedFrac;      /* Minimum payload as % of total page size */
  u8 minLeafFrac;       /* Minimum leaf payload as % of total page size */
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
static void put4byte(unsigned char *p, u32 v){
  p[0] = v>>24;
  p[1] = v>>16;
  p[2] = v>>8;
  p[3] = v;
}

#if 0 /* NOT_USED */
static u64 get8byte(unsigned char *p){
  u64 v = get4byte(p);
  return (v<<32) | get4byte(&p[4]);
}
static void put8byte(unsigned char *p, u64 v){
  put4byte(&p[4], v>>32);
  put4byte(p, v);
}
#endif

/*
** Read a variable-length integer.  Store the result in *pResult.
** Return the number of bytes in the integer.
*/
static unsigned int getVarint(unsigned char *p, u64 *pResult){
  u64 x = 0;
  int n = 0;







<
<
<
<
<
<
<
<
<
<
<







329
330
331
332
333
334
335











336
337
338
339
340
341
342
static void put4byte(unsigned char *p, u32 v){
  p[0] = v>>24;
  p[1] = v>>16;
  p[2] = v>>8;
  p[3] = v;
}












/*
** Read a variable-length integer.  Store the result in *pResult.
** Return the number of bytes in the integer.
*/
static unsigned int getVarint(unsigned char *p, u64 *pResult){
  u64 x = 0;
  int n = 0;
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
  nPayload = pInfo->nData;
  if( !pPage->intKey ){
    nPayload += pInfo->nKey;
  }
  pBt = pPage->pBt;
  if( pPage->leafData ){
    minLocal = pBt->minLeaf;
    maxLocal = pBt->pageSize - 23;
  }else{
    minLocal = pBt->minLocal;
    maxLocal = pBt->maxLocal;
  }
  if( nPayload<=maxLocal ){
    pInfo->nLocal = nPayload;
    pInfo->iOverflow = 0;
    pInfo->nSize = nPayload + n;
  }else{
    int surplus = minLocal + (nPayload - minLocal)%(pBt->pageSize - 4);
    if( surplus <= maxLocal ){
      pInfo->nLocal = surplus;
    }else{
      pInfo->nLocal = minLocal;
    }
    pInfo->iOverflow = pInfo->nLocal + n;
    pInfo->nSize = pInfo->iOverflow + 4;







|









|







406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
  nPayload = pInfo->nData;
  if( !pPage->intKey ){
    nPayload += pInfo->nKey;
  }
  pBt = pPage->pBt;
  if( pPage->leafData ){
    minLocal = pBt->minLeaf;
    maxLocal = pBt->usableSize - 23;
  }else{
    minLocal = pBt->minLocal;
    maxLocal = pBt->maxLocal;
  }
  if( nPayload<=maxLocal ){
    pInfo->nLocal = nPayload;
    pInfo->iOverflow = 0;
    pInfo->nSize = nPayload + n;
  }else{
    int surplus = minLocal + (nPayload - minLocal)%(pBt->usableSize - 4);
    if( surplus <= maxLocal ){
      pInfo->nLocal = surplus;
    }else{
      pInfo->nLocal = minLocal;
    }
    pInfo->iOverflow = pInfo->nLocal + n;
    pInfo->nSize = pInfo->iOverflow + 4;
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
** not right.
**
** This routine is used for internal error checking only.  It is omitted
** from most builds.
*/
#if defined(BTREE_DEBUG) && !defined(NDEBUG) && 0
static void _pageIntegrity(MemPage *pPage){
  int pageSize;
  u8 *data;
  int i, idx, c, pc, hdr, nFree;
  u8 used[MX_PAGE_SIZE];

  pageSize = pPage->pBt->pageSize;
  assert( pPage->aData==&((unsigned char*)pPage)[-pageSize] );
  hdr = pPage->hdrOffset;
  assert( hdr==(pPage->pgno==1 ? 100 : 0) );
  assert( pPage->pgno==sqlite3pager_pagenumber(pPage->aData) );
  c = pPage->aData[hdr];
  if( pPage->isInit ){
    assert( pPage->leaf == ((c & PTF_LEAF)!=0) );
    assert( pPage->zeroData == ((c & PTF_ZERODATA)!=0) );
    assert( pPage->leafData == ((c & PTF_LEAFDATA)!=0) );
    assert( pPage->intKey == ((c & (PTF_INTKEY|PTF_LEAFDATA))!=0) );
    assert( pPage->hasData ==
             !(pPage->zeroData || (!pPage->leaf && pPage->leafData)) );
  }
  data = pPage->aData;
  memset(used, 0, pageSize);
  for(i=0; i<hdr+10-pPage->leaf*4; i++) used[i] = 1;
  nFree = 0;
  pc = get2byte(&data[hdr+1]);
  while( pc ){
    int size;
    assert( pc>0 && pc<pageSize-4 );
    size = get2byte(&data[pc+2]);
    assert( pc+size<=pageSize );
    nFree += size;
    for(i=pc; i<pc+size; i++){
      assert( used[i]==0 );
      used[i] = 1;
    }
    pc = get2byte(&data[pc]);
  }
  assert( pPage->isInit==0 || pPage->nFree==nFree+data[hdr+5] );
  idx = 0;
  pc = get2byte(&data[hdr+3]);
  while( pc ){
    int size;
    assert( pPage->isInit==0 || idx<pPage->nCell );
    assert( pc>0 && pc<pageSize-4 );
    assert( pPage->isInit==0 || pPage->aCell[idx]==&data[pc] );
    size = cellSize(pPage, &data[pc]);
    assert( pc+size<=pageSize );
    for(i=pc; i<pc+size; i++){
      assert( used[i]==0 );
      used[i] = 1;
    }
    pc = get2byte(&data[pc]);
    idx++;
  }
  assert( idx==pPage->nCell );
  nFree = 0;
  for(i=0; i<pageSize; i++){
    assert( used[i]<=1 );
    if( used[i]==0 ) nFree++;
  }
  assert( nFree==data[hdr+5] );
}
#define pageIntegrity(X) _pageIntegrity(X)
#else







|




|
|













|





|

|













|


|









|







450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
** not right.
**
** This routine is used for internal error checking only.  It is omitted
** from most builds.
*/
#if defined(BTREE_DEBUG) && !defined(NDEBUG) && 0
static void _pageIntegrity(MemPage *pPage){
  int usableSize;
  u8 *data;
  int i, idx, c, pc, hdr, nFree;
  u8 used[MX_PAGE_SIZE];

  usableSize = pPage->pBt->usableSize;
  assert( pPage->aData==&((unsigned char*)pPage)[-pPage->pBt->pageSize] );
  hdr = pPage->hdrOffset;
  assert( hdr==(pPage->pgno==1 ? 100 : 0) );
  assert( pPage->pgno==sqlite3pager_pagenumber(pPage->aData) );
  c = pPage->aData[hdr];
  if( pPage->isInit ){
    assert( pPage->leaf == ((c & PTF_LEAF)!=0) );
    assert( pPage->zeroData == ((c & PTF_ZERODATA)!=0) );
    assert( pPage->leafData == ((c & PTF_LEAFDATA)!=0) );
    assert( pPage->intKey == ((c & (PTF_INTKEY|PTF_LEAFDATA))!=0) );
    assert( pPage->hasData ==
             !(pPage->zeroData || (!pPage->leaf && pPage->leafData)) );
  }
  data = pPage->aData;
  memset(used, 0, usableSize);
  for(i=0; i<hdr+10-pPage->leaf*4; i++) used[i] = 1;
  nFree = 0;
  pc = get2byte(&data[hdr+1]);
  while( pc ){
    int size;
    assert( pc>0 && pc<usableSize-4 );
    size = get2byte(&data[pc+2]);
    assert( pc+size<=usableSize );
    nFree += size;
    for(i=pc; i<pc+size; i++){
      assert( used[i]==0 );
      used[i] = 1;
    }
    pc = get2byte(&data[pc]);
  }
  assert( pPage->isInit==0 || pPage->nFree==nFree+data[hdr+5] );
  idx = 0;
  pc = get2byte(&data[hdr+3]);
  while( pc ){
    int size;
    assert( pPage->isInit==0 || idx<pPage->nCell );
    assert( pc>0 && pc<usableSize-4 );
    assert( pPage->isInit==0 || pPage->aCell[idx]==&data[pc] );
    size = cellSize(pPage, &data[pc]);
    assert( pc+size<=usableSize );
    for(i=pc; i<pc+size; i++){
      assert( used[i]==0 );
      used[i] = 1;
    }
    pc = get2byte(&data[pc]);
    idx++;
  }
  assert( idx==pPage->nCell );
  nFree = 0;
  for(i=0; i<usableSize; i++){
    assert( used[i]<=1 );
    if( used[i]==0 ) nFree++;
  }
  assert( nFree==data[hdr+5] );
}
#define pageIntegrity(X) _pageIntegrity(X)
#else
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
  int start, hdr, size;
  int leftover;
  unsigned char *oldPage;
  unsigned char newPage[MX_PAGE_SIZE];

  assert( sqlite3pager_iswriteable(pPage->aData) );
  assert( pPage->pBt!=0 );
  assert( pPage->pBt->pageSize <= MX_PAGE_SIZE );
  assert( !pPage->needRelink );
  assert( !pPage->isOverfull );
  oldPage = pPage->aData;
  hdr = pPage->hdrOffset;
  addr = 3+hdr;
  n = 6+hdr;
  if( !pPage->leaf ){
    n += 4;
  }
  memcpy(&newPage[hdr], &oldPage[hdr], n-hdr);
  start = n;
  pc = get2byte(&oldPage[addr]);
  i = 0;
  while( pc>0 ){
    assert( n<pPage->pBt->pageSize );
    size = cellSize(pPage, &oldPage[pc]);
    memcpy(&newPage[n], &oldPage[pc], size);
    put2byte(&newPage[addr],n);
    assert( pPage->aCell[i]==&oldPage[pc] );
    pPage->aCell[i++] = &oldPage[n];
    addr = n;
    n += size;
    pc = get2byte(&oldPage[pc]);
  }
  assert( i==pPage->nCell );
  leftover = pPage->pBt->pageSize - n;
  assert( leftover>=0 );
  assert( pPage->nFree==leftover );
  if( leftover<4 ){
    oldPage[hdr+5] = leftover;
    leftover = 0;
    n = pPage->pBt->pageSize;
  }
  memcpy(&oldPage[hdr], &newPage[hdr], n-hdr);
  if( leftover==0 ){
    put2byte(&oldPage[hdr+1], 0);
  }else if( leftover>=4 ){
    put2byte(&oldPage[hdr+1], n);
    put2byte(&oldPage[n], 0);







|














|










|





|







530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
  int start, hdr, size;
  int leftover;
  unsigned char *oldPage;
  unsigned char newPage[MX_PAGE_SIZE];

  assert( sqlite3pager_iswriteable(pPage->aData) );
  assert( pPage->pBt!=0 );
  assert( pPage->pBt->usableSize <= MX_PAGE_SIZE );
  assert( !pPage->needRelink );
  assert( !pPage->isOverfull );
  oldPage = pPage->aData;
  hdr = pPage->hdrOffset;
  addr = 3+hdr;
  n = 6+hdr;
  if( !pPage->leaf ){
    n += 4;
  }
  memcpy(&newPage[hdr], &oldPage[hdr], n-hdr);
  start = n;
  pc = get2byte(&oldPage[addr]);
  i = 0;
  while( pc>0 ){
    assert( n<pPage->pBt->usableSize );
    size = cellSize(pPage, &oldPage[pc]);
    memcpy(&newPage[n], &oldPage[pc], size);
    put2byte(&newPage[addr],n);
    assert( pPage->aCell[i]==&oldPage[pc] );
    pPage->aCell[i++] = &oldPage[n];
    addr = n;
    n += size;
    pc = get2byte(&oldPage[pc]);
  }
  assert( i==pPage->nCell );
  leftover = pPage->pBt->usableSize - n;
  assert( leftover>=0 );
  assert( pPage->nFree==leftover );
  if( leftover<4 ){
    oldPage[hdr+5] = leftover;
    leftover = 0;
    n = pPage->pBt->usableSize;
  }
  memcpy(&oldPage[hdr], &newPage[hdr], n-hdr);
  if( leftover==0 ){
    put2byte(&oldPage[hdr+1], 0);
  }else if( leftover>=4 ){
    put2byte(&oldPage[hdr+1], n);
    put2byte(&oldPage[n], 0);
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
  nFrag = data[hdr+5];
  if( nFrag>=60 || nFrag>pPage->nFree-nByte ){
    defragmentPage(pPage);
  }
  addr = hdr+1;
  pc = get2byte(&data[addr]);
  assert( addr<pc );
  assert( pc<=pPage->pBt->pageSize-4 );
  while( (size = get2byte(&data[pc+2]))<nByte ){
    addr = pc;
    pc = get2byte(&data[addr]);
    assert( pc<=pPage->pBt->pageSize-4 );
    assert( pc>=addr+size+4 || pc==0 );
    if( pc==0 ){
      assert( (cnt++)==0 );
      defragmentPage(pPage);
      assert( data[hdr+5]==0 );
      addr = pPage->hdrOffset+1;
      pc = get2byte(&data[addr]);
    }
  }
  assert( pc>0 && size>=nByte );
  assert( pc+size<=pPage->pBt->pageSize );
  if( size>nByte+4 ){
    int newStart = pc+nByte;
    put2byte(&data[addr], newStart);
    put2byte(&data[newStart], get2byte(&data[pc]));
    put2byte(&data[newStart+2], size-nByte);
  }else{
    put2byte(&data[addr], get2byte(&data[pc]));







|



|










|







614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
  nFrag = data[hdr+5];
  if( nFrag>=60 || nFrag>pPage->nFree-nByte ){
    defragmentPage(pPage);
  }
  addr = hdr+1;
  pc = get2byte(&data[addr]);
  assert( addr<pc );
  assert( pc<=pPage->pBt->usableSize-4 );
  while( (size = get2byte(&data[pc+2]))<nByte ){
    addr = pc;
    pc = get2byte(&data[addr]);
    assert( pc<=pPage->pBt->usableSize-4 );
    assert( pc>=addr+size+4 || pc==0 );
    if( pc==0 ){
      assert( (cnt++)==0 );
      defragmentPage(pPage);
      assert( data[hdr+5]==0 );
      addr = pPage->hdrOffset+1;
      pc = get2byte(&data[addr]);
    }
  }
  assert( pc>0 && size>=nByte );
  assert( pc+size<=pPage->pBt->usableSize );
  if( size>nByte+4 ){
    int newStart = pc+nByte;
    put2byte(&data[addr], newStart);
    put2byte(&data[newStart], get2byte(&data[pc]));
    put2byte(&data[newStart+2], size-nByte);
  }else{
    put2byte(&data[addr], get2byte(&data[pc]));
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
  int tsize = 0;          /* Total size of all freeblocks */
#endif
  unsigned char *data = pPage->aData;

  assert( pPage->pBt!=0 );
  assert( sqlite3pager_iswriteable(data) );
  assert( start>=pPage->hdrOffset+6+(pPage->leaf?0:4) );
  assert( end<=pPage->pBt->pageSize );
  if( size<4 ) size = 4;

  /* Add the space back into the linked list of freeblocks */
  addr = pPage->hdrOffset + 1;
  while( (pbegin = get2byte(&data[addr]))<start && pbegin>0 ){
    assert( pbegin<=pPage->pBt->pageSize-4 );
    assert( pbegin>addr );
    addr = pbegin;
  }
  assert( pbegin<=pPage->pBt->pageSize-4 );
  assert( pbegin>addr || pbegin==0 );
  put2byte(&data[addr], start);
  put2byte(&data[start], pbegin);
  put2byte(&data[start+2], size);
  pPage->nFree += size;

  /* Coalesce adjacent free blocks */
  addr = pPage->hdrOffset + 1;
  while( (pbegin = get2byte(&data[addr]))>0 ){
    int pnext, psize;
    assert( pbegin>addr );
    assert( pbegin<pPage->pBt->pageSize-4 );
    pnext = get2byte(&data[pbegin]);
    psize = get2byte(&data[pbegin+2]);
    if( pbegin + psize + 3 >= pnext && pnext>0 ){
      int frag = pnext - (pbegin+psize);
      assert( frag<=data[pPage->hdrOffset+5] );
      data[pPage->hdrOffset+5] -= frag;
      put2byte(&data[pbegin], get2byte(&data[pnext]));







|





|



|











|







663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
  int tsize = 0;          /* Total size of all freeblocks */
#endif
  unsigned char *data = pPage->aData;

  assert( pPage->pBt!=0 );
  assert( sqlite3pager_iswriteable(data) );
  assert( start>=pPage->hdrOffset+6+(pPage->leaf?0:4) );
  assert( end<=pPage->pBt->usableSize );
  if( size<4 ) size = 4;

  /* Add the space back into the linked list of freeblocks */
  addr = pPage->hdrOffset + 1;
  while( (pbegin = get2byte(&data[addr]))<start && pbegin>0 ){
    assert( pbegin<=pPage->pBt->usableSize-4 );
    assert( pbegin>addr );
    addr = pbegin;
  }
  assert( pbegin<=pPage->pBt->usableSize-4 );
  assert( pbegin>addr || pbegin==0 );
  put2byte(&data[addr], start);
  put2byte(&data[start], pbegin);
  put2byte(&data[start+2], size);
  pPage->nFree += size;

  /* Coalesce adjacent free blocks */
  addr = pPage->hdrOffset + 1;
  while( (pbegin = get2byte(&data[addr]))>0 ){
    int pnext, psize;
    assert( pbegin>addr );
    assert( pbegin<pPage->pBt->usableSize-4 );
    pnext = get2byte(&data[pbegin]);
    psize = get2byte(&data[pbegin+2]);
    if( pbegin + psize + 3 >= pnext && pnext>0 ){
      int frag = pnext - (pbegin+psize);
      assert( frag<=data[pPage->hdrOffset+5] );
      data[pPage->hdrOffset+5] -= frag;
      put2byte(&data[pbegin], get2byte(&data[pnext]));
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
*/
static int initPage(
  MemPage *pPage,        /* The page to be initialized */
  MemPage *pParent       /* The parent.  Might be NULL */
){
  int c, pc, i, hdr;
  unsigned char *data;
  int pageSize;
  int sumCell = 0;       /* Total size of all cells */

  assert( pPage->pBt!=0 );
  assert( pParent==0 || pParent->pBt==pPage->pBt );
  assert( pPage->pgno==sqlite3pager_pagenumber(pPage->aData) );
  assert( pPage->aData == &((unsigned char*)pPage)[-pPage->pBt->pageSize] );
  assert( pPage->pParent==0 || pPage->pParent==pParent );







|







736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
*/
static int initPage(
  MemPage *pPage,        /* The page to be initialized */
  MemPage *pParent       /* The parent.  Might be NULL */
){
  int c, pc, i, hdr;
  unsigned char *data;
  int usableSize;
  int sumCell = 0;       /* Total size of all cells */

  assert( pPage->pBt!=0 );
  assert( pParent==0 || pParent->pBt==pPage->pBt );
  assert( pPage->pgno==sqlite3pager_pagenumber(pPage->aData) );
  assert( pPage->aData == &((unsigned char*)pPage)[-pPage->pBt->pageSize] );
  assert( pPage->pParent==0 || pPage->pParent==pParent );
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
  pPage->zeroData = (c & PTF_ZERODATA)!=0;
  pPage->leafData = (c & PTF_LEAFDATA)!=0;
  pPage->leaf = (c & PTF_LEAF)!=0;
  pPage->hasData = !(pPage->zeroData || (!pPage->leaf && pPage->leafData));
  pPage->isOverfull = 0;
  pPage->needRelink = 0;
  pPage->idxShift = 0;
  pageSize = pPage->pBt->pageSize;

  /* Initialize the cell count and cell pointers */
  pc = get2byte(&data[hdr+3]);
  while( pc>0 ){
    if( pc>=pageSize ) return SQLITE_CORRUPT;
    if( pPage->nCell>pageSize ) return SQLITE_CORRUPT;
    pPage->nCell++;
    pc = get2byte(&data[pc]);
  }
  if( resizeCellArray(pPage, pPage->nCell) ){
    return SQLITE_NOMEM;
  }
  pc = get2byte(&data[hdr+3]);
  for(i=0; pc>0; i++){
    pPage->aCell[i] = &data[pc];
    sumCell += cellSize(pPage, &data[pc]);
    pc = get2byte(&data[pc]);
  }

  /* Compute the total free space on the page */
  pPage->nFree = data[hdr+5];
  pc = get2byte(&data[hdr+1]);
  while( pc>0 ){
    int next, size;
    if( pc>=pageSize ) return SQLITE_CORRUPT;
    next = get2byte(&data[pc]);
    size = get2byte(&data[pc+2]);
    if( next>0 && next<=pc+size+3 ) return SQLITE_CORRUPT;
    pPage->nFree += size;
    pc = next;
  }
  if( pPage->nFree>=pageSize ) return SQLITE_CORRUPT;

  /* Sanity check:  Cells and freespace and header must sum to the size
  ** a page. */
  if( sumCell+pPage->nFree+hdr+10-pPage->leaf*4 != pageSize ){
    return SQLITE_CORRUPT;
  }

  pPage->isInit = 1;
  pageIntegrity(pPage);
  return SQLITE_OK;
}







|




|
|


















|






|



|







762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
  pPage->zeroData = (c & PTF_ZERODATA)!=0;
  pPage->leafData = (c & PTF_LEAFDATA)!=0;
  pPage->leaf = (c & PTF_LEAF)!=0;
  pPage->hasData = !(pPage->zeroData || (!pPage->leaf && pPage->leafData));
  pPage->isOverfull = 0;
  pPage->needRelink = 0;
  pPage->idxShift = 0;
  usableSize = pPage->pBt->usableSize;

  /* Initialize the cell count and cell pointers */
  pc = get2byte(&data[hdr+3]);
  while( pc>0 ){
    if( pc>=usableSize ) return SQLITE_CORRUPT;
    if( pPage->nCell>usableSize ) return SQLITE_CORRUPT;
    pPage->nCell++;
    pc = get2byte(&data[pc]);
  }
  if( resizeCellArray(pPage, pPage->nCell) ){
    return SQLITE_NOMEM;
  }
  pc = get2byte(&data[hdr+3]);
  for(i=0; pc>0; i++){
    pPage->aCell[i] = &data[pc];
    sumCell += cellSize(pPage, &data[pc]);
    pc = get2byte(&data[pc]);
  }

  /* Compute the total free space on the page */
  pPage->nFree = data[hdr+5];
  pc = get2byte(&data[hdr+1]);
  while( pc>0 ){
    int next, size;
    if( pc>=usableSize ) return SQLITE_CORRUPT;
    next = get2byte(&data[pc]);
    size = get2byte(&data[pc+2]);
    if( next>0 && next<=pc+size+3 ) return SQLITE_CORRUPT;
    pPage->nFree += size;
    pc = next;
  }
  if( pPage->nFree>=usableSize ) return SQLITE_CORRUPT;

  /* Sanity check:  Cells and freespace and header must sum to the size
  ** a page. */
  if( sumCell+pPage->nFree+hdr+10-pPage->leaf*4 != usableSize ){
    return SQLITE_CORRUPT;
  }

  pPage->isInit = 1;
  pageIntegrity(pPage);
  return SQLITE_OK;
}
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
  Btree *pBt = pPage->pBt;
  int hdr = pPage->hdrOffset;
  int first;

  assert( sqlite3pager_pagenumber(data)==pPage->pgno );
  assert( &data[pBt->pageSize] == (unsigned char*)pPage );
  assert( sqlite3pager_iswriteable(data) );
  memset(&data[hdr], 0, pBt->pageSize - hdr);
  data[hdr] = flags;
  first = hdr + 6 + 4*((flags&PTF_LEAF)==0);
  put2byte(&data[hdr+1], first);
  put2byte(&data[first+2], pBt->pageSize - first);
  sqliteFree(pPage->aCell);
  pPage->aCell = 0;
  pPage->nCell = 0;
  pPage->nCellAlloc = 0;
  pPage->nFree = pBt->pageSize - first;
  pPage->intKey = (flags & (PTF_INTKEY|PTF_LEAFDATA))!=0;
  pPage->zeroData = (flags & PTF_ZERODATA)!=0;
  pPage->leafData = (flags & PTF_LEAFDATA)!=0;
  pPage->leaf = (flags & PTF_LEAF)!=0;
  pPage->hasData = !(pPage->zeroData || (!pPage->leaf && pPage->leafData));
  pPage->hdrOffset = hdr;
  pPage->isOverfull = 0;







|



|




|







820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
  Btree *pBt = pPage->pBt;
  int hdr = pPage->hdrOffset;
  int first;

  assert( sqlite3pager_pagenumber(data)==pPage->pgno );
  assert( &data[pBt->pageSize] == (unsigned char*)pPage );
  assert( sqlite3pager_iswriteable(data) );
  memset(&data[hdr], 0, pBt->usableSize - hdr);
  data[hdr] = flags;
  first = hdr + 6 + 4*((flags&PTF_LEAF)==0);
  put2byte(&data[hdr+1], first);
  put2byte(&data[first+2], pBt->usableSize - first);
  sqliteFree(pPage->aCell);
  pPage->aCell = 0;
  pPage->nCell = 0;
  pPage->nCellAlloc = 0;
  pPage->nFree = pBt->usableSize - first;
  pPage->intKey = (flags & (PTF_INTKEY|PTF_LEAFDATA))!=0;
  pPage->zeroData = (flags & PTF_ZERODATA)!=0;
  pPage->leafData = (flags & PTF_LEAFDATA)!=0;
  pPage->leaf = (flags & PTF_LEAF)!=0;
  pPage->hasData = !(pPage->zeroData || (!pPage->leaf && pPage->leafData));
  pPage->hdrOffset = hdr;
  pPage->isOverfull = 0;
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
}

/*
** This routine is called when the reference count for a page
** reaches zero.  We need to unref the pParent pointer when that
** happens.
*/
static void pageDestructor(void *pData){
  MemPage *pPage = (MemPage*)&((char*)pData)[SQLITE_PAGE_SIZE];
  assert( pPage->isInit==0 || pPage->needRelink==0 );
  if( pPage->pParent ){
    MemPage *pParent = pPage->pParent;
    pPage->pParent = 0;
    releasePage(pParent);
  }
  sqliteFree(pPage->aCell);







|
|







899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
}

/*
** This routine is called when the reference count for a page
** reaches zero.  We need to unref the pParent pointer when that
** happens.
*/
static void pageDestructor(void *pData, int pageSize){
  MemPage *pPage = (MemPage*)&((char*)pData)[pageSize];
  assert( pPage->isInit==0 || pPage->needRelink==0 );
  if( pPage->pParent ){
    MemPage *pParent = pPage->pParent;
    pPage->pParent = 0;
    releasePage(pParent);
  }
  sqliteFree(pPage->aCell);
974
975
976
977
978
979
980

981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
    return rc;
  }
  sqlite3pager_set_destructor(pBt->pPager, pageDestructor);
  pBt->pCursor = 0;
  pBt->pPage1 = 0;
  pBt->readOnly = sqlite3pager_isreadonly(pBt->pPager);
  pBt->pageSize = SQLITE_PAGE_SIZE;  /* FIX ME - read from header */

  pBt->maxEmbedFrac = 64;            /* FIX ME - read from header */
  pBt->minEmbedFrac = 32;            /* FIX ME - read from header */
  pBt->minLeafFrac = 32;             /* FIX ME - read from header */

  /* maxLocal is the maximum amount of payload to store locally for
  ** a cell.  Make sure it is small enough so that at least minFanout
  ** cells can will fit on one page.  We assume a 10-byte page header.
  ** Besides the payload, the cell must store:
  **     2-byte pointer to next cell
  **     4-byte child pointer
  **     9-byte nKey value
  **     4-byte nData value
  **     4-byte overflow page pointer
  ** So a cell consists of a header which is as much as 19 bytes long,
  ** 0 to N bytes of payload, and an optional 4 byte overflow page pointer.
  */
  assert(pBt->maxEmbedFrac>0 && 255/pBt->maxEmbedFrac>=3 );
  pBt->maxLocal = (pBt->pageSize-10)*pBt->maxEmbedFrac/255 - 23;
  pBt->minLocal = (pBt->pageSize-10)*pBt->minEmbedFrac/255 - 23;
  pBt->maxLeaf = pBt->pageSize - 33;
  pBt->minLeaf = (pBt->pageSize-10)*pBt->minLeafFrac/255 - 23;

  assert( pBt->maxLeaf + 23 <= MX_CELL_SIZE );
  *ppBtree = pBt;
  return SQLITE_OK;
}

/*
** Close an open database and invalidate all cursors.
*/







>




<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







964
965
966
967
968
969
970
971
972
973
974
975



















976
977
978
979
980
981
982
    return rc;
  }
  sqlite3pager_set_destructor(pBt->pPager, pageDestructor);
  pBt->pCursor = 0;
  pBt->pPage1 = 0;
  pBt->readOnly = sqlite3pager_isreadonly(pBt->pPager);
  pBt->pageSize = SQLITE_PAGE_SIZE;  /* FIX ME - read from header */
  pBt->usableSize = pBt->pageSize;
  pBt->maxEmbedFrac = 64;            /* FIX ME - read from header */
  pBt->minEmbedFrac = 32;            /* FIX ME - read from header */
  pBt->minLeafFrac = 32;             /* FIX ME - read from header */




















  *ppBtree = pBt;
  return SQLITE_OK;
}

/*
** Close an open database and invalidate all cursors.
*/
1067
1068
1069
1070
1071
1072
1073

1074

1075





1076


1077
1078




1079


















1080

1081
1082
1083
1084
1085
1086
1087
1088
1089
  rc = getPage(pBt, 1, &pPage1);
  if( rc!=SQLITE_OK ) return rc;
  

  /* Do some checking to help insure the file we opened really is
  ** a valid database file. 
  */

  if( sqlite3pager_pagecount(pBt->pPager)>0 ){

    if( memcmp(pPage1->aData, zMagicHeader, 16)!=0 ){





      rc = SQLITE_NOTADB;


      goto page1_init_failed;
    }




    /*** TBD:  Other header checks such as page size ****/


















  }

  pBt->pPage1 = pPage1;
  return rc;

page1_init_failed:
  releasePage(pPage1);
  pBt->pPage1 = 0;
  return rc;
}








>

>
|
>
>
>
>
>
|
>
>


>
>
>
>
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>

>

|







1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
  rc = getPage(pBt, 1, &pPage1);
  if( rc!=SQLITE_OK ) return rc;
  

  /* Do some checking to help insure the file we opened really is
  ** a valid database file. 
  */
  rc = SQLITE_NOTADB;
  if( sqlite3pager_pagecount(pBt->pPager)>0 ){
    u8 *page1 = pPage1->aData;
    if( memcmp(page1, zMagicHeader, 16)!=0 ){
      goto page1_init_failed;
    }
    if( page1[18]>1 || page1[19]>1 ){
      goto page1_init_failed;
    }
    pBt->pageSize = get2byte(&page1[16]);
    pBt->usableSize = pBt->pageSize - page1[20];
    if( pBt->usableSize<500 ){
      goto page1_init_failed;
    }
    pBt->maxEmbedFrac = page1[21];
    pBt->minEmbedFrac = page1[22];
    pBt->minLeafFrac = page1[23];
  }

  /* maxLocal is the maximum amount of payload to store locally for
  ** a cell.  Make sure it is small enough so that at least minFanout
  ** cells can will fit on one page.  We assume a 10-byte page header.
  ** Besides the payload, the cell must store:
  **     2-byte pointer to next cell
  **     4-byte child pointer
  **     9-byte nKey value
  **     4-byte nData value
  **     4-byte overflow page pointer
  ** So a cell consists of a header which is as much as 19 bytes long,
  ** 0 to N bytes of payload, and an optional 4 byte overflow page pointer.
  */
  pBt->maxLocal = (pBt->usableSize-10)*pBt->maxEmbedFrac/255 - 23;
  pBt->minLocal = (pBt->usableSize-10)*pBt->minEmbedFrac/255 - 23;
  pBt->maxLeaf = pBt->usableSize - 33;
  pBt->minLeaf = (pBt->usableSize-10)*pBt->minLeafFrac/255 - 23;
  if( pBt->minLocal>pBt->maxLocal || pBt->maxLocal<0 ){
    goto page1_init_failed;
  }
  assert( pBt->maxLeaf + 23 <= MX_CELL_SIZE );
  pBt->pPage1 = pPage1;
  return SQLITE_OK;

page1_init_failed:
  releasePage(pPage1);
  pBt->pPage1 = 0;
  return rc;
}

1118
1119
1120
1121
1122
1123
1124
1125
1126
1127


1128


1129
1130
1131
1132
1133
1134
1135
  pP1 = pBt->pPage1;
  assert( pP1!=0 );
  data = pP1->aData;
  rc = sqlite3pager_write(data);
  if( rc ) return rc;
  memcpy(data, zMagicHeader, sizeof(zMagicHeader));
  assert( sizeof(zMagicHeader)==16 );
  put2byte(&data[16], SQLITE_PAGE_SIZE);
  data[18] = 1;
  data[19] = 1;


  put2byte(&data[22], (SQLITE_PAGE_SIZE-10)/4-12);


  zeroPage(pP1, PTF_INTKEY|PTF_LEAF );
  return SQLITE_OK;
}

/*
** Attempt to start a new transaction.
**







|


>
>
|
>
>







1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
  pP1 = pBt->pPage1;
  assert( pP1!=0 );
  data = pP1->aData;
  rc = sqlite3pager_write(data);
  if( rc ) return rc;
  memcpy(data, zMagicHeader, sizeof(zMagicHeader));
  assert( sizeof(zMagicHeader)==16 );
  put2byte(&data[16], pBt->pageSize);
  data[18] = 1;
  data[19] = 1;
  data[20] = pBt->pageSize - pBt->usableSize;
  data[21] = pBt->maxEmbedFrac;
  data[22] = pBt->minEmbedFrac;
  data[23] = pBt->minLeafFrac;
  memset(&data[24], 0, 100-24);
  zeroPage(pP1, PTF_INTKEY|PTF_LEAF );
  return SQLITE_OK;
}

/*
** Attempt to start a new transaction.
**
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
    amt -= a;
  }else{
    offset -= info.nLocal;
  }
  if( amt>0 ){
    nextPage = get4byte(&aPayload[info.nLocal]);
  }
  ovflSize = pBt->pageSize - 4;
  while( amt>0 && nextPage ){
    rc = sqlite3pager_get(pBt->pPager, nextPage, (void**)&aPayload);
    if( rc!=0 ){
      return rc;
    }
    nextPage = get4byte(aPayload);
    if( offset<ovflSize ){







|







1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
    amt -= a;
  }else{
    offset -= info.nLocal;
  }
  if( amt>0 ){
    nextPage = get4byte(&aPayload[info.nLocal]);
  }
  ovflSize = pBt->usableSize - 4;
  while( amt>0 && nextPage ){
    rc = sqlite3pager_get(pBt->pPager, nextPage, (void**)&aPayload);
    if( rc!=0 ){
      return rc;
    }
    nextPage = get4byte(aPayload);
    if( offset<ovflSize ){
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
  }else{
    /* Other free pages already exist.  Retrive the first trunk page
    ** of the freelist and find out how many leaves it has. */
    MemPage *pTrunk;
    rc = getPage(pBt, get4byte(&pPage1->aData[32]), &pTrunk);
    if( rc ) return rc;
    k = get4byte(&pTrunk->aData[4]);
    if( k==pBt->pageSize/4 - 8 ){
      /* The trunk is full.  Turn the page being freed into a new
      ** trunk page with no leaves. */
      rc = sqlite3pager_write(pPage->aData);
      if( rc ) return rc;
      put4byte(pPage->aData, pTrunk->pgno);
      put4byte(&pPage->aData[4], 0);
      put4byte(&pPage1->aData[32], pPage->pgno);







|







2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
  }else{
    /* Other free pages already exist.  Retrive the first trunk page
    ** of the freelist and find out how many leaves it has. */
    MemPage *pTrunk;
    rc = getPage(pBt, get4byte(&pPage1->aData[32]), &pTrunk);
    if( rc ) return rc;
    k = get4byte(&pTrunk->aData[4]);
    if( k==pBt->usableSize/4 - 8 ){
      /* The trunk is full.  Turn the page being freed into a new
      ** trunk page with no leaves. */
      rc = sqlite3pager_write(pPage->aData);
      if( rc ) return rc;
      put4byte(pPage->aData, pTrunk->pgno);
      put4byte(&pPage->aData[4], 0);
      put4byte(&pPage1->aData[32], pPage->pgno);
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
      }
      put4byte(pPrior, pgnoOvfl);
      releasePage(pToRelease);
      pToRelease = pOvfl;
      pPrior = pOvfl->aData;
      put4byte(pPrior, 0);
      pPayload = &pOvfl->aData[4];
      spaceLeft = pBt->pageSize - 4;
    }
    n = nPayload;
    if( n>spaceLeft ) n = spaceLeft;
    if( n>nSrc ) n = nSrc;
    memcpy(pPayload, pSrc, n);
    nPayload -= n;
    pPayload += n;







|







2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
      }
      put4byte(pPrior, pgnoOvfl);
      releasePage(pToRelease);
      pToRelease = pOvfl;
      pPrior = pOvfl->aData;
      put4byte(pPrior, 0);
      pPayload = &pOvfl->aData[4];
      spaceLeft = pBt->usableSize - 4;
    }
    n = nPayload;
    if( n>spaceLeft ) n = spaceLeft;
    if( n>nSrc ) n = nSrc;
    memcpy(pPayload, pSrc, n);
    nPayload -= n;
    pPayload += n;
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
  MemPage *pThis;
  unsigned char *aData;

  if( pgno==0 ) return;
  assert( pBt->pPager!=0 );
  aData = sqlite3pager_lookup(pBt->pPager, pgno);
  if( aData ){
    pThis = (MemPage*)&aData[pBt->pageSize];
    if( pThis->isInit ){
      if( pThis->pParent!=pNewParent ){
        if( pThis->pParent ) sqlite3pager_unref(pThis->pParent->aData);
        pThis->pParent = pNewParent;
        if( pNewParent ) sqlite3pager_ref(pNewParent->aData);
      }
      pThis->idxParent = idx;







|







2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
  MemPage *pThis;
  unsigned char *aData;

  if( pgno==0 ) return;
  assert( pBt->pPager!=0 );
  aData = sqlite3pager_lookup(pBt->pPager, pgno);
  if( aData ){
    pThis = (MemPage*)&aData[pBt->usableSize];
    if( pThis->isInit ){
      if( pThis->pParent!=pNewParent ){
        if( pThis->pParent ) sqlite3pager_unref(pThis->pParent->aData);
        pThis->pParent = pNewParent;
        if( pNewParent ) sqlite3pager_ref(pNewParent->aData);
      }
      pThis->idxParent = idx;
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
static void dropCell(MemPage *pPage, int idx, int sz){
  int j, pc;
  u8 *data;
  assert( idx>=0 && idx<pPage->nCell );
  assert( sz==cellSize(pPage, pPage->aCell[idx]) );
  assert( sqlite3pager_iswriteable(pPage->aData) );
  assert( pPage->aCell[idx]>=pPage->aData );
  assert( pPage->aCell[idx]<=&pPage->aData[pPage->pBt->pageSize-sz] );
  data = pPage->aData;
  pc = Addr(pPage->aCell[idx]) - Addr(data);
  assert( pc>pPage->hdrOffset && pc+sz<=pPage->pBt->pageSize );
  freeSpace(pPage, pc, sz);
  for(j=idx; j<pPage->nCell-1; j++){
    pPage->aCell[j] = pPage->aCell[j+1];
  }
  pPage->nCell--;
  if( !pPage->isOverfull && !pPage->needRelink ){
    u8 *pPrev;







|


|







2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
static void dropCell(MemPage *pPage, int idx, int sz){
  int j, pc;
  u8 *data;
  assert( idx>=0 && idx<pPage->nCell );
  assert( sz==cellSize(pPage, pPage->aCell[idx]) );
  assert( sqlite3pager_iswriteable(pPage->aData) );
  assert( pPage->aCell[idx]>=pPage->aData );
  assert( pPage->aCell[idx]<=&pPage->aData[pPage->pBt->usableSize-sz] );
  data = pPage->aData;
  pc = Addr(pPage->aCell[idx]) - Addr(data);
  assert( pc>pPage->hdrOffset && pc+sz<=pPage->pBt->usableSize );
  freeSpace(pPage, pc, sz);
  for(j=idx; j<pPage->nCell-1; j++){
    pPage->aCell[j] = pPage->aCell[j+1];
  }
  pPage->nCell--;
  if( !pPage->isOverfull && !pPage->needRelink ){
    u8 *pPrev;
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
static void relinkCellList(MemPage *pPage){
  int i, idxFrom;
  assert( sqlite3pager_iswriteable(pPage->aData) );
  if( !pPage->needRelink ) return;
  idxFrom = pPage->hdrOffset+3;
  for(i=0; i<pPage->nCell; i++){
    int idx = Addr(pPage->aCell[i]) - Addr(pPage->aData);
    assert( idx>pPage->hdrOffset && idx<pPage->pBt->pageSize );
    put2byte(&pPage->aData[idxFrom], idx);
    idxFrom = idx;
  }
  put2byte(&pPage->aData[idxFrom], 0);
  pPage->needRelink = 0;
}








|







2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
static void relinkCellList(MemPage *pPage){
  int i, idxFrom;
  assert( sqlite3pager_iswriteable(pPage->aData) );
  if( !pPage->needRelink ) return;
  idxFrom = pPage->hdrOffset+3;
  for(i=0; i<pPage->nCell; i++){
    int idx = Addr(pPage->aCell[i]) - Addr(pPage->aData);
    assert( idx>pPage->hdrOffset && idx<pPage->pBt->usableSize );
    put2byte(&pPage->aData[idxFrom], idx);
    idxFrom = idx;
  }
  put2byte(&pPage->aData[idxFrom], 0);
  pPage->needRelink = 0;
}

2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
** not point to pFrom->aData[].  Those are unchanged.
**
** Over this operation completes, the meta data for pFrom is zeroed.
*/
static void movePage(MemPage *pTo, MemPage *pFrom){
  uptr from, to;
  int i;
  int pageSize;
  int ofst;

  assert( pTo->hdrOffset==0 );
  assert( pFrom->isInit );
  ofst = pFrom->hdrOffset;
  pageSize = pFrom->pBt->pageSize;
  sqliteFree(pTo->aCell);
  memcpy(pTo->aData, &pFrom->aData[ofst], pageSize - ofst);
  memcpy(pTo, pFrom, offsetof(MemPage, aData));
  pFrom->isInit = 0;
  pFrom->aCell = 0;
  assert( pTo->aData[5]<155 );
  pTo->aData[5] += ofst;
  pTo->isOverfull = pFrom->isOverfull;
  to = Addr(pTo->aData);
  from = Addr(&pFrom->aData[ofst]);
  for(i=0; i<pTo->nCell; i++){
    uptr x = Addr(pTo->aCell[i]);
    if( x>from && x<from+pageSize-ofst ){
      *((uptr*)&pTo->aCell[i]) = x + to - from;
    }
  }
}

/*
** The following parameters determine how many adjacent pages get involved







|





|

|










|







2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
** not point to pFrom->aData[].  Those are unchanged.
**
** Over this operation completes, the meta data for pFrom is zeroed.
*/
static void movePage(MemPage *pTo, MemPage *pFrom){
  uptr from, to;
  int i;
  int usableSize;
  int ofst;

  assert( pTo->hdrOffset==0 );
  assert( pFrom->isInit );
  ofst = pFrom->hdrOffset;
  usableSize = pFrom->pBt->usableSize;
  sqliteFree(pTo->aCell);
  memcpy(pTo->aData, &pFrom->aData[ofst], usableSize - ofst);
  memcpy(pTo, pFrom, offsetof(MemPage, aData));
  pFrom->isInit = 0;
  pFrom->aCell = 0;
  assert( pTo->aData[5]<155 );
  pTo->aData[5] += ofst;
  pTo->isOverfull = pFrom->isOverfull;
  to = Addr(pTo->aData);
  from = Addr(&pFrom->aData[ofst]);
  for(i=0; i<pTo->nCell; i++){
    uptr x = Addr(pTo->aCell[i]);
    if( x>from && x<from+usableSize-ofst ){
      *((uptr*)&pTo->aCell[i]) = x + to - from;
    }
  }
}

/*
** The following parameters determine how many adjacent pages get involved
2768
2769
2770
2771
2772
2773
2774

2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789

2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
  int nxDiv;                   /* Next divider slot in pParent->aCell[] */
  int rc;                      /* The return code */
  int leafCorrection;          /* 4 if pPage is a leaf.  0 if not */
  int leafData;                /* True if pPage is a leaf of a LEAFDATA tree */
  int usableSpace;             /* Bytes in pPage beyond the header */
  int pageFlags;               /* Value of pPage->aData[0] */
  int subtotal;                /* Subtotal of bytes in cells on one page */

  MemPage *extraUnref = 0;     /* Unref this page if not zero */
  MemPage *apOld[NB];          /* pPage and up to two siblings */
  Pgno pgnoOld[NB];            /* Page numbers for each page in apOld[] */
  MemPage *apCopy[NB];         /* Private copies of apOld[] pages */
  MemPage *apNew[NB+1];        /* pPage and up to NB siblings after balancing */
  Pgno pgnoNew[NB+1];          /* Page numbers for each page in apNew[] */
  int idxDiv[NB];              /* Indices of divider cells in pParent */
  u8 *apDiv[NB];               /* Divider cells in pParent */
  u8 aTemp[NB][MX_CELL_SIZE];  /* Temporary holding area for apDiv[] */
  u8 aInsBuf[NB][MX_CELL_SIZE];/* Space to hold dividers cells during insert */
  int cntNew[NB+1];            /* Index in aCell[] of cell after i-th page */
  int szNew[NB+1];             /* Combined size of cells place on i-th page */
  u8 *apCell[(MX_CELL+2)*NB];  /* All cells from pages being balanced */
  int szCell[(MX_CELL+2)*NB];  /* Local size of all cells */
  u8 aCopy[NB][MX_PAGE_SIZE+sizeof(MemPage)];  /* Space for apCopy[] */


  /* 
  ** Return without doing any work if pPage is neither overfull nor
  ** underfull.
  */
  assert( pPage->isInit );
  assert( sqlite3pager_iswriteable(pPage->aData) );
  pBt = pPage->pBt;
  if( !pPage->isOverfull && pPage->nFree<pBt->pageSize*2/3 && pPage->nCell>=2){
    relinkCellList(pPage);
    return SQLITE_OK;
  }

  /*
  ** Find the parent of the page to be balanced.  If there is no parent,
  ** it means this page is the root page and special rules apply.







>








<
<





>








|







2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791


2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
  int nxDiv;                   /* Next divider slot in pParent->aCell[] */
  int rc;                      /* The return code */
  int leafCorrection;          /* 4 if pPage is a leaf.  0 if not */
  int leafData;                /* True if pPage is a leaf of a LEAFDATA tree */
  int usableSpace;             /* Bytes in pPage beyond the header */
  int pageFlags;               /* Value of pPage->aData[0] */
  int subtotal;                /* Subtotal of bytes in cells on one page */
  int iSpace = 0;              /* First unused byte of aSpace[] */
  MemPage *extraUnref = 0;     /* Unref this page if not zero */
  MemPage *apOld[NB];          /* pPage and up to two siblings */
  Pgno pgnoOld[NB];            /* Page numbers for each page in apOld[] */
  MemPage *apCopy[NB];         /* Private copies of apOld[] pages */
  MemPage *apNew[NB+1];        /* pPage and up to NB siblings after balancing */
  Pgno pgnoNew[NB+1];          /* Page numbers for each page in apNew[] */
  int idxDiv[NB];              /* Indices of divider cells in pParent */
  u8 *apDiv[NB];               /* Divider cells in pParent */


  int cntNew[NB+1];            /* Index in aCell[] of cell after i-th page */
  int szNew[NB+1];             /* Combined size of cells place on i-th page */
  u8 *apCell[(MX_CELL+2)*NB];  /* All cells from pages being balanced */
  int szCell[(MX_CELL+2)*NB];  /* Local size of all cells */
  u8 aCopy[NB][MX_PAGE_SIZE+sizeof(MemPage)];  /* Space for apCopy[] */
  u8 aSpace[MX_PAGE_SIZE*4];   /* Space to copies of divider cells */

  /* 
  ** Return without doing any work if pPage is neither overfull nor
  ** underfull.
  */
  assert( pPage->isInit );
  assert( sqlite3pager_iswriteable(pPage->aData) );
  pBt = pPage->pBt;
  if( !pPage->isOverfull && pPage->nFree<pBt->usableSize*2/3 && pPage->nCell>=2){
    relinkCellList(pPage);
    return SQLITE_OK;
  }

  /*
  ** Find the parent of the page to be balanced.  If there is no parent,
  ** it means this page is the root page and special rules apply.
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
            TRACE(("BALANCE: child %d transfer to page 1\n", pChild->pgno));
          }else{
            /* The child has more information that will fit on the root.
            ** The tree is already balanced.  Do nothing. */
            TRACE(("BALANCE: child %d will not fit on page 1\n", pChild->pgno));
          }
        }else{
          memcpy(pPage->aData, pChild->aData, pBt->pageSize);
          pPage->isInit = 0;
          pPage->pParent = 0;
          rc = initPage(pPage, 0);
          assert( rc==SQLITE_OK );
          freePage(pChild);
          TRACE(("BALANCE: transfer child %d into root %d\n",
                  pChild->pgno, pPage->pgno));







|







2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
            TRACE(("BALANCE: child %d transfer to page 1\n", pChild->pgno));
          }else{
            /* The child has more information that will fit on the root.
            ** The tree is already balanced.  Do nothing. */
            TRACE(("BALANCE: child %d will not fit on page 1\n", pChild->pgno));
          }
        }else{
          memcpy(pPage->aData, pChild->aData, pBt->usableSize);
          pPage->isInit = 0;
          pPage->pParent = 0;
          rc = initPage(pPage, 0);
          assert( rc==SQLITE_OK );
          freePage(pChild);
          TRACE(("BALANCE: transfer child %d into root %d\n",
                  pChild->pgno, pPage->pgno));
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986

2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007

3008
3009

3010



3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
  ** Make copies of the content of pPage and its siblings into aOld[].
  ** The rest of this function will use data from the copies rather
  ** that the original pages since the original pages will be in the
  ** process of being overwritten.
  */
  for(i=0; i<nOld; i++){
    MemPage *p = apCopy[i] = (MemPage*)&aCopy[i+1][-sizeof(MemPage)];
    p->aData = &((u8*)p)[-pBt->pageSize];
    p->aCell = 0;
    p->hdrOffset = 0;
    movePage(p, apOld[i]);
  }

  /*
  ** Load pointers to all cells on sibling pages and the divider cells
  ** into the local apCell[] array.  Make copies of the divider cells
  ** into aTemp[] and remove the the divider Cells from pParent.

  **
  ** If the siblings are on leaf pages, then the child pointers of the
  ** divider cells are stripped from the cells before they are copied
  ** into aTemp[].  In this wall, all cells in apCell[] are without
  ** child pointers.  If siblings are not leaves, then all cell in
  ** apCell[] include child pointers.  Either way, all cells in apCell[]
  ** are alike.
  */
  nCell = 0;
  leafCorrection = pPage->leaf*4;
  leafData = pPage->leafData && pPage->leaf;
  for(i=0; i<nOld; i++){
    MemPage *pOld = apCopy[i];
    for(j=0; j<pOld->nCell; j++){
      apCell[nCell] = pOld->aCell[j];
      szCell[nCell] = cellSize(pOld, apCell[nCell]);
      nCell++;
    }
    if( i<nOld-1 ){
      if( leafData ){
        int sz = cellSize(pParent, apDiv[i]);

        dropCell(pParent, nxDiv, sz);
      }else{

        szCell[nCell] = cellSize(pParent, apDiv[i]);



        memcpy(aTemp[i], apDiv[i], szCell[nCell]);
        apCell[nCell] = &aTemp[i][leafCorrection];
        dropCell(pParent, nxDiv, szCell[nCell]);
        szCell[nCell] -= leafCorrection;
        assert( get4byte(&aTemp[i][2])==pgnoOld[i] );
        if( !pOld->leaf ){
          assert( leafCorrection==0 );
          /* The right pointer of the child page pOld becomes the left
          ** pointer of the divider cell */
          memcpy(&apCell[nCell][2], &pOld->aData[pOld->hdrOffset+6], 4);
        }else{
          assert( leafCorrection==4 );







|








|
>



|















<
|
>


>
|
>
>
>
|
|
|

|







2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014

3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
  ** Make copies of the content of pPage and its siblings into aOld[].
  ** The rest of this function will use data from the copies rather
  ** that the original pages since the original pages will be in the
  ** process of being overwritten.
  */
  for(i=0; i<nOld; i++){
    MemPage *p = apCopy[i] = (MemPage*)&aCopy[i+1][-sizeof(MemPage)];
    p->aData = &((u8*)p)[-pBt->usableSize];
    p->aCell = 0;
    p->hdrOffset = 0;
    movePage(p, apOld[i]);
  }

  /*
  ** Load pointers to all cells on sibling pages and the divider cells
  ** into the local apCell[] array.  Make copies of the divider cells
  ** into space obtained form aSpace[] and remove the the divider Cells
  ** from pParent.
  **
  ** If the siblings are on leaf pages, then the child pointers of the
  ** divider cells are stripped from the cells before they are copied
  ** into aSpace[].  In this wall, all cells in apCell[] are without
  ** child pointers.  If siblings are not leaves, then all cell in
  ** apCell[] include child pointers.  Either way, all cells in apCell[]
  ** are alike.
  */
  nCell = 0;
  leafCorrection = pPage->leaf*4;
  leafData = pPage->leafData && pPage->leaf;
  for(i=0; i<nOld; i++){
    MemPage *pOld = apCopy[i];
    for(j=0; j<pOld->nCell; j++){
      apCell[nCell] = pOld->aCell[j];
      szCell[nCell] = cellSize(pOld, apCell[nCell]);
      nCell++;
    }
    if( i<nOld-1 ){

      int sz = cellSize(pParent, apDiv[i]);
      if( leafData ){
        dropCell(pParent, nxDiv, sz);
      }else{
        u8 *pTemp;
        szCell[nCell] = sz;
        pTemp = &aSpace[iSpace];
        iSpace += sz;
        assert( iSpace<=sizeof(aSpace) );
        memcpy(pTemp, apDiv[i], sz);
        apCell[nCell] = pTemp+leafCorrection;
        dropCell(pParent, nxDiv, sz);
        szCell[nCell] -= leafCorrection;
        assert( get4byte(pTemp+2)==pgnoOld[i] );
        if( !pOld->leaf ){
          assert( leafCorrection==0 );
          /* The right pointer of the child page pOld becomes the left
          ** pointer of the divider cell */
          memcpy(&apCell[nCell][2], &pOld->aData[pOld->hdrOffset+6], 4);
        }else{
          assert( leafCorrection==4 );
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
  ** size of all cells on the i-th page and cntNew[] which is the index
  ** in apCell[] of the cell that divides page i from page i+1.  
  ** cntNew[k] should equal nCell.
  **
  ** This little patch of code is critical for keeping the tree
  ** balanced. 
  */
  usableSpace = pBt->pageSize - 10 + leafCorrection;
  for(subtotal=k=i=0; i<nCell; i++){
    subtotal += szCell[i];
    if( subtotal > usableSpace ){
      szNew[k] = subtotal - szCell[i];
      cntNew[k] = i;
      if( leafData ){ i--; }
      subtotal = 0;







|







3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
  ** size of all cells on the i-th page and cntNew[] which is the index
  ** in apCell[] of the cell that divides page i from page i+1.  
  ** cntNew[k] should equal nCell.
  **
  ** This little patch of code is critical for keeping the tree
  ** balanced. 
  */
  usableSpace = pBt->usableSize - 10 + leafCorrection;
  for(subtotal=k=i=0; i<nCell; i++){
    subtotal += szCell[i];
    if( subtotal > usableSpace ){
      szNew[k] = subtotal - szCell[i];
      cntNew[k] = i;
      if( leafData ){ i--; }
      subtotal = 0;
3161
3162
3163
3164
3165
3166
3167
3168
3169


3170
3171
3172
3173


3174
3175
3176
3177
3178
3179
3180
      if( !pNew->leaf ){
        memcpy(&pNew->aData[6], pCell+2, 4);
        pTemp = 0;
      }else if( leafData ){
        CellInfo info;
        j--;
        parseCell(pNew, apCell[j], &info);
        pCell = aInsBuf[i];
        fillInCell(pParent, pCell, 0, info.nKey, 0, 0, &sz);


        pTemp = 0;
      }else{
        pCell -= 4;
        pTemp = aInsBuf[i];


      }
      insertCell(pParent, nxDiv, pCell, sz, pTemp);
      put4byte(&pParent->aCell[nxDiv][2], pNew->pgno);
      j++;
      nxDiv++;
    }
  }







|

>
>



|
>
>







3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
      if( !pNew->leaf ){
        memcpy(&pNew->aData[6], pCell+2, 4);
        pTemp = 0;
      }else if( leafData ){
        CellInfo info;
        j--;
        parseCell(pNew, apCell[j], &info);
        pCell = &aSpace[iSpace];
        fillInCell(pParent, pCell, 0, info.nKey, 0, 0, &sz);
        iSpace += sz;
        assert( iSpace<=sizeof(aSpace) );
        pTemp = 0;
      }else{
        pCell -= 4;
        pTemp = &aSpace[iSpace];
        iSpace += sz;
        assert( iSpace<=sizeof(aSpace) );
      }
      insertCell(pParent, nxDiv, pCell, sz, pTemp);
      put4byte(&pParent->aCell[nxDiv][2], pNew->pgno);
      j++;
      nxDiv++;
    }
  }
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
  pPage->hasData = !(pPage->zeroData || (!pPage->leaf && pPage->leafData));
  printf("PAGE %d:  flags=0x%02x  frag=%d   parent=%d\n", pgno,
    data[hdr], data[hdr+5], 
    (pPage->isInit && pPage->pParent) ? pPage->pParent->pgno : 0);
  i = 0;
  assert( hdr == (pgno==1 ? 100 : 0) );
  idx = get2byte(&data[hdr+3]);
  while( idx>0 && idx<=pBt->pageSize ){
    CellInfo info;
    Pgno child;
    unsigned char *pCell = &data[idx];
    int sz;

    pCell = &data[idx];
    parseCell(pPage, pCell, &info);







|







3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
  pPage->hasData = !(pPage->zeroData || (!pPage->leaf && pPage->leafData));
  printf("PAGE %d:  flags=0x%02x  frag=%d   parent=%d\n", pgno,
    data[hdr], data[hdr+5], 
    (pPage->isInit && pPage->pParent) ? pPage->pParent->pgno : 0);
  i = 0;
  assert( hdr == (pgno==1 ? 100 : 0) );
  idx = get2byte(&data[hdr+3]);
  while( idx>0 && idx<=pBt->usableSize ){
    CellInfo info;
    Pgno child;
    unsigned char *pCell = &data[idx];
    int sz;

    pCell = &data[idx];
    parseCell(pPage, pCell, &info);
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
  }
  if( !pPage->leaf ){
    printf("right_child: %d\n", get4byte(&data[hdr+6]));
  }
  nFree = 0;
  i = 0;
  idx = get2byte(&data[hdr+1]);
  while( idx>0 && idx<pPage->pBt->pageSize ){
    int sz = get2byte(&data[idx+2]);
    sprintf(range,"%d..%d", idx, idx+sz-1);
    nFree += sz;
    printf("freeblock %2d: i=%-10s size=%-4d total=%d\n",
       i, range, sz, nFree);
    idx = get2byte(&data[idx]);
    i++;
  }
  if( idx!=0 ){
    printf("ERROR: next freeblock index out of range: %d\n", idx);
  }
  if( recursive && !pPage->leaf ){
    idx = get2byte(&data[hdr+3]);
    while( idx>0 && idx<pBt->pageSize ){
      unsigned char *pCell = &data[idx];
      sqlite3BtreePageDump(pBt, get4byte(&pCell[2]), 1);
      idx = get2byte(pCell);
    }
    sqlite3BtreePageDump(pBt, get4byte(&data[hdr+6]), 1);
  }
  sqlite3pager_unref(data);







|













|







3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
  }
  if( !pPage->leaf ){
    printf("right_child: %d\n", get4byte(&data[hdr+6]));
  }
  nFree = 0;
  i = 0;
  idx = get2byte(&data[hdr+1]);
  while( idx>0 && idx<pPage->pBt->usableSize ){
    int sz = get2byte(&data[idx+2]);
    sprintf(range,"%d..%d", idx, idx+sz-1);
    nFree += sz;
    printf("freeblock %2d: i=%-10s size=%-4d total=%d\n",
       i, range, sz, nFree);
    idx = get2byte(&data[idx]);
    i++;
  }
  if( idx!=0 ){
    printf("ERROR: next freeblock index out of range: %d\n", idx);
  }
  if( recursive && !pPage->leaf ){
    idx = get2byte(&data[hdr+3]);
    while( idx>0 && idx<pBt->usableSize ){
      unsigned char *pCell = &data[idx];
      sqlite3BtreePageDump(pBt, get4byte(&pCell[2]), 1);
      idx = get2byte(pCell);
    }
    sqlite3BtreePageDump(pBt, get4byte(&data[hdr+6]), 1);
  }
  sqlite3pager_unref(data);
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
  }else{
    aResult[3] = 0;
    aResult[6] = 0;
  }
  aResult[4] = pPage->nFree;
  cnt = 0;
  idx = get2byte(&pPage->aData[pPage->hdrOffset+1]);
  while( idx>0 && idx<pPage->pBt->pageSize ){
    cnt++;
    idx = get2byte(&pPage->aData[idx]);
  }
  aResult[5] = cnt;
  aResult[7] = pPage->leaf ? 0 : get4byte(&pPage->aData[pPage->hdrOffset+6]);
  return SQLITE_OK;
}







|







3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
  }else{
    aResult[3] = 0;
    aResult[6] = 0;
  }
  aResult[4] = pPage->nFree;
  cnt = 0;
  idx = get2byte(&pPage->aData[pPage->hdrOffset+1]);
  while( idx>0 && idx<pPage->pBt->usableSize ){
    cnt++;
    idx = get2byte(&pPage->aData[idx]);
  }
  aResult[5] = cnt;
  aResult[7] = pPage->leaf ? 0 : get4byte(&pPage->aData[pPage->hdrOffset+6]);
  return SQLITE_OK;
}
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
){
  MemPage *pPage;
  int i, rc, depth, d2, pgno, cnt;
  int hdr;
  u8 *data;
  BtCursor cur;
  Btree *pBt;
  int maxLocal, pageSize;
  char zMsg[100];
  char zContext[100];
  char hit[MX_PAGE_SIZE];

  /* Check that the page exists
  */
  cur.pBt = pBt = pCheck->pBt;
  pageSize = pBt->pageSize;
  if( iPage==0 ) return 0;
  if( checkRef(pCheck, iPage, zParentContext) ) return 0;
  if( (rc = getPage(pBt, (Pgno)iPage, &pPage))!=0 ){
    sprintf(zMsg, "unable to get the page. error code=%d", rc);
    checkAppendMsg(pCheck, zContext, zMsg);
    return 0;
  }







|







|







3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
){
  MemPage *pPage;
  int i, rc, depth, d2, pgno, cnt;
  int hdr;
  u8 *data;
  BtCursor cur;
  Btree *pBt;
  int maxLocal, usableSize;
  char zMsg[100];
  char zContext[100];
  char hit[MX_PAGE_SIZE];

  /* Check that the page exists
  */
  cur.pBt = pBt = pCheck->pBt;
  usableSize = pBt->usableSize;
  if( iPage==0 ) return 0;
  if( checkRef(pCheck, iPage, zParentContext) ) return 0;
  if( (rc = getPage(pBt, (Pgno)iPage, &pPage))!=0 ){
    sprintf(zMsg, "unable to get the page. error code=%d", rc);
    checkAppendMsg(pCheck, zContext, zMsg);
    return 0;
  }
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931
    */
    sprintf(zContext, "On tree page %d cell %d: ", iPage, i);
    pCell = pPage->aCell[i];
    parseCell(pPage, pCell, &info);
    sz = info.nData;
    if( !pPage->intKey ) sz += info.nKey;
    if( sz>info.nLocal ){
      int nPage = (sz - info.nLocal + pageSize - 5)/(pageSize - 4);
      checkList(pCheck, 0, get4byte(&pCell[info.iOverflow]),nPage,zContext);
    }

    /* Check sanity of left child page.
    */
    if( !pPage->leaf ){
      pgno = get4byte(&pCell[2]);







|







3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
    */
    sprintf(zContext, "On tree page %d cell %d: ", iPage, i);
    pCell = pPage->aCell[i];
    parseCell(pPage, pCell, &info);
    sz = info.nData;
    if( !pPage->intKey ) sz += info.nKey;
    if( sz>info.nLocal ){
      int nPage = (sz - info.nLocal + usableSize - 5)/(usableSize - 4);
      checkList(pCheck, 0, get4byte(&pCell[info.iOverflow]),nPage,zContext);
    }

    /* Check sanity of left child page.
    */
    if( !pPage->leaf ){
      pgno = get4byte(&pCell[2]);
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
    pgno = get4byte(&pPage->aData[pPage->hdrOffset+6]);
    sprintf(zContext, "On page %d at right child: ", iPage);
    checkTreePage(pCheck, pgno, pPage, zContext,0,0,0,0);
  }
 
  /* Check for complete coverage of the page
  */
  memset(hit, 0, pageSize);
  memset(hit, 1, pPage->hdrOffset+10-4*(pPage->leaf));
  data = pPage->aData;
  hdr = pPage->hdrOffset;
  for(cnt=0, i=get2byte(&data[hdr+3]); i>0 && i<pageSize && cnt<10000; cnt++){
    int size = cellSize(pPage, &data[i]);
    int j;
    for(j=i+size-1; j>=i; j--) hit[j]++;
    i = get2byte(&data[i]);
  }
  for(cnt=0, i=get2byte(&data[hdr+1]); i>0 && i<pageSize && cnt<10000; cnt++){
    int size = get2byte(&data[i+2]);
    int j;
    for(j=i+size-1; j>=i; j--) hit[j]++;
    i = get2byte(&data[i]);
  }
  for(i=cnt=0; i<pageSize; i++){
    if( hit[i]==0 ){
      cnt++;
    }else if( hit[i]>1 ){
      sprintf(zMsg, "Multiple uses for byte %d of page %d", i, iPage);
      checkAppendMsg(pCheck, zMsg, 0);
      break;
    }







|



|





|





|







3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
    pgno = get4byte(&pPage->aData[pPage->hdrOffset+6]);
    sprintf(zContext, "On page %d at right child: ", iPage);
    checkTreePage(pCheck, pgno, pPage, zContext,0,0,0,0);
  }
 
  /* Check for complete coverage of the page
  */
  memset(hit, 0, usableSize);
  memset(hit, 1, pPage->hdrOffset+10-4*(pPage->leaf));
  data = pPage->aData;
  hdr = pPage->hdrOffset;
  for(cnt=0, i=get2byte(&data[hdr+3]); i>0 && i<usableSize && cnt<10000; cnt++){
    int size = cellSize(pPage, &data[i]);
    int j;
    for(j=i+size-1; j>=i; j--) hit[j]++;
    i = get2byte(&data[i]);
  }
  for(cnt=0, i=get2byte(&data[hdr+1]); i>0 && i<usableSize && cnt<10000; cnt++){
    int size = get2byte(&data[i+2]);
    int j;
    for(j=i+size-1; j>=i; j--) hit[j]++;
    i = get2byte(&data[i]);
  }
  for(i=cnt=0; i<usableSize; i++){
    if( hit[i]==0 ){
      cnt++;
    }else if( hit[i]>1 ){
      sprintf(zMsg, "Multiple uses for byte %d of page %d", i, iPage);
      checkAppendMsg(pCheck, zMsg, 0);
      break;
    }
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
*/
int sqlite3BtreeCopyFile(Btree *pBtTo, Btree *pBtFrom){
  int rc = SQLITE_OK;
  Pgno i, nPage, nToPage;

  if( !pBtTo->inTrans || !pBtFrom->inTrans ) return SQLITE_ERROR;
  if( pBtTo->pCursor ) return SQLITE_BUSY;
  memcpy(pBtTo->pPage1, pBtFrom->pPage1, pBtFrom->pageSize);
  rc = sqlite3pager_overwrite(pBtTo->pPager, 1, pBtFrom->pPage1);
  nToPage = sqlite3pager_pagecount(pBtTo->pPager);
  nPage = sqlite3pager_pagecount(pBtFrom->pPager);
  for(i=2; rc==SQLITE_OK && i<=nPage; i++){
    void *pPage;
    rc = sqlite3pager_get(pBtFrom->pPager, i, &pPage);
    if( rc ) break;







|







4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
*/
int sqlite3BtreeCopyFile(Btree *pBtTo, Btree *pBtFrom){
  int rc = SQLITE_OK;
  Pgno i, nPage, nToPage;

  if( !pBtTo->inTrans || !pBtFrom->inTrans ) return SQLITE_ERROR;
  if( pBtTo->pCursor ) return SQLITE_BUSY;
  memcpy(pBtTo->pPage1, pBtFrom->pPage1, pBtFrom->usableSize);
  rc = sqlite3pager_overwrite(pBtTo->pPager, 1, pBtFrom->pPage1);
  nToPage = sqlite3pager_pagecount(pBtTo->pPager);
  nPage = sqlite3pager_pagecount(pBtFrom->pPager);
  for(i=2; rc==SQLITE_OK && i<=nPage; i++){
    void *pPage;
    rc = sqlite3pager_get(pBtFrom->pPager, i, &pPage);
    if( rc ) break;
Changes to src/pager.c.
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
** The pager is used to access a database disk file.  It implements
** atomic commit and rollback through the use of a journal file that
** is separate from the database file.  The pager also implements file
** locking to prevent two processes from writing the same database
** file simultaneously, or one process from reading the database while
** another is writing.
**
** @(#) $Id: pager.c,v 1.107 2004/05/12 13:30:08 drh Exp $
*/
#include "os.h"         /* Must be first to enable large file support */
#include "sqliteInt.h"
#include "pager.h"
#include <assert.h>
#include <string.h>








|







14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
** The pager is used to access a database disk file.  It implements
** atomic commit and rollback through the use of a journal file that
** is separate from the database file.  The pager also implements file
** locking to prevent two processes from writing the same database
** file simultaneously, or one process from reading the database while
** another is writing.
**
** @(#) $Id: pager.c,v 1.108 2004/05/14 01:58:13 drh Exp $
*/
#include "os.h"         /* Must be first to enable large file support */
#include "sqliteInt.h"
#include "pager.h"
#include <assert.h>
#include <string.h>

177
178
179
180
181
182
183
184

185
186
187
188
189
190
191
192
193
194
195
196
197
198
  int origDbSize;             /* dbSize before the current change */
  int stmtSize;               /* Size of database (in pages) at stmt_begin() */
  off_t stmtJSize;            /* Size of journal at stmt_begin() */
  int nRec;                   /* Number of pages written to the journal */
  u32 cksumInit;              /* Quasi-random value added to every checksum */
  int stmtNRec;               /* Number of records in stmt subjournal */
  int nExtra;                 /* Add this many bytes to each in-memory page */
  void (*xDestructor)(void*); /* Call this routine when freeing pages */

  int nPage;                  /* Total number of in-memory pages */
  int nRef;                   /* Number of in-memory pages with PgHdr.nRef>0 */
  int mxPage;                 /* Maximum number of pages to hold in cache */
  int nHit, nMiss, nOvfl;     /* Cache hits, missing, and LRU overflows */
  void (*xCodec)(void*,void*,Pgno,int); /* Routine for en/decoding data */
  void *pCodecArg;            /* First argument to xCodec() */
  int pageSize;               /* Page size in bytes */
  u8 journalOpen;             /* True if journal file descriptors is valid */
  u8 journalStarted;          /* True if header of journal is synced */
  u8 useJournal;              /* Use a rollback journal on this file */
  u8 stmtOpen;                /* True if the statement subjournal is open */
  u8 stmtInUse;               /* True we are in a statement subtransaction */
  u8 stmtAutoopen;            /* Open stmt journal when main journal is opened*/
  u8 noSync;                  /* Do not sync the journal if true */







|
>






<







177
178
179
180
181
182
183
184
185
186
187
188
189
190
191

192
193
194
195
196
197
198
  int origDbSize;             /* dbSize before the current change */
  int stmtSize;               /* Size of database (in pages) at stmt_begin() */
  off_t stmtJSize;            /* Size of journal at stmt_begin() */
  int nRec;                   /* Number of pages written to the journal */
  u32 cksumInit;              /* Quasi-random value added to every checksum */
  int stmtNRec;               /* Number of records in stmt subjournal */
  int nExtra;                 /* Add this many bytes to each in-memory page */
  void (*xDestructor)(void*,int); /* Call this routine when freeing pages */
  int pageSize;               /* Number of bytes in a page */
  int nPage;                  /* Total number of in-memory pages */
  int nRef;                   /* Number of in-memory pages with PgHdr.nRef>0 */
  int mxPage;                 /* Maximum number of pages to hold in cache */
  int nHit, nMiss, nOvfl;     /* Cache hits, missing, and LRU overflows */
  void (*xCodec)(void*,void*,Pgno,int); /* Routine for en/decoding data */
  void *pCodecArg;            /* First argument to xCodec() */

  u8 journalOpen;             /* True if journal file descriptors is valid */
  u8 journalStarted;          /* True if header of journal is synced */
  u8 useJournal;              /* Use a rollback journal on this file */
  u8 stmtOpen;                /* True if the statement subjournal is open */
  u8 stmtInUse;               /* True we are in a statement subtransaction */
  u8 stmtAutoopen;            /* Open stmt journal when main journal is opened*/
  u8 noSync;                  /* Do not sync the journal if true */
583
584
585
586
587
588
589

590
591

592
593
594
595
596
597
598
599
600
601
602
603
604
  sqlite3OsSeek(&pPager->fd, (pgRec.pgno-1)*(off_t)SQLITE_PAGE_SIZE);
  rc = sqlite3OsWrite(&pPager->fd, pgRec.aData, SQLITE_PAGE_SIZE);
  if( pPg ){
    /* No page should ever be rolled back that is in use, except for page
    ** 1 which is held in use in order to keep the lock on the database
    ** active.
    */

    assert( pPg->nRef==0 || pPg->pgno==1 );
    memcpy(PGHDR_TO_DATA(pPg), pgRec.aData, SQLITE_PAGE_SIZE);

    if( pPager->xDestructor ){
      pPager->xDestructor(PGHDR_TO_DATA(pPg));
    }
    pPg->dirty = 0;
    pPg->needSync = 0;
    CODEC(pPager, PGHDR_TO_DATA(pPg), pPg->pgno, 3);
  }
  return rc;
}

/*
** Playback the journal and thus restore the database file to
** the state it was in before we started making changes.  







>

|
>

|



|







583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
  sqlite3OsSeek(&pPager->fd, (pgRec.pgno-1)*(off_t)SQLITE_PAGE_SIZE);
  rc = sqlite3OsWrite(&pPager->fd, pgRec.aData, SQLITE_PAGE_SIZE);
  if( pPg ){
    /* No page should ever be rolled back that is in use, except for page
    ** 1 which is held in use in order to keep the lock on the database
    ** active.
    */
    void *pData;
    assert( pPg->nRef==0 || pPg->pgno==1 );
    pData = PGHDR_TO_DATA(pPg);
    memcpy(pData, pgRec.aData, pPager->pageSize);
    if( pPager->xDestructor ){
      pPager->xDestructor(pData, pPager->pageSize);
    }
    pPg->dirty = 0;
    pPg->needSync = 0;
    CODEC(pPager, pData, pPg->pgno, 3);
  }
  return rc;
}

/*
** Playback the journal and thus restore the database file to
** the state it was in before we started making changes.  
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
** Set the destructor for this pager.  If not NULL, the destructor is called
** when the reference count on each page reaches zero.  The destructor can
** be used to clean up information in the extra segment appended to each page.
**
** The destructor is not called as a result sqlite3pager_close().  
** Destructors are only called by sqlite3pager_unref().
*/
void sqlite3pager_set_destructor(Pager *pPager, void (*xDesc)(void*)){
  pPager->xDestructor = xDesc;
}

/*
** Return the total number of pages in the disk file associated with
** pPager.
*/







|







1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
** Set the destructor for this pager.  If not NULL, the destructor is called
** when the reference count on each page reaches zero.  The destructor can
** be used to clean up information in the extra segment appended to each page.
**
** The destructor is not called as a result sqlite3pager_close().  
** Destructors are only called by sqlite3pager_unref().
*/
void sqlite3pager_set_destructor(Pager *pPager, void (*xDesc)(void*,int)){
  pPager->xDestructor = xDesc;
}

/*
** Return the total number of pages in the disk file associated with
** pPager.
*/
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
    }else{
      pPager->pFirst = pPg;
    }
    if( pPg->needSync==0 && pPager->pFirstSynced==0 ){
      pPager->pFirstSynced = pPg;
    }
    if( pPager->xDestructor ){
      pPager->xDestructor(pData);
    }
  
    /* When all pages reach the freelist, drop the read lock from
    ** the database file.
    */
    pPager->nRef--;
    assert( pPager->nRef>=0 );







|







1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
    }else{
      pPager->pFirst = pPg;
    }
    if( pPg->needSync==0 && pPager->pFirstSynced==0 ){
      pPager->pFirstSynced = pPg;
    }
    if( pPager->xDestructor ){
      pPager->xDestructor(pData, pPager->pageSize);
    }
  
    /* When all pages reach the freelist, drop the read lock from
    ** the database file.
    */
    pPager->nRef--;
    assert( pPager->nRef>=0 );
Changes to src/pager.h.
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
**    May you share freely, never taking more than you give.
**
*************************************************************************
** This header file defines the interface that the sqlite page cache
** subsystem.  The page cache subsystem reads and writes a file a page
** at a time and provides a journal for rollback.
**
** @(#) $Id: pager.h,v 1.28 2004/05/08 08:23:30 danielk1977 Exp $
*/

/*
** The size of one page
**
** You can change this value to another (reasonable) value you want.
** It need not be a power of two, though the interface to the disk
** will likely be faster if it is.
**
** Experiments show that a page size of 1024 gives the best speed
** for common usages.  The speed differences for different sizes







|



|







9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
**    May you share freely, never taking more than you give.
**
*************************************************************************
** This header file defines the interface that the sqlite page cache
** subsystem.  The page cache subsystem reads and writes a file a page
** at a time and provides a journal for rollback.
**
** @(#) $Id: pager.h,v 1.29 2004/05/14 01:58:13 drh Exp $
*/

/*
** The size of a page.
**
** You can change this value to another (reasonable) value you want.
** It need not be a power of two, though the interface to the disk
** will likely be faster if it is.
**
** Experiments show that a page size of 1024 gives the best speed
** for common usages.  The speed differences for different sizes
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81

/*
** See source code comments for a detailed description of the following
** routines:
*/
int sqlite3pager_open(Pager **ppPager, const char *zFilename,
                     int nPage, int nExtra, int useJournal);
void sqlite3pager_set_destructor(Pager*, void(*)(void*));
void sqlite3pager_set_cachesize(Pager*, int);
int sqlite3pager_close(Pager *pPager);
int sqlite3pager_get(Pager *pPager, Pgno pgno, void **ppPage);
void *sqlite3pager_lookup(Pager *pPager, Pgno pgno);
int sqlite3pager_ref(void*);
int sqlite3pager_unref(void*);
Pgno sqlite3pager_pagenumber(void*);







|







67
68
69
70
71
72
73
74
75
76
77
78
79
80
81

/*
** See source code comments for a detailed description of the following
** routines:
*/
int sqlite3pager_open(Pager **ppPager, const char *zFilename,
                     int nPage, int nExtra, int useJournal);
void sqlite3pager_set_destructor(Pager*, void(*)(void*,int));
void sqlite3pager_set_cachesize(Pager*, int);
int sqlite3pager_close(Pager *pPager);
int sqlite3pager_get(Pager *pPager, Pgno pgno, void **ppPage);
void *sqlite3pager_lookup(Pager *pPager, Pgno pgno);
int sqlite3pager_ref(void*);
int sqlite3pager_unref(void*);
Pgno sqlite3pager_pagenumber(void*);
99
100
101
102
103
104
105
106
107
108
int sqlite3pager_rename(Pager*, const char *zNewName);
void sqlite3pager_set_codec(Pager*,void(*)(void*,void*,Pgno,int),void*);

#ifdef SQLITE_TEST
void sqlite3pager_refdump(Pager*);
int pager3_refinfo_enable;
#endif










<
<
<
99
100
101
102
103
104
105



int sqlite3pager_rename(Pager*, const char *zNewName);
void sqlite3pager_set_codec(Pager*,void(*)(void*,void*,Pgno,int),void*);

#ifdef SQLITE_TEST
void sqlite3pager_refdump(Pager*);
int pager3_refinfo_enable;
#endif