/ Check-in [d4be4709]
Login
Overview
Comment:All BTree code is in place. Now we just have to make it work. (CVS 225)
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1:d4be4709ee32bab6e78104861ed4e02d153779aa
User & Date: drh 2001-06-10 19:56:59
Context
2001-06-22
19:15
The BTree code compiles and links now, but it does not work yet. (CVS 226) check-in: b31c4902 user: drh tags: trunk
2001-06-10
19:56
All BTree code is in place. Now we just have to make it work. (CVS 225) check-in: d4be4709 user: drh tags: trunk
2001-06-08
00:25
documentation update (CVS 224) check-in: d1e211fa user: drh tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to src/btree.c.

17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
..
65
66
67
68
69
70
71

72
73

74
75
76
77
78
79
80
...
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
...
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
...
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
...
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253









254
255
256
257
258
259

260


261
262
263
264

265
266
267
268
269
270
271
272
...
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
...
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
...
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398

399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
...
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
...
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
...
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
...
753
754
755
756
757
758
759
760

761
762
763
764

765
766
767
768
769
770
771
772
773
774
...
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
...
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
....
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
....
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
....
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
....
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
....
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
....
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
....
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
....
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409


































































































1410
1411
1412

1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423









1424
1425
1426
1427



1428
1429
1430
1431
1432
1433

1434
1435
1436
1437
1438
1439
1440







1441
1442



1443
1444
1445


1446
1447

1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459

1460
1461
1462
1463
1464

1465
1466
1467
1468
1469


1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483

1484
1485

1486
1487
1488
1489
1490
1491
1492


1493
1494
1495
1496
1497
1498
1499
1500

1501


1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512



1513
1514
1515
1516

1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535

1536
1537










1538
1539
1540
1541

1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557



1558
1559
1560
1561
1562









1563




1564
1565
















1566
1567
1568
1569
1570
1571


1572
1573


1574




1575
1576
1577
1578
1579


1580
1581
1582
1583
1584
1585
1586
1587

1588
1589
1590
1591
1592
1593
1594
1595

1596
1597
1598
1599
1600

1601

1602
1603
1604
1605
1606
1607



1608
1609
1610
1611

1612
1613
1614
1615
1616



1617
1618

1619
1620
1621
1622
1623


1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713

1714
1715
1716



1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736

1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749


1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818

1819
1820
1821
1822
1823
1824
1825
1826

1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866

1867
1868
1869
1870
1871
1872
1873



1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887


1888


1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982

1983
1984
1985
1986
1987
1988
1989
1990

1991
1992
1993
1994

1995
1996
1997
1998
1999
2000

2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015

2016
2017
2018


2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065

2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
....
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127



2128
2129
2130
2131
2132
2133
2134
....
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
** Boston, MA  02111-1307, USA.
**
** Author contact information:
**   drh@hwaci.com
**   http://www.hwaci.com/drh/
**
*************************************************************************
** $Id: btree.c,v 1.11 2001/06/08 00:21:52 drh Exp $
**
** This file implements a external (disk-based) database using BTrees.
** For a detailed discussion of BTrees, refer to
**
**     Donald E. Knuth, THE ART OF COMPUTER PROGRAMMING, Volume 3:
**     "Sorting And Searching", pages 473-480. Addison-Wesley
**     Publishing Company, Reading, Massachusetts.
................................................................................
#include "pager.h"
#include "btree.h"
#include <assert.h>


/*
** Primitive data types.  u32 must be 4 bytes and u16 must be 2 bytes.

** Change these typedefs when porting to new architectures.
*/

typedef unsigned int u32;
typedef unsigned short int u16;
typedef unsigned char u8;

/*
** Forward declarations of structures used only in this file.
*/
................................................................................
};

/*
** Each database page has a header that is an instance of this
** structure.
**
** PageHdr.firstFree is 0 if there is no free space on this page.
** Otherwise, PageHdr.firstFree is the index in MemPage.aDisk[] of a 
** FreeBlk structure that describes the first block of free space.  
** All free space is defined by a linked list of FreeBlk structures.
**
** Data is stored in a linked list of Cell structures.  PageHdr.firstCell
** is the index into MemPage.aDisk[] of the first cell on the page.  The
** Cells are kept in sorted order.
**
** A Cell contains all information about a database entry and a pointer
** to a child page that contains other entries less than itself.  In
** other words, the i-th Cell contains both Ptr(i) and Key(i).  The
** right-most pointer of the page is contained in PageHdr.rightChild.
*/
struct PageHdr {
  Pgno rightChild;  /* Child page that comes after all cells on this page */
  u16 firstCell;    /* Index in MemPage.aDisk[] of the first cell */
  u16 firstFree;    /* Index in MemPage.aDisk[] of the first free block */
};

/*
** Entries on a page of the database are called "Cells".  Each Cell
** has a header and data.  This structure defines the header.  The
** key and data (collectively the "payload") follow this header on
** the database page.
................................................................................
** A definition of the complete Cell structure is given below.  The
** header for the cell must be defined separately in order to do some
** of the sizing #defines that follow.
*/
struct CellHdr {
  Pgno leftChild; /* Child page that comes before this cell */
  u16 nKey;       /* Number of bytes in the key */
  u16 iNext;      /* Index in MemPage.aDisk[] of next cell in sorted order */
  u32 nData;      /* Number of bytes of data */
}

/*
** The minimum size of a complete Cell.  The Cell must contain a header
** and at least 4 bytes of payload.
*/
................................................................................
** Free space on a page is remembered using a linked list of the FreeBlk
** structures.  Space on a database page is allocated in increments of
** at least 4 bytes and is always aligned to a 4-byte boundry.  The
** linked list of FreeBlks is always kept in order by address.
*/
struct FreeBlk {
  u16 iSize;      /* Number of bytes in this block of free space */
  u16 iNext;      /* Index in MemPage.aDisk[] of the next free block */
};

/*
** Number of bytes on a single overflow page.
*/
#define OVERFLOW_SIZE (SQLITE_PAGE_SIZE-sizeof(Pgno))

/*
** When the key and data for a single entry in the BTree will not fit in
** the MX_LOACAL_PAYLOAD bytes of space available on the database page,
** then all extra bytes are written to a linked list of overflow pages.
................................................................................
**
** Unused pages in the database are also represented by instances of
** the OverflowPage structure.  The PageOne.freeList field is the
** page number of the first page in a linked list of unused database
** pages.
*/
struct OverflowPage {
  Pgno next;
  char aPayload[OVERFLOW_SIZE];
};

/*
** For every page in the database file, an instance of the following structure
** is stored in memory.  The aDisk[] array contains the raw bits read from
** the disk.  The rest is auxiliary information that held in memory only. The
** auxiliary info is only valid for regular database pages - it is not
** used for overflow pages and pages on the freelist.
**
** Of particular interest in the auxiliary info is the apCell[] entry.  Each
** apCell[] entry is a pointer to a Cell structure in aDisk[].  The cells are
** put in this array so that they can be accessed in constant time, rather
** than in linear time which would be needed if we had to walk the linked 
** list on every access.
**









** The pParent field points back to the parent page.  This allows us to
** walk up the BTree from any leaf to the root.  Care must be taken to
** unref() the parent page pointer when this page is no longer referenced.
** The pageDestructor() routine handles that chore.
*/
struct MemPage {

  char aDisk[SQLITE_PAGE_SIZE];  /* Page data stored on disk */


  int isInit;                    /* True if auxiliary data is initialized */
  MemPage *pParent;              /* The parent of this page.  NULL for root */
  int nFree;                     /* Number of free bytes in aDisk[] */
  int nCell;                     /* Number of entries on this page */

  Cell *apCell[MX_CELL+1];       /* All data entires in sorted order */
}

/*
** The in-memory image of a disk page has the auxiliary information appended
** to the end.  EXTRA_SIZE is the number of bytes of space needed to hold
** that extra information.
*/
................................................................................
/*
** A cursor is a pointer to a particular entry in the BTree.
** The entry is identified by its MemPage and the index in
** MemPage.apCell[] of the entry.
*/
struct BtCursor {
  Btree *pBt;               /* The Btree to which this cursor belongs */
  BtCursor *pPrev, *pNext;  /* List of all cursors */
  Pgno pgnoRoot;            /* The root page of this tree */
  MemPage *pPage;           /* Page that contains the entry */
  u16 idx;                  /* Index of the entry in pPage->apCell[] */
  u8 bSkipNext;             /* sqliteBtreeNext() is no-op if true */
  u8 iMatch;                /* compare result from last sqliteBtreeMoveto() */
};

................................................................................

/*
** Defragment the page given.  All Cells are moved to the
** beginning of the page and all free space is collected 
** into one big FreeBlk at the end of the page.
*/
static void defragmentPage(MemPage *pPage){
  int pc;
  int i, n;
  FreeBlk *pFBlk;
  char newPage[SQLITE_PAGE_SIZE];

  pc = sizeof(PageHdr);
  ((PageHdr*)pPage)->firstCell = pc;
  memcpy(newPage, pPage->aDisk, pc);
  for(i=0; i<pPage->nCell; i++){
    Cell *pCell = &pPage->apCell[i];
    n = cellSize(pCell);
    pCell->h.iNext = i<pPage->nCell ? pc + n : 0;
    memcpy(&newPage[pc], pCell, n);
    pPage->apCell[i] = (Cell*)&pPage->aDisk[pc];
    pc += n;
  }
  assert( pPage->nFree==SQLITE_PAGE_SIZE-pc );
  memcpy(pPage->aDisk, newPage, pc);
  pFBlk = &pPage->aDisk[pc];
  pFBlk->iSize = SQLITE_PAGE_SIZE - pc;
  pFBlk->iNext = 0;
  ((PageHdr*)pPage)->firstFree = pc;
  memset(&pFBlk[1], 0, SQLITE_PAGE_SIZE - pc - sizeof(FreeBlk));
}

/*
** Allocate nByte bytes of space on a page.  nByte must be a 
** multiple of 4.
**
** Return the index into pPage->aDisk[] of the first byte of
** the new allocation. Or return 0 if there is not enough free
** space on the page to satisfy the allocation request.
**
** If the page contains nBytes of free space but does not contain
** nBytes of contiguous free space, then this routine automatically
** calls defragementPage() to consolidate all free space before 
** allocating the new chunk.
................................................................................
*/
static int allocateSpace(MemPage *pPage, int nByte){
  FreeBlk *p;
  u16 *pIdx;
  int start;

  assert( nByte==ROUNDUP(nByte) );
  if( pPage->nFree<nByte ) return 0;
  pIdx = &((PageHdr*)pPage)->firstFree;
  p = (FreeBlk*)&pPage->aDisk[*pIdx];
  while( p->iSize<nByte ){
    if( p->iNext==0 ){
      defragmentPage(pPage);
      pIdx = &((PageHdr*)pPage)->firstFree;
    }else{
      pIdx = &p->iNext;
    }
    p = (FreeBlk*)&pPage->aDisk[*pIdx];
  }
  if( p->iSize==nByte ){
    start = *pIdx;
    *pIdx = p->iNext;
  }else{
    start = *pIdx;
    FreeBlk *pNew = (FreeBlk*)&pPage->aDisk[start + nByte];
    pNew->iNext = p->iNext;
    pNew->iSize = p->iSize - nByte;
    *pIdx = start + nByte;
  }
  pPage->nFree -= nByte;
  return start;
}

/*
** Return a section of the MemPage.aDisk[] to the freelist.
** The first byte of the new free block is pPage->aDisk[start]
** and the size of the block is "size" bytes.

**
** Most of the effort here is involved in coalesing adjacent
** free blocks into a single big free block.
*/
static void freeSpace(MemPage *pPage, int start, int size){
  int end = start + size;
  u16 *pIdx, idx;
  FreeBlk *pFBlk;
  FreeBlk *pNew;
  FreeBlk *pNext;

  assert( size == ROUNDUP(size) );
  assert( start == ROUNDUP(start) );
  pIdx = &((PageHdr*)pPage)->firstFree;
  idx = *pIdx;
  while( idx!=0 && idx<start ){
    pFBlk = (FreeBlk*)&pPage->aDisk[idx];
    if( idx + pFBlk->iSize == start ){
      pFBlk->iSize += size;
      if( idx + pFBlk->iSize == pFBlk->iNext ){
        pNext = (FreeBlk*)&pPage->aDisk[pFblk->iNext];
        pFBlk->iSize += pNext->iSize;
        pFBlk->iNext = pNext->iNext;
      }
      pPage->nFree += size;
      return;
    }
    pIdx = &pFBlk->iNext;
    idx = *pIdx;
  }
  pNew = (FreeBlk*)&pPage->aDisk[start];
  if( idx != end ){
    pNew->iSize = size;
    pNew->iNext = idx;
  }else{
    pNext = (FreeBlk*)&pPage->aDisk[idx];
    pNew->iSize = size + pNext->iSize;
    pNew->iNext = pNext->iNext;
  }
  *pIdx = start;
  pPage->nFree += size;
}

................................................................................
** Return SQLITE_OK on success.  If we see that the page does
** not contained a well-formed database page, then return 
** SQLITE_CORRUPT.  Note that a return of SQLITE_OK does not
** guarantee that the page is well-formed.  It only shows that
** we failed to detect any corruption.
*/
static int initPage(MemPage *pPage, Pgno pgnoThis, MemPage *pParent){
  int idx;           /* An index into pPage->aDisk[] */
  Cell *pCell;       /* A pointer to a Cell in pPage->aDisk[] */
  FreeBlk *pFBlk;    /* A pointer to a free block in pPage->aDisk[] */
  int sz;            /* The size of a Cell in bytes */
  int freeSpace;     /* Amount of free space on the page */

  if( pPage->pParent ){
    assert( pPage->pParent==pParent );
    return SQLITE_OK;
  }
................................................................................
    pPage->pParent = pParent;
    sqlitepager_ref(pParent);
  }
  if( pPage->isInit ) return SQLITE_OK;
  pPage->isInit = 1;
  pPage->nCell = 0;
  freeSpace = SQLITE_PAGE_SIZE - sizeof(PageHdr);
  idx = ((PageHdr*)pPage)->firstCell;
  while( idx!=0 ){
    if( idx>SQLITE_PAGE_SIZE-MN_CELL_SIZE ) goto page_format_error;
    if( idx<sizeof(PageHdr) ) goto page_format_error;
    pCell = (Cell*)&pPage->aDisk[idx];
    sz = cellSize(pCell);
    if( idx+sz > SQLITE_PAGE_SIZE ) goto page_format_error;
    freeSpace -= sz;
    pPage->apCell[pPage->nCell++] = pCell;
    idx = pCell->h.iNext;
  }
  pPage->nFree = 0;
  idx = ((PageHdr*)pPage)->firstFree;
  while( idx!=0 ){
    if( idx>SQLITE_PAGE_SIZE-sizeof(FreeBlk) ) goto page_format_error;
    if( idx<sizeof(PageHdr) ) goto page_format_error;
    pFBlk = (FreeBlk*)&pPage->aDisk[idx];
    pPage->nFree += pFBlk->iSize;
    if( pFBlk->iNext <= idx ) goto page_format_error;
    idx = pFBlk->iNext;
  }
  if( pPage->nCell==0 && pPage->nFree==0 ){
    /* As a special case, an uninitialized root page appears to be
    ** an empty database */
................................................................................
  if( pPage->nFree!=freeSpace ) goto page_format_error;
  return SQLITE_OK;

page_format_error:
  return SQLITE_CORRUPT;
}

/*
** Recompute the MemPage.apCell[], MemPage.nCell, and MemPage.nFree parameters
** for a cell after the MemPage.aDisk[] content has be changed significantly.
**
** The computation here is similar to initPage() except that in this case
** the MemPage.aDisk[] field has been set up internally (instead of 
** having been read from disk) so we do not need to do as much error
** checking.
*/
static void reinitPage(MemPage *pPage){
  Cell *pCell;

  pPage->nCell = 0;
  idx = ((PageHdr*)pPage)->firstCell;
  while( idx!=0 ){
    pCell = (Cell*)&pPage->aDisk[idx];
    sz = cellSize(pCell);
    pPage->apCell[pPage->nCell++] = pCell;
    idx = pCell->h.iNext;
  }
  pPage->nFree = 0;
  idx = ((PageHdr*)pPage)->firstFree;
  while( idx!=0 ){
    pFBlk = (FreeBlk*)&pPage->aDisk[idx];
    pPage->nFree += pFBlk->iSize;
    idx = pFBlk->iNext;
  }
  return SQLITE_OK;
}

/*
** Set up a raw page so that it looks like a database page holding
** no entries.
*/
static void zeroPage(MemPage *pPage){
  PageHdr *pHdr;
  FreeBlk *pFBlk;
  memset(pPage, 0, SQLITE_PAGE_SIZE);
  pHdr = (PageHdr*)pPage;
  pHdr->firstCell = 0;
  pHdr->firstFree = sizeof(*pHdr);
  pFBlk = (FreeBlk*)&pHdr[1];
  pFBlk->iNext = 0;
  pFBlk->iSize = SQLITE_PAGE_SIZE - sizeof(*pHdr);
}

................................................................................
  if( rc!=SQLITE_OK ){
    goto create_cursor_exception;
  }
  rc = initPage(pCur->pPage, pCur->pgnoRoot, 0);
  if( rc!=SQLITE_OK ){
    goto create_cursor_exception;
  }
  pCur->pPrev = 0;

  pCur->pNext = pBt->pCursor;
  if( pCur->pNext ){
    pCur->pNext->pPrev = pCur;
  }

  pBt->pCursor = pCur;
  pCur->pBt = pBt;
  pCur->idx = 0;
  *ppCur = pCur;
  return SQLITE_OK;

create_cursor_exception:
  *ppCur = 0;
  if( pCur ){
    if( pCur->pPage ) sqlitepager_unref(pCur->pPage);
................................................................................
  sqliteFree(pCur);
}

/*
** Make a temporary cursor by filling in the fields of pTempCur.
** The temporary cursor is not on the cursor list for the Btree.
*/
static void CreateTemporaryCursor(BtCursor *pCur, BtCursor *pTempCur){
  memcpy(pTempCur, pCur, sizeof(*pCur));
  pTempCur->pNext = 0;
  pTempCur->pPrev = 0;
  sqlitepager_ref(pTempCur->pPage);
}

/*
** Delete a temporary cursor such as was made by the CreateTemporaryCursor()
** function above.
*/
static void DestroyTemporaryCursor(BeCursor *pCur){
  sqlitepager_unref(pCur->pPage);
}

/*
** Set *pSize to the number of bytes of key in the entry the
** cursor currently points to.  Always return SQLITE_OK.
** Failure is not possible.  If the cursor is not currently
................................................................................
  }
  while( amt>0 && nextPage ){
    OverflowPage *pOvfl;
    rc = sqlitepager_get(pCur->pBt->pPager, nextPage, &pOvfl);
    if( rc!=0 ){
      return rc;
    }
    nextPage = pOvfl->next;
    if( offset<OVERFLOW_SIZE ){
      int a = amt;
      if( a + offset > OVERFLOW_SIZE ){
        a = OVERFLOW_SIZE - offset;
      }
      memcpy(zBuf, &pOvfl->aPayload[offset], a);
      amt -= a;
................................................................................
    if( nextPage==0 ){
      return SQLITE_CORRUPT;
    }
    rc = sqlitepager_get(pCur->pBt->pPager, nextPage, &pOvfl);
    if( rc ){
      return rc;
    }
    nextPage = pOvfl->next;
    n = nKey;
    if( n>OVERFLOW_SIZE ){
      n = OVERFLOW_SIZE;
    }
    c = memcmp(pOvfl->aPayload, pKey, n);
    sqlitepager_unref(pOvfl);
    if( c!=0 ){
................................................................................
        lwr = pCur->idx+1;
      }else{
        upr = pCur->idx-1;
      }
    }
    assert( lwr==upr+1 );
    if( lwr>=pPage->nCell ){
      chldPg = ((PageHdr*)pPage)->rightChild;
    }else{
      chldPg = pPage->apCell[lwr]->h.leftChild;
    }
    if( chldPg==0 ){
      pCur->iMatch = c;
      if( pRes ) *pRes = c;
      return SQLITE_OK;
................................................................................
  if( pCur->bSkipNext ){
    pCur->bSkipNext = 0;
    if( pRes ) *pRes = 0;
    return SQLITE_OK;
  }
  pCur->idx++;
  if( pCur->idx>=pCur->pPage->nCell ){
    if( ((PageHdr*)pPage)->rightChild ){
      rc = moveToChild(pCur, ((PageHdr*)pPage)->rightChild);
      if( rc ) return rc;
      rc = moveToLeftmost(pCur);
      if( rc ) return rc;
      if( pRes ) *pRes = 0;
      return SQLITE_OK;
    }
    do{
................................................................................
    rc = sqlitepager_get(pBt->pPager, pPage1->freeList, &pOvfl);
    if( rc ) return rc;
    rc = sqlitepager_write(pOvfl);
    if( rc ){
      sqlitepager_unref(pOvfl);
      return rc;
    }
    pPage1->freeList = pOvfl->next;
    *ppPage = (MemPage*)pOvfl;
  }else{
    *pPgno = sqlitepager_pagecount(pBt->pPager);
    rc = sqlitepager_get(pBt->pPager, *pPgno, ppPage);
    if( rc ) return rc;
    rc = sqlitepager_write(*ppPage);
  }
................................................................................
    needOvflUnref = 1;
  }
  rc = sqlitepager_write(pOvfl);
  if( rc ){
    if( needOvflUnref ) sqlitepager_unref(pOvfl);
    return rc;
  }
  pOvfl->next = pPage1->freeList;
  pPage1->freeList = pgno;
  memset(pOvfl->aPayload, 0, OVERFLOW_SIZE);
  pPage->isInit = 0;
  assert( pPage->pParent==0 );
  rc = sqlitepager_unref(pOvfl);
  return rc;
}
................................................................................
    return SQLITE_OK;
  }
  ovfl = pCell->ovfl;
  pCell->ovfl = 0;
  while( ovfl ){
    rc = sqlitepager_get(pPager, ovfl, &pOvfl);
    if( rc ) return rc;
    nextOvfl = pOvfl->next;
    rc = freePage(pBt, pOvfl, ovfl);
    if( rc ) return rc;
    ovfl = nextOvfl;
    sqlitepager_unref(pOvfl);
  }
  return SQLITE_OK;
}
................................................................................
      if( rc ){
        *pNext = 0;
        clearCell(pBt, pCell);
        return rc;
      }
      spaceLeft = OVERFLOW_SIZE;
      pSpace = pOvfl->aPayload;
      pNextPg = &pOvfl->next;
    }
    n = nPayload;
    if( n>spaceLeft ) n = spaceLeft;
    memcpy(pSpace, pPayload, n);
    nPayload -= n;
    if( nPayload==0 && pData ){
      pPayload = pData;
................................................................................
** another.
*/
static void reparentChildPages(Pager *pPager, Page *pPage){
  int i;
  for(i=0; i<pPage->nCell; i++){
    reparentPage(pPager, pPage->apCell[i]->leftChild, pPage);
  }
  reparentPage(pPager, ((PageHdr*)pPage)->rightChild, pPage);
}

/*


































































































** This routine redistributes Cells on pPage and up to two siblings
** of pPage so that all pages have about the same amount of free space.
** Usually one siblings on either side of pPage are used in the repack,

** though both siblings might come from one side if pPage is the first
** or last child of its parent.  If pPage has fewer than two siblings
** (something which can only happen if pPage is the root page or a 
** child of root) then all siblings participate in the repack.
**
** The number of siblings of pPage might be increased or decreased by
** one in order to keep all pages between 2/3 and completely full.  If
** pPage is the root page, then the depth of the tree might be increased
** or decreased by one, as necessary, to keep the root page from being
** overfull or empty.
**









** Note that when this routine is called, some of the Cells on pPage
** might not actually be stored in pPage->aDisk[].  This can happen
** if the page is overfull.  Part of the job of this routine is to
** make sure all Cells for pPage once again fit in pPage->aDisk[].



*/
static int repack(Btree *pBt, MemPage *pPage){
  MemPage *pParent;            /* The parent of pPage */
  MemPage *apOld[3];           /* pPage and up to two siblings before repack */
  Pgno pgnoOld[3];             /* Page numbers for each page in apOld[] */
  MemPage *apNew[4];           /* pPage and up to 3 siblings after repack */

  int idxDiv[3];               /* Indices of divider cells in pParent */
  Cell *apDiv[3];              /* Divider cells in pParent */
  int nCell;                   /* Number of cells in apCell[] */
  int nOld;                    /* Number of pages in apOld[] */
  int nNew;                    /* Number of pages in apNew[] */
  int perPage;                 /* Approximate number of bytes per page */
  int nDiv;                    /* Number of cells in apDiv[] */







  Cell *apCell[MX_CELL*3+5];   /* All cells from pages being repacked */
  int unrefPage = 0;           /* If true, then unref pPage when done */




  /*
  ** Early out if no repacking is needed.


  */
  if( pPage->nFree>=0 && pPage->nFree<SQLITE_PAGE_SIZE/2 ){

    return SQLITE_OK;
  }

  /*
  ** Find the parent of the page to be repacked.
  */
  pParent = pPage->pParent;

  /*
  ** If there is no parent, it means the page is the root page.
  ** special rules apply.
  */

  if( pParent==0 ){
    Pgno pgnoChild;
    Page *pChild;
    if( pPage->nCell==0 ){
      if( ((PageHdr*)pPage)->rightChild ){

        /* The root page is under full.  Copy the one child page
        ** into the root page and return.  This reduces the depth
        ** of the BTree by one.
        */
        pgnoChild = ((PageHdr*)pPage->rightChild;


        rc = sqlitepager_get(pBt, pgnoChild, &pChild);
        if( rc ) return rc;
        memcpy(pPage, pChild, SQLITE_PAGE_SIZE);
        pPage->isInit = 0;
        initPage(pPage, sqlitepager_pagenumber(pPage), 0);
        reparentChildPages(pBt->pPager, pPage);
        freePage(pBt, pChild, pgnoChild);
        sqlitepager_unref(pChild);
      }
      return SQLITE_OK;
    }
    if( pPage->nFree>=0 ){
      /* It is OK for the root page to be less than half full.
      */

      return SQLITE_OK;
    }

    /* If we get to here, it means the root page is over full.
    ** When this happens, Create a new child page and copy the
    ** contents of the root into the child.  Then make the root
    ** page and empty page with rightChild pointing to the new
    ** child.  Then fall thru to the code below which will cause
    ** the overfull child page to be split.
    */


    rc = allocatePage(pBt, &pChild, &pgnoChild);
    if( rc ) return rc;
    memcpy(pChild, pPage, SQLITE_PAGE_SIZE);
    for(i=0; i<pPage->nCell; i++){
      if( pPage->apCell[i]>(Cell*)pPage && pPage->apCell[i]<(Cell*)&pPage[1] ){
        int offset = (int)pPage->apCell[i] - (int)pPage;
        pChild->apCell[i] = (Cell*)((int)pChild + offset);
      }else{

        pChild->apCell[i] = pPage->apCell[i];


      }
    }
    pChild->isInit = 1;
    pChild->nCell = pPage->nCell;
    pChild->nFree = pPage->nFree;
    /* reparentChildPages(pBt->pPager, pChild); */
    zeroPage(pPage);
    ((PageHdr*)pPage)->rightChild = pgnoChild;
    pParent = pPage;
    pPage = pChild;
    unrefPage = 1;



  }

  /*
  ** Find the Cell in the parent page that refers to the page

  ** to be repacked.
  */
  idx = -1;
  pgno = sqlitepager_pagenumber(pPage);
  for(i=0; i<pParent->nCell; i++){
    if( pParent->apCell[i]->h.leftChild==pgno ){
      idx = i;
      break;
    }
  }
  if( idx<0 && ((PageHdr*)pPage)->rightChild==pgno ){
    idx = pPage->nCell;
  }
  if( idx<0 ){
    rc = SQLITE_CORRUPT;
    goto end_of_repack;
  }

  /*

  ** Get sibling pages and their dividers
  */










  if( idx==pPage->nCell ){
    idx -= 2;
  }else{
    idx--;

  }
  if( idx<0 ) idx = 0;
  nDiv = 0;
  nOld = 0;
  for(i=0; i<3; i++){
    if( i+idx<pParent->nCell ){
      idxDiv[i] = i+idx;
      apDiv[i] = pParent->apCell[i+idx];
      nDiv++;
      pgnoOld[i] = apDiv[i]->h.leftChild;
      rc = sqlitepager_get(pBt, pgnoOld[i], &apOld[i]);
      if( rc ) goto end_of_repack;
      nOld++;
    }
    if( i+idx==pParent->nCell ){
      pgnoOld[i] = pParent->rightChild;



      rc = sqlitepager_get(pBt, pgnoOld[i], &apOld[i]);
      if( rc ) goto end_of_repack;
      nOld++;
    }
  }














  /*
  ** Get all cells
















  */
  nCell = 0;
  for(i=0; i<nOld; i++){
    MemPage *pOld = apOld[i];
    for(j=0; j<pOld->nCell; j++){
      apCell[nCell++] = pOld->apCell[j];


    }
    if( i<nOld-1 ){


      apCell[nCell++] = apDiv[i];




    }
  }

  /*
  ** Estimate the number of pages needed


  */
  totalSize = 0;
  for(i=0; i<nCell; i++){
    totalSize += cellSize(apCell[i]);
  }
  nNew = (totalSize + (SQLITE_PAGE_SIZE - sizeof(PageHdr) - 1)) /
            (SQLITE_PAGE_SIZE - sizeof(PageHdr));
  perPage = totalSize/nNew;

  

  /*
  ** Allocate new pages
  */
  for(i=0; i<nNew; i++){
    rc = allocatePage(pBt, &apNew[i], &pgnoNew[i]);
    if( rc ) goto end_of_repack;

    zeroPage(apNew[i]);
  }

  /*
  ** Copy data into the new pages

  */

  for(i=0; i<nNew; i++){
  }

  /*
  ** Reparent children of all cells
  */




  /*
  ** Release the old pages
  */

  for(i=0; i<nOld; i++){
    releasePage(pBt, apOld[i], 0);
  }

  /*



  ** Repack the parent page, if necessary
  */

  if( needToRepackParent ){
    return repack(pParent);
  }
  rc = SQLITE_OK;



end_of_repack:
  if( unrefPage ){
    sqlitepager_unref(pPage);
  }
  return rc;
}

/*
** Attempt to move N or more bytes out of the page that the cursor
** points to into the left sibling page.  (The left sibling page
** contains cells that are less than the cells on this page.)  The
** entry that the cursor is pointing to cannot be moved.  Return
** TRUE if successful and FALSE if not.
**
** Reasons for not being successful include: 
**
**    (1) there is no left sibling,
**    (2) we could only move N-1 bytes or less,
**    (3) some kind of file I/O error occurred
**
** Note that a partial rotation may have occurred even if this routine
** returns FALSE.  Failure means we could not rotation a full N bytes.
** If it is possible to rotation some smaller number M, then the 
** rotation occurs but we still return false.
**
** Example:  Consider a segment of the Btree that looks like the
** figure below prior to rotation.  The cursor is pointing to the
** entry *.  The sort order of the entries is A B C D E * F Y.
**
**
**            -------------------------
**                ... | C | Y | ...
**            -------------------------
**                     /     \
**            ---------       -----------------
**            | A | B |       | D | E | * | F |
**            ---------       -----------------
**
** After rotation of two cells (D and E), the same Btree segment 
** looks like this:
**
**            -------------------------
**                ... | E | Y | ...
**            -------------------------
**                     /     \
**    -----------------       ---------
**    | A | B | C | D |       | * | F |
**    -----------------       ---------
**
** The size of this rotation is the size by which the page containing
** the cursor was reduced.  In this case, the size of D and E.
**
*/
static int rotateLeft(BtCursor *pCur, int N){
  return 0;
}

/*
** This routine is the same as rotateLeft() except that it move data
** to the right instead of to the left.  See comments on the rotateLeft()
** routine for additional information.
*/
static int rotateRight(BtCursor *pCur, int N){
  return 0;
}

/*
** Append a cell onto the end of a page.
**
** The child page of the cell is reparented if pPager!=NULL.
*/
static void appendCell(
  Pager *pPager,      /* The page cache.  Needed for reparenting */
  Cell *pSrc,         /* The Cell to be copied onto a new page */
  MemPage *pPage      /* The page into which the cell is copied */
){
  int pc;
  int sz;
  Cell *pDest;

  sz = cellSize(pSrc);
  pc = allocateSpace(pPage, sz);
  assert( pc>0 ){
  pDest = pPage->apCell[pPage->nCell] = &pPage->aDisk[pc];
  memcpy(pDest, pSrc, sz);
  pDest->h.iNext = 0;
  if( pPage->nCell>0 ){
    pPage->apCell[pPage->nCell-1]->h.iNext = pc;
  }else{
    ((PageHdr*)pPage)->firstCell = pc;

  }
  if( pPager && pDest->h.leftChild ){
    reparentPage(pPager, pDest->h.leftChild, pPage);



  }
}

/*
** Split a single database page into two roughly equal-sized pages.
**
** The input is an existing page and a new Cell.  The Cell might contain
** a valid Cell.h.leftChild field pointing to a child page.
**
** The output is the Cell that divides the two new pages.  The content
** of this divider Cell is written into *pCenter.  pCenter->h.leftChild
** holds the page number of the new page that was created to hold the 
** smaller of the cells from the divided page.  The larger cells from
** the divided page are written to a newly allocated page and *ppOut 
** is made to point to that page.  Or if ppOut==NULL then the larger cells
** remain on pIn.
**
** Upon return, pCur should be pointing to the same cell, even if that
** cell has moved to a new page.  The cell that pCur points to cannot
** be the pCenter cell.

*/
static int split(
  BtCursor *pCur,     /* A cursor pointing at a cell on the page to be split */
  Cell *pNewCell,     /* A new cell to add to pIn before dividing it up */
  Cell *pCenter,      /* Write the cell that divides the two pages here */
  MemPage **ppOut     /* If not NULL, put larger cells in new page at *ppOut */
){
  MemPage *pLeft, *pRight;
  Pgno pgnoLeft, pgnoRight;
  PageHdr *pHdr;
  int rc;
  Pager *pPager = pCur->pBt->pPager;
  MemPage tempPage;



  /* Allocate pages to hold cells after the split and make pRight and 
  ** pLeft point to the newly allocated pages.
  */
  rc = allocatePage(pCur->pBt, &pLeft, &pgnoLeft);
  if( rc ) return rc;
  if( ppOut ){
    rc = allocatePage(pCur->pBt, &pRight, &pgnoRight);
    if( rc ){
      freePage(pCur->pBt, pLeft, pgnoLeft);
      return rc;
    }
    *ppOut = pRight;
  }else{
    *ppOut = tempPage;
  }

  /* Copy the smaller cells from the original page into the left page
  ** of the split.
  */
  zeroPage(pLeft);
  if( pCur->idx==0 && pCur->match>0 ){
    appendCell(pPager, pNewCell, pLeft);
  }
  do{
    assert( i<pPage->nCell );
    appendCell(pPager, pPage->apCell[i++], pLeft);
    if( pCur->idx==i && pCur->iMatch>0 ){
      appendCell(pPager, pNewCell, Left);
    }
  }while( pc < SQLITE_PAGE_SIZE/2 );

  /* Copy the middle entry into *pCenter
  */
  assert( i<pPage->nCell );
  memcpy(pCenter, pPage->aCell[i], cellSize(pPage->aCell[i]));
  i++;
  pHdr = (PageHdr*)pLeft;
  pHdr->rightChild = pCenter->h.leftChild;
  if( pHdr->rightChild ){
    reparentPage(pPager, pHdr->rightChild, pLeft);
  }
  pCenter->h.leftChild = pgnoLeft;
 
  /* Copy the larger cells from the original page into the right
  ** page of the split
  */
  zeroPage(pRight);
  while( i<pPage->nCell ){
    appendCell(0, pPage->apCell[i++], pRight);
  }

  /* If ppOut==NULL then copy the temporary right page over top of
  ** the original input page.
  */
  if( ppOut==0 ){
    pRight->pParent = pPage->pParent;
    pRight->isInit = 1;
    memcpy(pPage, pRight, sizeof(*pPage));
  }
  reparentChildPages(pPager, pPage);
}

/*
** Unlink a cell from a database page.  Add the space used by the cell
** back to the freelist for the database page on which the cell used to
** reside.
**
** This operation overwrites the cell header and content.

*/
static void unlinkCell(BtCursor *pCur){
  MemPage *pPage;    /* Page containing cell to be unlinked */
  int idx;           /* The index of the cell to be unlinked */
  Cell *pCell;       /* Pointer to the cell to be unlinked */
  u16 *piCell;       /* iNext pointer from prior cell */
  int iCell;         /* Index in pPage->aDisk[] of cell to be unlinked */
  int i;             /* Loop counter */


  pPage = pCur->pPage;
  sqlitepager_write(pPage);
  idx = pCur->idx;
  pCell = pPage->apCell[idx];
  if( idx==0 ){
    piCell = &pPage->pHdr->firstCell;
  }else{
    piCell = &pPage->apCell[idx-1]->h.iNext;
  }
  iCell = *piCell;
  *piCell = pCell->h.iNext;
  freeSpace(pPage, iCell, cellSize(pCell));
  pPage->nCell--;
  for(i=idx; i<pPage->nCell; i++){
    pPage->apCell[i] = pPage->apCell[i+1];
  }
}

/*
** Add a Cell to a database page at the spot indicated by the cursor.
**
** With this routine, we know that the Cell pNewCell will fit into the
** database page that pCur points to.  The calling routine has made
** sure it will fit.  All this routine needs to do is add the Cell
** to the page.  The addToPage() routine should be used for cases
** were it is not known if the new cell will fit.
**
** The new cell is added to the page either before or after the cell
** to which the cursor is pointing.  The new cell is added before
** the cursor cell if pCur->iMatch>0 and the new cell is added after
** the cursor cell if pCur->iMatch<0.  pCur->iMatch should have been set
** by a prior call to sqliteBtreeMoveto() where the key was the key
** of the cell being inserted.  If sqliteBtreeMoveto() ended up on a
** cell that is larger than the key, then pCur->iMatch was set to a
** positive number, hence we insert the new record before the pointer
** if pCur->iMatch is positive.  If sqliteBtreeMaveto() ended up on a
** cell that is smaller than the key then pCur->iMatch was set to a
** negative number, hence we insert the new record after then pointer
** if pCur->iMatch is negative.

*/
static int insertCell(BtCursor *pCur, Cell *pNewCell){
  int sz;
  int idx;
  int i;
  Cell *pCell, *pIdx;
  MemPage *pPage;




  pPage = pCur->pPage;
  sz = cellSize(pNewCell);
  idx = allocateSpace(pPage, sz);
  assert( idx>0 && idx<=SQLITE_PAGE_SIZE - sz );
  pCell = (Cell*)&pPage->aDisk[idx];
  memcpy(pCell, pNewCell, sz);
  pIdx = pPage->aDisk[pCur->idx];
  if( pCur->iMatch<0 ){
    /* Insert the new cell after the cell pCur points to */
    pCell->h.iNext = pIdx->h.iNext;
    pIdx->h.iNext = idx;
    for(i=pPage->nCell-1; i>pCur->idx; i--){
      pPage->apCell[i+1] = pPage->apCell[i];


    }


    pPage->apCell[pCur->idx+1] = pCell;
  }else{
    /* Insert the new cell before the cell pCur points to */
    pCell->h.iNext = pPage->pHdr->firstCell;
    pPage->pHdr->firstCell = idx;
    for(i=pPage->nCell; i>0; i++){
      pPage->apCell[i] = pPage->apCell[i-1];
    }
    pPage->apCell[0] = pCell;
  }
  pPage->nCell++;
  if( pCell->h.leftChild ){
    MemPage *pChild = sqlitepager_lookup(pCur->pBt, pCell->h.leftChild);
    if( pChild && pChild->pParent ){
      sqlitepager_unref(pChild->pParent);
      pChild->pParent = pPage;
      sqlitepager_ref(pChild->pParent);
    }
  }
  return SQLITE_OK;
}

/*
** Insert pNewCell into the database page that pCur is pointing to at
** the place where pCur is pointing.  
**
** This routine works just like insertCell() except that the cell
** to be inserted need not fit on the page.  If the new cell does 
** not fit, then the page sheds data to its siblings to try to get 
** down to a size where the new cell will fit.  If that effort fails,
** then the page is split.
*/
static int addToPage(BtCursor *pCur, Cell *pNewCell){
  Cell tempCell;
  Cell centerCell;

  for(;;){
    MemPage *pPage = pCur->pPage;
    rc = sqlitepager_write(pPage);
    if( rc ) return rc;
    int sz = cellSize(pNewCell);
    if( sz<=pPage->nFree ){
      insertCell(pCur, pNewCell);
      return SQLITE_OK;
    }
    if( pPage->pParent==0 ){
      MemPage *pRight;
      PageHdr *pHdr;
      FreeBlk *pFBlk;
      int pc;
      rc = split(pCur, pNewCell, &centerCell, &pRight);
      if( rc ) return rc;
      pHdr = pPage->pHdr;
      pHdr->right = sqlitepager_pagenumber(pRight);
      sqlitepager_unref(pRight);
      pHdr->firstCell = pc = sizeof(*pHdr);
      sz = cellSize(&centerCell);
      memcpy(&pPage->aDisk[pc], &centerCell, sz);
      pc += sz;
      pHdr->firstFree = pc;
      pFBlk = (FreeBlk*)&pPage->aDisk[pc];
      pFBlk->iSize = SQLITE_PAGE_SIZE - pc;
      pFBlk->iNext = 0;
      memset(&pFBlk[1], 0, pFBlk->iSize-sizeof(*pFBlk));
      return SQLITE_OK;
    }
    if( rotateLeft(pCur, sz - pPage->nFree) 
           || rotateRight(pCur, sz - pPage->nFree) ){
      insertCell(pCur, pNewCell);
      return SQLITE_OK;
    }
    rc = split(pCur, pNewCell, &centerCell, 0);
    if( rc ) return rc;
    moveToParent(pCur);
    tempCell = centerCell;
    pNewPage = &tempCell;
  }
  /* NOT REACHED */
}

/*
** Insert a new record into the BTree.  The key is given by (pKey,nKey)
** and the data is given by (pData,nData).  The cursor is used only to
** define what database the record should be inserted into.  The cursor
** is NOT left pointing at the new record.
*/
int sqliteBtreeInsert(
  BtCursor *pCur,            /* Insert data into the table of this cursor */
  void *pKey,  int nKey,     /* The key of the new record */
  void *pData, int nData     /* The data of the new record */
){
  Cell newCell;
  int rc;
  int loc;

  MemPage *pPage;
  Btree *pBt = pCur->pBt;

  if( !pCur->pBt->inTrans ){
    return SQLITE_ERROR;  /* Must start a transaction first */
  }
  rc = sqliteBtreeMoveTo(pCur, pKey, nKey, &loc);
  if( rc ) return rc;

  rc = sqlitepager_write(pCur->pPage);
  if( rc ) return rc;
  rc = fillInCell(pBt, &newCell, pKey, nKey, pData, nData);
  if( rc ) return rc;

  if( loc==0 ){
    newCell.h.leftChild = pCur->pPage->apCell[pCur->idx]->h.leftChild;
    rc = clearCell(pBt, pCur->pPage->apCell[pCur->idx]);
    if( rc ) return rc;
    unlinkCell(pCur);
  }

  return addToPage(pCur, &newCell);
}

/*
** Check the page at which the cursor points to see if it is less than
** half full.  If it is less than half full, then try to increase
** its fill factor by grabbing cells from siblings or by merging
** the page with siblings.
*/
static int refillPage(BtCursor *pCur){
  MemPage *pPage;
  BtCursor tempCur;
  int rc;
  Pager *pPager;


  pPage = pCur->pPage;
  if( pPage->nFree < SQLITE_PAGE_SIZE/2 ){
    return SQLITE_OK;


  }
  rc = sqlitepager_write(pPage);
  if( rc ) return rc;
  pPager = pCur->pBt->pPager;

  if( pPage->nCell==0 ){
    /* The page being refilled is the root of the BTree and it has
    ** no entries of its own.  If there is a child page, then make the
    ** child become the new root.
    */
    MemPage *pChild;
    Pgno pgnoChild;
    assert( pPage->pParent==0 );
    assert( sqlitepager_pagenumber(pPage)==pCur->pgnoRoot );
    pgnoChild = ((PageHdr*)pPage)->rightChild;
    if( pgnoChild==0 ){
      return SQLITE_OK;
    }
    rc = sqlitepager_get(pPager, pgno, &pChild);
    if( rc ) return rc;
    memcpy(pPage, pChild, SQLITE_PAGE_SIZE);
    memset(&pPage->aDisk[SQLITE_PAGE_SIZE], 0, EXTRA_SIZE);
    freePage(pCur->pBt, pChild, pgnoChild);
    sqlitepager_unref(pChild);
    rc = initPage(pPage, pCur->pgnoRoot, 0);
    reparentChildPages(pPager, pPage);
    return SQLITE_OK;
  }

  /** merge with siblings **/

  /** borrow from siblings **/
}

/*
** Replace the content of the cell that pCur is pointing to with the content
** in pNewContent.  The pCur cell is not unlinked or moved in the Btree,
** its content is just replaced.
**
** If the size of pNewContent is greater than the current size of the
** cursor cell then the page that cursor points to might have to split.
*/
static int ReplaceContentOfCell(BtCursor *pCur, Cell *pNewContent){
  Cell *pCell;       /* The cell whose content will be changed */
  Pgno pgno;         /* Temporary storage for a page number */

  pCell = pCur->pPage->apCell[pCur->idx];

  rc = clearCell(pCur->pBt, pCell);
  if( rc ) return rc;
  pgno = pNewCell->h.leftChild;
  pNewCell->h.leftChild = pCell->h.leftChild;
  unlinkCell(pCur);
  rc = addToPage(pCur, pNewCell);
  pNewCell->h.leftChild = pgno;
  return rc;
}

/*
** Delete the entry that the cursor is pointing to.
**
** The cursor is left pointing at either the next or the previous
................................................................................
  }
  if( pCur->idx >= pPage->nCell ){
    return SQLITE_ERROR;  /* The cursor is not pointing to anything */
  }
  rc = sqlitepager_write(pPage);
  if( rc ) return rc;
  pCell = pPage->apCell[pCur->idx];
  if( pPage->pHdr->rightChild ){
    /* The entry to be deleted is not on a leaf page.  Non-leaf entries 
    ** cannot be deleted directly because they have to be present to
    ** hold pointers to subpages.  So what we do is look at the next 
    ** entry in sequence.  The next entry is guaranteed to exist and 
    ** be a leaf.  We copy the payload from the next entry into this
    ** entry, then delete the next entry.
    */
    BtCursor origCur;
    CreateTemporaryCursor(pCur, &origCur);
    rc = sqliteBtreeNext(pCur, 0);
    if( rc==SQLITE_OK ){
      pPage = pCur->pPage;
      pCell = pPage->apCell[pCur->idx];
      rc = ReplaceContentOfCell(&origCur, pCell);
    }
    DestroyTemporaryCursor(&origCur);
    if( rc ) return rc;
  }
  rc = clearCell(pCell);
  if( rc ) return rc;
  unlinkCell(pCur->pBt, pCell);
  if( pCur->idx == 0 ){
    pCur->bSkipNext = 1;
  }else{
    pCur->idx--;
  }
  rc = refillPage(pCur);



  return rc;
}

/*
** Create a new BTree in the same file.  Write into *piTable the index
** of the root page of the new table.
*/
................................................................................
  int rc;
  int i;
  Cell *pCell;
  int idx;

  rc = sqlitepager_get(pBt->pPager, pgno, &pPage);
  if( rc ) return rc;
  idx = ((PageHdr*)pPage)->firstCell;
  while( idx>0 ){
    pCell = (Cell*)&pPage->aDisk[idx];
    idx = pCell->h.iNext;
    if( pCell->h.leftChild ){
      rc = clearDatabasePage(pBt, pCell->h.leftChild);
      if( rc ) return rc;
    }
    rc = clearCell(pCell);
    if( rc ) return rc;







|







 







>


>







 







|




|









|
|







 







|







 







|



|







 







|





|





|




>
>
>
>
>
>
>
>
>






>
|
>
>


|

>
|







 







|







 







|
<




|
|



|

|



|
|


|







|







 







|
|
|



|



|






|









|
|
|
>













|


|



|









|




|







 







|
|
|







 







|



|







|



|







 







<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<








|







 







|
>




>

<
<







 







|










|







 







|







 







|







 







|







 







|
|







 







|







 







|







 







|







 







|







 







|



>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>


<
>



|







>
>
>
>
>
>
>
>
>

|

|
>
>
>

|

|

|
>







>
>
>
>
>
>
>
|
<
>
>
>

|
<
>
>

|
>




|
<
<
<
<
|


>




|
>
|



|
>
>











|


>


>
|


|



>
>


|
<
<
|
|
<
>
|
>
>
|
<
<
<
<
<

|


<
>
>
>

|

|
>
|









|



|
<



>
|

>
>
>
>
>
>
>
>
>
>
|
|

<
>

<
|
|
|
|
|
|


<
<
<
<
|

>
>
>
|
|
|
|
|
>
>
>
>
>
>
>
>
>
|
>
>
>
>

<
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>





|
>
>


>
>
|
>
>
>
>




|
>
>



|

|

|
>





|

|
>




|
>

>

<
<
|
|
<
>
>
>
|
<
<
<
>
|
|
<
<
<
>
>
>
|
<
>
|
<
|
<
|
>
>
|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<

<
>

<
<
>
>
>

|
<
|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
>
|
<
<
<
<
<
<
<
<
<
<
<
<
>
>
|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
|
|
<
|
<
<
<
<
<
>
|
<
<
<
<
<
<
<
>

<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
>
|
<
<
<
<
<
<
>
>
>
|
<
<
<
<
<
<
<
<
<
<
<
<
<
>
>
|
>
>
|

<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
|
<
<
<
<
<
<
<
<
<
<
|
<
<
<
<
<
<
|
<
<
<
|
<
|
<




|









>






|

>
|



>

|
|

<
<
>
|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
>
|
<
<
>
>

<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
>
|
<
<
<
<
<
<







 







|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
>
>
>







 







|

|







17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
..
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
...
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
...
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
...
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
...
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
...
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
...
333
334
335
336
337
338
339
340

341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
...
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
...
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
...
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
...
516
517
518
519
520
521
522






























523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
...
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752


753
754
755
756
757
758
759
...
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
...
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
...
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
....
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
....
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
....
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
....
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
....
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
....
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
....
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494

1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544

1545
1546
1547
1548
1549

1550
1551
1552
1553
1554
1555
1556
1557
1558
1559




1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606


1607
1608

1609
1610
1611
1612
1613





1614
1615
1616
1617

1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640

1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659

1660
1661

1662
1663
1664
1665
1666
1667
1668
1669




1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694

1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761


1762
1763

1764
1765
1766
1767



1768
1769
1770



1771
1772
1773
1774

1775
1776

1777

1778
1779
1780
1781























































































1782

1783
1784


1785
1786
1787
1788
1789

1790
















1791
1792












1793
1794
1795



























































1796
1797

1798





1799
1800







1801
1802


















1803




















1804
1805






1806
1807
1808
1809













1810
1811
1812
1813
1814
1815
1816




















































1817










1818






1819



1820

1821

1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854


1855
1856














1857
1858


1859
1860
1861














































1862
1863






1864
1865
1866
1867
1868
1869
1870
....
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
....
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
** Boston, MA  02111-1307, USA.
**
** Author contact information:
**   drh@hwaci.com
**   http://www.hwaci.com/drh/
**
*************************************************************************
** $Id: btree.c,v 1.12 2001/06/10 19:56:59 drh Exp $
**
** This file implements a external (disk-based) database using BTrees.
** For a detailed discussion of BTrees, refer to
**
**     Donald E. Knuth, THE ART OF COMPUTER PROGRAMMING, Volume 3:
**     "Sorting And Searching", pages 473-480. Addison-Wesley
**     Publishing Company, Reading, Massachusetts.
................................................................................
#include "pager.h"
#include "btree.h"
#include <assert.h>


/*
** Primitive data types.  u32 must be 4 bytes and u16 must be 2 bytes.
** The uptr type must be big enough to hold a pointer.
** Change these typedefs when porting to new architectures.
*/
typedef unsigned int uptr;
typedef unsigned int u32;
typedef unsigned short int u16;
typedef unsigned char u8;

/*
** Forward declarations of structures used only in this file.
*/
................................................................................
};

/*
** Each database page has a header that is an instance of this
** structure.
**
** PageHdr.firstFree is 0 if there is no free space on this page.
** Otherwise, PageHdr.firstFree is the index in MemPage.u.aDisk[] of a 
** FreeBlk structure that describes the first block of free space.  
** All free space is defined by a linked list of FreeBlk structures.
**
** Data is stored in a linked list of Cell structures.  PageHdr.firstCell
** is the index into MemPage.u.aDisk[] of the first cell on the page.  The
** Cells are kept in sorted order.
**
** A Cell contains all information about a database entry and a pointer
** to a child page that contains other entries less than itself.  In
** other words, the i-th Cell contains both Ptr(i) and Key(i).  The
** right-most pointer of the page is contained in PageHdr.rightChild.
*/
struct PageHdr {
  Pgno rightChild;  /* Child page that comes after all cells on this page */
  u16 firstCell;    /* Index in MemPage.u.aDisk[] of the first cell */
  u16 firstFree;    /* Index in MemPage.u.aDisk[] of the first free block */
};

/*
** Entries on a page of the database are called "Cells".  Each Cell
** has a header and data.  This structure defines the header.  The
** key and data (collectively the "payload") follow this header on
** the database page.
................................................................................
** A definition of the complete Cell structure is given below.  The
** header for the cell must be defined separately in order to do some
** of the sizing #defines that follow.
*/
struct CellHdr {
  Pgno leftChild; /* Child page that comes before this cell */
  u16 nKey;       /* Number of bytes in the key */
  u16 iNext;      /* Index in MemPage.u.aDisk[] of next cell in sorted order */
  u32 nData;      /* Number of bytes of data */
}

/*
** The minimum size of a complete Cell.  The Cell must contain a header
** and at least 4 bytes of payload.
*/
................................................................................
** Free space on a page is remembered using a linked list of the FreeBlk
** structures.  Space on a database page is allocated in increments of
** at least 4 bytes and is always aligned to a 4-byte boundry.  The
** linked list of FreeBlks is always kept in order by address.
*/
struct FreeBlk {
  u16 iSize;      /* Number of bytes in this block of free space */
  u16 iNext;      /* Index in MemPage.u.aDisk[] of the next free block */
};

/*
** The number of bytes of payload that will fit on a single overflow page.
*/
#define OVERFLOW_SIZE (SQLITE_PAGE_SIZE-sizeof(Pgno))

/*
** When the key and data for a single entry in the BTree will not fit in
** the MX_LOACAL_PAYLOAD bytes of space available on the database page,
** then all extra bytes are written to a linked list of overflow pages.
................................................................................
**
** Unused pages in the database are also represented by instances of
** the OverflowPage structure.  The PageOne.freeList field is the
** page number of the first page in a linked list of unused database
** pages.
*/
struct OverflowPage {
  Pgno iNext;
  char aPayload[OVERFLOW_SIZE];
};

/*
** For every page in the database file, an instance of the following structure
** is stored in memory.  The u.aDisk[] array contains the raw bits read from
** the disk.  The rest is auxiliary information that held in memory only. The
** auxiliary info is only valid for regular database pages - it is not
** used for overflow pages and pages on the freelist.
**
** Of particular interest in the auxiliary info is the apCell[] entry.  Each
** apCell[] entry is a pointer to a Cell structure in u.aDisk[].  The cells are
** put in this array so that they can be accessed in constant time, rather
** than in linear time which would be needed if we had to walk the linked 
** list on every access.
**
** Note that apCell[] contains enough space to hold up to two more Cells
** than can possibly fit on one page.  In the steady state, every apCell[]
** points to memory inside u.aDisk[].  But in the middle of an insert
** operation, some apCell[] entries may temporarily point to data space
** outside of u.aDisk[].  This is a transient situation that is quickly
** resolved.  But while it is happening, it is possible for a database
** page to hold as many as two more cells than it might otherwise hold.
** The extra too entries in apCell[] are an allowance for this situation.
**
** The pParent field points back to the parent page.  This allows us to
** walk up the BTree from any leaf to the root.  Care must be taken to
** unref() the parent page pointer when this page is no longer referenced.
** The pageDestructor() routine handles that chore.
*/
struct MemPage {
  union {
    char aDisk[SQLITE_PAGE_SIZE];  /* Page data stored on disk */
    PageHdr hdr;                   /* Overlay page header */
  } u;
  int isInit;                    /* True if auxiliary data is initialized */
  MemPage *pParent;              /* The parent of this page.  NULL for root */
  int nFree;                     /* Number of free bytes in u.aDisk[] */
  int nCell;                     /* Number of entries on this page */
  int isOverfull;                /* Some apCell[] points outside u.aDisk[] */
  Cell *apCell[MX_CELL+2];       /* All data entires in sorted order */
}

/*
** The in-memory image of a disk page has the auxiliary information appended
** to the end.  EXTRA_SIZE is the number of bytes of space needed to hold
** that extra information.
*/
................................................................................
/*
** A cursor is a pointer to a particular entry in the BTree.
** The entry is identified by its MemPage and the index in
** MemPage.apCell[] of the entry.
*/
struct BtCursor {
  Btree *pBt;               /* The Btree to which this cursor belongs */
  BtCursor *pNext, *pPrev;  /* Forms a linked list of all cursors */
  Pgno pgnoRoot;            /* The root page of this tree */
  MemPage *pPage;           /* Page that contains the entry */
  u16 idx;                  /* Index of the entry in pPage->apCell[] */
  u8 bSkipNext;             /* sqliteBtreeNext() is no-op if true */
  u8 iMatch;                /* compare result from last sqliteBtreeMoveto() */
};

................................................................................

/*
** Defragment the page given.  All Cells are moved to the
** beginning of the page and all free space is collected 
** into one big FreeBlk at the end of the page.
*/
static void defragmentPage(MemPage *pPage){
  int pc, i, n;

  FreeBlk *pFBlk;
  char newPage[SQLITE_PAGE_SIZE];

  pc = sizeof(PageHdr);
  pPage->u.hdr.firstCell = pc;
  memcpy(newPage, pPage->u.aDisk, pc);
  for(i=0; i<pPage->nCell; i++){
    Cell *pCell = &pPage->apCell[i];
    n = cellSize(pCell);
    pCell->h.iNext = i<pPage->nCell-1 ? pc + n : 0;
    memcpy(&newPage[pc], pCell, n);
    pPage->apCell[i] = (Cell*)&pPage->u.aDisk[pc];
    pc += n;
  }
  assert( pPage->nFree==SQLITE_PAGE_SIZE-pc );
  memcpy(pPage->u.aDisk, newPage, pc);
  pFBlk = &pPage->u.aDisk[pc];
  pFBlk->iSize = SQLITE_PAGE_SIZE - pc;
  pFBlk->iNext = 0;
  pPage->u.hdr.firstFree = pc;
  memset(&pFBlk[1], 0, SQLITE_PAGE_SIZE - pc - sizeof(FreeBlk));
}

/*
** Allocate nByte bytes of space on a page.  nByte must be a 
** multiple of 4.
**
** Return the index into pPage->u.aDisk[] of the first byte of
** the new allocation. Or return 0 if there is not enough free
** space on the page to satisfy the allocation request.
**
** If the page contains nBytes of free space but does not contain
** nBytes of contiguous free space, then this routine automatically
** calls defragementPage() to consolidate all free space before 
** allocating the new chunk.
................................................................................
*/
static int allocateSpace(MemPage *pPage, int nByte){
  FreeBlk *p;
  u16 *pIdx;
  int start;

  assert( nByte==ROUNDUP(nByte) );
  if( pPage->nFree<nByte || pPage->isOverfull ) return 0;
  pIdx = &pPage->u.hdr.firstFree;
  p = (FreeBlk*)&pPage->u.aDisk[*pIdx];
  while( p->iSize<nByte ){
    if( p->iNext==0 ){
      defragmentPage(pPage);
      pIdx = &pPage->u.hdr.firstFree;
    }else{
      pIdx = &p->iNext;
    }
    p = (FreeBlk*)&pPage->u.aDisk[*pIdx];
  }
  if( p->iSize==nByte ){
    start = *pIdx;
    *pIdx = p->iNext;
  }else{
    start = *pIdx;
    FreeBlk *pNew = (FreeBlk*)&pPage->u.aDisk[start + nByte];
    pNew->iNext = p->iNext;
    pNew->iSize = p->iSize - nByte;
    *pIdx = start + nByte;
  }
  pPage->nFree -= nByte;
  return start;
}

/*
** Return a section of the MemPage.u.aDisk[] to the freelist.
** The first byte of the new free block is pPage->u.aDisk[start]
** and the size of the block is "size" bytes.  Size must be
** a multiple of 4.
**
** Most of the effort here is involved in coalesing adjacent
** free blocks into a single big free block.
*/
static void freeSpace(MemPage *pPage, int start, int size){
  int end = start + size;
  u16 *pIdx, idx;
  FreeBlk *pFBlk;
  FreeBlk *pNew;
  FreeBlk *pNext;

  assert( size == ROUNDUP(size) );
  assert( start == ROUNDUP(start) );
  pIdx = &pPage->u.hdr.firstFree;
  idx = *pIdx;
  while( idx!=0 && idx<start ){
    pFBlk = (FreeBlk*)&pPage->u.aDisk[idx];
    if( idx + pFBlk->iSize == start ){
      pFBlk->iSize += size;
      if( idx + pFBlk->iSize == pFBlk->iNext ){
        pNext = (FreeBlk*)&pPage->u.aDisk[pFblk->iNext];
        pFBlk->iSize += pNext->iSize;
        pFBlk->iNext = pNext->iNext;
      }
      pPage->nFree += size;
      return;
    }
    pIdx = &pFBlk->iNext;
    idx = *pIdx;
  }
  pNew = (FreeBlk*)&pPage->u.aDisk[start];
  if( idx != end ){
    pNew->iSize = size;
    pNew->iNext = idx;
  }else{
    pNext = (FreeBlk*)&pPage->u.aDisk[idx];
    pNew->iSize = size + pNext->iSize;
    pNew->iNext = pNext->iNext;
  }
  *pIdx = start;
  pPage->nFree += size;
}

................................................................................
** Return SQLITE_OK on success.  If we see that the page does
** not contained a well-formed database page, then return 
** SQLITE_CORRUPT.  Note that a return of SQLITE_OK does not
** guarantee that the page is well-formed.  It only shows that
** we failed to detect any corruption.
*/
static int initPage(MemPage *pPage, Pgno pgnoThis, MemPage *pParent){
  int idx;           /* An index into pPage->u.aDisk[] */
  Cell *pCell;       /* A pointer to a Cell in pPage->u.aDisk[] */
  FreeBlk *pFBlk;    /* A pointer to a free block in pPage->u.aDisk[] */
  int sz;            /* The size of a Cell in bytes */
  int freeSpace;     /* Amount of free space on the page */

  if( pPage->pParent ){
    assert( pPage->pParent==pParent );
    return SQLITE_OK;
  }
................................................................................
    pPage->pParent = pParent;
    sqlitepager_ref(pParent);
  }
  if( pPage->isInit ) return SQLITE_OK;
  pPage->isInit = 1;
  pPage->nCell = 0;
  freeSpace = SQLITE_PAGE_SIZE - sizeof(PageHdr);
  idx = pPage->u.hdr.firstCell;
  while( idx!=0 ){
    if( idx>SQLITE_PAGE_SIZE-MN_CELL_SIZE ) goto page_format_error;
    if( idx<sizeof(PageHdr) ) goto page_format_error;
    pCell = (Cell*)&pPage->u.aDisk[idx];
    sz = cellSize(pCell);
    if( idx+sz > SQLITE_PAGE_SIZE ) goto page_format_error;
    freeSpace -= sz;
    pPage->apCell[pPage->nCell++] = pCell;
    idx = pCell->h.iNext;
  }
  pPage->nFree = 0;
  idx = pPage->u.hdr.firstFree;
  while( idx!=0 ){
    if( idx>SQLITE_PAGE_SIZE-sizeof(FreeBlk) ) goto page_format_error;
    if( idx<sizeof(PageHdr) ) goto page_format_error;
    pFBlk = (FreeBlk*)&pPage->u.aDisk[idx];
    pPage->nFree += pFBlk->iSize;
    if( pFBlk->iNext <= idx ) goto page_format_error;
    idx = pFBlk->iNext;
  }
  if( pPage->nCell==0 && pPage->nFree==0 ){
    /* As a special case, an uninitialized root page appears to be
    ** an empty database */
................................................................................
  if( pPage->nFree!=freeSpace ) goto page_format_error;
  return SQLITE_OK;

page_format_error:
  return SQLITE_CORRUPT;
}































/*
** Set up a raw page so that it looks like a database page holding
** no entries.
*/
static void zeroPage(MemPage *pPage){
  PageHdr *pHdr;
  FreeBlk *pFBlk;
  memset(pPage, 0, SQLITE_PAGE_SIZE);
  pHdr = &pPage->u.hdr;
  pHdr->firstCell = 0;
  pHdr->firstFree = sizeof(*pHdr);
  pFBlk = (FreeBlk*)&pHdr[1];
  pFBlk->iNext = 0;
  pFBlk->iSize = SQLITE_PAGE_SIZE - sizeof(*pHdr);
}

................................................................................
  if( rc!=SQLITE_OK ){
    goto create_cursor_exception;
  }
  rc = initPage(pCur->pPage, pCur->pgnoRoot, 0);
  if( rc!=SQLITE_OK ){
    goto create_cursor_exception;
  }
  pCur->pBt = pBt;
  pCur->idx = 0;
  pCur->pNext = pBt->pCursor;
  if( pCur->pNext ){
    pCur->pNext->pPrev = pCur;
  }
  pCur->pPrev = 0;
  pBt->pCursor = pCur;


  *ppCur = pCur;
  return SQLITE_OK;

create_cursor_exception:
  *ppCur = 0;
  if( pCur ){
    if( pCur->pPage ) sqlitepager_unref(pCur->pPage);
................................................................................
  sqliteFree(pCur);
}

/*
** Make a temporary cursor by filling in the fields of pTempCur.
** The temporary cursor is not on the cursor list for the Btree.
*/
static void getTempCursor(BtCursor *pCur, BtCursor *pTempCur){
  memcpy(pTempCur, pCur, sizeof(*pCur));
  pTempCur->pNext = 0;
  pTempCur->pPrev = 0;
  sqlitepager_ref(pTempCur->pPage);
}

/*
** Delete a temporary cursor such as was made by the CreateTemporaryCursor()
** function above.
*/
static void releaseTempCursor(BtCursor *pCur){
  sqlitepager_unref(pCur->pPage);
}

/*
** Set *pSize to the number of bytes of key in the entry the
** cursor currently points to.  Always return SQLITE_OK.
** Failure is not possible.  If the cursor is not currently
................................................................................
  }
  while( amt>0 && nextPage ){
    OverflowPage *pOvfl;
    rc = sqlitepager_get(pCur->pBt->pPager, nextPage, &pOvfl);
    if( rc!=0 ){
      return rc;
    }
    nextPage = pOvfl->iNext;
    if( offset<OVERFLOW_SIZE ){
      int a = amt;
      if( a + offset > OVERFLOW_SIZE ){
        a = OVERFLOW_SIZE - offset;
      }
      memcpy(zBuf, &pOvfl->aPayload[offset], a);
      amt -= a;
................................................................................
    if( nextPage==0 ){
      return SQLITE_CORRUPT;
    }
    rc = sqlitepager_get(pCur->pBt->pPager, nextPage, &pOvfl);
    if( rc ){
      return rc;
    }
    nextPage = pOvfl->iNext;
    n = nKey;
    if( n>OVERFLOW_SIZE ){
      n = OVERFLOW_SIZE;
    }
    c = memcmp(pOvfl->aPayload, pKey, n);
    sqlitepager_unref(pOvfl);
    if( c!=0 ){
................................................................................
        lwr = pCur->idx+1;
      }else{
        upr = pCur->idx-1;
      }
    }
    assert( lwr==upr+1 );
    if( lwr>=pPage->nCell ){
      chldPg = pPage->u.hdr.rightChild;
    }else{
      chldPg = pPage->apCell[lwr]->h.leftChild;
    }
    if( chldPg==0 ){
      pCur->iMatch = c;
      if( pRes ) *pRes = c;
      return SQLITE_OK;
................................................................................
  if( pCur->bSkipNext ){
    pCur->bSkipNext = 0;
    if( pRes ) *pRes = 0;
    return SQLITE_OK;
  }
  pCur->idx++;
  if( pCur->idx>=pCur->pPage->nCell ){
    if( pPage->u.hdr.rightChild ){
      rc = moveToChild(pCur, pPage->u.hdr.rightChild);
      if( rc ) return rc;
      rc = moveToLeftmost(pCur);
      if( rc ) return rc;
      if( pRes ) *pRes = 0;
      return SQLITE_OK;
    }
    do{
................................................................................
    rc = sqlitepager_get(pBt->pPager, pPage1->freeList, &pOvfl);
    if( rc ) return rc;
    rc = sqlitepager_write(pOvfl);
    if( rc ){
      sqlitepager_unref(pOvfl);
      return rc;
    }
    pPage1->freeList = pOvfl->iNext;
    *ppPage = (MemPage*)pOvfl;
  }else{
    *pPgno = sqlitepager_pagecount(pBt->pPager);
    rc = sqlitepager_get(pBt->pPager, *pPgno, ppPage);
    if( rc ) return rc;
    rc = sqlitepager_write(*ppPage);
  }
................................................................................
    needOvflUnref = 1;
  }
  rc = sqlitepager_write(pOvfl);
  if( rc ){
    if( needOvflUnref ) sqlitepager_unref(pOvfl);
    return rc;
  }
  pOvfl->iNext = pPage1->freeList;
  pPage1->freeList = pgno;
  memset(pOvfl->aPayload, 0, OVERFLOW_SIZE);
  pPage->isInit = 0;
  assert( pPage->pParent==0 );
  rc = sqlitepager_unref(pOvfl);
  return rc;
}
................................................................................
    return SQLITE_OK;
  }
  ovfl = pCell->ovfl;
  pCell->ovfl = 0;
  while( ovfl ){
    rc = sqlitepager_get(pPager, ovfl, &pOvfl);
    if( rc ) return rc;
    nextOvfl = pOvfl->iNext;
    rc = freePage(pBt, pOvfl, ovfl);
    if( rc ) return rc;
    ovfl = nextOvfl;
    sqlitepager_unref(pOvfl);
  }
  return SQLITE_OK;
}
................................................................................
      if( rc ){
        *pNext = 0;
        clearCell(pBt, pCell);
        return rc;
      }
      spaceLeft = OVERFLOW_SIZE;
      pSpace = pOvfl->aPayload;
      pNextPg = &pOvfl->iNext;
    }
    n = nPayload;
    if( n>spaceLeft ) n = spaceLeft;
    memcpy(pSpace, pPayload, n);
    nPayload -= n;
    if( nPayload==0 && pData ){
      pPayload = pData;
................................................................................
** another.
*/
static void reparentChildPages(Pager *pPager, Page *pPage){
  int i;
  for(i=0; i<pPage->nCell; i++){
    reparentPage(pPager, pPage->apCell[i]->leftChild, pPage);
  }
  reparentPage(pPager, pPage->u.hdr.rightChild, pPage);
}

/*
** Remove the i-th cell from pPage.  This routine effects pPage only.
** The cell content is not freed or deallocated.  It is assumed that
** the cell content has been copied someplace else.  This routine just
** removes the reference to the cell from pPage.
**
** "sz" must be the number of bytes in the cell.
**
** Do not bother maintaining the integrity of the linked list of Cells.
** Only pPage->apCell[] is important.  The relinkCellList() routine
** will be called soon after this routine in order to rebuild the
** linked list.
*/
static void dropCell(MemPage *pPage, int i, int sz){
  int j;
  assert( i>=0 && i<pPage->nCell );
  assert( sz==cellSize(pPage->apCell[i]);
  freeSpace(pPage, idx, sz);
  for(j=i, j<pPage->nCell-2; j++){
    pPage->apCell[j] = pPage->apCell[j+1];
  }
  pPage->nCell--;
}

/*
** Insert a new cell on pPage at cell index "i".  pCell points to the
** content of the cell.
**
** If the cell content will fit on the page, then put it there.  If it
** will not fit, then just make pPage->apCell[i] point to the content
** and set pPage->isOverfull.  
**
** Do not bother maintaining the integrity of the linked list of Cells.
** Only pPage->apCell[] is important.  The relinkCellList() routine
** will be called soon after this routine in order to rebuild the
** linked list.
*/
static void insertCell(MemPage *pPage, int i, Cell *pCell, int sz){
  int idx, j;
  assert( i>=0 && i<=pPage->nCell );
  assert( sz==cellSize(pCell) );
  for(j=pPage->nCell; j>i; j--){
    pPage->apCell[j] = pPage->apCell[j-1];
  }
  pPage->nCell++;
  idx = allocateSpace(pPage, sz);
  if( idx<=0 ){
    pPage->isOverfull = 1;
    pPage->apCell[i] = pCell;
  }else{
    memcpy(&pPage->u.aDisk[idx], pCell, sz);
    pPage->apCell[i] = (Cell*)&pPage->u.aDisk[idx]);
  }
}

/*
** Rebuild the linked list of cells on a page so that the cells
** occur in the order specified by pPage->apCell[].  Invoke this
** routine once to repair damage after one or more invocations
** of either insertCell() or dropCell().
*/
static void relinkCellList(MemPage *pPage){
  int i;
  u16 *pIdx;
  pIdx = &pPage->u.hdr.firstCell;
  for(i=0; i<pPage->nCell; i++){
    int idx = ((uptr)pPage->apCell[i]) - (uptr)pPage;
    *pIdx = idx;
    pIdx = &pPage->apCell[i]->h.iNext;
  }
  *pIdx = 0;
}

/*
** Make a copy of the contents of pFrom into pTo.  The pFrom->apCell[]
** pointers that point intto pFrom->u.aDisk[] must be adjusted to point
** intto pTo->u.aDisk[] instead.  But some pFrom->apCell[] entries might
** not point to pFrom->u.aDisk[].  Those are unchanged.
*/
static void copyPage(MemPage *pTo, MemPage *pFrom){
  uptr from, to;
  int i;
  memcpy(pTo->u.aDisk, pFrom->u.aDisk, SQLITE_PAGE_SIZE);
  pTo->pParent = pFrom->pParent;
  pTo->isInit = 1;
  pTo->nCell = pFrom->nCell;
  pTo->nFree = pFrom->nFree;
  pTo->isOverfull = pFrom->isOverfull;
  to = (unsigned int)pTo;
  from = (unsigned int)pFrom;
  for(i=0; i<pTo->nCell; i++){
    uptr addr = (uptr)(pFrom->apCell[i]);
    if( addr>from && addr<from+SQLITE_PAGE_SIZE ){
      *((uptr*)&pTo->apCell[i]) = addr + to - from;
    }
  }
}

/*
** This routine redistributes Cells on pPage and up to two siblings
** of pPage so that all pages have about the same amount of free space.

** Usually one sibling on either side of pPage is used in the balancing,
** though both siblings might come from one side if pPage is the first
** or last child of its parent.  If pPage has fewer than two siblings
** (something which can only happen if pPage is the root page or a 
** child of root) then all available siblings participate in the balancing.
**
** The number of siblings of pPage might be increased or decreased by
** one in order to keep all pages between 2/3 and completely full.  If
** pPage is the root page, then the depth of the tree might be increased
** or decreased by one, as necessary, to keep the root page from being
** overfull or empty.
**
** This routine calls relinkCellList() on its input page regardless of
** whether or not it does any real balancing.  Client routines will typically
** invoke insertCell() or dropCell() before calling this routine, so we
** need to call relinkCellList() to clean up the mess that those other
** routines left behind.
**
** pCur is left pointing to the same cell as when this routine was called
** event if that cell gets moved to a different page.  pCur may be NULL.
**
** Note that when this routine is called, some of the Cells on pPage
** might not actually be stored in pPage->u.aDisk[].  This can happen
** if the page is overfull.  Part of the job of this routine is to
** make sure all Cells for pPage once again fit in pPage->u.aDisk[].
**
** If this routine fails for any reason, it means the database may have
** been left in a corrupted state and should be rolled back.
*/
static int balance(Btree *pBt, MemPage *pPage, BtCursor *pCur){
  MemPage *pParent;            /* The parent of pPage */
  MemPage *apOld[3];           /* pPage and up to two siblings */
  Pgno pgnoOld[3];             /* Page numbers for each page in apOld[] */
  MemPage *apNew[4];           /* pPage and up to 3 siblings after balancing */
  Pgno pgnoNew[4];             /* Page numbers for each page in apNew[] */
  int idxDiv[3];               /* Indices of divider cells in pParent */
  Cell *apDiv[3];              /* Divider cells in pParent */
  int nCell;                   /* Number of cells in apCell[] */
  int nOld;                    /* Number of pages in apOld[] */
  int nNew;                    /* Number of pages in apNew[] */
  int perPage;                 /* Approximate number of bytes per page */
  int nDiv;                    /* Number of cells in apDiv[] */
  int i, j, k;                 /* Loop counters */
  int idx;                     /* Index of pPage in pParent->apCell[] */
  int nxDiv;                   /* Next divider slot in pParent->apCell[] */
  int rc;                      /* The return code */
  int iCur;                    /* apCell[iCur] is the cell of the cursor */
  int usedPerPage;             /* Memory needed for each page */
  int freePerPage;             /* Average free space per page */
  Cell *apCell[MX_CELL*3+5];   /* All cells from pages being balanceed */

  int szCell[MX_CELL*3+5];     /* Local size of all cells */
  Cell aTemp[2];               /* Temporary holding area for apDiv[] */
  MemPage aOld[3];             /* Temporary copies of pPage and its siblings */

  /* 

  ** Return without doing any work if pPage is neither overfull nor
  ** underfull.
  */
  if( !pPage->isOverfull && pPage->nFree<SQLITE_PAGE_SIZE/2 ){
    relinkCellList(pPage);
    return SQLITE_OK;
  }

  /*
  ** Find the parent of the page to be balanceed.




  ** If there is no parent, it means this page is the root page and
  ** special rules apply.
  */
  pParent = pPage->pParent;
  if( pParent==0 ){
    Pgno pgnoChild;
    Page *pChild;
    if( pPage->nCell==0 ){
      if( pPage->u.hdr.rightChild ){
        /*
        ** The root page is empty.  Copy the one child page
        ** into the root page and return.  This reduces the depth
        ** of the BTree by one.
        */
        rc = sqlitepager_write(pPage);
        if( rc ) return rc;
        pgnoChild = pPage->u.hdr.rightChild;
        rc = sqlitepager_get(pBt, pgnoChild, &pChild);
        if( rc ) return rc;
        memcpy(pPage, pChild, SQLITE_PAGE_SIZE);
        pPage->isInit = 0;
        initPage(pPage, sqlitepager_pagenumber(pPage), 0);
        reparentChildPages(pBt->pPager, pPage);
        freePage(pBt, pChild, pgnoChild);
        sqlitepager_unref(pChild);
      }
      return SQLITE_OK;
    }
    if( !pPage->isOverfull ){
      /* It is OK for the root page to be less than half full.
      */
      relinkCellList(pPage);
      return SQLITE_OK;
    }
    /*
    ** If we get to here, it means the root page is overfull.
    ** When this happens, Create a new child page and copy the
    ** contents of the root into the child.  Then make the root
    ** page an empty page with rightChild pointing to the new
    ** child.  Then fall thru to the code below which will cause
    ** the overfull child page to be split.
    */
    rc = sqlitepager_write(pPage);
    if( rc ) return rc;
    rc = allocatePage(pBt, &pChild, &pgnoChild);
    if( rc ) return rc;
    copyPage(pChild, pPage);


    pChild->pParent = pPage;
    pChild->isOverfull = 1;

    if( pCur ){
      sqlitepager_ref(pChild);
      sqlitepager_unref(pCur->pPage);
      pCur->pPage = pChild;
    }





    zeroPage(pPage);
    pPage->u.hdr.rightChild = pgnoChild;
    pParent = pPage;
    pPage = pChild;

  }else{
    rc = sqlitepager_write(pPage);
    if( rc ) return rc;
  }
  
  /*
  ** Find the Cell in the parent page whose h.leftChild points back
  ** to pPage.  The "idx" variable is the index of that cell.  If pPage
  ** is the rightmost child of pParent then set idx to pParent->nCell 
  */
  idx = -1;
  pgno = sqlitepager_pagenumber(pPage);
  for(i=0; i<pParent->nCell; i++){
    if( pParent->apCell[i]->h.leftChild==pgno ){
      idx = i;
      break;
    }
  }
  if( idx<0 && pPage->u.hdr.rightChild==pgno ){
    idx = pPage->nCell;
  }
  if( idx<0 ){
    return SQLITE_CORRUPT;

  }

  /*
  ** Initialize variables so that it will be safe to jump
  ** directory to balance_cleanup at any moment.
  */
  nOld = nNew = 0;
  sqlitepager_ref(pParent);

  /*
  ** Find sibling pages to pPage and the Cells in pParent that divide
  ** the siblings.  An attempt is made to find one sibling on either
  ** side of pPage.  Both siblings are taken from one side, however, if
  ** pPage is either the first or last child of its parent.  If pParent
  ** has 3 or fewer children then all children of pParent are taken.
  */
  if( idx==pParent->nCell ){
    nxDiv = idx - 2;
  }else{

    nxDiv = idx - 1;
  }

  if( nxDiv<0 ) nxDiv = 0;
  nDiv = 0;
  for(i=0, k=nxDiv; i<3; i++, k++){
    if( k<pParent->nCell ){
      idxDiv[i] = k;
      apDiv[i] = pParent->apCell[k];
      nDiv++;
      pgnoOld[i] = apDiv[i]->h.leftChild;




    }else if( k==pParent->nCell ){
      pgnoOld[i] = pParent->rightChild;
    }else{
      break;
    }
    rc = sqlitepager_get(pBt, pgnoOld[i], &apOld[i]);
    if( rc ) goto balance_cleanup;
    nOld++;
  }

  /*
  ** Set iCur to be the index in apCell[] of the cell that the cursor
  ** is pointing to.  We will need this later on in order to keep the
  ** cursor pointing at the same cell.
  */
  if( pCur ){
    iCur = pCur->idx;
    for(i=0; idxDiv[i]<idx; i++){
      iCur += apOld[i]->nCell + 1;
    }
    sqlitepager_unref(pCur->pPage);
    pCur->pPage = 0;
  }

  /*

  ** Make copies of the content of pPage and its siblings into aOld[].
  ** The rest of this function will use data from the copies rather
  ** that the original pages since the original pages will be in the
  ** process of being overwritten.
  */
  for(i=0; i<nOld; i++){
    copyPage(&aOld[i], apOld[i]);
    rc = freePage(pBt, apOld[i], pgnoOld[i]);
    if( rc ) goto balance_cleanup;
    apOld[i] = &aOld[i];
  }

  /*
  ** Load pointers to all cells on sibling pages and the divider cells
  ** into the local apCell[] array.  Make copies of the divider cells
  ** into aTemp[] and remove the the divider Cells from pParent.
  */
  nCell = 0;
  for(i=0; i<nOld; i++){
    MemPage *pOld = apOld[i];
    for(j=0; j<pOld->nCell; j++){
      apCell[nCell] = pOld->apCell[j];
      szCell[nCell] = cellSize(apCell[nCell]);
      nCell++;
    }
    if( i<nOld-1 ){
      szCell[nCell] = cellSize(apDiv[i]);
      memcpy(aTemp[i], apDiv[i], szCell[nCell]);
      apCell[nCell] = &aTemp[i];
      dropCell(pParent, nxDiv, szCell[nCell]);
      assert( apCell[nCell]->h.leftChild==pgnoOld[i] );
      apCell[nCell]->h.leftChild = pOld->u.hdr.rightChild;
      nCell++;
    }
  }

  /*
  ** Estimate the number of pages needed.  Record this number in "k"
  ** for now.  It will get transferred to nNew as we allocate the
  ** new pages.
  */
  totalSize = 0;
  for(i=0; i<nCell; i++){
    totalSize += szCell[i];
  }
  k = (totalSize + (SQLITE_PAGE_SIZE - sizeof(PageHdr) - 1)) /
            (SQLITE_PAGE_SIZE - sizeof(PageHdr));
  usedPerPage = (totalSize+k-1)/k;
  freePerPage = SQLITE_PAGE_SIZE - usedPerPage;
  

  /*
  ** Allocate new pages
  */
  for(i=0; i<k; i++){
    rc = allocatePage(pBt, &apNew[i], &pgnoNew[i]);
    if( rc ) goto balance_cleanup;
    nNew++;
    zeroPage(apNew[i]);
  }

  /*
  ** Evenly distribute the data in apCell[] across the new pages.
  ** Insert divider cells into pParent as necessary.
  */
  j = 0;
  for(i=0; i<nNew; i++){


    MemPage *pNew = apNew[i];
    while( j<nCell && pNew->nFree<freePerPage && szCell[j]<=pNew->nFree ){

      if( pCur && iCur==j ){ pCur->pPage = pNew; pCur->idx = pNew->nCell; }
      insertCell(pNew, pNew->nCell, apCell[j], szCell[j]);
      j++;
    }



    assert( !pNew->isOverfull );
    relinkCellList(pNew);
    if( i<nNew-1 && j<nCell ){



      pNew->u.hdr.rightChild = apCell[j]->h.leftChild;
      apCell[j]->h.leftChild = pgnoNew[i];
      if( pCur && iCur==j ){ pCur->pPage = pParent; pCur->idx = nxDiv; }
      insertCell(pParent, nxDiv, apCell[j], szCell[j]);

      j++;
      nxDiv++;

    }

  }
  apNew[nNew-1]->u.hdr.rightChild = apOld[nOld-1]->u.hdr.rightChild;
  if( nxDiv==pParent->nCell ){
    pParent->u.hdr.rightChild = pgnoNew[nNew-1];























































































  }else{

    pParent->apCell[nxDiv]->h.leftChild = pgnoNew[nNew-1];
  }


  if( pCur ){
    assert( pCur->pPage!=0 );
    sqlitepager_ref(pCur->pPage);
  }


  /*
















  ** Reparent children of all cells.
  */












  for(i=0; i<nNew; i++){
    reparentChildPages(pBt->pPager, apNew[i]);
  }



























































  reparentChildPages(pBt->pPager, pParent);


  /*





  ** balance the parent page.
  */







  rc = balance(pBt, pParent, 0);



















  /*




















  ** Cleanup before returning.
  */






balance_cleanup:
  for(i=0; i<nOld; i++){
    sqlitepager_unref(apOld[i]);
  }













  for(i=0; i<nNew; i++){
    sqlitepager_unref(apNew[i]);
  }
  if( pCur && pCur->pPage==0 ){
    pCur->pPage = pParent;
    pCur->idx = 0;
  }else{




















































    sqlitepager_unref(pParent);










  }






  return rc;



}



/*
** Insert a new record into the BTree.  The key is given by (pKey,nKey)
** and the data is given by (pData,nData).  The cursor is used only to
** define what database the record should be inserted into.  The cursor
** is left pointing at the new record.
*/
int sqliteBtreeInsert(
  BtCursor *pCur,            /* Insert data into the table of this cursor */
  void *pKey,  int nKey,     /* The key of the new record */
  void *pData, int nData     /* The data of the new record */
){
  Cell newCell;
  int rc;
  int loc;
  int szNew;
  MemPage *pPage;
  Btree *pBt = pCur->pBt;

  if( !pCur->pBt->inTrans ){
    return SQLITE_ERROR;  /* Must start a transaction first */
  }
  rc = sqliteBtreeMoveto(pCur, pKey, nKey, &loc);
  if( rc ) return rc;
  pPage = pCur->pPage;
  rc = sqlitepager_write(pPage);
  if( rc ) return rc;
  rc = fillInCell(pBt, &newCell, pKey, nKey, pData, nData);
  if( rc ) return rc;
  szNew = cellSize(&newCell);
  if( loc==0 ){
    newCell.h.leftChild = pPage->apCell[pCur->idx]->h.leftChild;
    rc = clearCell(pBt, pPage->apCell[pCur->idx]);
    if( rc ) return rc;


    dropCell(pPage, pCur->idx, cellSize(pPage->apCell[pCur->idx]));
  }else if( loc>0 ){














    assert( pPage->u.hdr.rightChild==0 );  /* Must be a leaf page */
    pCur->idx++;


  }else{
    assert( pPage->u.hdr.rightChild==0 );  /* Must be a leaf page */
  }














































  insertCell(pPage, pCur->idx, &newCell, cellSize(&newCell));
  rc = balance(pCur->pBt, pPage, pCur);






  return rc;
}

/*
** Delete the entry that the cursor is pointing to.
**
** The cursor is left pointing at either the next or the previous
................................................................................
  }
  if( pCur->idx >= pPage->nCell ){
    return SQLITE_ERROR;  /* The cursor is not pointing to anything */
  }
  rc = sqlitepager_write(pPage);
  if( rc ) return rc;
  pCell = pPage->apCell[pCur->idx];
  pgnoChild = pCell->h.leftChild;
  clearCell(pCell);
  dropCell(pPage, pCur->idx, cellSize(pCell));
  if( pgnoChild ){
    /*
    ** If the entry we just deleted is not a leaf, then we've left a
    ** whole in an internal page.  We have to fill the whole by moving
    ** in a page from a leaf.  The next Cell after the one just deleted
    ** is guaranteed to exist and to be a leaf so we can use it.
    */
    BtCursor leafCur;
    Cell *pNext;
    int szNext;
    getTempCursor(pCur, &leafCur);
    rc = sqliteBtreeNext(&leafCur, 0);
    if( rc!=SQLITE_OK ){
      return SQLITE_CORRUPT;
    }
    pNext = leafCur.pPage->apCell[leafCur.idx]
    szNext = cellSize(pNext);
    insertCell(pPage, pCur->idx, pNext, szNext);
    rc = balance(pCur->pBt, pPage, pCur);
    if( rc ) return rc;
    pCur->bSkipNext = 1;
    dropCell(leafCur.pPage, leafCur.idx, szNext);
    rc = balance(pCur->pBt, leafCur.pPage, 0);
    releaseTempCur(&leafCur);
  }else{
    rc = balance(pCur->pBt, pPage, pCur);
    pCur->bSkipNext = 1;
  }
  return rc;
}

/*
** Create a new BTree in the same file.  Write into *piTable the index
** of the root page of the new table.
*/
................................................................................
  int rc;
  int i;
  Cell *pCell;
  int idx;

  rc = sqlitepager_get(pBt->pPager, pgno, &pPage);
  if( rc ) return rc;
  idx = pPage->u.hdr.firstCell;
  while( idx>0 ){
    pCell = (Cell*)&pPage->u.aDisk[idx];
    idx = pCell->h.iNext;
    if( pCell->h.leftChild ){
      rc = clearDatabasePage(pBt, pCell->h.leftChild);
      if( rc ) return rc;
    }
    rc = clearCell(pCell);
    if( rc ) return rc;