Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | incremental update (CVS 223) |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA1: |
7108b699cc03d5d4dfb222ceab0196a8 |
User & Date: | drh 2001-06-08 00:21:52.000 |
Context
2001-06-08
| ||
00:25 | documentation update (CVS 224) (check-in: d1e211fad9 user: drh tags: trunk) | |
00:21 | incremental update (CVS 223) (check-in: 7108b699cc user: drh tags: trunk) | |
2001-06-02
| ||
02:40 | continued work on btree (CVS 222) (check-in: d07e0e80a0 user: drh tags: trunk) | |
Changes
Changes to src/btree.c.
︙ | ︙ | |||
17 18 19 20 21 22 23 | ** Boston, MA 02111-1307, USA. ** ** Author contact information: ** drh@hwaci.com ** http://www.hwaci.com/drh/ ** ************************************************************************* | | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 | ** Boston, MA 02111-1307, USA. ** ** Author contact information: ** drh@hwaci.com ** http://www.hwaci.com/drh/ ** ************************************************************************* ** $Id: btree.c,v 1.11 2001/06/08 00:21:52 drh Exp $ ** ** This file implements a external (disk-based) database using BTrees. ** For a detailed discussion of BTrees, refer to ** ** Donald E. Knuth, THE ART OF COMPUTER PROGRAMMING, Volume 3: ** "Sorting And Searching", pages 473-480. Addison-Wesley ** Publishing Company, Reading, Massachusetts. ** ** The basic idea is that each page of the file contains N database ** entries and N+1 pointers to subpages. ** ** ---------------------------------------------------------------- ** | Ptr(0) | Key(0) | Ptr(1) | Key(1) | ... | Key(N) | Ptr(N+1) | ** ---------------------------------------------------------------- ** ** All of the keys on the page that Ptr(0) points to have values less ** than Key(0). All of the keys on page Ptr(1) and its subpages have ** values greater than Key(0) and less than Key(1). All of the keys ** on Ptr(N+1) and its subpages have values greater than Key(N). And ** so forth. ** ** Finding a particular key requires reading O(log(M)) pages from the file ** where M is the number of entries in the tree. ** ** In this implementation, a single file can hold one or more separate ** BTrees. Each BTree is identified by the index of its root page. The ** key and data for any entry are combined to form the "payload". Up to ** MX_LOCAL_PAYLOAD bytes of payload can be carried directly on the ** database page. If the payload is larger than MX_LOCAL_PAYLOAD bytes ** then surplus bytes are stored on overflow pages. The payload for an ** entry and the preceding pointer are combined to form a "Cell". Each ** page has a smaller header which contains the Ptr(N+1) pointer. ** ** The first page of the file contains a magic string used to verify that ** the file really is a valid BTree database, a pointer to a list of unused ** pages in the file, and some meta information. The root of the first ** BTree begins on page 2 of the file. (Pages are numbered beginning with ** 1, not 0.) Thus a minimum database contains 2 pages. */ #include "sqliteInt.h" #include "pager.h" #include "btree.h" #include <assert.h> |
︙ | ︙ | |||
65 66 67 68 69 70 71 | "** This file contains an SQLite 2.0 database **" #define MAGIC_SIZE (sizeof(zMagicHeader)) /* ** The first page of the database file contains a magic header string ** to identify the file as an SQLite database file. It also contains ** a pointer to the first free page of the file. Page 2 contains the | | > > > > > < | | | | | | > > > > > | 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 | "** This file contains an SQLite 2.0 database **" #define MAGIC_SIZE (sizeof(zMagicHeader)) /* ** The first page of the database file contains a magic header string ** to identify the file as an SQLite database file. It also contains ** a pointer to the first free page of the file. Page 2 contains the ** root of the principle BTree. The file might contain other BTrees ** rooted on pages above 2. ** ** The first page also contains SQLITE_N_BTREE_META integers that ** can be used by higher-level routines. ** ** Remember that pages are numbered beginning with 1. (See pager.c ** for additional information.) Page 0 does not exist and a page ** number of 0 is used to mean "no such page". */ struct PageOne { char zMagic[MAGIC_SIZE]; /* String that identifies the file as a database */ Pgno firstList; /* First free page in a list of all free pages */ int aMeta[SQLITE_N_BTREE_META]; /* User defined integers */ }; /* ** Each database page has a header that is an instance of this ** structure. ** ** PageHdr.firstFree is 0 if there is no free space on this page. ** Otherwise, PageHdr.firstFree is the index in MemPage.aDisk[] of a ** FreeBlk structure that describes the first block of free space. ** All free space is defined by a linked list of FreeBlk structures. ** ** Data is stored in a linked list of Cell structures. PageHdr.firstCell ** is the index into MemPage.aDisk[] of the first cell on the page. The ** Cells are kept in sorted order. ** ** A Cell contains all information about a database entry and a pointer ** to a child page that contains other entries less than itself. In ** other words, the i-th Cell contains both Ptr(i) and Key(i). The ** right-most pointer of the page is contained in PageHdr.rightChild. */ struct PageHdr { Pgno rightChild; /* Child page that comes after all cells on this page */ u16 firstCell; /* Index in MemPage.aDisk[] of the first cell */ u16 firstFree; /* Index in MemPage.aDisk[] of the first free block */ }; |
︙ | ︙ | |||
159 160 161 162 163 164 165 | Pgno ovfl; /* The first overflow page */ }; /* ** Free space on a page is remembered using a linked list of the FreeBlk ** structures. Space on a database page is allocated in increments of ** at least 4 bytes and is always aligned to a 4-byte boundry. The | | | | 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 | Pgno ovfl; /* The first overflow page */ }; /* ** Free space on a page is remembered using a linked list of the FreeBlk ** structures. Space on a database page is allocated in increments of ** at least 4 bytes and is always aligned to a 4-byte boundry. The ** linked list of FreeBlks is always kept in order by address. */ struct FreeBlk { u16 iSize; /* Number of bytes in this block of free space */ u16 iNext; /* Index in MemPage.aDisk[] of the next free block */ }; /* ** Number of bytes on a single overflow page. */ #define OVERFLOW_SIZE (SQLITE_PAGE_SIZE-sizeof(Pgno)) /* ** When the key and data for a single entry in the BTree will not fit in ** the MX_LOACAL_PAYLOAD bytes of space available on the database page, ** then all extra bytes are written to a linked list of overflow pages. ** Each overflow page is an instance of the following structure. ** ** Unused pages in the database are also represented by instances of ** the OverflowPage structure. The PageOne.freeList field is the ** page number of the first page in a linked list of unused database ** pages. */ |
︙ | ︙ | |||
211 212 213 214 215 216 217 | */ struct MemPage { char aDisk[SQLITE_PAGE_SIZE]; /* Page data stored on disk */ int isInit; /* True if auxiliary data is initialized */ MemPage *pParent; /* The parent of this page. NULL for root */ int nFree; /* Number of free bytes in aDisk[] */ int nCell; /* Number of entries on this page */ | | | 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 | */ struct MemPage { char aDisk[SQLITE_PAGE_SIZE]; /* Page data stored on disk */ int isInit; /* True if auxiliary data is initialized */ MemPage *pParent; /* The parent of this page. NULL for root */ int nFree; /* Number of free bytes in aDisk[] */ int nCell; /* Number of entries on this page */ Cell *apCell[MX_CELL+1]; /* All data entires in sorted order */ } /* ** The in-memory image of a disk page has the auxiliary information appended ** to the end. EXTRA_SIZE is the number of bytes of space needed to hold ** that extra information. */ |
︙ | ︙ | |||
240 241 242 243 244 245 246 247 248 249 250 251 252 253 | ** A cursor is a pointer to a particular entry in the BTree. ** The entry is identified by its MemPage and the index in ** MemPage.apCell[] of the entry. */ struct BtCursor { Btree *pBt; /* The Btree to which this cursor belongs */ BtCursor *pPrev, *pNext; /* List of all cursors */ MemPage *pPage; /* Page that contains the entry */ u16 idx; /* Index of the entry in pPage->apCell[] */ u8 bSkipNext; /* sqliteBtreeNext() is no-op if true */ u8 iMatch; /* compare result from last sqliteBtreeMoveto() */ }; /* | > | 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 | ** A cursor is a pointer to a particular entry in the BTree. ** The entry is identified by its MemPage and the index in ** MemPage.apCell[] of the entry. */ struct BtCursor { Btree *pBt; /* The Btree to which this cursor belongs */ BtCursor *pPrev, *pNext; /* List of all cursors */ Pgno pgnoRoot; /* The root page of this tree */ MemPage *pPage; /* Page that contains the entry */ u16 idx; /* Index of the entry in pPage->apCell[] */ u8 bSkipNext; /* sqliteBtreeNext() is no-op if true */ u8 iMatch; /* compare result from last sqliteBtreeMoveto() */ }; /* |
︙ | ︙ | |||
296 297 298 299 300 301 302 | pFBlk->iSize = SQLITE_PAGE_SIZE - pc; pFBlk->iNext = 0; ((PageHdr*)pPage)->firstFree = pc; memset(&pFBlk[1], 0, SQLITE_PAGE_SIZE - pc - sizeof(FreeBlk)); } /* | | | | | | | 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 | pFBlk->iSize = SQLITE_PAGE_SIZE - pc; pFBlk->iNext = 0; ((PageHdr*)pPage)->firstFree = pc; memset(&pFBlk[1], 0, SQLITE_PAGE_SIZE - pc - sizeof(FreeBlk)); } /* ** Allocate nByte bytes of space on a page. nByte must be a ** multiple of 4. ** ** Return the index into pPage->aDisk[] of the first byte of ** the new allocation. Or return 0 if there is not enough free ** space on the page to satisfy the allocation request. ** ** If the page contains nBytes of free space but does not contain ** nBytes of contiguous free space, then this routine automatically ** calls defragementPage() to consolidate all free space before ** allocating the new chunk. */ static int allocateSpace(MemPage *pPage, int nByte){ FreeBlk *p; u16 *pIdx; int start; assert( nByte==ROUNDUP(nByte) ); |
︙ | ︙ | |||
343 344 345 346 347 348 349 | pPage->nFree -= nByte; return start; } /* ** Return a section of the MemPage.aDisk[] to the freelist. ** The first byte of the new free block is pPage->aDisk[start] | | | 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 | pPage->nFree -= nByte; return start; } /* ** Return a section of the MemPage.aDisk[] to the freelist. ** The first byte of the new free block is pPage->aDisk[start] ** and the size of the block is "size" bytes. ** ** Most of the effort here is involved in coalesing adjacent ** free blocks into a single big free block. */ static void freeSpace(MemPage *pPage, int start, int size){ int end = start + size; u16 *pIdx, idx; |
︙ | ︙ | |||
392 393 394 395 396 397 398 | } /* ** Initialize the auxiliary information for a disk block. ** ** The pParent parameter must be a pointer to the MemPage which ** is the parent of the page being initialized. The root of the | | > | 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 | } /* ** Initialize the auxiliary information for a disk block. ** ** The pParent parameter must be a pointer to the MemPage which ** is the parent of the page being initialized. The root of the ** BTree (usually page 2) has no parent and so for that page, ** pParent==NULL. ** ** Return SQLITE_OK on success. If we see that the page does ** not contained a well-formed database page, then return ** SQLITE_CORRUPT. Note that a return of SQLITE_OK does not ** guarantee that the page is well-formed. It only shows that ** we failed to detect any corruption. */ |
︙ | ︙ | |||
439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 | while( idx!=0 ){ if( idx>SQLITE_PAGE_SIZE-sizeof(FreeBlk) ) goto page_format_error; if( idx<sizeof(PageHdr) ) goto page_format_error; pFBlk = (FreeBlk*)&pPage->aDisk[idx]; pPage->nFree += pFBlk->iSize; if( pFBlk->iNext <= idx ) goto page_format_error; idx = pFBlk->iNext; } if( pPage->nFree!=freeSpace ) goto page_format_error; return SQLITE_OK; page_format_error: return SQLITE_CORRUPT; } /* ** Recompute the MemPage.apCell[], MemPage.nCell, and MemPage.nFree parameters | > > > > > | | 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 | while( idx!=0 ){ if( idx>SQLITE_PAGE_SIZE-sizeof(FreeBlk) ) goto page_format_error; if( idx<sizeof(PageHdr) ) goto page_format_error; pFBlk = (FreeBlk*)&pPage->aDisk[idx]; pPage->nFree += pFBlk->iSize; if( pFBlk->iNext <= idx ) goto page_format_error; idx = pFBlk->iNext; } if( pPage->nCell==0 && pPage->nFree==0 ){ /* As a special case, an uninitialized root page appears to be ** an empty database */ return SQLITE_OK; } if( pPage->nFree!=freeSpace ) goto page_format_error; return SQLITE_OK; page_format_error: return SQLITE_CORRUPT; } /* ** Recompute the MemPage.apCell[], MemPage.nCell, and MemPage.nFree parameters ** for a cell after the MemPage.aDisk[] content has be changed significantly. ** ** The computation here is similar to initPage() except that in this case ** the MemPage.aDisk[] field has been set up internally (instead of ** having been read from disk) so we do not need to do as much error ** checking. */ static void reinitPage(MemPage *pPage){ |
︙ | ︙ | |||
478 479 480 481 482 483 484 | pPage->nFree += pFBlk->iSize; idx = pFBlk->iNext; } return SQLITE_OK; } /* | > | | 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 | pPage->nFree += pFBlk->iSize; idx = pFBlk->iNext; } return SQLITE_OK; } /* ** Set up a raw page so that it looks like a database page holding ** no entries. */ static void zeroPage(MemPage *pPage){ PageHdr *pHdr; FreeBlk *pFBlk; memset(pPage, 0, SQLITE_PAGE_SIZE); pHdr = (PageHdr*)pPage; pHdr->firstCell = 0; |
︙ | ︙ | |||
580 581 582 583 584 585 586 587 588 589 590 591 592 | return rc; page1_init_failed: sqlitepager_unref(pBt->page1); pBt->page1 = 0; return rc; } /* ** Attempt to start a new transaction. */ int sqliteBtreeBeginTrans(Btree *pBt){ int rc; | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | < < < < < | | 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 | return rc; page1_init_failed: sqlitepager_unref(pBt->page1); pBt->page1 = 0; return rc; } /* ** Create a new database by initializing the first two pages. */ static int newDatabase(Btree *pBt){ MemPage *pRoot; PageOne *pP1; if( sqlitepager_pagecount(pBt->pPager)>0 ) return SQLITE_OK; pP1 = pBt->page1; rc = sqlitepager_write(pBt->page1); if( rc ) return rc; rc = sqlitepager_get(pBt->pPager, 2, &pRoot); if( rc ) return rc; rc = sqlitepager_write(pRoot); if( rc ){ sqlitepager_unref(pRoot); return rc; } strcpy(pP1->zMagic, zMagicHeader); zeroPage(pRoot); sqlitepager_unref(pRoot); return SQLITE_OK; } /* ** Attempt to start a new transaction. ** ** A transaction must be started before attempting any changes ** to the database. None of the following routines will work ** unless a transaction is started first: ** ** sqliteBtreeCreateTable() ** sqliteBtreeClearTable() ** sqliteBtreeDropTable() ** sqliteBtreeInsert() ** sqliteBtreeDelete() ** sqliteBtreeUpdateMeta() */ int sqliteBtreeBeginTrans(Btree *pBt){ int rc; PageOne *pP1; if( pBt->inTrans ) return SQLITE_ERROR; if( pBt->page1==0 ){ rc = lockBtree(pBt); if( rc!=SQLITE_OK ) return rc; } rc = sqlitepager_write(pBt->page1); if( rc==SQLITE_OK ){ pBt->inTrans = 1; } return newDatabase(pBt); } /* ** Remove the last reference to the database file. This will ** remove the read lock. */ static void unlockBtree(Btree *pBt){ |
︙ | ︙ | |||
641 642 643 644 645 646 647 | if( pBt->pCursor!=0 ) return SQLITE_ERROR; rc = sqlitepager_rollback(pBt->pPager); unlockBtree(pBt); return rc; } /* | > | | | > | | | 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 | if( pBt->pCursor!=0 ) return SQLITE_ERROR; rc = sqlitepager_rollback(pBt->pPager); unlockBtree(pBt); return rc; } /* ** Create a new cursor for the BTree whose root is on the page ** iTable. The act of acquiring a cursor gets a read lock on ** the database file. */ int sqliteBtreeCursor(Btree *pBt, int iTable, BtCursor **ppCur){ int rc; BtCursor *pCur; if( pBt->page1==0 ){ rc = lockBtree(pBt); if( rc!=SQLITE_OK ){ *ppCur = 0; return rc; } } pCur = sqliteMalloc( sizeof(*pCur) ); if( pCur==0 ){ rc = SQLITE_NOMEM; goto create_cursor_exception; } pCur->pgnoRoot = (Pgno)iTable; rc = sqlitepager_get(pBt->pPager, pCur->pgnoRoot, &pCur->pPage); if( rc!=SQLITE_OK ){ goto create_cursor_exception; } rc = initPage(pCur->pPage, pCur->pgnoRoot, 0); if( rc!=SQLITE_OK ){ goto create_cursor_exception; } pCur->pPrev = 0; pCur->pNext = pBt->pCursor; if( pCur->pNext ){ pCur->pNext->pPrev = pCur; |
︙ | ︙ | |||
991 992 993 994 995 996 997 | /* ** Move the cursor to the root page */ static int moveToRoot(BtCursor *pCur){ MemPage *pNew; int rc; | | | 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 | /* ** Move the cursor to the root page */ static int moveToRoot(BtCursor *pCur){ MemPage *pNew; int rc; rc = sqlitepager_get(pCur->pBt->pPager, pCur->pgnoRoot, &pNew); if( rc ) return rc; sqlitepager_unref(pCur->pPage); pCur->pPage = pNew; pCur->idx = 0; return SQLITE_OK; } |
︙ | ︙ | |||
1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 | int rc; rc = moveToRoot(pCur); if( rc ) return rc; for(;;){ int lwr, upr; Pgno chldPg; MemPage *pPage = pCur->pPage; lwr = 0; upr = pPage->nCell-1; while( lwr<=upr ){ | > < | 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 | int rc; rc = moveToRoot(pCur); if( rc ) return rc; for(;;){ int lwr, upr; Pgno chldPg; MemPage *pPage = pCur->pPage; int c = -1; lwr = 0; upr = pPage->nCell-1; while( lwr<=upr ){ pCur->idx = (lwr+upr)/2; rc = compareKey(pCur, pKey, nKey, &c); if( rc ) return rc; if( c==0 ){ pCur->iMatch = c; if( pRes ) *pRes = 0; return SQLITE_OK; |
︙ | ︙ | |||
1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 | ** needs to do that. */ static int freePage(Btree *pBt, void *pPage, Pgno pgno){ PageOne *pPage1 = pBt->page1; OverflowPage *pOvfl = (OverflowPage*)pPage; int rc; int needOvflUnref = 0; if( pgno==0 ){ assert( pOvfl!=0 ); pgno = sqlitepager_pagenumber(pOvfl); } rc = sqlitepager_write(pPage1); if( rc ){ return rc; | > | 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 | ** needs to do that. */ static int freePage(Btree *pBt, void *pPage, Pgno pgno){ PageOne *pPage1 = pBt->page1; OverflowPage *pOvfl = (OverflowPage*)pPage; int rc; int needOvflUnref = 0; if( pgno==0 ){ assert( pOvfl!=0 ); pgno = sqlitepager_pagenumber(pOvfl); } rc = sqlitepager_write(pPage1); if( rc ){ return rc; |
︙ | ︙ | |||
1284 1285 1286 1287 1288 1289 1290 | pSpace += n; } return SQLITE_OK; } /* ** Change the MemPage.pParent pointer on the page whose number is | | | 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 | pSpace += n; } return SQLITE_OK; } /* ** Change the MemPage.pParent pointer on the page whose number is ** given in the second argument so that MemPage.pParent holds the ** pointer in the third argument. */ static void reparentPage(Pager *pPager, Pgno pgno, MemPage *pNewParent){ MemPage *pThis; assert( pPager!=0 && pgno!=0 ); pThis = sqlitepager_lookup(pPager, pgno); |
︙ | ︙ | |||
1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 | static void reparentChildPages(Pager *pPager, Page *pPage){ int i; for(i=0; i<pPage->nCell; i++){ reparentPage(pPager, pPage->apCell[i]->leftChild, pPage); } reparentPage(pPager, ((PageHdr*)pPage)->rightChild, pPage); } /* ** Attempt to move N or more bytes out of the page that the cursor ** points to into the left sibling page. (The left sibling page ** contains cells that are less than the cells on this page.) The ** entry that the cursor is pointing to cannot be moved. Return ** TRUE if successful and FALSE if not. | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 | static void reparentChildPages(Pager *pPager, Page *pPage){ int i; for(i=0; i<pPage->nCell; i++){ reparentPage(pPager, pPage->apCell[i]->leftChild, pPage); } reparentPage(pPager, ((PageHdr*)pPage)->rightChild, pPage); } /* ** This routine redistributes Cells on pPage and up to two siblings ** of pPage so that all pages have about the same amount of free space. ** Usually one siblings on either side of pPage are used in the repack, ** though both siblings might come from one side if pPage is the first ** or last child of its parent. If pPage has fewer than two siblings ** (something which can only happen if pPage is the root page or a ** child of root) then all siblings participate in the repack. ** ** The number of siblings of pPage might be increased or decreased by ** one in order to keep all pages between 2/3 and completely full. If ** pPage is the root page, then the depth of the tree might be increased ** or decreased by one, as necessary, to keep the root page from being ** overfull or empty. ** ** Note that when this routine is called, some of the Cells on pPage ** might not actually be stored in pPage->aDisk[]. This can happen ** if the page is overfull. Part of the job of this routine is to ** make sure all Cells for pPage once again fit in pPage->aDisk[]. */ static int repack(Btree *pBt, MemPage *pPage){ MemPage *pParent; /* The parent of pPage */ MemPage *apOld[3]; /* pPage and up to two siblings before repack */ Pgno pgnoOld[3]; /* Page numbers for each page in apOld[] */ MemPage *apNew[4]; /* pPage and up to 3 siblings after repack */ int idxDiv[3]; /* Indices of divider cells in pParent */ Cell *apDiv[3]; /* Divider cells in pParent */ int nCell; /* Number of cells in apCell[] */ int nOld; /* Number of pages in apOld[] */ int nNew; /* Number of pages in apNew[] */ int perPage; /* Approximate number of bytes per page */ int nDiv; /* Number of cells in apDiv[] */ Cell *apCell[MX_CELL*3+5]; /* All cells from pages being repacked */ int unrefPage = 0; /* If true, then unref pPage when done */ /* ** Early out if no repacking is needed. */ if( pPage->nFree>=0 && pPage->nFree<SQLITE_PAGE_SIZE/2 ){ return SQLITE_OK; } /* ** Find the parent of the page to be repacked. */ pParent = pPage->pParent; /* ** If there is no parent, it means the page is the root page. ** special rules apply. */ if( pParent==0 ){ Pgno pgnoChild; Page *pChild; if( pPage->nCell==0 ){ if( ((PageHdr*)pPage)->rightChild ){ /* The root page is under full. Copy the one child page ** into the root page and return. This reduces the depth ** of the BTree by one. */ pgnoChild = ((PageHdr*)pPage->rightChild; rc = sqlitepager_get(pBt, pgnoChild, &pChild); if( rc ) return rc; memcpy(pPage, pChild, SQLITE_PAGE_SIZE); pPage->isInit = 0; initPage(pPage, sqlitepager_pagenumber(pPage), 0); reparentChildPages(pBt->pPager, pPage); freePage(pBt, pChild, pgnoChild); sqlitepager_unref(pChild); } return SQLITE_OK; } if( pPage->nFree>=0 ){ /* It is OK for the root page to be less than half full. */ return SQLITE_OK; } /* If we get to here, it means the root page is over full. ** When this happens, Create a new child page and copy the ** contents of the root into the child. Then make the root ** page and empty page with rightChild pointing to the new ** child. Then fall thru to the code below which will cause ** the overfull child page to be split. */ rc = allocatePage(pBt, &pChild, &pgnoChild); if( rc ) return rc; memcpy(pChild, pPage, SQLITE_PAGE_SIZE); for(i=0; i<pPage->nCell; i++){ if( pPage->apCell[i]>(Cell*)pPage && pPage->apCell[i]<(Cell*)&pPage[1] ){ int offset = (int)pPage->apCell[i] - (int)pPage; pChild->apCell[i] = (Cell*)((int)pChild + offset); }else{ pChild->apCell[i] = pPage->apCell[i]; } } pChild->isInit = 1; pChild->nCell = pPage->nCell; pChild->nFree = pPage->nFree; /* reparentChildPages(pBt->pPager, pChild); */ zeroPage(pPage); ((PageHdr*)pPage)->rightChild = pgnoChild; pParent = pPage; pPage = pChild; unrefPage = 1; } /* ** Find the Cell in the parent page that refers to the page ** to be repacked. */ idx = -1; pgno = sqlitepager_pagenumber(pPage); for(i=0; i<pParent->nCell; i++){ if( pParent->apCell[i]->h.leftChild==pgno ){ idx = i; break; } } if( idx<0 && ((PageHdr*)pPage)->rightChild==pgno ){ idx = pPage->nCell; } if( idx<0 ){ rc = SQLITE_CORRUPT; goto end_of_repack; } /* ** Get sibling pages and their dividers */ if( idx==pPage->nCell ){ idx -= 2; }else{ idx--; } if( idx<0 ) idx = 0; nDiv = 0; nOld = 0; for(i=0; i<3; i++){ if( i+idx<pParent->nCell ){ idxDiv[i] = i+idx; apDiv[i] = pParent->apCell[i+idx]; nDiv++; pgnoOld[i] = apDiv[i]->h.leftChild; rc = sqlitepager_get(pBt, pgnoOld[i], &apOld[i]); if( rc ) goto end_of_repack; nOld++; } if( i+idx==pParent->nCell ){ pgnoOld[i] = pParent->rightChild; rc = sqlitepager_get(pBt, pgnoOld[i], &apOld[i]); if( rc ) goto end_of_repack; nOld++; } } /* ** Get all cells */ nCell = 0; for(i=0; i<nOld; i++){ MemPage *pOld = apOld[i]; for(j=0; j<pOld->nCell; j++){ apCell[nCell++] = pOld->apCell[j]; } if( i<nOld-1 ){ apCell[nCell++] = apDiv[i]; } } /* ** Estimate the number of pages needed */ totalSize = 0; for(i=0; i<nCell; i++){ totalSize += cellSize(apCell[i]); } nNew = (totalSize + (SQLITE_PAGE_SIZE - sizeof(PageHdr) - 1)) / (SQLITE_PAGE_SIZE - sizeof(PageHdr)); perPage = totalSize/nNew; /* ** Allocate new pages */ for(i=0; i<nNew; i++){ rc = allocatePage(pBt, &apNew[i], &pgnoNew[i]); if( rc ) goto end_of_repack; zeroPage(apNew[i]); } /* ** Copy data into the new pages */ for(i=0; i<nNew; i++){ } /* ** Reparent children of all cells */ /* ** Release the old pages */ for(i=0; i<nOld; i++){ releasePage(pBt, apOld[i], 0); } /* ** Repack the parent page, if necessary */ if( needToRepackParent ){ return repack(pParent); } rc = SQLITE_OK; end_of_repack: if( unrefPage ){ sqlitepager_unref(pPage); } return rc; } /* ** Attempt to move N or more bytes out of the page that the cursor ** points to into the left sibling page. (The left sibling page ** contains cells that are less than the cells on this page.) The ** entry that the cursor is pointing to cannot be moved. Return ** TRUE if successful and FALSE if not. |
︙ | ︙ | |||
1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 | ){ Cell newCell; int rc; int loc; MemPage *pPage; Btree *pBt = pCur->pBt; rc = sqliteBtreeMoveTo(pCur, pKey, nKey, &loc); if( rc ) return rc; rc = sqlitepager_write(pCur->pPage); if( rc ) return rc; rc = fillInCell(pBt, &newCell, pKey, nKey, pData, nData); if( rc ) return rc; if( loc==0 ){ | > > > | 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 | ){ Cell newCell; int rc; int loc; MemPage *pPage; Btree *pBt = pCur->pBt; if( !pCur->pBt->inTrans ){ return SQLITE_ERROR; /* Must start a transaction first */ } rc = sqliteBtreeMoveTo(pCur, pKey, nKey, &loc); if( rc ) return rc; rc = sqlitepager_write(pCur->pPage); if( rc ) return rc; rc = fillInCell(pBt, &newCell, pKey, nKey, pData, nData); if( rc ) return rc; if( loc==0 ){ |
︙ | ︙ | |||
1713 1714 1715 1716 1717 1718 1719 | /* The page being refilled is the root of the BTree and it has ** no entries of its own. If there is a child page, then make the ** child become the new root. */ MemPage *pChild; Pgno pgnoChild; assert( pPage->pParent==0 ); | | | | 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 | /* The page being refilled is the root of the BTree and it has ** no entries of its own. If there is a child page, then make the ** child become the new root. */ MemPage *pChild; Pgno pgnoChild; assert( pPage->pParent==0 ); assert( sqlitepager_pagenumber(pPage)==pCur->pgnoRoot ); pgnoChild = ((PageHdr*)pPage)->rightChild; if( pgnoChild==0 ){ return SQLITE_OK; } rc = sqlitepager_get(pPager, pgno, &pChild); if( rc ) return rc; memcpy(pPage, pChild, SQLITE_PAGE_SIZE); memset(&pPage->aDisk[SQLITE_PAGE_SIZE], 0, EXTRA_SIZE); freePage(pCur->pBt, pChild, pgnoChild); sqlitepager_unref(pChild); rc = initPage(pPage, pCur->pgnoRoot, 0); reparentChildPages(pPager, pPage); return SQLITE_OK; } /** merge with siblings **/ /** borrow from siblings **/ |
︙ | ︙ | |||
1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 | ** sqliteBtreeNext() after a delete and the cursor will be left ** pointing to the first entry after the deleted entry. */ int sqliteBtreeDelete(BtCursor *pCur){ MemPage *pPage = pCur->pPage; Cell *pCell; int rc; if( pCur->idx >= pPage->nCell ){ return SQLITE_ERROR; /* The cursor is not pointing to anything */ } rc = sqlitepager_write(pPage); if( rc ) return rc; pCell = pPage->apCell[pCur->idx]; if( pPage->pHdr->rightChild ){ | > > > > | 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 | ** sqliteBtreeNext() after a delete and the cursor will be left ** pointing to the first entry after the deleted entry. */ int sqliteBtreeDelete(BtCursor *pCur){ MemPage *pPage = pCur->pPage; Cell *pCell; int rc; if( !pCur->pBt->inTrans ){ return SQLITE_ERROR; /* Must start a transaction first */ } if( pCur->idx >= pPage->nCell ){ return SQLITE_ERROR; /* The cursor is not pointing to anything */ } rc = sqlitepager_write(pPage); if( rc ) return rc; pCell = pPage->apCell[pCur->idx]; if( pPage->pHdr->rightChild ){ |
︙ | ︙ | |||
1807 1808 1809 1810 1811 1812 1813 | pCur->bSkipNext = 1; }else{ pCur->idx--; } rc = refillPage(pCur); return rc; } | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 | pCur->bSkipNext = 1; }else{ pCur->idx--; } rc = refillPage(pCur); return rc; } /* ** Create a new BTree in the same file. Write into *piTable the index ** of the root page of the new table. */ int sqliteBtreeCreateTable(Btree *pBt, int *piTable){ MemPage *pRoot; Pgno pgnoRoot; int rc; if( !pBt->inTrans ){ return SQLITE_ERROR; /* Must start a transaction first */ } rc = allocatePage(pBt, &pRoot, &pgnoRoot); if( rc ) return rc; sqlitepager_write(pRoot); zeroPage(pRoot); sqlitepager_unref(pRoot); *piTable = (int)pgnoRoot; return SQLITE_OK; } /* ** Erase the given database page and all its children. Return ** the page to the freelist. */ static int clearDatabasePage(Btree *pBt, Pgno pgno){ MemPage *pPage; int rc; int i; Cell *pCell; int idx; rc = sqlitepager_get(pBt->pPager, pgno, &pPage); if( rc ) return rc; idx = ((PageHdr*)pPage)->firstCell; while( idx>0 ){ pCell = (Cell*)&pPage->aDisk[idx]; idx = pCell->h.iNext; if( pCell->h.leftChild ){ rc = clearDatabasePage(pBt, pCell->h.leftChild); if( rc ) return rc; } rc = clearCell(pCell); if( rc ) return rc; } return freePage(pBt, pPage, pgno); } /* ** Delete all information from a single table in the database. */ int sqliteBtreeClearTable(Btree *pBt, int iTable){ int rc; if( !pBt->inTrans ){ return SQLITE_ERROR; /* Must start a transaction first */ } rc = clearDatabasePage(pBt, (Pgno)iTable); if( rc ){ sqliteBtreeRollback(pBt); return rc; } } /* ** Erase all information in a table and add the root of the table to ** the freelist. Except, the root of the principle table (the one on ** page 2) is never added to the freelist. */ int sqliteBtreeDropTable(Btree *pBt, int iTable){ int rc; MemPage *pPage; if( !pBt->inTrans ){ return SQLITE_ERROR; /* Must start a transaction first */ } rc = sqlitepager_get(pBt->pPager, (Pgno)iTable, &pPage); if( rc==SQLITE_OK ){ rc = sqliteBtreeClearTable(pBt, iTable); } if( rc==SQLITE_OK && iTable!=2 ){ rc = freePage(pBt, pPage, (Pgno)iTable); } sqlitepager_unref(pPage); return rc; } /* ** Read the meta-information out of a database file. */ int sqliteBtreeGetMeta(Btree *pBt, int *aMeta){ PageOne *pP1; int rc; rc = sqlitepager_get(pBt->pPager, 1, &pP1); if( rc ) return rc; memcpy(aMeta, pP1->aMeta, sizeof(pP1->aMeta)); sqlitepager_unref(pP1); return SQLITE_OK; } /* ** Write meta-information back into the database. */ int sqliteBtreeUpdateMeta(Btree *pBt, int *aMeta){ PageOne *pP1; int rc; if( !pBt->inTrans ){ return SQLITE_ERROR; /* Must start a transaction first */ } pP1 = pBt->page1; rc = sqlitepager_write(pP1); if( rc ) return rc; memcpy(pP1->aMeta, aMeta, sizeof(pP1->aMeta)); return SQLITE_OK; } |
Changes to src/btree.h.
︙ | ︙ | |||
20 21 22 23 24 25 26 | ** drh@hwaci.com ** http://www.hwaci.com/drh/ ** ************************************************************************* ** This header file defines the interface that the sqlite B-Tree file ** subsystem. ** | | > > > > > | 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 | ** drh@hwaci.com ** http://www.hwaci.com/drh/ ** ************************************************************************* ** This header file defines the interface that the sqlite B-Tree file ** subsystem. ** ** @(#) $Id: btree.h,v 1.4 2001/06/08 00:21:53 drh Exp $ */ typedef struct Btree Btree; typedef struct BtCursor BtCursor; int sqliteBtreeOpen(const char *zFilename, int mode, Btree **ppBtree); int sqliteBtreeClose(Btree*); int sqliteBtreeBeginTrans(Btree*); int sqliteBtreeCommit(Btree*); int sqliteBtreeRollback(Btree*); int sqliteBtreeCreateTable(Btree*, int*); int sqliteBtreeDropTable(Btree*, int); int sqliteBtreeClearTable(Btree*, int); int sqliteBtreeCursor(Btree*, int iTable, BtCursor **ppCur); int sqliteBtreeMoveto(BtCursor*, void *pKey, int nKey, *pRes); int sqliteBtreeDelete(BtCursor*); int sqliteBtreeInsert(BtCursor*, void *pKey, int nKey, void *pData, int nData); int sqliteBtreeNext(BtCursor*, int *pRes); int sqliteBtreeKeySize(BtCursor*, int *pSize); int sqliteBtreeKey(BtCursor*, int offset, int amt, char *zBuf); int sqliteBtreeDataSize(BtCursor*, int *pSize); int sqliteBtreeData(BtCursor*, int offset, int amt, char *zBuf); int sqliteBtreeCloseCursor(BtCursor*); #define SQLITE_N_BTREE_META 3 int sqliteBtreeGetMeta(Btree*, int*); int sqliteBtreeUpdateMeta(Btree*, int*); |