Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Incremental btree.c changes. (CVS 1312) |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA1: |
fdc629dbbf974024215969e0bd3def45 |
User & Date: | drh 2004-05-03 19:49:33.000 |
Context
2004-05-04
| ||
15:00 | Added template for the utf.c file containing conversion routines. (CVS 1313) (check-in: 89b42c468f user: drh tags: trunk) | |
2004-05-03
| ||
19:49 | Incremental btree.c changes. (CVS 1312) (check-in: fdc629dbbf user: drh tags: trunk) | |
2004-05-02
| ||
21:12 | Changes to btree for the new file format are mostly complete. Still need to test and debug. (CVS 1311) (check-in: 0eee3b5cd4 user: drh tags: trunk) | |
Changes
Changes to src/btree.c.
1 2 3 4 5 6 7 8 9 10 11 | /* ** 2004 April 6 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: ** ** May you do good and not evil. ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ************************************************************************* | | | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 | /* ** 2004 April 6 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: ** ** May you do good and not evil. ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ************************************************************************* ** $Id: btree.c,v 1.108 2004/05/03 19:49:33 drh Exp $ ** ** This file implements a external (disk-based) database using BTrees. ** For a detailed discussion of BTrees, refer to ** ** Donald E. Knuth, THE ART OF COMPUTER PROGRAMMING, Volume 3: ** "Sorting And Searching", pages 473-480. Addison-Wesley ** Publishing Company, Reading, Massachusetts. |
︙ | ︙ | |||
463 464 465 466 467 468 469 | data = pPage->aData; assert( sqlitepager_iswriteable(data->aData) ); assert( pPage->pBt ); if( nByte<4 ) nByte = 4; if( pPage->nFree<nByte || pPage->isOverfull ) return 0; hdr = pPage->hdrOffset; | | | 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 | data = pPage->aData; assert( sqlitepager_iswriteable(data->aData) ); assert( pPage->pBt ); if( nByte<4 ) nByte = 4; if( pPage->nFree<nByte || pPage->isOverfull ) return 0; hdr = pPage->hdrOffset; if( data[hdr+5]>=60 ){ defragmentPage(pPage); } addr = hdr+1; pc = get2byte(&data[addr]); assert( addr<pc ); assert( pc<=pPage->pageSize-4 ); while( (size = get2byte(&data[pc+2]))<nByte ){ |
︙ | ︙ | |||
1094 1095 1096 1097 1098 1099 1100 | ** to start a new checkpoint if another checkpoint is already active. */ int sqlite3BtreeBeginStmt(Btree *pBt){ int rc; if( !pBt->inTrans || pBt->inStmt ){ return pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR; } | | | | | 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 | ** to start a new checkpoint if another checkpoint is already active. */ int sqlite3BtreeBeginStmt(Btree *pBt){ int rc; if( !pBt->inTrans || pBt->inStmt ){ return pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR; } rc = pBt->readOnly ? SQLITE_OK : sqlitepager_stmt_begin(pBt->pPager); pBt->inStmt = 1; return rc; } /* ** Commit a checkpoint to transaction currently in progress. If no ** checkpoint is active, this is a no-op. */ int sqlite3BtreeCommitStmt(Btree *pBt){ int rc; if( pBt->inStmt && !pBt->readOnly ){ rc = sqlitepager_stmt_commit(pBt->pPager); }else{ rc = SQLITE_OK; } pBt->inStmt = 0; return rc; } /* ** Rollback the checkpoint to the current transaction. If there ** is no active checkpoint or transaction, this routine is a no-op. ** ** All cursors will be invalided by this operation. Any attempt ** to use a cursor that was open at the beginning of this operation ** will result in an error. */ int sqlite3BtreeRollbackStmt(Btree *pBt){ int rc; BtCursor *pCur; if( pBt->inStmt==0 || pBt->readOnly ) return SQLITE_OK; rc = sqlitepager_stmt_rollback(pBt->pPager); for(pCur=pBt->pCursor; pCur; pCur=pCur->pNext){ MemPage *pPage = pCur->pPage; if( pPage && !pPage->isInit ){ releasePage(pPage); pCur->pPage = 0; } } |
︙ | ︙ | |||
1987 1988 1989 1990 1991 1992 1993 | int rc; int n; /* Number of pages on the freelist */ int k; /* Number of leaves on the trunk of the freelist */ pPage1 = pBt->pPage1; n = get4byte(&pPage1->aData[36]); if( n>0 ){ | | | 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 | int rc; int n; /* Number of pages on the freelist */ int k; /* Number of leaves on the trunk of the freelist */ pPage1 = pBt->pPage1; n = get4byte(&pPage1->aData[36]); if( n>0 ){ /* There are pages on the freelist. Reuse one of those pages. */ MemPage *pTrunk; rc = sqlitepager_write(pPage1->aData); if( rc ) return rc; put4byte(&pPage1->aData[36], n-1); rc = getPage(pBt, get4byte(&pPage1->aData[32]), &pTrunk); if( rc ) return rc; rc = sqlitepager_write(pTrunk->aData); |
︙ | ︙ | |||
2129 2130 2131 2132 2133 2134 2135 | if( rc ) return rc; sqlitepager_unref(pOvfl->aData); } return SQLITE_OK; } /* | | | > > > | < | < < < < | < < < < < < < | | < < < > | > > > > > > > > > > > > | | 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 | if( rc ) return rc; sqlitepager_unref(pOvfl->aData); } return SQLITE_OK; } /* ** Create the byte sequence used to represent a cell on page pPage ** and write that byte sequence into pCell[]. Overflow pages are ** allocated and filled in as necessary. The calling procedure ** is responsible for making sure sufficient space has been allocated ** for pCell[]. ** ** Note that pCell does not necessary need to point to the pPage->aData ** area. pCell might point to some temporary storage. The cell will ** be constructed in this temporary area then copied into pPage->aData ** later. */ static int fillInCell( MemPage *pPage, /* The page that contains the cell */ unsigned char *pCell, /* Complete text of the cell */ const void *pKey, u64 nKey, /* The key */ const void *pData,int nData, /* The data */ int *pnSize /* Write cell size here */ ){ int nPayload; const void *pSrc; int nSrc, nSrc2; int spaceLeft; MemPage *pOvfl = 0; unsigned char *pPrior; unsigned char *pPayload; Btree *pBt = pPage->pBt; Pgno pgnoOvfl = 0; int nHeader; /* Fill in the header. */ nHeader = 2; if( !pPage->leaf ){ nHeader += 4; } if( !pPage->zeroData ){ nHeader += putVarint(&pCell[nHeader], nData); } nHeader += putVarint(&pCell[nHeader], nKey); /* Fill in the payload */ if( pPage->zeroData ){ nData = 0; } nPayload = nData; if( pPage->intKey ){ pSrc = pData; nSrc = nData; nData = 0; }else{ nPayload += nKey; pSrc = pKey; nSrc = nKey; } if( nPayload>pBt->maxLocal ){ *pnSize = nHeader + pBt->maxLocal + 4; |
︙ | ︙ | |||
2350 2351 2352 2353 2354 2355 2356 | put2byte(&pPage->aData[idxFrom], idx); idxFrom = idx; } put2byte(&pPage->aData[idxFrom], 0); } /* | | > > > | < < < | < | | | < < | 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 | put2byte(&pPage->aData[idxFrom], idx); idxFrom = idx; } put2byte(&pPage->aData[idxFrom], 0); } /* ** Move the content of the page at pFrom over to pTo. The pFrom->aCell[] ** pointers that point into pFrom->aData[] must be adjusted to point ** into pTo->aData[] instead. But some pFrom->aCell[] entries might ** not point to pFrom->aData[]. Those are unchanged. ** ** Over this operation completes, the meta data for pFrom is zeroed. */ static void copyPage(MemPage *pTo, MemPage *pFrom){ uptr from, to; int i; int pageSize; int ofst; assert( pTo->hdrOffset==0 ); ofst = pFrom->hdrOffset; pageSize = pTo->pBt->pageSize; sqliteFree(pTo->aCell); memcpy(pTo->aData, &pFrom->aData[ofst], pageSize - ofst + sizeof(MemPage)); memset(pFrom, 0, sizeof(MemPage)); assert( pTo->aData[5]<155 ); pTo->aData[5] += ofst; pTo->isOverfull = pFrom->isOverfull; to = Addr(pTo->aData); from = Addr(&pFrom->aData[ofst]); for(i=0; i<pTo->nCell; i++){ uptr x = Addr(pTo->aCell[i]); if( x>from && x<from+pageSize-ofst ){ *((uptr*)&pTo->aCell[i]) = x + to - from; } } } /* ** The following parameters determine how many adjacent pages get involved ** in a balancing operation. NN is the number of neighbors on either side |
︙ | ︙ | |||
2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 | int nNew; /* Number of pages in apNew[] */ int nDiv; /* Number of cells in apDiv[] */ int i, j, k; /* Loop counters */ int idx; /* Index of pPage in pParent->apCell[] */ int nxDiv; /* Next divider slot in pParent->apCell[] */ int rc; /* The return code */ int iCur; /* apCell[iCur] is the cell of the cursor */ MemPage *pOldCurPage; /* The cursor originally points to this page */ int subtotal; /* Subtotal of bytes in cells on one page */ MemPage *apOld[NB]; /* pPage and up to two siblings */ Pgno pgnoOld[NB]; /* Page numbers for each page in apOld[] */ MemPage *apCopy[NB]; /* Private copies of apOld[] pages */ MemPage *apNew[NB+1]; /* pPage and up to NB siblings after balancing */ Pgno pgnoNew[NB+1]; /* Page numbers for each page in apNew[] */ | > > > | 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 | int nNew; /* Number of pages in apNew[] */ int nDiv; /* Number of cells in apDiv[] */ int i, j, k; /* Loop counters */ int idx; /* Index of pPage in pParent->apCell[] */ int nxDiv; /* Next divider slot in pParent->apCell[] */ int rc; /* The return code */ int iCur; /* apCell[iCur] is the cell of the cursor */ int leafCorrection; /* 4 if pPage is a leaf. 0 if not */ int usableSpace; /* Bytes in pPage beyond the header */ int pageFlags; /* Value of pPage->aData[0] */ MemPage *pOldCurPage; /* The cursor originally points to this page */ int subtotal; /* Subtotal of bytes in cells on one page */ MemPage *apOld[NB]; /* pPage and up to two siblings */ Pgno pgnoOld[NB]; /* Page numbers for each page in apOld[] */ MemPage *apCopy[NB]; /* Private copies of apOld[] pages */ MemPage *apNew[NB+1]; /* pPage and up to NB siblings after balancing */ Pgno pgnoNew[NB+1]; /* Page numbers for each page in apNew[] */ |
︙ | ︙ | |||
2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 | break; } rc = getPage(pBt, pgnoOld[i], &apOld[i]); if( rc ) goto balance_cleanup; rc = initPage(apOld[i], pParent); if( rc ) goto balance_cleanup; apOld[i]->idxParent = k; nOld++; } /* ** Make copies of the content of pPage and its siblings into aOld[]. ** The rest of this function will use data from the copies rather ** that the original pages since the original pages will be in the | > > | 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 | break; } rc = getPage(pBt, pgnoOld[i], &apOld[i]); if( rc ) goto balance_cleanup; rc = initPage(apOld[i], pParent); if( rc ) goto balance_cleanup; apOld[i]->idxParent = k; apCopy[i] = 0; assert( i==nOld ); nOld++; } /* ** Make copies of the content of pPage and its siblings into aOld[]. ** The rest of this function will use data from the copies rather ** that the original pages since the original pages will be in the |
︙ | ︙ | |||
2840 2841 2842 2843 2844 2845 2846 | rc = balance(pParent); /* ** Cleanup before returning. */ balance_cleanup: for(i=0; i<nOld; i++){ | | > > > | > | | | 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 | rc = balance(pParent); /* ** Cleanup before returning. */ balance_cleanup: for(i=0; i<nOld; i++){ releasePage(apOld[i]); if( apCopy[i] ){ releasePage(apCopy[i]->pParent); sqliteFree(apCopy[i]->aCell); } } for(i=0; i<nNew; i++){ releasePage(apNew[i]); } releasePage(pParent); return rc; } /* ** This routine checks all cursors that point to the same table ** as pCur points to. If any of those cursors were opened with ** wrFlag==0 then this routine returns SQLITE_LOCKED. If all |
︙ | ︙ | |||
2870 2871 2872 2873 2874 2875 2876 2877 | */ static int checkReadLocks(BtCursor *pCur){ BtCursor *p; assert( pCur->wrFlag ); for(p=pCur->pShared; p!=pCur; p=p->pShared){ assert( p ); assert( p->pgnoRoot==pCur->pgnoRoot ); if( p->wrFlag==0 ) return SQLITE_LOCKED; | > | | | 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 | */ static int checkReadLocks(BtCursor *pCur){ BtCursor *p; assert( pCur->wrFlag ); for(p=pCur->pShared; p!=pCur; p=p->pShared){ assert( p ); assert( p->pgnoRoot==pCur->pgnoRoot ); assert( p->pPage->pgno==sqlitepager_pagenumber(p->pPage->aData); if( p->wrFlag==0 ) return SQLITE_LOCKED; if( p->pPage->pgno!=p->pgnoRoot ){ moveToRoot(p); } } return SQLITE_OK; } /* ** Insert a new record into the BTree. The key is given by (pKey,nKey) ** and the data is given by (pData,nData). The cursor is used only to ** define what table the record should be inserted into. The cursor ** is left pointing at a random location. ** ** For an INTKEY table, only the nKey value of the key is used. pKey is ** ignored. For a ZERODATA table, the pData and nData are both ignored. */ int sqlite3BtreeInsert( BtCursor *pCur, /* Insert data into the table of this cursor */ |
︙ | ︙ | |||
2916 2917 2918 2919 2920 2921 2922 | } if( checkReadLocks(pCur) ){ return SQLITE_LOCKED; /* The table pCur points to has a read lock */ } rc = sqlite3BtreeMoveto(pCur, pKey, nKey, &loc); if( rc ) return rc; pPage = pCur->pPage; | < | | 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 | } if( checkReadLocks(pCur) ){ return SQLITE_LOCKED; /* The table pCur points to has a read lock */ } rc = sqlite3BtreeMoveto(pCur, pKey, nKey, &loc); if( rc ) return rc; pPage = pCur->pPage; assert( pPage->isInit ); rc = sqlitepager_write(pPage->aData); if( rc ) return rc; rc = fillInCell(pPage, &newCell, pKey, nKey, pData, nData, &szNew); if( rc ) return rc; assert( szNew==cellSize(pPage, newCell) ); if( loc==0 ){ int szOld assert( pCur->idx>=0 && pCur->idx<pPage->nPage ); |
︙ | ︙ | |||
2978 2979 2980 2981 2982 2983 2984 | } if( !pCur->wrFlag ){ return SQLITE_PERM; /* Did not open this cursor for writing */ } if( checkReadLocks(pCur) ){ return SQLITE_LOCKED; /* The table pCur points to has a read lock */ } | | | 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 | } if( !pCur->wrFlag ){ return SQLITE_PERM; /* Did not open this cursor for writing */ } if( checkReadLocks(pCur) ){ return SQLITE_LOCKED; /* The table pCur points to has a read lock */ } rc = sqlitepager_write(pPage->aData); if( rc ) return rc; pCell = pPage->aCell[pCur->idx]; if( !pPage->leaf ){ pgnoChild = get4byte(&pCell[2]); } clearCell(pPage, pCell); if( !pPage->leaf ){ |
︙ | ︙ | |||
3003 3004 3005 3006 3007 3008 3009 | int notUsed; getTempCursor(pCur, &leafCur); rc = sqlite3BtreeNext(&leafCur, ¬Used); if( rc!=SQLITE_OK ){ if( rc!=SQLITE_NOMEM ) rc = SQLITE_CORRUPT; return rc; } | | | 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 | int notUsed; getTempCursor(pCur, &leafCur); rc = sqlite3BtreeNext(&leafCur, ¬Used); if( rc!=SQLITE_OK ){ if( rc!=SQLITE_NOMEM ) rc = SQLITE_CORRUPT; return rc; } rc = sqlitepager_write(leafCur.pPage->aData); if( rc ) return rc; dropCell(pPage, pCur->idx, cellSize(pPage, pCell)); pNext = leafCur.pPage->aCell[leafCur.idx]; szNext = cellSize(leafCur.pPage, pNext); insertCell(pPage, pCur->idx, &pNext[-4], szNext+4); put4byte(&pNext[-2], pgnoChild); rc = balance(pPage); |
︙ | ︙ | |||
3048 3049 3050 3051 3052 3053 3054 | if( pBt->readOnly ){ return SQLITE_READONLY; } rc = allocatePage(pBt, &pRoot, &pgnoRoot, 0); if( rc ) return rc; assert( sqlitepager_iswriteable(pRoot->aData) ); zeroPage(pBt, pRoot); | | | 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 | if( pBt->readOnly ){ return SQLITE_READONLY; } rc = allocatePage(pBt, &pRoot, &pgnoRoot, 0); if( rc ) return rc; assert( sqlitepager_iswriteable(pRoot->aData) ); zeroPage(pBt, pRoot); sqlitepager_unref(pRoot->aData); *piTable = (int)pgnoRoot; return SQLITE_OK; } /* ** Erase the given database page and all its children. Return ** the page to the freelist. |
︙ | ︙ | |||
3272 3273 3274 3275 3276 3277 3278 | while( idx>0 && idx<SQLITE_USABLE_SIZE-MIN_CELL_SIZE ){ unsigned char *pCell = &pPage->aData[idx]; fileBtreePageDump(pBt, get4byte(&pPage->aData[idx+2]), 1); idx = get2byte(&pPage->aData[idx]); } fileBtreePageDump(pBt, get4byte(&pPage->aData[hdrOffset+6]), 1); } | | | 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 | while( idx>0 && idx<SQLITE_USABLE_SIZE-MIN_CELL_SIZE ){ unsigned char *pCell = &pPage->aData[idx]; fileBtreePageDump(pBt, get4byte(&pPage->aData[idx+2]), 1); idx = get2byte(&pPage->aData[idx]); } fileBtreePageDump(pBt, get4byte(&pPage->aData[hdrOffset+6]), 1); } sqlitepager_unref(pPage->aData); return SQLITE_OK; } #endif #ifdef SQLITE_TEST /* ** Fill aResult[] with information about the entry and page that the |
︙ | ︙ | |||
3298 3299 3300 3301 3302 3303 3304 | ** This routine is used for testing and debugging only. */ static int fileBtreeCursorDump(BtCursor *pCur, int *aResult){ int cnt, idx; MemPage *pPage = pCur->pPage; Btree *pBt = pCur->pBt; assert( pPage->isInit ); | | > | 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 | ** This routine is used for testing and debugging only. */ static int fileBtreeCursorDump(BtCursor *pCur, int *aResult){ int cnt, idx; MemPage *pPage = pCur->pPage; Btree *pBt = pCur->pBt; assert( pPage->isInit ); aResult[0] = sqlitepager_pagenumber(pPage->aData); assert( aResult[0]==pPage->pgno ); aResult[1] = pCur->idx; aResult[2] = pPage->nCell; if( pCur->idx>=0 && pCur->idx<pPage->nCell ){ aResult[3] = cellSize(pPage, pPage->aCell[pCur->idx]); aResult[6] = pPage->leaf ? 0 : get4byte(&pPage->aCell[pCur->idx][2]); }else{ aResult[3] = 0; |
︙ | ︙ | |||
3488 3489 3490 3491 3492 3493 3494 | sprintf(zMsg, "unable to get the page. error code=%d", rc); checkAppendMsg(pCheck, zContext, zMsg); return 0; } if( (rc = initPage(pPage, pParent))!=0 ){ sprintf(zMsg, "initPage() returns error code %d", rc); checkAppendMsg(pCheck, zContext, zMsg); | | | 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 | sprintf(zMsg, "unable to get the page. error code=%d", rc); checkAppendMsg(pCheck, zContext, zMsg); return 0; } if( (rc = initPage(pPage, pParent))!=0 ){ sprintf(zMsg, "initPage() returns error code %d", rc); checkAppendMsg(pCheck, zContext, zMsg); releasePage(pPage); return 0; } #if 0 /* Check out all the cells. */ |
︙ | ︙ |