Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Remove the psAligned value from the BTree structure - the pageSize is now always aligned to an 8-byte boundary. Add comments on a confusing bit of code. Ticket #1231. (CVS 2451) |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA1: |
535523e1be692adc940d256a7b3d23c6 |
User & Date: | drh 2005-05-01 22:52:42.000 |
Context
2005-05-03
| ||
12:30 | Make sure all data structures have 8-byte alignment - necessary for the sparc architecture and helpful on other 64-bit platforms. Ticket #1232. Also update some comments in build.c. (CVS 2452) (check-in: d9418851ce user: drh tags: trunk) | |
2005-05-01
| ||
22:52 | Remove the psAligned value from the BTree structure - the pageSize is now always aligned to an 8-byte boundary. Add comments on a confusing bit of code. Ticket #1231. (CVS 2451) (check-in: 535523e1be user: drh tags: trunk) | |
2005-04-29
| ||
02:10 | Prevent a segfault described by ticket #1229. (CVS 2450) (check-in: 0667eae9a9 user: drh tags: trunk) | |
Changes
Changes to src/btree.c.
1 2 3 4 5 6 7 8 9 10 11 | /* ** 2004 April 6 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: ** ** May you do good and not evil. ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ************************************************************************* | | | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 | /* ** 2004 April 6 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: ** ** May you do good and not evil. ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ************************************************************************* ** $Id: btree.c,v 1.257 2005/05/01 22:52:42 drh Exp $ ** ** This file implements a external (disk-based) database using BTrees. ** For a detailed discussion of BTrees, refer to ** ** Donald E. Knuth, THE ART OF COMPUTER PROGRAMMING, Volume 3: ** "Sorting And Searching", pages 473-480. Addison-Wesley ** Publishing Company, Reading, Massachusetts. |
︙ | ︙ | |||
207 208 209 210 211 212 213 | */ #include "sqliteInt.h" #include "pager.h" #include "btree.h" #include "os.h" #include <assert.h> | < < < < < < | 207 208 209 210 211 212 213 214 215 216 217 218 219 220 | */ #include "sqliteInt.h" #include "pager.h" #include "btree.h" #include "os.h" #include <assert.h> /* The following value is the maximum cell size assuming a maximum page ** size give above. */ #define MX_CELL_SIZE(pBt) (pBt->pageSize-8) /* The maximum number of cells on a single page of the database. This ** assumes a minimum cell size of 3 bytes. Such small cells will be |
︙ | ︙ | |||
304 305 306 307 308 309 310 | u8 minEmbedFrac; /* Minimum payload as % of total page size */ u8 minLeafFrac; /* Minimum leaf payload as % of total page size */ u8 pageSizeFixed; /* True if the page size can no longer be changed */ #ifndef SQLITE_OMIT_AUTOVACUUM u8 autoVacuum; /* True if database supports auto-vacuum */ #endif u16 pageSize; /* Total number of bytes on a page */ | < | 298 299 300 301 302 303 304 305 306 307 308 309 310 311 | u8 minEmbedFrac; /* Minimum payload as % of total page size */ u8 minLeafFrac; /* Minimum leaf payload as % of total page size */ u8 pageSizeFixed; /* True if the page size can no longer be changed */ #ifndef SQLITE_OMIT_AUTOVACUUM u8 autoVacuum; /* True if database supports auto-vacuum */ #endif u16 pageSize; /* Total number of bytes on a page */ u16 usableSize; /* Number of usable bytes on each page */ int maxLocal; /* Maximum local payload in non-LEAFDATA tables */ int minLocal; /* Minimum local payload in non-LEAFDATA tables */ int maxLeaf; /* Maximum local payload in a LEAFDATA table */ int minLeaf; /* Minimum local payload in a LEAFDATA table */ BusyHandler *pBusyHandler; /* Callback for when there is lock contention */ }; |
︙ | ︙ | |||
710 711 712 713 714 715 716 | int cellOffset; int nCell, cellLimit; u8 *used; used = sqliteMallocRaw( pPage->pBt->pageSize ); if( used==0 ) return; usableSize = pPage->pBt->usableSize; | | | 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 | int cellOffset; int nCell, cellLimit; u8 *used; used = sqliteMallocRaw( pPage->pBt->pageSize ); if( used==0 ) return; usableSize = pPage->pBt->usableSize; assert( pPage->aData==&((unsigned char*)pPage)[-pPage->pBt->pageSize] ); hdr = pPage->hdrOffset; assert( hdr==(pPage->pgno==1 ? 100 : 0) ); assert( pPage->pgno==sqlite3pager_pagenumber(pPage->aData) ); c = pPage->aData[hdr]; if( pPage->isInit ){ assert( pPage->leaf == ((c & PTF_LEAF)!=0) ); assert( pPage->zeroData == ((c & PTF_ZERODATA)!=0) ); |
︙ | ︙ | |||
1013 1014 1015 1016 1017 1018 1019 | int nFree; /* Number of unused bytes on the page */ int top; /* First byte of the cell content area */ pBt = pPage->pBt; assert( pBt!=0 ); assert( pParent==0 || pParent->pBt==pBt ); assert( pPage->pgno==sqlite3pager_pagenumber(pPage->aData) ); | | | 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 | int nFree; /* Number of unused bytes on the page */ int top; /* First byte of the cell content area */ pBt = pPage->pBt; assert( pBt!=0 ); assert( pParent==0 || pParent->pBt==pBt ); assert( pPage->pgno==sqlite3pager_pagenumber(pPage->aData) ); assert( pPage->aData == &((unsigned char*)pPage)[-pBt->pageSize] ); if( pPage->pParent!=pParent && (pPage->pParent!=0 || pPage->isInit) ){ /* The parent page should never change unless the file is corrupt */ return SQLITE_CORRUPT; /* bkpt-CORRUPT */ } if( pPage->isInit ) return SQLITE_OK; if( pPage->pParent==0 && pParent!=0 ){ pPage->pParent = pParent; |
︙ | ︙ | |||
1081 1082 1083 1084 1085 1086 1087 | static void zeroPage(MemPage *pPage, int flags){ unsigned char *data = pPage->aData; Btree *pBt = pPage->pBt; int hdr = pPage->hdrOffset; int first; assert( sqlite3pager_pagenumber(data)==pPage->pgno ); | | | 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 | static void zeroPage(MemPage *pPage, int flags){ unsigned char *data = pPage->aData; Btree *pBt = pPage->pBt; int hdr = pPage->hdrOffset; int first; assert( sqlite3pager_pagenumber(data)==pPage->pgno ); assert( &data[pBt->pageSize] == (unsigned char*)pPage ); assert( sqlite3pager_iswriteable(data) ); memset(&data[hdr], 0, pBt->usableSize - hdr); data[hdr] = flags; first = hdr + 8 + 4*((flags&PTF_LEAF)==0); memset(&data[hdr+1], 0, 4); data[hdr+7] = 0; put2byte(&data[hdr+5], pBt->usableSize); |
︙ | ︙ | |||
1110 1111 1112 1113 1114 1115 1116 | */ static int getPage(Btree *pBt, Pgno pgno, MemPage **ppPage){ int rc; unsigned char *aData; MemPage *pPage; rc = sqlite3pager_get(pBt->pPager, pgno, (void**)&aData); if( rc ) return rc; | | | 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 | */ static int getPage(Btree *pBt, Pgno pgno, MemPage **ppPage){ int rc; unsigned char *aData; MemPage *pPage; rc = sqlite3pager_get(pBt->pPager, pgno, (void**)&aData); if( rc ) return rc; pPage = (MemPage*)&aData[pBt->pageSize]; pPage->aData = aData; pPage->pBt = pBt; pPage->pgno = pgno; pPage->hdrOffset = pPage->pgno==1 ? 100 : 0; *ppPage = pPage; return SQLITE_OK; } |
︙ | ︙ | |||
1149 1150 1151 1152 1153 1154 1155 | ** Release a MemPage. This should be called once for each prior ** call to getPage. */ static void releasePage(MemPage *pPage){ if( pPage ){ assert( pPage->aData ); assert( pPage->pBt ); | | > > | > > | | 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 | ** Release a MemPage. This should be called once for each prior ** call to getPage. */ static void releasePage(MemPage *pPage){ if( pPage ){ assert( pPage->aData ); assert( pPage->pBt ); assert( &pPage->aData[pPage->pBt->pageSize]==(unsigned char*)pPage ); sqlite3pager_unref(pPage->aData); } } /* ** This routine is called when the reference count for a page ** reaches zero. We need to unref the pParent pointer when that ** happens. */ static void pageDestructor(void *pData, int pageSize){ MemPage *pPage; assert( (pageSize & 7)==0 ); pPage = (MemPage*)&((char*)pData)[pageSize]; if( pPage->pParent ){ MemPage *pParent = pPage->pParent; pPage->pParent = 0; releasePage(pParent); } pPage->isInit = 0; } /* ** During a rollback, when the pager reloads information into the cache ** so that the cache is restored to its original state at the start of ** the transaction, for each page restored this routine is called. ** ** This routine needs to reset the extra data section at the end of the ** page to agree with the restored data. */ static void pageReinit(void *pData, int pageSize){ MemPage *pPage; assert( (pageSize & 7)==0 ); pPage = (MemPage*)&((char*)pData)[pageSize]; if( pPage->isInit ){ pPage->isInit = 0; initPage(pPage, pPage->pParent); } } /* |
︙ | ︙ | |||
1234 1235 1236 1237 1238 1239 1240 | sqlite3pager_set_destructor(pBt->pPager, pageDestructor); sqlite3pager_set_reiniter(pBt->pPager, pageReinit); pBt->pCursor = 0; pBt->pPage1 = 0; pBt->readOnly = sqlite3pager_isreadonly(pBt->pPager); sqlite3pager_read_fileheader(pBt->pPager, sizeof(zDbHeader), zDbHeader); pBt->pageSize = get2byte(&zDbHeader[16]); | | > | 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 | sqlite3pager_set_destructor(pBt->pPager, pageDestructor); sqlite3pager_set_reiniter(pBt->pPager, pageReinit); pBt->pCursor = 0; pBt->pPage1 = 0; pBt->readOnly = sqlite3pager_isreadonly(pBt->pPager); sqlite3pager_read_fileheader(pBt->pPager, sizeof(zDbHeader), zDbHeader); pBt->pageSize = get2byte(&zDbHeader[16]); if( pBt->pageSize<512 || pBt->pageSize>SQLITE_MAX_PAGE_SIZE || ((pBt->pageSize-1)&pBt->pageSize)!=0 ){ pBt->pageSize = SQLITE_DEFAULT_PAGE_SIZE; pBt->maxEmbedFrac = 64; /* 25% */ pBt->minEmbedFrac = 32; /* 12.5% */ pBt->minLeafFrac = 32; /* 12.5% */ #ifndef SQLITE_OMIT_AUTOVACUUM /* If the magic name ":memory:" will create an in-memory database, then ** do not set the auto-vacuum flag, even if SQLITE_DEFAULT_AUTOVACUUM |
︙ | ︙ | |||
1266 1267 1268 1269 1270 1271 1272 | pBt->minLeafFrac = zDbHeader[23]; pBt->pageSizeFixed = 1; #ifndef SQLITE_OMIT_AUTOVACUUM pBt->autoVacuum = (get4byte(&zDbHeader[36 + 4*4])?1:0); #endif } pBt->usableSize = pBt->pageSize - nReserve; | | | 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 | pBt->minLeafFrac = zDbHeader[23]; pBt->pageSizeFixed = 1; #ifndef SQLITE_OMIT_AUTOVACUUM pBt->autoVacuum = (get4byte(&zDbHeader[36 + 4*4])?1:0); #endif } pBt->usableSize = pBt->pageSize - nReserve; assert( (pBt->pageSize & 7)==0 ); /* 8-byte alignment of pageSize */ sqlite3pager_set_pagesize(pBt->pPager, pBt->pageSize); *ppBtree = pBt; return SQLITE_OK; } /* ** Close an open database and invalidate all cursors. |
︙ | ︙ | |||
1353 1354 1355 1356 1357 1358 1359 1360 | return SQLITE_READONLY; } if( nReserve<0 ){ nReserve = pBt->pageSize - pBt->usableSize; } if( pageSize>=512 && pageSize<=SQLITE_MAX_PAGE_SIZE && ((pageSize-1)&pageSize)==0 ){ pBt->pageSize = pageSize; | > < | 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 | return SQLITE_READONLY; } if( nReserve<0 ){ nReserve = pBt->pageSize - pBt->usableSize; } if( pageSize>=512 && pageSize<=SQLITE_MAX_PAGE_SIZE && ((pageSize-1)&pageSize)==0 ){ assert( (pageSize & 7)==0 ); pBt->pageSize = pageSize; sqlite3pager_set_pagesize(pBt->pPager, pageSize); } pBt->usableSize = pBt->pageSize - nReserve; return SQLITE_OK; } /* |
︙ | ︙ | |||
1414 1415 1416 1417 1418 1419 1420 | ** SQLITE_OK is returned on success. If the file is not a ** well-formed database file, then SQLITE_CORRUPT is returned. ** SQLITE_BUSY is returned if the database is locked. SQLITE_NOMEM ** is returned if we run out of memory. SQLITE_PROTOCOL is returned ** if there is a locking protocol violation. */ static int lockBtree(Btree *pBt){ | | | > > > > > | < | 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 | ** SQLITE_OK is returned on success. If the file is not a ** well-formed database file, then SQLITE_CORRUPT is returned. ** SQLITE_BUSY is returned if the database is locked. SQLITE_NOMEM ** is returned if we run out of memory. SQLITE_PROTOCOL is returned ** if there is a locking protocol violation. */ static int lockBtree(Btree *pBt){ int rc, pageSize; MemPage *pPage1; if( pBt->pPage1 ) return SQLITE_OK; rc = getPage(pBt, 1, &pPage1); if( rc!=SQLITE_OK ) return rc; /* Do some checking to help insure the file we opened really is ** a valid database file. */ rc = SQLITE_NOTADB; if( sqlite3pager_pagecount(pBt->pPager)>0 ){ u8 *page1 = pPage1->aData; if( memcmp(page1, zMagicHeader, 16)!=0 ){ goto page1_init_failed; } if( page1[18]>1 || page1[19]>1 ){ goto page1_init_failed; } pageSize = get2byte(&page1[16]); if( ((pageSize-1)&pageSize)!=0 ){ goto page1_init_failed; } assert( (pageSize & 7)==0 ); pBt->pageSize = pageSize; pBt->usableSize = pageSize - page1[20]; if( pBt->usableSize<500 ){ goto page1_init_failed; } pBt->maxEmbedFrac = page1[21]; pBt->minEmbedFrac = page1[22]; pBt->minLeafFrac = page1[23]; #ifndef SQLITE_OMIT_AUTOVACUUM pBt->autoVacuum = (get4byte(&page1[36 + 4*4])?1:0); #endif } |
︙ | ︙ | |||
1505 1506 1507 1508 1509 1510 1511 | ** ** If there is a transaction in progress, this routine is a no-op. */ static void unlockBtreeIfUnused(Btree *pBt){ if( pBt->inTrans==TRANS_NONE && pBt->pCursor==0 && pBt->pPage1!=0 ){ if( pBt->pPage1->aData==0 ){ MemPage *pPage = pBt->pPage1; | | | 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 | ** ** If there is a transaction in progress, this routine is a no-op. */ static void unlockBtreeIfUnused(Btree *pBt){ if( pBt->inTrans==TRANS_NONE && pBt->pCursor==0 && pBt->pPage1!=0 ){ if( pBt->pPage1->aData==0 ){ MemPage *pPage = pBt->pPage1; pPage->aData = &((char*)pPage)[-pBt->pageSize]; pPage->pBt = pBt; pPage->pgno = 1; } releasePage(pBt->pPage1); pBt->pPage1 = 0; pBt->inStmt = 0; } |
︙ | ︙ | |||
3377 3378 3379 3380 3381 3382 3383 | MemPage *pThis; unsigned char *aData; if( pgno==0 ) return SQLITE_OK; assert( pBt->pPager!=0 ); aData = sqlite3pager_lookup(pBt->pPager, pgno); if( aData ){ | | | 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 | MemPage *pThis; unsigned char *aData; if( pgno==0 ) return SQLITE_OK; assert( pBt->pPager!=0 ); aData = sqlite3pager_lookup(pBt->pPager, pgno); if( aData ){ pThis = (MemPage*)&aData[pBt->pageSize]; assert( pThis->aData==aData ); if( pThis->isInit ){ if( pThis->pParent!=pNewParent ){ if( pThis->pParent ) sqlite3pager_unref(pThis->pParent->aData); pThis->pParent = pNewParent; if( pNewParent ) sqlite3pager_ref(pNewParent->aData); } |
︙ | ︙ | |||
3891 3892 3893 3894 3895 3896 3897 | /* ** Allocate space for memory structures */ apCell = sqliteMallocRaw( nMaxCells*sizeof(u8*) /* apCell */ + nMaxCells*sizeof(int) /* szCell */ + sizeof(MemPage)*NB /* aCopy */ | | | | | | > | | > > > | | 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 | /* ** Allocate space for memory structures */ apCell = sqliteMallocRaw( nMaxCells*sizeof(u8*) /* apCell */ + nMaxCells*sizeof(int) /* szCell */ + sizeof(MemPage)*NB /* aCopy */ + pBt->pageSize*(5+NB) /* aSpace */ + (ISAUTOVACUUM ? nMaxCells : 0) /* aFrom */ ); if( apCell==0 ){ rc = SQLITE_NOMEM; goto balance_cleanup; } szCell = (int*)&apCell[nMaxCells]; aCopy[0] = (u8*)&szCell[nMaxCells]; for(i=1; i<NB; i++){ aCopy[i] = &aCopy[i-1][pBt->pageSize+sizeof(MemPage)]; } aSpace = &aCopy[NB-1][pBt->pageSize+sizeof(MemPage)]; #ifndef SQLITE_OMIT_AUTOVACUUM if( pBt->autoVacuum ){ aFrom = &aSpace[5*pBt->pageSize]; } #endif /* ** Make copies of the content of pPage and its siblings into aOld[]. ** The rest of this function will use data from the copies rather ** that the original pages since the original pages will be in the ** process of being overwritten. */ for(i=0; i<nOld; i++){ MemPage *p = apCopy[i] = (MemPage*)&aCopy[i][pBt->pageSize]; assert( (((long long unsigned int)p) & 7)==0 ); p->aData = &((u8*)p)[-pBt->pageSize]; memcpy(p->aData, apOld[i]->aData, pBt->pageSize + sizeof(MemPage)); /* The memcpy() above changes the value of p->aData so we have to ** set it again. */ assert( (((long long unsigned int)p) & 7)==0 ); p->aData = &((u8*)p)[-pBt->pageSize]; } /* ** Load pointers to all cells on sibling pages and the divider cells ** into the local apCell[] array. Make copies of the divider cells ** into space obtained form aSpace[] and remove the the divider Cells ** from pParent. |
︙ | ︙ | |||
3978 3979 3980 3981 3982 3983 3984 | dropCell(pParent, nxDiv, sz); }else{ u8 *pTemp; assert( nCell<nMaxCells ); szCell[nCell] = sz; pTemp = &aSpace[iSpace]; iSpace += sz; | | | 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 | dropCell(pParent, nxDiv, sz); }else{ u8 *pTemp; assert( nCell<nMaxCells ); szCell[nCell] = sz; pTemp = &aSpace[iSpace]; iSpace += sz; assert( iSpace<=pBt->pageSize*5 ); memcpy(pTemp, apDiv[i], sz); apCell[nCell] = pTemp+leafCorrection; #ifndef SQLITE_OMIT_AUTOVACUUM if( pBt->autoVacuum ){ aFrom[nCell] = 0xFF; } #endif |
︙ | ︙ | |||
4203 4204 4205 4206 4207 4208 4209 | */ CellInfo info; j--; parseCellPtr(pNew, apCell[j], &info); pCell = &aSpace[iSpace]; fillInCell(pParent, pCell, 0, info.nKey, 0, 0, &sz); iSpace += sz; | | | | 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 | */ CellInfo info; j--; parseCellPtr(pNew, apCell[j], &info); pCell = &aSpace[iSpace]; fillInCell(pParent, pCell, 0, info.nKey, 0, 0, &sz); iSpace += sz; assert( iSpace<=pBt->pageSize*5 ); pTemp = 0; }else{ pCell -= 4; pTemp = &aSpace[iSpace]; iSpace += sz; assert( iSpace<=pBt->pageSize*5 ); } rc = insertCell(pParent, nxDiv, pCell, sz, pTemp, 4); if( rc!=SQLITE_OK ) goto balance_cleanup; put4byte(findOverflowCell(pParent,nxDiv), pNew->pgno); #ifndef SQLITE_OMIT_AUTOVACUUM /* If this is an auto-vacuum database, and not a leaf-data tree, ** then update the pointer map with an entry for the overflow page |
︙ | ︙ |