Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Change the way that balance_nonroot() partitions cells between the sibling pages such that a scan of the cell size array is not required. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | btree-opt2 |
Files: | files | file ages | folders |
SHA1: |
168728715156d756ac8c0df45710d054 |
User & Date: | drh 2015-06-22 20:02:04.079 |
Context
2015-06-23
| ||
02:37 | Avoid computing cell sizes in balance_nonroot() until they are really needed. This gives an overall 1.7% performance gain for about 1000 extra bytes of code space. (check-in: 43844537e8 user: drh tags: btree-opt2) | |
2015-06-22
| ||
20:02 | Change the way that balance_nonroot() partitions cells between the sibling pages such that a scan of the cell size array is not required. (check-in: 1687287151 user: drh tags: btree-opt2) | |
2015-06-20
| ||
14:11 | Update the fuzztest data using the latest test vectors discovered by AFL. (check-in: b97f9cf73e user: drh tags: trunk) | |
Changes
Changes to src/btree.c.
︙ | ︙ | |||
3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 | u8 *pCell = findCell(pPage, i); ptrmapPutOvflPtr(pPage, pCell, &rc); if( !pPage->leaf ){ Pgno childPgno = get4byte(pCell); ptrmapPut(pBt, childPgno, PTRMAP_BTREE, pgno, &rc); } } if( !pPage->leaf ){ Pgno childPgno = get4byte(&pPage->aData[pPage->hdrOffset+8]); ptrmapPut(pBt, childPgno, PTRMAP_BTREE, pgno, &rc); } | > | 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 | u8 *pCell = findCell(pPage, i); ptrmapPutOvflPtr(pPage, pCell, &rc); if( !pPage->leaf ){ Pgno childPgno = get4byte(pCell); ptrmapPut(pBt, childPgno, PTRMAP_BTREE, pgno, &rc); if( rc ) return rc; } } if( !pPage->leaf ){ Pgno childPgno = get4byte(&pPage->aData[pPage->hdrOffset+8]); ptrmapPut(pBt, childPgno, PTRMAP_BTREE, pgno, &rc); } |
︙ | ︙ | |||
6291 6292 6293 6294 6295 6296 6297 | ** Some of the cells in apCell[] may currently be stored in pPg. This ** function works around problems caused by this by making a copy of any ** such cells before overwriting the page data. ** ** The MemPage.nFree field is invalidated by this function. It is the ** responsibility of the caller to set it correctly. */ | | | 6292 6293 6294 6295 6296 6297 6298 6299 6300 6301 6302 6303 6304 6305 6306 | ** Some of the cells in apCell[] may currently be stored in pPg. This ** function works around problems caused by this by making a copy of any ** such cells before overwriting the page data. ** ** The MemPage.nFree field is invalidated by this function. It is the ** responsibility of the caller to set it correctly. */ static int rebuildPage( MemPage *pPg, /* Edit this page */ int nCell, /* Final number of cells on page */ u8 **apCell, /* Array of cells */ u16 *szCell /* Array of cell sizes */ ){ const int hdr = pPg->hdrOffset; /* Offset of header on pPg */ u8 * const aData = pPg->aData; /* Pointer to data for pPg */ |
︙ | ︙ | |||
6316 6317 6318 6319 6320 6321 6322 | pData = pEnd; for(i=0; i<nCell; i++){ u8 *pCell = apCell[i]; if( pCell>aData && pCell<pEnd ){ pCell = &pTmp[pCell - aData]; } pData -= szCell[i]; | < > > > | 6317 6318 6319 6320 6321 6322 6323 6324 6325 6326 6327 6328 6329 6330 6331 6332 6333 6334 6335 6336 6337 6338 6339 6340 6341 6342 6343 6344 6345 6346 6347 | pData = pEnd; for(i=0; i<nCell; i++){ u8 *pCell = apCell[i]; if( pCell>aData && pCell<pEnd ){ pCell = &pTmp[pCell - aData]; } pData -= szCell[i]; put2byte(pCellptr, (pData - aData)); pCellptr += 2; if( pData < pCellptr ) return SQLITE_CORRUPT_BKPT; memcpy(pData, pCell, szCell[i]); assert( szCell[i]==pPg->xCellSize(pPg, pCell) || CORRUPT_DB ); testcase( szCell[i]==pPg->xCellSize(pPg,pCell) ); } /* The pPg->nFree field is now set incorrectly. The caller will fix it. */ pPg->nCell = nCell; pPg->nOverflow = 0; put2byte(&aData[hdr+1], 0); put2byte(&aData[hdr+3], pPg->nCell); put2byte(&aData[hdr+5], pData - aData); aData[hdr+7] = 0x00; return SQLITE_OK; } /* ** Array apCell[] contains nCell pointers to b-tree cells. Array szCell ** contains the size in bytes of each such cell. This function attempts to ** add the cells stored in the array to page pPg. If it cannot (because ** the page needs to be defragmented before the cells will fit), non-zero |
︙ | ︙ | |||
6450 6451 6452 6453 6454 6455 6456 | ** ** This routine makes the necessary adjustments to pPg so that it contains ** the correct cells after being balanced. ** ** The pPg->nFree field is invalid when this function returns. It is the ** responsibility of the caller to set it correctly. */ | | | 6453 6454 6455 6456 6457 6458 6459 6460 6461 6462 6463 6464 6465 6466 6467 | ** ** This routine makes the necessary adjustments to pPg so that it contains ** the correct cells after being balanced. ** ** The pPg->nFree field is invalid when this function returns. It is the ** responsibility of the caller to set it correctly. */ static int editPage( MemPage *pPg, /* Edit this page */ int iOld, /* Index of first cell currently on page */ int iNew, /* Index of new first cell on page */ int nNew, /* Final number of cells on page */ u8 **apCell, /* Array of cells */ u16 *szCell /* Array of cell sizes */ ){ |
︙ | ︙ | |||
6541 6542 6543 6544 6545 6546 6547 | if( pCell>=aData && pCell<&aData[pPg->pBt->usableSize] ){ pCell = &pTmp[pCell - aData]; } assert( 0==memcmp(pCell, &aData[iOff], szCell[i+iNew]) ); } #endif | | | | 6544 6545 6546 6547 6548 6549 6550 6551 6552 6553 6554 6555 6556 6557 6558 6559 6560 6561 | if( pCell>=aData && pCell<&aData[pPg->pBt->usableSize] ){ pCell = &pTmp[pCell - aData]; } assert( 0==memcmp(pCell, &aData[iOff], szCell[i+iNew]) ); } #endif return SQLITE_OK; editpage_fail: /* Unable to edit this page. Rebuild it from scratch instead. */ return rebuildPage(pPg, nNew, &apCell[iNew], &szCell[iNew]); } /* ** The following parameters determine how many adjacent pages get involved ** in a balancing operation. NN is the number of neighbors on either side ** of the page that participate in the balancing operation. NB is the ** total number of pages that participate, including the target page and |
︙ | ︙ | |||
6616 6617 6618 6619 6620 6621 6622 | u8 *pCell = pPage->apOvfl[0]; u16 szCell = pPage->xCellSize(pPage, pCell); u8 *pStop; assert( sqlite3PagerIswriteable(pNew->pDbPage) ); assert( pPage->aData[0]==(PTF_INTKEY|PTF_LEAFDATA|PTF_LEAF) ); zeroPage(pNew, PTF_INTKEY|PTF_LEAFDATA|PTF_LEAF); | | > | 6619 6620 6621 6622 6623 6624 6625 6626 6627 6628 6629 6630 6631 6632 6633 6634 | u8 *pCell = pPage->apOvfl[0]; u16 szCell = pPage->xCellSize(pPage, pCell); u8 *pStop; assert( sqlite3PagerIswriteable(pNew->pDbPage) ); assert( pPage->aData[0]==(PTF_INTKEY|PTF_LEAFDATA|PTF_LEAF) ); zeroPage(pNew, PTF_INTKEY|PTF_LEAFDATA|PTF_LEAF); rc = rebuildPage(pNew, 1, &pCell, &szCell); if( rc ) return rc; pNew->nFree = pBt->usableSize - pNew->cellOffset - 2 - szCell; /* If this is an auto-vacuum database, update the pointer map ** with entries for the new page, and any pointer from the ** cell on the page to an overflow page. If either of these ** operations fails, the return code is set, but the contents ** of the parent page are still manipulated by thh code below. |
︙ | ︙ | |||
6831 6832 6833 6834 6835 6836 6837 | int i, j, k; /* Loop counters */ int nxDiv; /* Next divider slot in pParent->aCell[] */ int rc = SQLITE_OK; /* The return code */ u16 leafCorrection; /* 4 if pPage is a leaf. 0 if not */ int leafData; /* True if pPage is a leaf of a LEAFDATA tree */ int usableSpace; /* Bytes in pPage beyond the header */ int pageFlags; /* Value of pPage->aData[0] */ | < | 6835 6836 6837 6838 6839 6840 6841 6842 6843 6844 6845 6846 6847 6848 | int i, j, k; /* Loop counters */ int nxDiv; /* Next divider slot in pParent->aCell[] */ int rc = SQLITE_OK; /* The return code */ u16 leafCorrection; /* 4 if pPage is a leaf. 0 if not */ int leafData; /* True if pPage is a leaf of a LEAFDATA tree */ int usableSpace; /* Bytes in pPage beyond the header */ int pageFlags; /* Value of pPage->aData[0] */ int iSpace1 = 0; /* First unused byte of aSpace1[] */ int iOvflSpace = 0; /* First unused byte of aOvflSpace[] */ int szScratch; /* Size of scratch memory requested */ MemPage *apOld[NB]; /* pPage and up to two siblings */ MemPage *apNew[NB+2]; /* pPage and up to NB siblings after balancing */ u8 *pRight; /* Location in parent of right-sibling pointer */ u8 *apDiv[NB-1]; /* Divider cells in pParent */ |
︙ | ︙ | |||
7077 7078 7079 7080 7081 7082 7083 | ** szNew[i]: Spaced used on the i-th sibling page. ** cntNew[i]: Index in apCell[] and szCell[] for the first cell to ** the right of the i-th sibling page. ** usableSpace: Number of bytes of space available on each sibling. ** */ usableSpace = pBt->usableSize - 12 + leafCorrection; | | < | | > > | > | > > > > > | | < | > > | > > > > | | | > > > > > | > > > > > > > > > > > > | 7080 7081 7082 7083 7084 7085 7086 7087 7088 7089 7090 7091 7092 7093 7094 7095 7096 7097 7098 7099 7100 7101 7102 7103 7104 7105 7106 7107 7108 7109 7110 7111 7112 7113 7114 7115 7116 7117 7118 7119 7120 7121 7122 7123 7124 7125 7126 7127 7128 7129 7130 7131 7132 7133 7134 7135 7136 7137 | ** szNew[i]: Spaced used on the i-th sibling page. ** cntNew[i]: Index in apCell[] and szCell[] for the first cell to ** the right of the i-th sibling page. ** usableSpace: Number of bytes of space available on each sibling. ** */ usableSpace = pBt->usableSize - 12 + leafCorrection; for(i=0; i<nOld; i++){ MemPage *p = apOld[i]; szNew[i] = usableSpace - p->nFree; if( szNew[i]<0 ){ rc = SQLITE_CORRUPT_BKPT; goto balance_cleanup; } for(j=0; j<p->nOverflow; j++){ szNew[i] += 2 + p->xCellSize(p, p->apOvfl[j]); } cntNew[i] = cntOld[i]; } k = nOld; for(i=0; i<k; i++){ int sz; while( szNew[i]>usableSpace ){ if( i+1>=k ){ k = i+2; if( k>NB+2 ){ rc = SQLITE_CORRUPT_BKPT; goto balance_cleanup; } szNew[k-1] = 0; cntNew[k-1] = nCell; } sz = 2+szCell[cntNew[i]-1]; szNew[i] -= sz; if( !leafData ){ sz = cntNew[i]<nCell ? 2+szCell[cntNew[i]] : 0; } szNew[i+1] += sz; cntNew[i]--; } while( cntNew[i]<nCell ){ sz = 2+szCell[cntNew[i]]; if( szNew[i]+sz>usableSpace ) break; szNew[i] += sz; cntNew[i]++; if( !leafData ){ sz = cntNew[i]<nCell ? 2+szCell[cntNew[i]] : 0; } szNew[i+1] -= sz; } if( cntNew[i]>=nCell ){ k = i+1; }else if( cntNew[i] - (i>0 ? cntNew[i-1] : 0) <= 0 ){ rc = SQLITE_CORRUPT_BKPT; goto balance_cleanup; } } /* ** The packing computed by the previous block is biased toward the siblings ** on the left side (siblings with smaller keys). The left siblings are ** always nearly full, while the right-most sibling might be nearly empty. ** The next block of code attempts to adjust the packing of siblings to ** get a better balance. |
︙ | ︙ | |||
7120 7121 7122 7123 7124 7125 7126 7127 7128 7129 7130 7131 7132 7133 | assert( r<nMaxCells ); while( szRight==0 || (!bBulk && szRight+szCell[d]+2<=szLeft-(szCell[r]+2)) ){ szRight += szCell[d] + 2; szLeft -= szCell[r] + 2; cntNew[i-1]--; r = cntNew[i-1] - 1; d = r + 1 - leafData; } szNew[i] = szRight; szNew[i-1] = szLeft; } | > > > > | 7152 7153 7154 7155 7156 7157 7158 7159 7160 7161 7162 7163 7164 7165 7166 7167 7168 7169 | assert( r<nMaxCells ); while( szRight==0 || (!bBulk && szRight+szCell[d]+2<=szLeft-(szCell[r]+2)) ){ szRight += szCell[d] + 2; szLeft -= szCell[r] + 2; cntNew[i-1]--; if( cntNew[i-1] <= 0 ){ rc = SQLITE_CORRUPT_BKPT; goto balance_cleanup; } r = cntNew[i-1] - 1; d = r + 1 - leafData; } szNew[i] = szRight; szNew[i-1] = szLeft; } |
︙ | ︙ | |||
7290 7291 7292 7293 7294 7295 7296 7297 7298 7299 7300 7301 7302 7303 7304 7305 7306 | if( iOld>=nNew || pNew->pgno!=aPgno[iOld] || pCell<aOld || pCell>=&aOld[usableSize] ){ if( !leafCorrection ){ ptrmapPut(pBt, get4byte(pCell), PTRMAP_BTREE, pNew->pgno, &rc); } if( szCell[i]>pNew->minLocal ){ ptrmapPutOvflPtr(pNew, pCell, &rc); } } } } /* Insert new divider cells into pParent. */ for(i=0; i<nNew-1; i++){ | > > | 7326 7327 7328 7329 7330 7331 7332 7333 7334 7335 7336 7337 7338 7339 7340 7341 7342 7343 7344 | if( iOld>=nNew || pNew->pgno!=aPgno[iOld] || pCell<aOld || pCell>=&aOld[usableSize] ){ if( !leafCorrection ){ ptrmapPut(pBt, get4byte(pCell), PTRMAP_BTREE, pNew->pgno, &rc); if( rc ) goto balance_cleanup; } if( szCell[i]>pNew->minLocal ){ ptrmapPutOvflPtr(pNew, pCell, &rc); if( rc ) goto balance_cleanup; } } } } /* Insert new divider cells into pParent. */ for(i=0; i<nNew-1; i++){ |
︙ | ︙ | |||
7400 7401 7402 7403 7404 7405 7406 | nNewCell = cntNew[0]; }else{ iOld = iPg<nOld ? (cntOld[iPg-1] + !leafData) : nCell; iNew = cntNew[iPg-1] + !leafData; nNewCell = cntNew[iPg] - iNew; } | | > | 7438 7439 7440 7441 7442 7443 7444 7445 7446 7447 7448 7449 7450 7451 7452 7453 | nNewCell = cntNew[0]; }else{ iOld = iPg<nOld ? (cntOld[iPg-1] + !leafData) : nCell; iNew = cntNew[iPg-1] + !leafData; nNewCell = cntNew[iPg] - iNew; } rc = editPage(apNew[iPg], iOld, iNew, nNewCell, apCell, szCell); if( rc ) goto balance_cleanup; abDone[iPg]++; apNew[iPg]->nFree = usableSpace-szNew[iPg]; assert( apNew[iPg]->nOverflow==0 ); assert( apNew[iPg]->nCell==nNewCell ); } } |
︙ | ︙ |