SQLite

Check-in [0da4820914]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Avoid journalling an extra page when a btree insert operation uses the 'quick-balance' trick. (CVS 4257)
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 0da48209140b6c1ba47df799ce6065054b994f02
User & Date: danielk1977 2007-08-21 13:11:01.000
Context
2007-08-21
13:30
Add the tests that go with the previous commit. (CVS 4258) (check-in: a87f32e13e user: danielk1977 tags: trunk)
13:11
Avoid journalling an extra page when a btree insert operation uses the 'quick-balance' trick. (CVS 4257) (check-in: 0da4820914 user: danielk1977 tags: trunk)
13:07
Fix some problems in the crash-test backend. (CVS 4256) (check-in: 5bced2392a user: danielk1977 tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Changes to src/btree.c.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
/*
** 2004 April 6
**
** The author disclaims copyright to this source code.  In place of
** a legal notice, here is a blessing:
**
**    May you do good and not evil.
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** $Id: btree.c,v 1.404 2007/08/20 23:50:25 drh Exp $
**
** This file implements a external (disk-based) database using BTrees.
** See the header comment on "btreeInt.h" for additional information.
** Including a description of file format and an overview of operation.
*/
#include "btreeInt.h"












|







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
/*
** 2004 April 6
**
** The author disclaims copyright to this source code.  In place of
** a legal notice, here is a blessing:
**
**    May you do good and not evil.
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** $Id: btree.c,v 1.405 2007/08/21 13:11:01 danielk1977 Exp $
**
** This file implements a external (disk-based) database using BTrees.
** See the header comment on "btreeInt.h" for additional information.
** Including a description of file format and an overview of operation.
*/
#include "btreeInt.h"

4495
4496
4497
4498
4499
4500
4501
4502
4503
4504
4505
4506
4507
4508
4509
4510
4511
4512
4513
4514





4515
4516
4517
4518
4519
4520
4521
4522
4523
4524
4525
4526
4527
4528
4529
  int hdr;          /* Offset into data[] of the page header */
  int cellOffset;   /* Address of first cell pointer in data[] */
  u8 *data;         /* The content of the whole page */
  u8 *ptr;          /* Used for moving information around in data[] */

  assert( i>=0 && i<=pPage->nCell+pPage->nOverflow );
  assert( sz==cellSizePtr(pPage, pCell) );
  assert( sqlite3PagerIswriteable(pPage->pDbPage) );
  assert( sqlite3_mutex_held(pPage->pBt->mutex) );
  if( pPage->nOverflow || sz+2>pPage->nFree ){
    if( pTemp ){
      memcpy(pTemp+nSkip, pCell+nSkip, sz-nSkip);
      pCell = pTemp;
    }
    j = pPage->nOverflow++;
    assert( j<sizeof(pPage->aOvfl)/sizeof(pPage->aOvfl[0]) );
    pPage->aOvfl[j].pCell = pCell;
    pPage->aOvfl[j].idx = i;
    pPage->nFree = 0;
  }else{





    data = pPage->aData;
    hdr = pPage->hdrOffset;
    top = get2byte(&data[hdr+5]);
    cellOffset = pPage->cellOffset;
    end = cellOffset + 2*pPage->nCell + 2;
    ins = cellOffset + 2*i;
    if( end > top - sz ){
      int rc = defragmentPage(pPage);
      if( rc!=SQLITE_OK ) return rc;
      top = get2byte(&data[hdr+5]);
      assert( end + sz <= top );
    }
    idx = allocateSpace(pPage, sz);
    assert( idx>0 );
    assert( end <= get2byte(&data[hdr+5]) );







<












>
>
>
>
>







|







4495
4496
4497
4498
4499
4500
4501

4502
4503
4504
4505
4506
4507
4508
4509
4510
4511
4512
4513
4514
4515
4516
4517
4518
4519
4520
4521
4522
4523
4524
4525
4526
4527
4528
4529
4530
4531
4532
4533
  int hdr;          /* Offset into data[] of the page header */
  int cellOffset;   /* Address of first cell pointer in data[] */
  u8 *data;         /* The content of the whole page */
  u8 *ptr;          /* Used for moving information around in data[] */

  assert( i>=0 && i<=pPage->nCell+pPage->nOverflow );
  assert( sz==cellSizePtr(pPage, pCell) );

  assert( sqlite3_mutex_held(pPage->pBt->mutex) );
  if( pPage->nOverflow || sz+2>pPage->nFree ){
    if( pTemp ){
      memcpy(pTemp+nSkip, pCell+nSkip, sz-nSkip);
      pCell = pTemp;
    }
    j = pPage->nOverflow++;
    assert( j<sizeof(pPage->aOvfl)/sizeof(pPage->aOvfl[0]) );
    pPage->aOvfl[j].pCell = pCell;
    pPage->aOvfl[j].idx = i;
    pPage->nFree = 0;
  }else{
    int rc = sqlite3PagerWrite(pPage->pDbPage);
    if( rc!=SQLITE_OK ){
      return rc;
    }
    assert( sqlite3PagerIswriteable(pPage->pDbPage) );
    data = pPage->aData;
    hdr = pPage->hdrOffset;
    top = get2byte(&data[hdr+5]);
    cellOffset = pPage->cellOffset;
    end = cellOffset + 2*pPage->nCell + 2;
    ins = cellOffset + 2*i;
    if( end > top - sz ){
      rc = defragmentPage(pPage);
      if( rc!=SQLITE_OK ) return rc;
      top = get2byte(&data[hdr+5]);
      assert( end + sz <= top );
    }
    idx = allocateSpace(pPage, sz);
    assert( idx>0 );
    assert( end <= get2byte(&data[hdr+5]) );
4543
4544
4545
4546
4547
4548
4549
4550
4551
4552
4553
4554
4555
4556
4557
      ** the entry for the overflow page into the pointer map.
      */
      CellInfo info;
      sqlite3BtreeParseCellPtr(pPage, pCell, &info);
      assert( (info.nData+(pPage->intKey?0:info.nKey))==info.nPayload );
      if( (info.nData+(pPage->intKey?0:info.nKey))>info.nLocal ){
        Pgno pgnoOvfl = get4byte(&pCell[info.iOverflow]);
        int rc = ptrmapPut(pPage->pBt, pgnoOvfl, PTRMAP_OVERFLOW1, pPage->pgno);
        if( rc!=SQLITE_OK ) return rc;
      }
    }
#endif
  }

  return SQLITE_OK;







|







4547
4548
4549
4550
4551
4552
4553
4554
4555
4556
4557
4558
4559
4560
4561
      ** the entry for the overflow page into the pointer map.
      */
      CellInfo info;
      sqlite3BtreeParseCellPtr(pPage, pCell, &info);
      assert( (info.nData+(pPage->intKey?0:info.nKey))==info.nPayload );
      if( (info.nData+(pPage->intKey?0:info.nKey))>info.nLocal ){
        Pgno pgnoOvfl = get4byte(&pCell[info.iOverflow]);
        rc = ptrmapPut(pPage->pBt, pgnoOvfl, PTRMAP_OVERFLOW1, pPage->pgno);
        if( rc!=SQLITE_OK ) return rc;
      }
    }
#endif
  }

  return SQLITE_OK;
4778
4779
4780
4781
4782
4783
4784
4785
4786
4787
4788
4789
4790
4791
4792

  assert( sqlite3_mutex_held(pPage->pBt->mutex) );

  /* 
  ** Find the parent page.
  */
  assert( pPage->isInit );
  assert( sqlite3PagerIswriteable(pPage->pDbPage) );
  pBt = pPage->pBt;
  pParent = pPage->pParent;
  assert( pParent );
  if( SQLITE_OK!=(rc = sqlite3PagerWrite(pParent->pDbPage)) ){
    return rc;
  }
  TRACE(("BALANCE: begin page %d child of %d\n", pPage->pgno, pParent->pgno));







|







4782
4783
4784
4785
4786
4787
4788
4789
4790
4791
4792
4793
4794
4795
4796

  assert( sqlite3_mutex_held(pPage->pBt->mutex) );

  /* 
  ** Find the parent page.
  */
  assert( pPage->isInit );
  assert( sqlite3PagerIswriteable(pPage->pDbPage) || pPage->nOverflow==1 );
  pBt = pPage->pBt;
  pParent = pPage->pParent;
  assert( pParent );
  if( SQLITE_OK!=(rc = sqlite3PagerWrite(pParent->pDbPage)) ){
    return rc;
  }
  TRACE(("BALANCE: begin page %d child of %d\n", pPage->pgno, pParent->pgno));
4811
4812
4813
4814
4815
4816
4817




4818
4819
4820
4821
4822
4823
4824
    /*
    ** TODO: Check the siblings to the left of pPage. It may be that
    ** they are not full and no new page is required.
    */
    return balance_quick(pPage, pParent);
  }
#endif





  /*
  ** Find the cell in the parent page whose left child points back
  ** to pPage.  The "idx" variable is the index of that cell.  If pPage
  ** is the rightmost child of pParent then set idx to pParent->nCell 
  */
  if( pParent->idxShift ){







>
>
>
>







4815
4816
4817
4818
4819
4820
4821
4822
4823
4824
4825
4826
4827
4828
4829
4830
4831
4832
    /*
    ** TODO: Check the siblings to the left of pPage. It may be that
    ** they are not full and no new page is required.
    */
    return balance_quick(pPage, pParent);
  }
#endif

  if( SQLITE_OK!=(rc = sqlite3PagerWrite(pPage->pDbPage)) ){
    return rc;
  }

  /*
  ** Find the cell in the parent page whose left child points back
  ** to pPage.  The "idx" variable is the index of that cell.  If pPage
  ** is the rightmost child of pParent then set idx to pParent->nCell 
  */
  if( pParent->idxShift ){
5477
5478
5479
5480
5481
5482
5483

5484
5485
5486
5487
5488
5489
5490
5491
** Decide if the page pPage needs to be balanced.  If balancing is
** required, call the appropriate balancing routine.
*/
static int balance(MemPage *pPage, int insert){
  int rc = SQLITE_OK;
  assert( sqlite3_mutex_held(pPage->pBt->mutex) );
  if( pPage->pParent==0 ){

    if( pPage->nOverflow>0 ){
      rc = balance_deeper(pPage);
    }
    if( rc==SQLITE_OK && pPage->nCell==0 ){
      rc = balance_shallower(pPage);
    }
  }else{
    if( pPage->nOverflow>0 || 







>
|







5485
5486
5487
5488
5489
5490
5491
5492
5493
5494
5495
5496
5497
5498
5499
5500
** Decide if the page pPage needs to be balanced.  If balancing is
** required, call the appropriate balancing routine.
*/
static int balance(MemPage *pPage, int insert){
  int rc = SQLITE_OK;
  assert( sqlite3_mutex_held(pPage->pBt->mutex) );
  if( pPage->pParent==0 ){
    rc = sqlite3PagerWrite(pPage->pDbPage);
    if( rc==SQLITE_OK && pPage->nOverflow>0 ){
      rc = balance_deeper(pPage);
    }
    if( rc==SQLITE_OK && pPage->nCell==0 ){
      rc = balance_shallower(pPage);
    }
  }else{
    if( pPage->nOverflow>0 || 
5590
5591
5592
5593
5594
5595
5596
5597
5598
5599
5600
5601
5602
5603
5604
5605
5606
5607
5608
5609
5610




5611
5612
5613
5614
5615
5616
5617
  pPage = pCur->pPage;
  assert( pPage->intKey || nKey>=0 );
  assert( pPage->leaf || !pPage->leafData );
  TRACE(("INSERT: table=%d nkey=%lld ndata=%d page=%d %s\n",
          pCur->pgnoRoot, nKey, nData, pPage->pgno,
          loc==0 ? "overwrite" : "new entry"));
  assert( pPage->isInit );
  rc = sqlite3PagerWrite(pPage->pDbPage);
  if( rc ){
    sqlite3BtreeLeave(p);
    return rc;
  }
  newCell = sqlite3_malloc( MX_CELL_SIZE(pBt) );
  if( newCell==0 ) return SQLITE_NOMEM;
  rc = fillInCell(pPage, newCell, pKey, nKey, pData, nData, nZero, &szNew);
  if( rc ) goto end_insert;
  assert( szNew==cellSizePtr(pPage, newCell) );
  assert( szNew<=MX_CELL_SIZE(pBt) );
  if( loc==0 && CURSOR_VALID==pCur->eState ){
    int szOld;
    assert( pCur->idx>=0 && pCur->idx<pPage->nCell );




    oldCell = findCell(pPage, pCur->idx);
    if( !pPage->leaf ){
      memcpy(newCell, oldCell, 4);
    }
    szOld = cellSizePtr(pPage, oldCell);
    rc = clearCell(pPage, oldCell);
    if( rc ) goto end_insert;







<
<
<
<
<









>
>
>
>







5599
5600
5601
5602
5603
5604
5605





5606
5607
5608
5609
5610
5611
5612
5613
5614
5615
5616
5617
5618
5619
5620
5621
5622
5623
5624
5625
  pPage = pCur->pPage;
  assert( pPage->intKey || nKey>=0 );
  assert( pPage->leaf || !pPage->leafData );
  TRACE(("INSERT: table=%d nkey=%lld ndata=%d page=%d %s\n",
          pCur->pgnoRoot, nKey, nData, pPage->pgno,
          loc==0 ? "overwrite" : "new entry"));
  assert( pPage->isInit );





  newCell = sqlite3_malloc( MX_CELL_SIZE(pBt) );
  if( newCell==0 ) return SQLITE_NOMEM;
  rc = fillInCell(pPage, newCell, pKey, nKey, pData, nData, nZero, &szNew);
  if( rc ) goto end_insert;
  assert( szNew==cellSizePtr(pPage, newCell) );
  assert( szNew<=MX_CELL_SIZE(pBt) );
  if( loc==0 && CURSOR_VALID==pCur->eState ){
    int szOld;
    assert( pCur->idx>=0 && pCur->idx<pPage->nCell );
    rc = sqlite3PagerWrite(pPage->pDbPage);
    if( rc ){
      goto end_insert;
    }
    oldCell = findCell(pPage, pCur->idx);
    if( !pPage->leaf ){
      memcpy(newCell, oldCell, 4);
    }
    szOld = cellSizePtr(pPage, oldCell);
    rc = clearCell(pPage, oldCell);
    if( rc ) goto end_insert;