SQLite

Check-in [c550d80c25]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Experimental patch to balance() (use -DSQLITE_BALANCE_QUICK). (CVS 2211)
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: c550d80c25ec88fceb20acabd00c21faa2d552f5
User & Date: danielk1977 2005-01-14 13:50:12.000
Context
2005-01-14
22:55
Add comments to the new balance_quick() routine. (CVS 2212) (check-in: 183c42eac8 user: drh tags: trunk)
13:50
Experimental patch to balance() (use -DSQLITE_BALANCE_QUICK). (CVS 2211) (check-in: c550d80c25 user: danielk1977 tags: trunk)
01:22
Improved test coverage on insert.c. (CVS 2210) (check-in: c772f75166 user: drh tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Changes to src/btree.c.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
/*
** 2004 April 6
**
** The author disclaims copyright to this source code.  In place of
** a legal notice, here is a blessing:
**
**    May you do good and not evil.
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** $Id: btree.c,v 1.231 2005/01/12 07:15:05 danielk1977 Exp $
**
** This file implements a external (disk-based) database using BTrees.
** For a detailed discussion of BTrees, refer to
**
**     Donald E. Knuth, THE ART OF COMPUTER PROGRAMMING, Volume 3:
**     "Sorting And Searching", pages 473-480. Addison-Wesley
**     Publishing Company, Reading, Massachusetts.











|







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
/*
** 2004 April 6
**
** The author disclaims copyright to this source code.  In place of
** a legal notice, here is a blessing:
**
**    May you do good and not evil.
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** $Id: btree.c,v 1.232 2005/01/14 13:50:12 danielk1977 Exp $
**
** This file implements a external (disk-based) database using BTrees.
** For a detailed discussion of BTrees, refer to
**
**     Donald E. Knuth, THE ART OF COMPUTER PROGRAMMING, Volume 3:
**     "Sorting And Searching", pages 473-480. Addison-Wesley
**     Publishing Company, Reading, Massachusetts.
3552
3553
3554
3555
3556
3557
3558
3559


















































3560
3561
3562
3563
3564
3565
3566
** in exchange for a larger degradation in INSERT and UPDATE performance.
** The value of NN appears to give the best results overall.
*/
#define NN 1             /* Number of neighbors on either side of pPage */
#define NB (NN*2+1)      /* Total pages involved in the balance */

/* Forward reference */
static int balance(MemPage*);



















































/*
** This routine redistributes Cells on pPage and up to NN*2 siblings
** of pPage so that all pages have about the same amount of free space.
** Usually NN siblings on either side of pPage is used in the balancing,
** though more siblings might come from one side if pPage is the first
** or last child of its parent.  If pPage has fewer than 2*NN siblings







|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
** in exchange for a larger degradation in INSERT and UPDATE performance.
** The value of NN appears to give the best results overall.
*/
#define NN 1             /* Number of neighbors on either side of pPage */
#define NB (NN*2+1)      /* Total pages involved in the balance */

/* Forward reference */
static int balance(MemPage*, int);

static int balance_quick(MemPage *pPage, MemPage *pParent){
  int rc;
  MemPage *pNew;
  Pgno pgnoNew;
  u8 *pCell;
  int szCell;
  CellInfo info;

  u8 parentCell[64];              /* How big should this be? */
  int parentIdx = pParent->nCell;
  int parentSize;

  /* Allocate a new page. Insert the overflow cell from pPage
  ** into it. Then remove the overflow cell from pPage.
  */
  rc = allocatePage(pPage->pBt, &pNew, &pgnoNew, 0, 0);
  if( rc!=SQLITE_OK ){
    return rc;
  }
  pCell = pPage->aOvfl[0].pCell;
  szCell = cellSizePtr(pPage, pCell);
  zeroPage(pNew, pPage->aData[0]);
  assemblePage(pNew, 1, &pCell, &szCell);
  pPage->nOverflow = 0;

  /* pPage is currently the right-child of pParent. Change this
  ** so that the right-child is the new page allocated above and
  ** pPage is the next-to-right child. Then balance() the parent
  ** page, in case it is now overfull.
  */
  parseCellPtr(pPage, findCell(pPage, pPage->nCell-1), &info);
  rc = fillInCell(pParent, parentCell, 0, info.nKey, 0, 0, &parentSize);
  if( rc!=SQLITE_OK ){
    return SQLITE_OK;
  }
  assert( parentSize<64 );
  rc = insertCell(pParent, parentIdx, parentCell, parentSize, 0, 4);
  if( rc!=SQLITE_OK ){
    return SQLITE_OK;
  }
  put4byte(findOverflowCell(pParent,parentIdx), pPage->pgno);
  put4byte(&pParent->aData[pParent->hdrOffset+8], pgnoNew);

  pNew->pParent = pParent;
  sqlite3pager_ref(pParent->aData);

  releasePage(pNew);
  return balance(pParent, 0);
}

/*
** This routine redistributes Cells on pPage and up to NN*2 siblings
** of pPage so that all pages have about the same amount of free space.
** Usually NN siblings on either side of pPage is used in the balancing,
** though more siblings might come from one side if pPage is the first
** or last child of its parent.  If pPage has fewer than 2*NN siblings
3625
3626
3627
3628
3629
3630
3631












3632
3633
3634
3635
3636
3637
3638
  assert( pPage->isInit );
  assert( sqlite3pager_iswriteable(pPage->aData) );
  pBt = pPage->pBt;
  pParent = pPage->pParent;
  sqlite3pager_write(pParent->aData);
  assert( pParent );
  TRACE(("BALANCE: begin page %d child of %d\n", pPage->pgno, pParent->pgno));













  /*
  ** Allocate space for memory structures
  */
  mxCellPerPage = MX_CELL(pBt);
  apCell = sqliteMallocRaw( 
       (mxCellPerPage+2)*NB*(sizeof(u8*)+sizeof(int))







>
>
>
>
>
>
>
>
>
>
>
>







3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
  assert( pPage->isInit );
  assert( sqlite3pager_iswriteable(pPage->aData) );
  pBt = pPage->pBt;
  pParent = pPage->pParent;
  sqlite3pager_write(pParent->aData);
  assert( pParent );
  TRACE(("BALANCE: begin page %d child of %d\n", pPage->pgno, pParent->pgno));

#ifdef SQLITE_BALANCE_QUICK
  if( pPage->leaf &&
      pPage->intKey &&
      pPage->leafData &&
      pPage->nOverflow==1 &&
      pPage->aOvfl[0].idx==pPage->nCell &&
      get4byte(&pParent->aData[pParent->hdrOffset+8])==pPage->pgno
  ){
    return balance_quick(pPage, pParent);
  }
#endif

  /*
  ** Allocate space for memory structures
  */
  mxCellPerPage = MX_CELL(pBt);
  apCell = sqliteMallocRaw( 
       (mxCellPerPage+2)*NB*(sizeof(u8*)+sizeof(int))
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
  ** Balance the parent page.  Note that the current page (pPage) might
  ** have been added to the freelist is it might no longer be initialized.
  ** But the parent page will always be initialized.
  */
  assert( pParent->isInit );
  /* assert( pPage->isInit ); // No! pPage might have been added to freelist */
  /* pageIntegrity(pPage);    // No! pPage might have been added to freelist */ 
  rc = balance(pParent);
  
  /*
  ** Cleanup before returning.
  */
balance_cleanup:
  sqliteFree(apCell);
  for(i=0; i<nOld; i++){







|







4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
  ** Balance the parent page.  Note that the current page (pPage) might
  ** have been added to the freelist is it might no longer be initialized.
  ** But the parent page will always be initialized.
  */
  assert( pParent->isInit );
  /* assert( pPage->isInit ); // No! pPage might have been added to freelist */
  /* pageIntegrity(pPage);    // No! pPage might have been added to freelist */ 
  rc = balance(pParent, 0);
  
  /*
  ** Cleanup before returning.
  */
balance_cleanup:
  sqliteFree(apCell);
  for(i=0; i<nOld; i++){
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167

4168
4169
4170
4171
4172
4173
4174
  return rc;
}

/*
** Decide if the page pPage needs to be balanced.  If balancing is
** required, call the appropriate balancing routine.
*/
static int balance(MemPage *pPage){
  int rc = SQLITE_OK;
  if( pPage->pParent==0 ){
    if( pPage->nOverflow>0 ){
      rc = balance_deeper(pPage);
    }
    if( rc==SQLITE_OK && pPage->nCell==0 ){
      rc = balance_shallower(pPage);
    }
  }else{
    if( pPage->nOverflow>0 || pPage->nFree>pPage->pBt->usableSize*2/3 ){

      rc = balance_nonroot(pPage);
    }
  }
  return rc;
}

/*







|









|
>







4212
4213
4214
4215
4216
4217
4218
4219
4220
4221
4222
4223
4224
4225
4226
4227
4228
4229
4230
4231
4232
4233
4234
4235
4236
4237
  return rc;
}

/*
** Decide if the page pPage needs to be balanced.  If balancing is
** required, call the appropriate balancing routine.
*/
static int balance(MemPage *pPage, int insert){
  int rc = SQLITE_OK;
  if( pPage->pParent==0 ){
    if( pPage->nOverflow>0 ){
      rc = balance_deeper(pPage);
    }
    if( rc==SQLITE_OK && pPage->nCell==0 ){
      rc = balance_shallower(pPage);
    }
  }else{
    if( pPage->nOverflow>0 || 
        (!insert && pPage->nFree>pPage->pBt->usableSize*2/3) ){
      rc = balance_nonroot(pPage);
    }
  }
  return rc;
}

/*
4264
4265
4266
4267
4268
4269
4270
4271
4272
4273
4274
4275
4276
4277
4278
    pCur->idx++;
    pCur->info.nSize = 0;
  }else{
    assert( pPage->leaf );
  }
  rc = insertCell(pPage, pCur->idx, newCell, szNew, 0, 0);
  if( rc!=SQLITE_OK ) goto end_insert;
  rc = balance(pPage);
  /* sqlite3BtreePageDump(pCur->pBt, pCur->pgnoRoot, 1); */
  /* fflush(stdout); */
  if( rc==SQLITE_OK ){
    moveToRoot(pCur);
  }
end_insert:
  sqliteFree(newCell);







|







4327
4328
4329
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
    pCur->idx++;
    pCur->info.nSize = 0;
  }else{
    assert( pPage->leaf );
  }
  rc = insertCell(pPage, pCur->idx, newCell, szNew, 0, 0);
  if( rc!=SQLITE_OK ) goto end_insert;
  rc = balance(pPage, 1);
  /* sqlite3BtreePageDump(pCur->pBt, pCur->pgnoRoot, 1); */
  /* fflush(stdout); */
  if( rc==SQLITE_OK ){
    moveToRoot(pCur);
  }
end_insert:
  sqliteFree(newCell);
4350
4351
4352
4353
4354
4355
4356
4357
4358
4359
4360
4361
4362
4363
4364
4365
4366
4367
4368
4369
4370
4371
4372
4373
4374
    szNext = cellSizePtr(leafCur.pPage, pNext);
    assert( MX_CELL_SIZE(pBt)>=szNext+4 );
    tempCell = sqliteMallocRaw( MX_CELL_SIZE(pBt) );
    if( tempCell==0 ) return SQLITE_NOMEM;
    rc = insertCell(pPage, pCur->idx, pNext-4, szNext+4, tempCell, 0);
    if( rc!=SQLITE_OK ) return rc;
    put4byte(findOverflowCell(pPage, pCur->idx), pgnoChild);
    rc = balance(pPage);
    sqliteFree(tempCell);
    if( rc ) return rc;
    dropCell(leafCur.pPage, leafCur.idx, szNext);
    rc = balance(leafCur.pPage);
    releaseTempCursor(&leafCur);
  }else{
    TRACE(("DELETE: table=%d delete from leaf %d\n",
       pCur->pgnoRoot, pPage->pgno));
    dropCell(pPage, pCur->idx, cellSizePtr(pPage, pCell));
    rc = balance(pPage);
  }
  moveToRoot(pCur);
  return rc;
}

/*
** Create a new BTree table.  Write into *piTable the page







|



|





|







4413
4414
4415
4416
4417
4418
4419
4420
4421
4422
4423
4424
4425
4426
4427
4428
4429
4430
4431
4432
4433
4434
4435
4436
4437
    szNext = cellSizePtr(leafCur.pPage, pNext);
    assert( MX_CELL_SIZE(pBt)>=szNext+4 );
    tempCell = sqliteMallocRaw( MX_CELL_SIZE(pBt) );
    if( tempCell==0 ) return SQLITE_NOMEM;
    rc = insertCell(pPage, pCur->idx, pNext-4, szNext+4, tempCell, 0);
    if( rc!=SQLITE_OK ) return rc;
    put4byte(findOverflowCell(pPage, pCur->idx), pgnoChild);
    rc = balance(pPage, 0);
    sqliteFree(tempCell);
    if( rc ) return rc;
    dropCell(leafCur.pPage, leafCur.idx, szNext);
    rc = balance(leafCur.pPage, 0);
    releaseTempCursor(&leafCur);
  }else{
    TRACE(("DELETE: table=%d delete from leaf %d\n",
       pCur->pgnoRoot, pPage->pgno));
    dropCell(pPage, pCur->idx, cellSizePtr(pPage, pCell));
    rc = balance(pPage, 0);
  }
  moveToRoot(pCur);
  return rc;
}

/*
** Create a new BTree table.  Write into *piTable the page
5277
5278
5279
5280
5281
5282
5283





5284
5285
5286
5287
5288
5289
5290
  sCheck.pPager = pBt->pPager;
  sCheck.nPage = sqlite3pager_pagecount(sCheck.pPager);
  if( sCheck.nPage==0 ){
    unlockBtreeIfUnused(pBt);
    return 0;
  }
  sCheck.anRef = sqliteMallocRaw( (sCheck.nPage+1)*sizeof(sCheck.anRef[0]) );





  for(i=0; i<=sCheck.nPage; i++){ sCheck.anRef[i] = 0; }
  i = PENDING_BYTE_PAGE(pBt);
  if( i<=sCheck.nPage ){
    sCheck.anRef[i] = 1;
  }
  sCheck.zErrMsg = 0;








>
>
>
>
>







5340
5341
5342
5343
5344
5345
5346
5347
5348
5349
5350
5351
5352
5353
5354
5355
5356
5357
5358
  sCheck.pPager = pBt->pPager;
  sCheck.nPage = sqlite3pager_pagecount(sCheck.pPager);
  if( sCheck.nPage==0 ){
    unlockBtreeIfUnused(pBt);
    return 0;
  }
  sCheck.anRef = sqliteMallocRaw( (sCheck.nPage+1)*sizeof(sCheck.anRef[0]) );
  if( !sCheck.anRef ){
    unlockBtreeIfUnused(pBt);
    return sqlite3MPrintf("Unable to malloc %d bytes", 
        (sCheck.nPage+1)*sizeof(sCheck.anRef[0]));
  }
  for(i=0; i<=sCheck.nPage; i++){ sCheck.anRef[i] = 0; }
  i = PENDING_BYTE_PAGE(pBt);
  if( i<=sCheck.nPage ){
    sCheck.anRef[i] = 1;
  }
  sCheck.zErrMsg = 0;

Changes to test/ioerr.test.
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
# This file implements regression tests for SQLite library.  The
# focus of this file is testing for correct handling of I/O errors
# such as writes failing because the disk is full.
# 
# The tests in this file use special facilities that are only
# available in the SQLite test fixture.
#
# $Id: ioerr.test,v 1.12 2005/01/13 11:07:54 danielk1977 Exp $

set testdir [file dirname $argv0]
source $testdir/tester.tcl

set ::AV [execsql {pragma auto_vacuum}]

# Usage: do_ioerr_test <test number> <options...>







|







11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
# This file implements regression tests for SQLite library.  The
# focus of this file is testing for correct handling of I/O errors
# such as writes failing because the disk is full.
# 
# The tests in this file use special facilities that are only
# available in the SQLite test fixture.
#
# $Id: ioerr.test,v 1.13 2005/01/14 13:50:13 danielk1977 Exp $

set testdir [file dirname $argv0]
source $testdir/tester.tcl

set ::AV [execsql {pragma auto_vacuum}]

# Usage: do_ioerr_test <test number> <options...>
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
  CREATE TABLE t1(a,b,c);
  CREATE TABLE test2.t2(a,b,c);
  COMMIT;
} -exclude [expr [execsql {pragma auto_vacuum}] ? 8 : 0]

# Test IO errors when replaying two hot journals from a 2-file 
# transaction. This test only runs on UNIX.
if {$tcl_platform(platform)=="unix"} {
  do_ioerr_test 6 -tclprep {
    set rc [crashsql 2 test2.db-journal {
      ATTACH 'test2.db' as aux;
      PRAGMA cache_size = 10;
      BEGIN;
      CREATE TABLE aux.t2(a, b, c);
      CREATE TABLE t1(a, b, c);







|







228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
  CREATE TABLE t1(a,b,c);
  CREATE TABLE test2.t2(a,b,c);
  COMMIT;
} -exclude [expr [execsql {pragma auto_vacuum}] ? 8 : 0]

# Test IO errors when replaying two hot journals from a 2-file 
# transaction. This test only runs on UNIX.
if {$tcl_platform(platform)=="unix" && [file exists ./crashtest]} {
  do_ioerr_test 6 -tclprep {
    set rc [crashsql 2 test2.db-journal {
      ATTACH 'test2.db' as aux;
      PRAGMA cache_size = 10;
      BEGIN;
      CREATE TABLE aux.t2(a, b, c);
      CREATE TABLE t1(a, b, c);