SQLite

Check-in [29d3bfd7c9]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Simplify the b-tree logic by taking advantage of the fact that all b-trees are either intkey+leafdata or zerodata. (CVS 5431)
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 29d3bfd7c9a68078385354394052612bf812859b
User & Date: drh 2008-07-17 18:39:58.000
Context
2008-07-18
00:57
Optimization to sqltie3BtreeParseCellPtr. 0.3% performance increase. (CVS 5432) (check-in: 77e099ad7d user: drh tags: trunk)
2008-07-17
18:39
Simplify the b-tree logic by taking advantage of the fact that all b-trees are either intkey+leafdata or zerodata. (CVS 5431) (check-in: 29d3bfd7c9 user: drh tags: trunk)
17:34
Fix the test harness so that it does not try to link against sqlite3_mutex_alloc() if compiled with -DSQLITE_THREADSAFE=0. (CVS 5430) (check-in: 26a203d894 user: drh tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Changes to src/btree.c.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
/*
** 2004 April 6
**
** The author disclaims copyright to this source code.  In place of
** a legal notice, here is a blessing:
**
**    May you do good and not evil.
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** $Id: btree.c,v 1.483 2008/07/16 18:17:56 danielk1977 Exp $
**
** This file implements a external (disk-based) database using BTrees.
** See the header comment on "btreeInt.h" for additional information.
** Including a description of file format and an overview of operation.
*/
#include "btreeInt.h"












|







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
/*
** 2004 April 6
**
** The author disclaims copyright to this source code.  In place of
** a legal notice, here is a blessing:
**
**    May you do good and not evil.
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** $Id: btree.c,v 1.484 2008/07/17 18:39:58 drh Exp $
**
** This file implements a external (disk-based) database using BTrees.
** See the header comment on "btreeInt.h" for additional information.
** Including a description of file format and an overview of operation.
*/
#include "btreeInt.h"

867
868
869
870
871
872
873








874
875
876
877
878
879
880
881
882
883
884
885

886
887
888
889

890
891
892


893
894

895
896
897
898
899
900
901
    put2byte(&data[hdr+5], top + get2byte(&data[pbegin+2]));
  }
}

/*
** Decode the flags byte (the first byte of the header) for a page
** and initialize fields of the MemPage structure accordingly.








*/
static void decodeFlags(MemPage *pPage, int flagByte){
  BtShared *pBt;     /* A copy of pPage->pBt */

  assert( pPage->hdrOffset==(pPage->pgno==1 ? 100 : 0) );
  assert( sqlite3_mutex_held(pPage->pBt->mutex) );
  pPage->intKey = (flagByte & (PTF_INTKEY|PTF_LEAFDATA))!=0;
  pPage->zeroData = (flagByte & PTF_ZERODATA)!=0;
  pPage->leaf = (flagByte & PTF_LEAF)!=0;
  pPage->childPtrSize = 4*(pPage->leaf==0);
  pBt = pPage->pBt;
  if( flagByte & PTF_LEAFDATA ){

    pPage->leafData = 1;
    pPage->maxLocal = pBt->maxLeaf;
    pPage->minLocal = pBt->minLeaf;
  }else{

    pPage->leafData = 0;
    pPage->maxLocal = pBt->maxLocal;
    pPage->minLocal = pBt->minLocal;


  }
  pPage->hasData = !(pPage->zeroData || (!pPage->leaf && pPage->leafData));

}

/*
** Initialize the auxiliary information for a disk block.
**
** The pParent parameter must be a pointer to the MemPage which
** is the parent of the page being initialized.  The root of a







>
>
>
>
>
>
>
>

|




<
|
|
|

|
>
|


|
>
|


>
>

<
>







867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887

888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904

905
906
907
908
909
910
911
912
    put2byte(&data[hdr+5], top + get2byte(&data[pbegin+2]));
  }
}

/*
** Decode the flags byte (the first byte of the header) for a page
** and initialize fields of the MemPage structure accordingly.
**
** Only the following combinations are supported.  Anything different
** indicates a corrupt database files:
**
**         PTF_ZERODATA
**         PTF_ZERODATA | PTF_LEAF
**         PTF_LEAFDATA | PTF_INTKEY
**         PTF_LEAFDATA | PTF_INTKEY | PTF_LEAF
*/
static int decodeFlags(MemPage *pPage, int flagByte){
  BtShared *pBt;     /* A copy of pPage->pBt */

  assert( pPage->hdrOffset==(pPage->pgno==1 ? 100 : 0) );
  assert( sqlite3_mutex_held(pPage->pBt->mutex) );

  pPage->leaf = flagByte>>3;  assert( PTF_LEAF == 1<<3 );
  flagByte &= ~PTF_LEAF;
  pPage->childPtrSize = 4-4*pPage->leaf;
  pBt = pPage->pBt;
  if( flagByte==(PTF_LEAFDATA | PTF_INTKEY) ){
    pPage->intKey = 1;
    pPage->hasData = pPage->leaf;
    pPage->maxLocal = pBt->maxLeaf;
    pPage->minLocal = pBt->minLeaf;
  }else if( flagByte==PTF_ZERODATA ){
    pPage->intKey = 0;
    pPage->hasData = 0;
    pPage->maxLocal = pBt->maxLocal;
    pPage->minLocal = pBt->minLocal;
  }else{
    return SQLITE_CORRUPT_BKPT;
  }

  return SQLITE_OK;
}

/*
** Initialize the auxiliary information for a disk block.
**
** The pParent parameter must be a pointer to the MemPage which
** is the parent of the page being initialized.  The root of a
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
  if( pPage->isInit ) return SQLITE_OK;
  if( pPage->pParent==0 && pParent!=0 ){
    pPage->pParent = pParent;
    sqlite3PagerRef(pParent->pDbPage);
  }
  hdr = pPage->hdrOffset;
  data = pPage->aData;
  decodeFlags(pPage, data[hdr]);
  pPage->nOverflow = 0;
  pPage->idxShift = 0;
  usableSize = pBt->usableSize;
  pPage->cellOffset = cellOffset = hdr + 12 - 4*pPage->leaf;
  top = get2byte(&data[hdr+5]);
  pPage->nCell = get2byte(&data[hdr+3]);
  if( pPage->nCell>MX_CELL(pBt) ){







|







948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
  if( pPage->isInit ) return SQLITE_OK;
  if( pPage->pParent==0 && pParent!=0 ){
    pPage->pParent = pParent;
    sqlite3PagerRef(pParent->pDbPage);
  }
  hdr = pPage->hdrOffset;
  data = pPage->aData;
  if( decodeFlags(pPage, data[hdr]) ) return SQLITE_CORRUPT_BKPT;
  pPage->nOverflow = 0;
  pPage->idxShift = 0;
  usableSize = pBt->usableSize;
  pPage->cellOffset = cellOffset = hdr + 12 - 4*pPage->leaf;
  top = get2byte(&data[hdr+5]);
  pPage->nCell = get2byte(&data[hdr+3]);
  if( pPage->nCell>MX_CELL(pBt) ){
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
          c = sqlite3VdbeRecordCompare(nCellKey, pCellKey, pUnKey);
          sqlite3_free(pCellKey);
          if( rc ) goto moveto_finish;
        }
      }
      if( c==0 ){
        pCur->info.nKey = nCellKey;
        if( pPage->leafData && !pPage->leaf ){
          lwr = pCur->idx;
          upr = lwr - 1;
          break;
        }else{
          if( pRes ) *pRes = 0;
          rc = SQLITE_OK;
          goto moveto_finish;







|







3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
          c = sqlite3VdbeRecordCompare(nCellKey, pCellKey, pUnKey);
          sqlite3_free(pCellKey);
          if( rc ) goto moveto_finish;
        }
      }
      if( c==0 ){
        pCur->info.nKey = nCellKey;
        if( pPage->intKey && !pPage->leaf ){
          lwr = pCur->idx;
          upr = lwr - 1;
          break;
        }else{
          if( pRes ) *pRes = 0;
          rc = SQLITE_OK;
          goto moveto_finish;
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
        pCur->eState = CURSOR_INVALID;
        return SQLITE_OK;
      }
      sqlite3BtreeMoveToParent(pCur);
      pPage = pCur->pPage;
    }while( pCur->idx>=pPage->nCell );
    *pRes = 0;
    if( pPage->leafData ){
      rc = sqlite3BtreeNext(pCur, pRes);
    }else{
      rc = SQLITE_OK;
    }
    return rc;
  }
  *pRes = 0;







|







3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
        pCur->eState = CURSOR_INVALID;
        return SQLITE_OK;
      }
      sqlite3BtreeMoveToParent(pCur);
      pPage = pCur->pPage;
    }while( pCur->idx>=pPage->nCell );
    *pRes = 0;
    if( pPage->intKey ){
      rc = sqlite3BtreeNext(pCur, pRes);
    }else{
      rc = SQLITE_OK;
    }
    return rc;
  }
  *pRes = 0;
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
      }
      sqlite3BtreeMoveToParent(pCur);
      pPage = pCur->pPage;
    }
    pCur->idx--;
    pCur->info.nSize = 0;
    pCur->validNKey = 0;
    if( pPage->leafData && !pPage->leaf ){
      rc = sqlite3BtreePrevious(pCur, pRes);
    }else{
      rc = SQLITE_OK;
    }
  }
  *pRes = 0;
  return rc;







|







3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
      }
      sqlite3BtreeMoveToParent(pCur);
      pPage = pCur->pPage;
    }
    pCur->idx--;
    pCur->info.nSize = 0;
    pCur->validNKey = 0;
    if( pPage->intKey && !pPage->leaf ){
      rc = sqlite3BtreePrevious(pCur, pRes);
    }else{
      rc = SQLITE_OK;
    }
  }
  *pRes = 0;
  return rc;
4942
4943
4944
4945
4946
4947
4948
4949
4950
4951
4952
4953
4954

4955
4956
4957
4958
4959
4960
4961
  ** and the new entry is the right-most entry in the tree (it has the
  ** largest key) then use the special balance_quick() routine for
  ** balancing.  balance_quick() is much faster and results in a tighter
  ** packing of data in the common case.
  */
  if( pPage->leaf &&
      pPage->intKey &&
      pPage->leafData &&
      pPage->nOverflow==1 &&
      pPage->aOvfl[0].idx==pPage->nCell &&
      pPage->pParent->pgno!=1 &&
      get4byte(&pParent->aData[pParent->hdrOffset+8])==pPage->pgno
  ){

    /*
    ** TODO: Check the siblings to the left of pPage. It may be that
    ** they are not full and no new page is required.
    */
    return balance_quick(pPage, pParent);
  }
#endif







<





>







4953
4954
4955
4956
4957
4958
4959

4960
4961
4962
4963
4964
4965
4966
4967
4968
4969
4970
4971
4972
  ** and the new entry is the right-most entry in the tree (it has the
  ** largest key) then use the special balance_quick() routine for
  ** balancing.  balance_quick() is much faster and results in a tighter
  ** packing of data in the common case.
  */
  if( pPage->leaf &&
      pPage->intKey &&

      pPage->nOverflow==1 &&
      pPage->aOvfl[0].idx==pPage->nCell &&
      pPage->pParent->pgno!=1 &&
      get4byte(&pParent->aData[pParent->hdrOffset+8])==pPage->pgno
  ){
    assert( pPage->intKey );
    /*
    ** TODO: Check the siblings to the left of pPage. It may be that
    ** they are not full and no new page is required.
    */
    return balance_quick(pPage, pParent);
  }
#endif
5091
5092
5093
5094
5095
5096
5097
5098
5099
5100
5101
5102
5103
5104
5105
  ** are alike.
  **
  ** leafCorrection:  4 if pPage is a leaf.  0 if pPage is not a leaf.
  **       leafData:  1 if pPage holds key+data and pParent holds only keys.
  */
  nCell = 0;
  leafCorrection = pPage->leaf*4;
  leafData = pPage->leafData && pPage->leaf;
  for(i=0; i<nOld; i++){
    MemPage *pOld = apCopy[i];
    int limit = pOld->nCell+pOld->nOverflow;
    for(j=0; j<limit; j++){
      assert( nCell<nMaxCells );
      apCell[nCell] = findOverflowCell(pOld, j);
      szCell[nCell] = cellSizePtr(pOld, apCell[nCell]);







|







5102
5103
5104
5105
5106
5107
5108
5109
5110
5111
5112
5113
5114
5115
5116
  ** are alike.
  **
  ** leafCorrection:  4 if pPage is a leaf.  0 if pPage is not a leaf.
  **       leafData:  1 if pPage holds key+data and pParent holds only keys.
  */
  nCell = 0;
  leafCorrection = pPage->leaf*4;
  leafData = pPage->hasData;
  for(i=0; i<nOld; i++){
    MemPage *pOld = apCopy[i];
    int limit = pOld->nCell+pOld->nOverflow;
    for(j=0; j<limit; j++){
      assert( nCell<nMaxCells );
      apCell[nCell] = findOverflowCell(pOld, j);
      szCell[nCell] = cellSizePtr(pOld, apCell[nCell]);
5768
5769
5770
5771
5772
5773
5774
5775
5776
5777
5778
5779
5780
5781
5782
    SQLITE_OK!=(rc = sqlite3BtreeMoveto(pCur, pKey, 0, nKey, appendBias, &loc))
  ){
    return rc;
  }

  pPage = pCur->pPage;
  assert( pPage->intKey || nKey>=0 );
  assert( pPage->leaf || !pPage->leafData );
  TRACE(("INSERT: table=%d nkey=%lld ndata=%d page=%d %s\n",
          pCur->pgnoRoot, nKey, nData, pPage->pgno,
          loc==0 ? "overwrite" : "new entry"));
  assert( pPage->isInit );
  allocateTempSpace(pBt);
  newCell = pBt->pTmpSpace;
  if( newCell==0 ) return SQLITE_NOMEM;







|







5779
5780
5781
5782
5783
5784
5785
5786
5787
5788
5789
5790
5791
5792
5793
    SQLITE_OK!=(rc = sqlite3BtreeMoveto(pCur, pKey, 0, nKey, appendBias, &loc))
  ){
    return rc;
  }

  pPage = pCur->pPage;
  assert( pPage->intKey || nKey>=0 );
  assert( pPage->leaf || !pPage->intKey );
  TRACE(("INSERT: table=%d nkey=%lld ndata=%d page=%d %s\n",
          pCur->pgnoRoot, nKey, nData, pPage->pgno,
          loc==0 ? "overwrite" : "new entry"));
  assert( pPage->isInit );
  allocateTempSpace(pBt);
  newCell = pBt->pTmpSpace;
  if( newCell==0 ) return SQLITE_NOMEM;
5884
5885
5886
5887
5888
5889
5890
5891
5892
5893
5894
5895
5896
5897
5898
    ** next Cell after the one to be deleted is guaranteed to exist and
    ** to be a leaf so we can use it.
    */
    BtCursor leafCur;
    unsigned char *pNext;
    int notUsed;
    unsigned char *tempCell = 0;
    assert( !pPage->leafData );
    sqlite3BtreeGetTempCursor(pCur, &leafCur);
    rc = sqlite3BtreeNext(&leafCur, &notUsed);
    if( rc==SQLITE_OK ){
      rc = sqlite3PagerWrite(leafCur.pPage->pDbPage);
    }
    if( rc==SQLITE_OK ){
      u16 szNext;







|







5895
5896
5897
5898
5899
5900
5901
5902
5903
5904
5905
5906
5907
5908
5909
    ** next Cell after the one to be deleted is guaranteed to exist and
    ** to be a leaf so we can use it.
    */
    BtCursor leafCur;
    unsigned char *pNext;
    int notUsed;
    unsigned char *tempCell = 0;
    assert( !pPage->intKey );
    sqlite3BtreeGetTempCursor(pCur, &leafCur);
    rc = sqlite3BtreeNext(&leafCur, &notUsed);
    if( rc==SQLITE_OK ){
      rc = sqlite3PagerWrite(leafCur.pPage->pDbPage);
    }
    if( rc==SQLITE_OK ){
      u16 szNext;
Changes to src/btreeInt.h.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
/*
** 2004 April 6
**
** The author disclaims copyright to this source code.  In place of
** a legal notice, here is a blessing:
**
**    May you do good and not evil.
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** $Id: btreeInt.h,v 1.26 2008/07/12 14:52:20 drh Exp $
**
** This file implements a external (disk-based) database using BTrees.
** For a detailed discussion of BTrees, refer to
**
**     Donald E. Knuth, THE ART OF COMPUTER PROGRAMMING, Volume 3:
**     "Sorting And Searching", pages 473-480. Addison-Wesley
**     Publishing Company, Reading, Massachusetts.











|







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
/*
** 2004 April 6
**
** The author disclaims copyright to this source code.  In place of
** a legal notice, here is a blessing:
**
**    May you do good and not evil.
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** $Id: btreeInt.h,v 1.27 2008/07/17 18:39:58 drh Exp $
**
** This file implements a external (disk-based) database using BTrees.
** For a detailed discussion of BTrees, refer to
**
**     Donald E. Knuth, THE ART OF COMPUTER PROGRAMMING, Volume 3:
**     "Sorting And Searching", pages 473-480. Addison-Wesley
**     Publishing Company, Reading, Massachusetts.
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
*/
struct MemPage {
  u8 isInit;           /* True if previously initialized. MUST BE FIRST! */
  u8 idxShift;         /* True if Cell indices have changed */
  u8 nOverflow;        /* Number of overflow cell bodies in aCell[] */
  u8 intKey;           /* True if intkey flag is set */
  u8 leaf;             /* True if leaf flag is set */
  u8 zeroData;         /* True if table stores keys only */
  u8 leafData;         /* True if tables stores data on leaves only */
  u8 hasData;          /* True if this page stores data */
  u8 hdrOffset;        /* 100 for page 1.  0 otherwise */
  u8 childPtrSize;     /* 0 if leaf==1.  4 if leaf==0 */
  u16 maxLocal;        /* Copy of BtShared.maxLocal or BtShared.maxLeaf */
  u16 minLocal;        /* Copy of BtShared.minLocal or BtShared.minLeaf */
  u16 cellOffset;      /* Index in aData of first cell pointer */
  u16 idxParent;       /* Index in parent of this node */







<
<







271
272
273
274
275
276
277


278
279
280
281
282
283
284
*/
struct MemPage {
  u8 isInit;           /* True if previously initialized. MUST BE FIRST! */
  u8 idxShift;         /* True if Cell indices have changed */
  u8 nOverflow;        /* Number of overflow cell bodies in aCell[] */
  u8 intKey;           /* True if intkey flag is set */
  u8 leaf;             /* True if leaf flag is set */


  u8 hasData;          /* True if this page stores data */
  u8 hdrOffset;        /* 100 for page 1.  0 otherwise */
  u8 childPtrSize;     /* 0 if leaf==1.  4 if leaf==0 */
  u16 maxLocal;        /* Copy of BtShared.maxLocal or BtShared.maxLeaf */
  u16 minLocal;        /* Copy of BtShared.minLocal or BtShared.minLeaf */
  u16 cellOffset;      /* Index in aData of first cell pointer */
  u16 idxParent;       /* Index in parent of this node */