SQLite4
Check-in [449433ea21]
Not logged in

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Use key-prefixes instead of full keys on interior nodes.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 449433ea21655ca6bc6a42d32d7ddf0b1cdc158f
User & Date: dan 2013-10-14 14:03:06
Context
2013-10-18
08:28
Trim overflow pages when the corresponding record is deleted from the database. check-in: 547b950db0 user: dan tags: trunk
2013-10-14
14:03
Use key-prefixes instead of full keys on interior nodes. check-in: 449433ea21 user: dan tags: trunk
2013-10-12
20:03
Speed up the assert() used to check that no page references are being leaked. check-in: d276ad3f9e user: dan tags: trunk
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to src/btInt.h.

    65     65   */
    66     66   u32 sqlite4BtPagerRootpgno(BtPager*);
    67     67   
    68     68   /*
    69     69   ** Read, write and trim existing database pages.
    70     70   */
    71     71   int sqlite4BtPageGet(BtPager*, u32 pgno, BtPage **ppPage);
           72  +int sqlite4BtPageTrimPgno(BtPager*, u32 pgno);
    72     73   int sqlite4BtPageWrite(BtPage*);
    73     74   int sqlite4BtPageTrim(BtPage*);
    74     75   int sqlite4BtPageRelease(BtPage*);
    75     76   void sqlite4BtPageReference(BtPage*);
    76     77   
    77     78   /*
    78     79   ** Allocate a new database page and return a writable reference to it.

Changes to src/bt_main.c.

    24     24   ** b-tree pages. 
    25     25   */
    26     26   #define BT_PGFLAGS_INTERNAL 0x01  /* True for non-leaf nodes */
    27     27   
    28     28   #ifndef MIN
    29     29   # define MIN(a,b) (((a)<(b))?(a):(b))
    30     30   #endif
           31  +#ifndef MAX
           32  +# define MAX(a,b) (((a)>(b))?(a):(b))
           33  +#endif
    31     34   
    32     35   #define BT_EXPENSIVE_ASSERT 0
           36  +/* #define BT_STDERR_DEBUG 1 */
    33     37   
    34     38   struct bt_db {
    35     39     sqlite4_env *pEnv;              /* SQLite environment */
    36     40     BtPager *pPager;                /* Underlying page-based database */
    37     41     bt_cursor *pAllCsr;             /* List of all open cursors */
    38     42   };
    39     43   
................................................................................
   207    211       rc = btErrorBkpt(SQLITE4_NOMEM);
   208    212     }else{
   209    213       btCsrSetup(db, pCsr);
   210    214       pCsr->pNextCsr = db->pAllCsr;
   211    215       db->pAllCsr = pCsr;
   212    216     }
   213    217   
          218  +  btCheckPageRefs(db);
   214    219     return rc;
   215    220   }
   216    221   
   217    222   static void btCsrReset(bt_cursor *pCsr, int bFreeBuffer){
   218    223     int i;
   219    224     for(i=0; i<pCsr->nPg; i++){
   220    225       sqlite4BtPageRelease(pCsr->apPage[i]);
................................................................................
   221    226     }
   222    227     if( bFreeBuffer ) sqlite4_free(pCsr->pDb->pEnv, pCsr->ovfl.pBuf);
   223    228     pCsr->nPg = 0;
   224    229   }
   225    230   
   226    231   int sqlite4BtCsrClose(bt_cursor *pCsr){
   227    232     if( pCsr ){
          233  +    bt_db *pDb = pCsr->pDb;
   228    234       bt_cursor **pp;
          235  +    btCheckPageRefs(pDb);
   229    236       btCsrReset(pCsr, 1);
   230         -    for(pp=&pCsr->pDb->pAllCsr; *pp!=pCsr; pp=&(*pp)->pNextCsr);
          237  +    for(pp=&pDb->pAllCsr; *pp!=pCsr; pp=&(*pp)->pNextCsr);
   231    238       *pp = pCsr->pNextCsr;
   232         -    sqlite4_free(pCsr->pDb->pEnv, pCsr);
          239  +    sqlite4_free(pDb->pEnv, pCsr);
          240  +    btCheckPageRefs(pDb);
   233    241     }
   234    242     return SQLITE4_OK;
   235    243   }
   236    244   
   237    245   void *sqlite4BtCsrExtra(bt_cursor *pCsr){
   238    246     return (void*)&pCsr[1];
   239    247   }
................................................................................
   244    252   static int btCsrDescend(bt_cursor *pCsr, u32 pgno){
   245    253     int rc;
   246    254     if( pCsr->nPg>=BT_MAX_DEPTH ){
   247    255       rc = btErrorBkpt(SQLITE4_CORRUPT);
   248    256     }else{
   249    257       rc = sqlite4BtPageGet(pCsr->pDb->pPager, pgno, &pCsr->apPage[pCsr->nPg]);
   250    258       if( rc==SQLITE4_OK ){
          259  +      assert( pCsr->apPage[pCsr->nPg] );
   251    260         pCsr->nPg++;
   252    261       }
   253    262     }
   254    263     return rc;
   255    264   }
   256    265   
   257    266   /*
................................................................................
   260    269   ** or SQLITE4_OK otherwise.
   261    270   */
   262    271   static int btCsrAscend(bt_cursor *pCsr, int nLvl){
   263    272     int i;
   264    273     for(i=0; i<nLvl && ( pCsr->nPg>0 ); i++){
   265    274       pCsr->nPg--;
   266    275       sqlite4BtPageRelease(pCsr->apPage[pCsr->nPg]);
          276  +    pCsr->apPage[pCsr->nPg] = 0;
   267    277     }
   268    278     return (pCsr->nPg==0 ? SQLITE4_NOTFOUND : SQLITE4_OK);
   269    279   }
   270    280   
   271    281   /**************************************************************************
   272    282   ** The functions in this section are used to extract data from buffers
   273    283   ** containing formatted b-tree pages. They do not entirely encapsulate all
................................................................................
   377    387     return 0;
   378    388   }
   379    389   #endif
   380    390   
   381    391   
   382    392   /*
   383    393   ** This function compares the key passed via parameters pK and nK to the
   384         -** key stored as part of cell iCell on the database page stored in buffer
   385         -** aData (size nData bytes).
          394  +** key that cursor pCsr currently points to.
   386    395   **
   387         -** If the cell key is C, and the user key K, then this function sets:
          396  +** If the cursor key is C, and the user key K, then this function sets:
   388    397   **
   389    398   **     *piRes = (C - K).
   390    399   **
   391         -** In other workds, *piRes is +ve, zero or -ve if C is respectively larger, 
          400  +** In other words, *piRes is +ve, zero or -ve if C is respectively larger, 
   392    401   ** equal to or smaller than K.
   393    402   */
   394         -#if 0
   395    403   static int btCellKeyCompare(
   396    404     bt_cursor *pCsr,                /* Cursor handle */
   397         -  const u8 *aData, int nData,     /* Page data (nData is the page size) */
   398         -  int iCell,                      /* Cell to compare key from */
   399         -  const u8 *pK, int nK,           /* Key to compare to cell key */
          405  +  int bLeaf,                      /* True if cursor currently points to leaf */
          406  +  const void *pK, int nK,         /* Key to compare against cursor key */
   400    407     int *piRes                      /* OUT: Result of comparison */
   401    408   ){
   402         -  BtPage *pLeaf = 0;              /* Leaf page reference to release */
   403         -  int rc = SQLITE4_OK;
   404         -  int nCellKey;                   /* Number of bytes in cell key */
   405         -  int res;
   406         -  u8 *pCell = btCellFind((u8*)aData, nData, iCell);
   407         -  int nCmp;
   408         -  int nAscend = 0;
   409         -
   410         -  pCell += sqlite4BtVarintGet32(pCell, &nCellKey);
   411         -  if( nCellKey==0 ){
   412         -    if( (btFlags(aData, nData) & BT_PGFLAGS_INTERNAL)==0 ){
   413         -      rc = sqlite4BtCsrKey(pCsr, &pCell, &nCellKey);
   414         -    }else{
   415         -      rc = btOverflowBuffer(pCsr, 0);
   416         -      if( rc!=SQLITE4_OK ) return rc;
   417         -      pK = pCsr->ovfl.pBuf;
   418         -    }else{
   419         -      assert( 0 );
   420         -    }
   421         -  }
   422         -
   423         -  nCmp = MIN(nCellKey, nK);
   424         -  res = memcmp(pCell, pK, nCmp);
   425         -  if( res==0 ){
   426         -    res = nCellKey - nK;
   427         -  }
   428         -
   429         -  *piRes = res;
   430         -  return rc;
   431         -}
   432         -#endif
   433         -
   434         -static int btCellKeyCompare(
   435         -  bt_cursor *pCsr, 
   436         -  int bLeaf, 
   437         -  const void *pK, 
   438         -  int nK, 
   439         -  int *piRes
   440         -){
   441    409     const void *pCsrKey;
   442    410     int nCsrKey;
   443    411     int nCmp;
   444    412     int nAscend = 0;
   445    413     int rc = SQLITE4_OK;
   446    414     int res;
   447    415   
................................................................................
   451    419       const int pgsz = sqlite4BtPagerPagesize(pCsr->pDb->pPager);
   452    420   
   453    421       u8 *aData = sqlite4BtPageData(pCsr->apPage[pCsr->nPg-1]);
   454    422       u8 *pCell = btCellFind(aData, pgsz, pCsr->aiCell[pCsr->nPg-1]);
   455    423   
   456    424       pCsrKey = pCell + sqlite4BtVarintGet32(pCell, &nCsrKey);
   457    425       if( nCsrKey==0 ){
          426  +      int iCell = pCsr->aiCell[pCsr->nPg-1]+1;
   458    427         while( 1 ){
   459    428           u8 *aData = sqlite4BtPageData(pCsr->apPage[pCsr->nPg-1]);
   460         -        u32 pgno = btChildPgno(aData, pgsz, pCsr->aiCell[pCsr->nPg-1]);
          429  +        u32 pgno = btChildPgno(aData, pgsz, iCell);
   461    430           nAscend++;
   462    431           rc = btCsrDescend(pCsr, pgno);
   463    432           if( rc!=SQLITE4_OK ) break;
   464    433           aData = sqlite4BtPageData(pCsr->apPage[pCsr->nPg-1]);
   465         -        pCsr->aiCell[pCsr->nPg-1] = btCellCount(aData, pgsz);
          434  +        pCsr->aiCell[pCsr->nPg-1] = 0;
   466    435           if( (btFlags(aData) & BT_PGFLAGS_INTERNAL)==0 ) break;
          436  +        iCell = 0;
   467    437         }
   468         -      pCsr->aiCell[pCsr->nPg-1]--;
   469    438         rc = sqlite4BtCsrKey(pCsr, &pCsrKey, &nCsrKey);
   470    439       }
   471    440     }
   472    441   
   473    442     if( rc==SQLITE4_OK ){
   474    443       nCmp = MIN(nCsrKey, nK);
   475    444       res = memcmp(pCsrKey, pK, nCmp);
................................................................................
   532    501           }else{
   533    502             /* Cell iTst is LARGER than pK/nK */
   534    503             iHi = iTst;
   535    504           }
   536    505         }
   537    506         if( rc!=SQLITE4_OK ) break;
   538    507         assert( iHi==iLo );
          508  +
          509  +      iHi += (bLeaf==0 && res==0);
   539    510         pCsr->aiCell[pCsr->nPg-1] = iHi;
   540         -
   541         -#if 0
   542         -printPage(stderr, pgno, aData, pgsz);
   543         -#endif
   544         -
   545    511         if( bLeaf==0 ){
   546    512           pgno = btChildPgno(aData, pgsz, iHi);
   547    513         }else{
   548    514           pgno = 0;
   549    515   
   550    516           if( res!=0 ){
   551    517             assert( BT_SEEK_LEFAST<0 && BT_SEEK_LE<0 );
................................................................................
   573    539   
   574    540   int sqlite4BtCsrSeek(
   575    541     bt_cursor *pCsr, 
   576    542     const void *pK,                 /* Key to seek for */
   577    543     int nK,                         /* Size of key pK in bytes */
   578    544     int eSeek                       /* Seek mode (a BT_SEEK_XXX constant) */
   579    545   ){
   580         -  return btCsrSeek(pCsr, pK, nK, eSeek, 0);
          546  +  int rc;
          547  +  btCheckPageRefs(pCsr->pDb);
          548  +  rc = btCsrSeek(pCsr, pK, nK, eSeek, 0);
          549  +  btCheckPageRefs(pCsr->pDb);
          550  +  return rc;
   581    551   }
   582    552   
   583    553   /*
   584    554   ** This function seeks the cursor as required for either sqlite4BtCsrFirst()
   585    555   ** (if parameter bLast is false) or sqlite4BtCsrLast() (if bLast is true).
   586    556   */
   587    557   static int btCsrEnd(bt_cursor *pCsr, int bLast){
................................................................................
  1678   1648       }
  1679   1649     }
  1680   1650   
  1681   1651     assert( rc!=SQLITE4_OK || iCall==p->nCell );
  1682   1652     return rc;
  1683   1653   }
  1684   1654   
         1655  +/*
         1656  +** Extract a key-prefix from page pPg, which resides in a database with
         1657  +** page size pgsz. If parameter bLast is true, the key-prefix is extracted
         1658  +** from the right-most cell on the page. If bLast is false, the key-prefix
         1659  +** is extracted from the left-most cell.
         1660  +**
         1661  +** A pointer to the key-prefix is returned. Before returning, *pnByte is
         1662  +** set to the size of the prefix in bytes.
         1663  +*/
         1664  +static u8 *btKeyPrefix(const int pgsz, BtPage *pPg, int bLast, int *pnByte){
         1665  +  u8 *p;
         1666  +  int n;
         1667  +  u8 *aData;
         1668  +
         1669  +  aData = sqlite4BtPageData(pPg);
         1670  +  p = btCellFind(aData, pgsz, bLast ? btCellCount(aData, pgsz)-1 : 0);
         1671  +  p += sqlite4BtVarintGet32(p, &n);
         1672  +  if( n==0 ) p += sqlite4BtVarintGet32(p, &n);
         1673  +
         1674  +  *pnByte = n;
         1675  +  return p;
         1676  +}
         1677  +
         1678  +/*
         1679  +** Parameters pLeft and pRight point to a pair of adjacent leaf pages in
         1680  +** a database with page size pgsz. The keys in pRight are larger than those
         1681  +** in pLeft. This function populates pKV->pK and pKV->nK with a separator
         1682  +** key that is:
         1683  +**
         1684  +**   * larger than all keys on pLeft, and 
         1685  +**   * smaller than or equal to all keys on pRight.
         1686  +*/
         1687  +static void btPrefixKey(
         1688  +    const int pgsz, BtPage *pLeft, BtPage *pRight, KeyValue *pKV
         1689  +){
         1690  +  int nMax;
         1691  +  int nMaxPrefix = pgsz/4;
         1692  +
         1693  +  u8 *aLeft; int nLeft;
         1694  +  u8 *aRight; int nRight;
         1695  +  u8 *aData;
         1696  +  int i;
         1697  +
         1698  +  aLeft = btKeyPrefix(pgsz, pLeft, 1, &nLeft);
         1699  +  aRight = btKeyPrefix(pgsz, pRight, 0, &nRight);
         1700  +
         1701  +  nMax = MIN(nLeft, nMaxPrefix);
         1702  +  for(i=0; i<nMax && aLeft[i]==aRight[i]; i++);
         1703  +  if( i<nMaxPrefix ){
         1704  +    pKV->pK = aRight;
         1705  +    pKV->nK = i + 1;
         1706  +    assert( pKV->nK<=nRight );
         1707  +  }
         1708  +}
  1685   1709   
  1686   1710   int btBalance(
  1687   1711     bt_cursor *pCsr,                /* Cursor pointed to page to rebalance */
  1688   1712     int bLeaf,                      /* True if rebalancing leaf pages */
  1689   1713     int nKV,                        /* Number of entries in apKV[] array */
  1690   1714     KeyValue *apKV                  /* Extra entries to add while rebalancing */
  1691   1715   ){
................................................................................
  1853   1877   
  1854   1878     /* The leaves are written. Now gather the keys and page numbers to
  1855   1879     ** push up into the parent page. This is only required when rebalancing
  1856   1880     ** b-tree leaves. When internal nodes are balanced, the btBalanceOutput
  1857   1881     ** loop accumulates the cells destined for the parent page.  */
  1858   1882     for(iPg=0; iPg<(ctx.nOut-1); iPg++){
  1859   1883       ctx.aPCell[iPg].pgno = sqlite4BtPagePgno(ctx.apPg[iPg]);
  1860         -    assert( ctx.aPCell[iPg].nK==0 );
  1861         -
  1862   1884       if( bLeaf ){
  1863         -#if 0
  1864         -      u8 *aData = sqlite4BtPageData(ctx.apPg[iPg]);
  1865         -      u8 *pCell;
  1866         -      pCell = btCellFind(aData, pgsz, btCellCount(aData, pgsz)-1);
  1867         -      pCell += sqlite4BtVarintGet32(pCell, &ctx.aPCell[iPg].nK);
  1868         -      ctx.aPCell[iPg].pK = pCell;
  1869         -#endif
         1885  +      assert( ctx.aPCell[iPg].nK==0 );
         1886  +      btPrefixKey(pgsz, ctx.apPg[iPg], ctx.apPg[iPg+1], &ctx.aPCell[iPg]);
  1870   1887       }
  1871   1888     }
  1872   1889   
  1873   1890     rc = btSetChildPgno(
  1874   1891         pDb, pPar, iSib+ctx.nIn-1, sqlite4BtPagePgno(ctx.apPg[ctx.nOut-1])
  1875   1892     );
  1876   1893     if( rc==SQLITE4_OK ){
................................................................................
  2178   2195   
  2179   2196     btCheckPageRefs(db);
  2180   2197     return rc;
  2181   2198   }
  2182   2199   
  2183   2200   int sqlite4BtDelete(bt_cursor *pCsr){
  2184   2201     int rc;
  2185         -  btCheckPageRefs(pCsr->pDb);
  2186   2202     rc =  btDeleteFromPage(pCsr, 1);
  2187   2203     if( rc==SQLITE4_OK ){
  2188   2204       rc = btBalanceIfUnderfull(pCsr);
  2189   2205     }
  2190         -  btCheckPageRefs(pCsr->pDb);
  2191   2206     return rc;
  2192   2207   }
  2193   2208   
  2194   2209   int sqlite4BtSetCookie(bt_db *db, unsigned int iVal){
  2195   2210     return sqlite4BtPagerSetCookie(db->pPager, iVal);
  2196   2211   }
  2197   2212   
  2198   2213   int sqlite4BtGetCookie(bt_db *db, unsigned int *piVal){
  2199   2214     return sqlite4BtPagerGetCookie(db->pPager, piVal);
  2200   2215   }
  2201   2216   

Changes to src/bt_pager.c.

   497    497         if( rc!=SQLITE4_OK ){
   498    498           btFreePage(p, pRet);
   499    499           pRet = 0;
   500    500         }
   501    501       }
   502    502     }
   503    503   
          504  +  assert( (pRet!=0)==(rc==SQLITE4_OK) );
   504    505     if( rc==SQLITE4_OK ){
   505    506       p->nTotalRef++;
   506    507       pRet->nRef++;
   507         -    *ppPg = pRet;
   508    508     }
          509  +  *ppPg = pRet;
   509    510     return rc;
   510    511   }
   511    512   
   512    513   int sqlite4BtPageWrite(BtPage *pPg){
   513    514     if( (pPg->flags & BT_PAGE_DIRTY)==0 ){
   514    515       pPg->flags |= BT_PAGE_DIRTY;
   515    516       pPg->pNextDirty = pPg->pPager->pDirty;
................................................................................
   523    524   ** no longer in use.
   524    525   */
   525    526   int sqlite4BtPageTrim(BtPage *pPg){
   526    527     /* assert( !"todo" ); */
   527    528     sqlite4BtPageRelease(pPg);
   528    529     return SQLITE4_OK;
   529    530   }
          531  +
          532  +/*
          533  +** Page number pgno is no longer in use.
          534  +*/
          535  +int sqlite4BtPageTrimPgno(BtPager *pPager, u32 pgno){
          536  +  return SQLITE4_OK;
          537  +}
   530    538   
   531    539   int sqlite4BtPageRelease(BtPage *pPg){
   532    540     if( pPg ){
   533    541       assert( pPg->nRef>=1 );
   534    542       pPg->nRef--;
   535    543       pPg->pPager->nTotalRef--;
   536    544     }