/ Check-in [18500cdc]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Continued work on btree (CVS 219)
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 18500cdcc1a42118cdf650681ebb1cbeac106aa7
User & Date: drh 2001-05-24 21:06:35
Context
2001-05-26
13:15
:-) (CVS 220) check-in: 45a0e0fc user: drh tags: trunk
2001-05-24
21:06
Continued work on btree (CVS 219) check-in: 18500cdc user: drh tags: trunk
2001-05-21
13:45
:-) (CVS 218) check-in: 523d52df user: drh tags: trunk
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to src/btree.c.

    17     17   ** Boston, MA  02111-1307, USA.
    18     18   **
    19     19   ** Author contact information:
    20     20   **   drh@hwaci.com
    21     21   **   http://www.hwaci.com/drh/
    22     22   **
    23     23   *************************************************************************
    24         -** $Id: btree.c,v 1.6 2001/05/21 13:45:10 drh Exp $
           24  +** $Id: btree.c,v 1.7 2001/05/24 21:06:35 drh Exp $
    25     25   */
    26     26   #include "sqliteInt.h"
    27     27   #include "pager.h"
    28     28   #include "btree.h"
    29     29   #include <assert.h>
    30     30   
    31     31   /*
................................................................................
    93     93   };
    94     94   #define MAGIC_1  0x7264dc61
    95     95   #define MAGIC_2  0x54e55d9e
    96     96   
    97     97   /*
    98     98   ** Each database page has a header as follows:
    99     99   **
   100         -**      page1_header          Extra numbers found on page 1 only.
          100  +**      page1_header          Optional instance of Page1Header structure
   101    101   **      rightmost_pgno        Page number of the right-most child page
   102    102   **      first_cell            Index into MemPage.aPage of first cell
   103    103   **      first_free            Index of first free block
   104    104   **
   105    105   ** MemPage.pStart always points to the rightmost_pgno.  First_free is
   106    106   ** 0 if there is no free space on this page.  Otherwise, first_free is
   107    107   ** the index in MemPage.aPage[] of a FreeBlk structure that describes
................................................................................
   134    134     u32 nData;      /* Number of bytes of data */
   135    135     char aData[4];  /* Key and data */
   136    136   };
   137    137   
   138    138   /*
   139    139   ** Free space on a page is remembered using a linked list of the FreeBlk
   140    140   ** structures.  Space on a database page is allocated in increments of
   141         -** at least 4 bytes and is always aligned to a 4-byte boundry.
          141  +** at least 4 bytes and is always aligned to a 4-byte boundry.  The
          142  +** linked list of freeblocks is always kept in order by address.
   142    143   */
   143    144   struct FreeBlk {
   144         -  u16 iSize;      /* Number of u32-sized slots in the block of free space */
          145  +  u16 iSize;      /* Number of bytes in this block of free space */
   145    146     u16 iNext;      /* Index in MemPage.aPage[] of the next free block */
   146    147   };
   147    148   
   148    149   /*
   149    150   ** When the key and data for a single entry in the BTree will not fit in
   150    151   ** the MX_LOACAL_PAYLOAD bytes of space available on the database page,
   151    152   ** then all extra data is written to a linked list of overflow pages.
................................................................................
   154    155   ** Unused pages in the database are also represented by instances of
   155    156   ** the OverflowPage structure.  The Page1Header.freeList field is the
   156    157   ** page number of the first page in a linked list of unused database
   157    158   ** pages.
   158    159   */
   159    160   struct OverflowPage {
   160    161     Pgno next;
   161         -  char aData[SQLITE_PAGE_SIZE-sizeof(Pgno)];
          162  +  char aData[OVERFLOW_SIZE];
   162    163   };
   163    164   
   164    165   /*
   165    166   ** For every page in the database file, an instance of the following structure
   166    167   ** is stored in memory.  The aPage[] array contains the data obtained from
   167    168   ** the disk.  The rest is auxiliary data that held in memory only.  The
   168    169   ** auxiliary data is only valid for regular database pages - the auxiliary
   169    170   ** data is meaningless for overflow pages and pages on the freelist.
   170    171   **
   171    172   ** Of particular interest in the auxiliary data is the aCell[] entry.  Each
   172         -** aCell[] entry is a pointer to a Cell structure in aPage[].  The cells
          173  +** aCell[] entry is a pointer to a Cell structure in aPage[].  The cells are
   173    174   ** put in this array so that they can be accessed in constant time, rather
   174    175   ** than in linear time which would be needed if we walked the linked list.
          176  +**
          177  +** The pParent field points back to the parent page.  This allows us to
          178  +** walk up the BTree from any leaf to the root.  Care must be taken to
          179  +** unref() the parent page pointer when this page is no longer referenced.
          180  +** The pageDestructor() routine handles that.
   175    181   */
   176    182   struct MemPage {
   177    183     char aPage[SQLITE_PAGE_SIZE];  /* Page data stored on disk */
   178    184     unsigned char isInit;          /* True if auxiliary data is initialized */
   179         -  unsigned char validUp;         /* True if MemPage.up is valid */
   180    185     unsigned char validLeft;       /* True if MemPage.left is valid */
   181    186     unsigned char validRight;      /* True if MemPage.right is valid */
   182         -  Pgno up;                       /* The parent page. 0 means this is the root */
          187  +  MemPage *pParent;              /* The parent of this page.  NULL for root */
   183    188     Pgno left;                     /* Left sibling page.  0==none */
   184    189     Pgno right;                    /* Right sibling page.  0==none */
   185    190     int idxStart;                  /* Index in aPage[] of real data */
   186    191     PageHdr *pStart;               /* Points to aPage[idxStart] */
   187    192     int nFree;                     /* Number of free bytes in aPage[] */
   188    193     int nCell;                     /* Number of entries on this page */
   189    194     Cell *aCell[MX_CELL];          /* All data entires in sorted order */
................................................................................
   201    206   typedef Btree Bt;
   202    207   
   203    208   /*
   204    209   ** A cursor is a pointer to a particular entry in the BTree.
   205    210   ** The entry is identified by its MemPage and the index in
   206    211   ** MemPage.aCell[] of the entry.
   207    212   */
   208         -struct Cursor {
   209         -  Btree *pBt;            /* The pointer back to the BTree */
   210         -  Cursor *pPrev, *pNext; /* List of all cursors */
   211         -  MemPage *pPage;        /* Page that contains the entry */
   212         -  int idx;               /* Index of the entry in pPage->aCell[] */
   213         -  int skip_incr;         /* */
          213  +struct BtCursor {
          214  +  Btree *pBt;                     /* The pointer back to the BTree */
          215  +  BtCursor *pPrev, *pNext;        /* List of all cursors */
          216  +  MemPage *pPage;                 /* Page that contains the entry */
          217  +  int idx;                        /* Index of the entry in pPage->aCell[] */
          218  +  int skip_incr;                  /* */
   214    219   };
   215    220   
   216    221   /*
   217         -** Defragment the page given.  All of the free space
   218         -** is collected into one big block at the end of the
   219         -** page.
          222  +** Defragment the page given.  All Cells are moved to the
          223  +** beginning of the page and all free space is collected 
          224  +** into one big FreeBlk at the end of the page.
   220    225   */
   221    226   static void defragmentPage(MemPage *pPage){
   222    227     int pc;
   223    228     int i, n;
   224    229     FreeBlk *pFBlk;
   225    230     char newPage[SQLITE_PAGE_SIZE];
   226    231   
................................................................................
   234    239       n = ROUNDUP(n);
   235    240       n += sizeof(Cell) - sizeof(pCell->aData);
   236    241       pCell->iNext = i<pPage->nCell ? pc + n : 0;
   237    242       memcpy(&newPage[pc], pCell, n);
   238    243       pPage->aCell[i] = (Cell*)&pPage->aPage[pc];
   239    244       pc += n;
   240    245     }
   241         -  assert( pPage->nFree==pc );
          246  +  assert( pPage->nFree==SQLITE_PAGE_SIZE-pc );
   242    247     memcpy(pPage->aPage, newPage, pc);
   243    248     pFBlk = &pPage->aPage[pc];
   244    249     pFBlk->iSize = SQLITE_PAGE_SIZE - pc;
   245    250     pFBlk->iNext = 0;
   246    251     pPage->pStart->firstFree = pc;
   247    252     memset(&pFBlk[1], 0, SQLITE_PAGE_SIZE - pc - sizeof(FreeBlk));
   248    253   }
................................................................................
   251    256   ** Allocate space on a page.  The space needs to be at least
   252    257   ** nByte bytes in size.  (Actually, all allocations are rounded
   253    258   ** up to the next even multiple of 4.)  Return the index into
   254    259   ** pPage->aPage[] of the first byte of the new allocation.
   255    260   ** Or return 0 if there is not enough free space on the page to
   256    261   ** satisfy the allocation request.
   257    262   **
   258         -** This routine will call defragmentPage if necessary to consolidate
   259         -** free space.  
          263  +** If the page contains nBytes of free space but does not contain
          264  +** nBytes of contiguous free space, then defragementPage() is
          265  +** called to consolidate all free space before allocating the
          266  +** new chunk.
   260    267   */
   261    268   static int allocSpace(MemPage *pPage, int nByte){
   262    269     FreeBlk *p;
   263    270     u16 *pIdx;
   264    271     int start;
          272  +
   265    273     nByte = ROUNDUP(nByte);
   266    274     if( pPage->nFree<nByte ) return 0;
   267    275     pIdx = &pPage->pStart->firstFree;
   268    276     p = (FreeBlk*)&pPage->aPage[*pIdx];
   269    277     while( p->iSize<nByte ){
   270    278       if( p->iNext==0 ){
   271    279         defragmentPage(pPage);
................................................................................
   275    283       }
   276    284       p = (FreeBlk*)&pPage->aPage[*pIdx];
   277    285     }
   278    286     if( p->iSize==nByte ){
   279    287       start = *pIdx;
   280    288       *pIdx = p->iNext;
   281    289     }else{
   282         -    p->iSize -= nByte;
   283         -    start = *pIdx + p->iSize;
          290  +    start = *pIdx;
          291  +    FreeBlk *pNew = (FreeBlk*)&pPage->aPage[start + nByte];
          292  +    pNew->iNext = p->iNext;
          293  +    pNew->iSize = p->iSize - nByte;
          294  +    *pIdx = start + nByte;
   284    295     }
   285    296     pPage->nFree -= nByte;
   286    297     return start;
   287    298   }
   288    299   
   289    300   /*
   290    301   ** Return a section of the MemPage.aPage[] to the freelist.
................................................................................
   331    342     }
   332    343     *pIdx = start;
   333    344     pPage->nFree += size;
   334    345   }
   335    346   
   336    347   /*
   337    348   ** Initialize the auxiliary information for a disk block.
          349  +**
          350  +** Return SQLITE_OK on success.  If we see that the page does
          351  +** not contained a well-formed database page, then return 
          352  +** SQLITE_CORRUPT.  Note that a return of SQLITE_OK does not
          353  +** guarantee that the page is well-formed.  It only shows that
          354  +** we failed to detect any corruption.
   338    355   */
   339         -static int initPage(MemPage *pPage, Pgno pgnoThis, Pgno pgnoParent){
          356  +static int initPage(MemPage *pPage, Pgno pgnoThis, MemPage *pParent){
   340    357     int idx;
   341    358     Cell *pCell;
   342    359     FreeBlk *pFBlk;
   343    360   
   344    361     pPage->idxStart = (pgnoThis==1) ? sizeof(Page1Header) : 0;
   345    362     pPage->pStart = (PageHdr*)&pPage->aPage[pPage->idxStart];
   346    363     pPage->isInit = 1;
   347         -  pPage->validUp = 1;
   348         -  pPage->up = pgnoParent;
          364  +  assert( pPage->pParent==0 );
          365  +  pPage->pParent = pParent;
          366  +  if( pParent ) sqlitepager_ref(pParent);
   349    367     pPage->nCell = 0;
   350    368     idx = pPage->pStart->firstCell;
   351    369     while( idx!=0 ){
   352    370       if( idx>SQLITE_PAGE_SIZE-sizeof(Cell) ) goto page_format_error;
   353    371       if( idx<pPage->idxStart + sizeof(PageHeader) ) goto page_format_error;
   354    372       pCell = (Cell*)&pPage->aPage[idx];
   355    373       pPage->aCell[pPage->nCell++] = pCell;
................................................................................
   366    384       idx = pFBlk->iNext;
   367    385     }
   368    386     return SQLITE_OK;
   369    387   
   370    388   page_format_error:
   371    389     return SQLITE_CORRUPT;
   372    390   }
          391  +
          392  +/*
          393  +** This routine is called when the reference count for a page
          394  +** reaches zero.  We need to unref the pParent pointer when that
          395  +** happens.
          396  +*/
          397  +static void pageDestructor(void *pData){
          398  +  MemPage *pPage = (MemPage*)pData;
          399  +  if( pPage->pParent ){
          400  +    MemPage *pParent = pPage->pParent;
          401  +    pPage->pParent = 0;
          402  +    sqlitepager_unref(pParent);
          403  +  }
          404  +}
   373    405   
   374    406   /*
   375    407   ** Open a new database.
   376    408   **
   377    409   ** Actually, this routine just sets up the internal data structures
   378         -** for accessing the database.  We do not actually open the database
   379         -** file until the first page is loaded.
          410  +** for accessing the database.  We do not open the database file 
          411  +** until the first page is loaded.
   380    412   */
   381    413   int sqliteBtreeOpen(const char *zFilename, int mode, Btree **ppBtree){
   382    414     Btree *pBt;
   383    415   
   384    416     pBt = sqliteMalloc( sizeof(*pBt) );
   385    417     if( pBt==0 ){
   386    418       **ppBtree = 0;
................................................................................
   389    421     rc = sqlitepager_open(&pBt->pPager, zFilename, 100, EXTRA_SPACE);
   390    422     if( rc!=SQLITE_OK ){
   391    423       if( pBt->pPager ) sqlitepager_close(pBt->pPager);
   392    424       sqliteFree(pBt);
   393    425       *ppBtree = 0;
   394    426       return rc;
   395    427     }
          428  +  sqlitepager_set_destructor(pBt->pPager, pageDestructor);
   396    429     pBt->pCursor = 0;
   397    430     pBt->page1 = 0;
   398    431     *ppBtree = pBt;
   399    432     return SQLITE_OK;
   400    433   }
   401    434   
   402    435   /*
................................................................................
   423    456   */
   424    457   static int lockBtree(Btree *pBt){
   425    458     int rc;
   426    459     if( pBt->page1 ) return SQLITE_OK;
   427    460     rc = sqlitepager_get(pBt->pPager, 1, &pBt->page1);
   428    461     if( rc!=SQLITE_OK ) return rc;
   429    462     rc = initPage(pBt->page1, 1, 0);
   430         -  if( rc!=SQLITE_OK ) goto lock_failed;
          463  +  if( rc!=SQLITE_OK ) goto page1_init_failed;
   431    464   
   432    465     /* Do some checking to help insure the file we opened really is
   433    466     ** a valid database file. 
   434    467     */
   435    468     if( sqlitepager_pagecount(pBt->pPager)>0 ){
   436    469       Page1Header *pP1 = (Page1Header*)pBt->page1;
   437    470       if( pP1->magic1!=MAGIC_1 || pP1->magic2!=MAGIC_2 ){
   438    471         rc = SQLITE_CORRUPT;
   439         -      goto lock_failed;
          472  +      goto page1_init_failed;
   440    473       }
   441    474     }
   442    475     return rc;
   443    476   
   444         -lock_failed:
          477  +page1_init_failed:
   445    478     sqlitepager_unref(pBt->page1);
   446    479     pBt->page1 = 0;
          480  +  return rc;
   447    481   }
   448    482   
   449    483   /*
   450         -** Start a new transaction
          484  +** Attempt to start a new transaction.
   451    485   */
   452    486   int sqliteBtreeBeginTrans(Btree *pBt){
   453    487     int rc;
          488  +  Page1Header *pP1;
   454    489     if( pBt->inTrans ) return SQLITE_ERROR;
   455    490     if( pBt->page1==0 ){
   456    491       rc = lockBtree(pBt);
   457    492       if( rc!=SQLITE_OK ) return rc;
   458    493     }
   459    494     rc = sqlitepager_write(pBt->page1);
   460    495     if( rc==SQLITE_OK ){
   461    496       pBt->inTrans = 1;
          497  +  }
          498  +  pP1 = (Page1Header*)pBt->page1;
          499  +  if( pP1->magic1==0 ){
          500  +    pP1->magic1 = MAGIC_1;
          501  +    pP1->magic2 = MAGIC_2;
   462    502     }
   463    503     return rc;
   464    504   }
   465    505   
   466    506   /*
   467    507   ** Remove the last reference to the database file.  This will
   468    508   ** remove the read lock.
................................................................................
   477    517   
   478    518   /*
   479    519   ** Commit the transaction currently in progress.  All cursors
   480    520   ** must be closed before this routine is called.
   481    521   */
   482    522   int sqliteBtreeCommit(Btree *pBt){
   483    523     int rc;
   484         -  assert( pBt->pCursor==0 );
          524  +  if( pBt->pCursor!=0 ) return SQLITE_ERROR;
   485    525     rc = sqlitepager_commit(pBt->pPager);
   486    526     unlockBtree(pBt);
   487    527     return rc;
   488    528   }
   489    529   
   490    530   /*
   491    531   ** Rollback the transaction in progress.  All cursors must be
   492    532   ** closed before this routine is called.
   493    533   */
   494    534   int sqliteBtreeRollback(Btree *pBt){
   495    535     int rc;
   496         -  assert( pBt->pCursor==0 );
          536  +  if( pBt->pCursor!=0 ) return SQLITE_ERROR;
   497    537     rc = sqlitepager_rollback(pBt->pPager);
   498    538     unlockBtree(pBt);
   499    539     return rc;
   500    540   }
   501    541   
   502    542   /*
   503    543   ** Create a new cursor.  The act of acquiring a cursor
................................................................................
   529    569     rc = sqlitepager_get(pBt->pPager, 1, &pCur->pPage);
   530    570     if( rc!=SQLITE_OK ){
   531    571       sqliteFree(pCur);
   532    572       *ppCur = 0;
   533    573       return rc;
   534    574     }
   535    575     if( !pCur->pPage->isInit ){
   536         -    initPage(pCur->pPage);
          576  +    initPage(pCur->pPage, 1, 0);
   537    577     }
   538    578     pCur->idx = 0;
          579  +  pCur->depth = 0;
          580  +  pCur->aPage[0] = pCur->pPage;
   539    581     *ppCur = pCur;
   540    582     return SQLITE_OK;
   541    583   }
   542    584   
   543    585   /*
   544    586   ** Close a cursor. 
   545    587   */
................................................................................
   558    600     if( pBt->pCursor==0 && pBt->inTrans==0 ){
   559    601       unlockBtree(pBt);
   560    602     }
   561    603     sqliteFree(pCur);
   562    604   }
   563    605   
   564    606   /*
   565         -** Return the number of bytes in the key of the entry to which
   566         -** the cursor is currently point.  If the cursor has not been
   567         -** initialized or is pointed to a deleted entry, then return 0.
          607  +** Write the number of bytes of key for the entry the cursor is
          608  +** pointing to into *pSize.  Return SQLITE_OK.  Failure is not
          609  +** possible.
   568    610   */
   569         -int sqliteBtreeKeySize(BtCursor *pCur){
          611  +int sqliteBtreeKeySize(BtCursor *pCur, int *pSize){
   570    612     Cell *pCell;
   571    613     MemPage *pPage;
   572    614   
   573    615     pPage = pCur->pPage;
   574         -  if( pCur->idx >= pPage->nCell ) return 0;
   575         -  pCell = pPage->aCell[pCur->idx];
   576         -  return pCell->nKey;
          616  +  assert( pPage!=0 );
          617  +  if( pCur->idx >= pPage->nCell ){
          618  +    *pSize = 0;
          619  +  }else{
          620  +    pCell = pPage->aCell[pCur->idx];
          621  +    *psize = pCell->nKey;
          622  +  }
          623  +  return SQLITE_OK;
   577    624   }
   578    625   
          626  +/*
          627  +** Read payload information from the entry that the pCur cursor is
          628  +** pointing to.  Begin reading the payload at "offset" and read
          629  +** a total of "amt" bytes.  Put the result in zBuf.
          630  +**
          631  +** This routine does not make a distinction between key and data.
          632  +** It just reads bytes from the payload area.
          633  +*/
   579    634   static int getPayload(BtCursor *pCur, int offset, int amt, char *zBuf){
   580    635     char *aData;
   581    636     Pgno nextPage;
          637  +  assert( pCur!=0 && pCur->pPage!=0 );
          638  +  assert( pCur->idx>=0 && pCur->idx<pCur->nCell );
   582    639     aData = pCur->pPage->aCell[pCur->idx].aData;
   583    640     if( offset<MX_LOCAL_PAYLOAD ){
   584    641       int a = amt;
   585    642       if( a+offset>MX_LOCAL_PAYLOAD ){
   586    643         a = MX_LOCAL_PAYLOAD - offset;
   587    644       }
   588    645       memcpy(zBuf, &aData[offset], a);
................................................................................
   615    672         zBuf += a;
   616    673       }
   617    674       sqlitepager_unref(pOvfl);
   618    675     }
   619    676     return amt==0 ? SQLITE_OK : SQLITE_CORRUPT;
   620    677   }
   621    678   
   622         -int sqliteBtreeKey(BtCursor*, int offset, int amt, char *zBuf);
   623         -int sqliteBtreeDataSize(BtCursor*);
   624         -int sqliteBtreeData(BtCursor*, int offset, int amt, char *zBuf);
          679  +/*
          680  +** Read part of the key associated with cursor pCur.  A total
          681  +** of "amt" bytes will be transfered into zBuf[].  The transfer
          682  +** begins at "offset".  If the key does not contain enough data
          683  +** to satisfy the request, no data is fetched and this routine
          684  +** returns SQLITE_ERROR.
          685  +*/
          686  +int sqliteBtreeKey(BtCursor *pCur, int offset, int amt, char *zBuf){
          687  +  Cell *pCell;
          688  +  MemPage *pPage;
          689  +
          690  +  if( amt<0 ) return SQLITE_ERROR;
          691  +  if( offset<0 ) return SQLITE_ERROR;
          692  +  if( amt==0 ) return SQLITE_OK;
          693  +  pPage = pCur->pPage;
          694  +  assert( pPage!=0 );
          695  +  if( pCur->idx >= pPage->nCell ){
          696  +    return SQLITE_ERROR;
          697  +  }
          698  +  pCell = pPage->aCell[pCur->idx];
          699  +  if( amt+offset > pCell->nKey ){
          700  +  return getPayload(pCur, offset, amt, zBuf);
          701  +}
          702  +
          703  +/*
          704  +** Write the number of bytes of data on the entry that the cursor
          705  +** is pointing to into *pSize.  Return SQLITE_OK.  Failure is
          706  +** not possible.
          707  +*/
          708  +int sqliteBtreeDataSize(BtCursor *pCur, int *pSize){
          709  +  Cell *pCell;
          710  +  MemPage *pPage;
          711  +
          712  +  pPage = pCur->pPage;
          713  +  assert( pPage!=0 );
          714  +  if( pCur->idx >= pPage->nCell ){
          715  +    *pSize = 0;
          716  +  }else{
          717  +    pCell = pPage->aCell[pCur->idx];
          718  +    *pSize = pCell->nData;
          719  +  }
          720  +  return SQLITE_OK;
          721  +}
          722  +
          723  +/*
          724  +** Read part of the data associated with cursor pCur.  A total
          725  +** of "amt" bytes will be transfered into zBuf[].  The transfer
          726  +** begins at "offset".  If the size of the data in the record
          727  +** is insufficent to satisfy this request then no data is read
          728  +** and this routine returns SQLITE_ERROR.
          729  +*/
          730  +int sqliteBtreeData(BtCursor *pCur, int offset, int amt, char *zBuf){
          731  +  Cell *pCell;
          732  +  MemPage *pPage;
   625    733   
          734  +  if( amt<0 ) return SQLITE_ERROR;
          735  +  if( offset<0 ) return SQLITE_ERROR;
          736  +  if( amt==0 ) return SQLITE_OK;
          737  +  pPage = pCur->pPage;
          738  +  assert( pPage!=0 );
          739  +  if( pCur->idx >= pPage->nCell ){
          740  +    return SQLITE_ERROR;
          741  +  }
          742  +  pCell = pPage->aCell[pCur->idx];
          743  +  if( amt+offset > pCell->nKey ){
          744  +  return getPayload(pCur, offset + pCell->nKey, amt, zBuf);
          745  +}
   626    746   
   627    747   /*
   628    748   ** Compare the key for the entry that pCur points to against the 
   629    749   ** given key (pKey,nKeyOrig).  Put the comparison result in *pResult.
   630    750   ** The result is negative if pCur<pKey, zero if they are equal and
   631    751   ** positive if pCur>pKey.
   632    752   **
................................................................................
   661    781     nextPage = *(Pgno*)&pCell->aData[MX_LOCAL_PAYLOAD];
   662    782     while( nKey>0 ){
   663    783       OverflowPage *pOvfl;
   664    784       if( nextPage==0 ){
   665    785         return SQLITE_CORRUPT;
   666    786       }
   667    787       rc = sqlitepager_get(pCur->pBt->pPager, nextPage, &pOvfl);
   668         -    if( rc!=0 ){
          788  +    if( rc ){
   669    789         return rc;
   670    790       }
   671    791       nextPage = pOvfl->next;
   672    792       n = nKey;
   673    793       if( n>OVERFLOW_SIZE ){
   674    794         n = OVERFLOW_SIZE;
   675    795       }
................................................................................
   683    803       pKey += n;
   684    804     }
   685    805     c = pCell->nKey - nKeyOrig;
   686    806     *pResult = c;
   687    807     return SQLITE_OK;
   688    808   }
   689    809   
          810  +/*
          811  +** Move the cursor down to a new child page.
          812  +*/
          813  +static int childPage(BtCursor *pCur, int newPgno){
          814  +  int rc;
          815  +  MemPage *pNewPage;
          816  +
          817  +  rc = sqlitepager_get(pCur->pBt->pPager, newPgno, &pNewPage);
          818  +  if( rc ){
          819  +    return rc;
          820  +  }
          821  +  if( !pNewPage->isInit ){
          822  +    initPage(pNewPage, newPgno, pCur->pPage);
          823  +  }
          824  +  sqlitepager_unref(pCur->pPage);
          825  +  pCur->pPage = pNewPage;
          826  +  pCur->idx = 0;
          827  +  return SQLITE_OK;
          828  +}
          829  +
          830  +/*
          831  +** Move the cursor up to the parent page
          832  +*/
          833  +static int parentPage(BtCursor *pCur){
          834  +  Pgno oldPgno;
          835  +  MemPage *pParent;
          836  +
          837  +  pParent = pCur->pPage->pParent;
          838  +  oldPgno = sqlitepager_pagenumber(pCur->pPage);
          839  +  if( pParent==0 ){
          840  +    return SQLITE_INTERNAL;
          841  +  }
          842  +  sqlitepager_ref(pParent);
          843  +  sqlitepager_unref(pCur->pPage);
          844  +  pCur->pPage = pParent;
          845  +  pCur->idx = pPage->nCell;
          846  +  for(i=0; i<pPage->nCell; i++){
          847  +    if( pPage->aCell[i].pgno==oldPgno ){
          848  +      pCur->idx = i;
          849  +      break;
          850  +    }
          851  +  }
          852  +}
          853  +
          854  +/*
          855  +** Move the cursor to the root page
          856  +*/
          857  +static int rootPage(BtCursor *pCur){
          858  +  MemPage *pNew;
          859  +  pNew = pCur->pBt->page1;
          860  +  sqlitepager_ref(pNew);
          861  +  sqlitepager_unref(pCur->pPage);
          862  +  pCur->pPage = pNew;
          863  +  pCur->idx = 0;
          864  +  return SQLITE_OK;
          865  +}
   690    866   
   691    867   /* Move the cursor so that it points to an entry near pKey.
   692         -** Return 0 if the cursor is left pointing exactly at pKey.
   693         -** Return -1 if the cursor points to the largest entry less than pKey.
   694         -** Return 1 if the cursor points to the smallest entry greater than pKey.
          868  +** Return a success code.
          869  +**
          870  +** If pRes!=NULL, then *pRes is written with an integer code to
          871  +** describe the results.  *pRes is set to 0 if the cursor is left 
          872  +** pointing at an entry that exactly matches pKey.  *pRes is made
          873  +** negative if the cursor is on the largest entry less than pKey.
          874  +** *pRes is set positive if the cursor is on the smallest entry
          875  +** greater than pKey.  *pRes is not changed if the return value
          876  +** is something other than SQLITE_OK;
          877  +*/
          878  +int sqliteBtreeMoveto(BtCursor *pCur, void *pKey, int nKey, int *pRes){
          879  +  int rc;
          880  +  rc = rootPage(pCur);
          881  +  if( rc ) return rc;
          882  +  for(;;){
          883  +    int lwr, upr;
          884  +    Pgno chldPg;
          885  +    MemPage *pPage = pCur->pPage;
          886  +    lwr = 0;
          887  +    upr = pPage->nCell-1;
          888  +    while( lwr<=upr ){
          889  +      int c;
          890  +      pCur->idx = (lwr+upr)/2;
          891  +      rc = compareKey(pCur, pKey, nKey, &c);
          892  +      if( rc ) return rc;
          893  +      if( c==0 ){
          894  +        if( pRes ) *pRes = 0;
          895  +        return SQLITE_OK;
          896  +      }
          897  +      if( c<0 ){
          898  +        lwr = pCur->idx+1;
          899  +      }else{
          900  +        upr = pCur->idx-1;
          901  +      }
          902  +    }
          903  +    assert( lwr==upr+1 );
          904  +    if( lwr>=pPage->nCell ){
          905  +      chldPg = pPage->pStart->pgno;
          906  +    }else{
          907  +      chldPg = pPage->aCell[lwr].pgno;
          908  +    }
          909  +    if( chldPg==0 ){
          910  +      if( pRes ) *pRes = c;
          911  +      return SQLITE_OK;
          912  +    }
          913  +    rc = childPage(pCur, chldPg);
          914  +    if( rc ) return rc;
          915  +  }
          916  +}
          917  +
          918  +/*
          919  +** Advance the cursor to the next entry in the database.  If pRes!=NULL
          920  +** then set *pRes=0 on success and set *pRes=1 if the cursor was
          921  +** pointing to the last entry in the database.
   695    922   */
   696         -int sqliteBtreeMoveto(BtCursor*, void *pKey, int nKey);
   697         -int sqliteBtreeDelete(BtCursor*);
          923  +int sqliteBtreeNext(BtCursor *pCur, int *pRes){
          924  +  MemPage *pPage;
          925  +  int rc;
          926  +  int moved = 0;
          927  +  if( pCur->skip_next ){
          928  +    pCur->skip_next = 0;
          929  +    if( pRes ) *pRes = 0;
          930  +    return SQLITE_OK;
          931  +  }
          932  +  pPage = pCur->pPage;
          933  +  pCur->idx++;
          934  +  while( pCur->idx>=pPage->nCell ){
          935  +    if( pCur->depth==0 ){
          936  +      if( pRes ) *pRes = 1;
          937  +      return SQLITE_OK;
          938  +    }
          939  +    rc = parentPage(pCur);
          940  +    if( rc ) return rc;
          941  +    moved = 1;
          942  +    pPage = pCur->pPage;
          943  +  }
          944  +  if( moved ){
          945  +    if( pRes ) *pRes = 0;
          946  +    return SQLITE_OK;
          947  +  }
          948  +  while( pCur->idx<pPage->nCell && pPage->aCell[pCur->idx].pgno>0 ){
          949  +    rc = childPage(pCur, pPage->aCell[pCur->idx].pgno);
          950  +    if( rc ) return rc;
          951  +    pPage = pCur->pPage;
          952  +  }
          953  +  if( pRes ) *pRes = 0;
          954  +  return SQLITE_OK;
          955  +}
          956  +
   698    957   int sqliteBtreeInsert(BtCursor*, void *pKey, int nKey, void *pData, int nData);
   699         -int sqliteBtreeNext(BtCursor*);
          958  +int sqliteBtreeDelete(BtCursor*);

Changes to src/btree.h.

    20     20   **   drh@hwaci.com
    21     21   **   http://www.hwaci.com/drh/
    22     22   **
    23     23   *************************************************************************
    24     24   ** This header file defines the interface that the sqlite B-Tree file
    25     25   ** subsystem.
    26     26   **
    27         -** @(#) $Id: btree.h,v 1.1 2001/04/17 20:09:11 drh Exp $
           27  +** @(#) $Id: btree.h,v 1.2 2001/05/24 21:06:36 drh Exp $
    28     28   */
    29     29   
    30     30   typedef struct Btree Btree;
    31     31   typedef struct BtCursor BtCursor;
    32     32   
    33     33   int sqliteBtreeOpen(const char *zFilename, int mode, Btree **ppBtree);
    34     34   int sqliteBtreeClose(Btree*);
................................................................................
    35     35   
    36     36   int sqliteBtreeBeginTrans(Btree*);
    37     37   int sqliteBtreeCommit(Btree*);
    38     38   int sqliteBtreeRollback(Btree*);
    39     39   
    40     40   
    41     41   int sqliteBtreeCursor(Btree*, BtCursor **ppCur);
    42         -
    43         -/* Move the cursor so that it points to an entry near pKey.
    44         -** Return 0 if the cursor is left pointing exactly at pKey.
    45         -** Return -1 if the cursor points to the largest entry less than pKey.
    46         -** Return 1 if the cursor points to the smallest entry greater than pKey.
    47         -*/
    48         -int sqliteBtreeMoveto(BtCursor*, void *pKey, int nKey);
           42  +int sqliteBtreeMoveto(BtCursor*, void *pKey, int nKey, *pRes);
    49     43   int sqliteBtreeDelete(BtCursor*);
    50     44   int sqliteBtreeInsert(BtCursor*, void *pKey, int nKey, void *pData, int nData);
    51         -int sqliteBtreeNext(BtCursor*);
    52         -int sqliteBtreeKeySize(BtCursor*);
           45  +int sqliteBtreeNext(BtCursor*, int *pRes);
           46  +int sqliteBtreeKeySize(BtCursor*, int *pSize);
    53     47   int sqliteBtreeKey(BtCursor*, int offset, int amt, char *zBuf);
    54         -int sqliteBtreeDataSize(BtCursor*);
           48  +int sqliteBtreeDataSize(BtCursor*, int *pSize);
    55     49   int sqliteBtreeData(BtCursor*, int offset, int amt, char *zBuf);
    56     50   int sqliteBtreeCloseCursor(BtCursor*);

Changes to src/pager.c.

    23     23   *************************************************************************
    24     24   ** This is the implementation of the page cache subsystem.
    25     25   ** 
    26     26   ** The page cache is used to access a database file.  The pager journals
    27     27   ** all writes in order to support rollback.  Locking is used to limit
    28     28   ** access to one or more reader or one writer.
    29     29   **
    30         -** @(#) $Id: pager.c,v 1.6 2001/05/21 13:45:10 drh Exp $
           30  +** @(#) $Id: pager.c,v 1.7 2001/05/24 21:06:36 drh Exp $
    31     31   */
    32     32   #include "sqliteInt.h"
    33     33   #include "pager.h"
    34     34   #include <fcntl.h>
    35     35   #include <sys/stat.h>
    36     36   #include <unistd.h>
    37     37   #include <assert.h>
................................................................................
   109    109   struct Pager {
   110    110     char *zFilename;            /* Name of the database file */
   111    111     char *zJournal;             /* Name of the journal file */
   112    112     int fd, jfd;                /* File descriptors for database and journal */
   113    113     int dbSize;                 /* Number of pages in the file */
   114    114     int origDbSize;             /* dbSize before the current change */
   115    115     int nExtra;                 /* Add this many bytes to each in-memory page */
          116  +  void (*xDestructor)(void*); /* Call this routine when freeing pages */
   116    117     int nPage;                  /* Total number of in-memory pages */
   117    118     int nRef;                   /* Number of in-memory pages with PgHdr.nRef>0 */
   118    119     int mxPage;                 /* Maximum number of pages to hold in cache */
   119    120     int nHit, nMiss, nOvfl;     /* Cache hits, missing, and LRU overflows */
   120    121     unsigned char state;        /* SQLITE_UNLOCK, _READLOCK or _WRITELOCK */
   121    122     unsigned char errMask;      /* One of several kinds of errors */
   122    123     PgHdr *pFirst, *pLast;      /* List of free pages */
................................................................................
   473    474     pPager->errMask = 0;
   474    475     pPager->pFirst = 0;
   475    476     pPager->pLast = 0;
   476    477     memset(pPager->aHash, 0, sizeof(pPager->aHash));
   477    478     *ppPager = pPager;
   478    479     return SQLITE_OK;
   479    480   }
          481  +
          482  +/*
          483  +** Set the destructor for this pager.  If not NULL, the destructor is called
          484  +** when the reference count on the page reaches zero.  
          485  +**
          486  +** The destructor is not called as a result sqlitepager_close().  
          487  +** Destructors are only called by sqlitepager_unref().
          488  +*/
          489  +void sqlitepager_set_destructor(Pager *pPager, void (*xDesc)(void*)){
          490  +  pPager->xDestructor = xDesc;
          491  +}
   480    492   
   481    493   /*
   482    494   ** Return the total number of pages in the file opened by pPager.
   483    495   */
   484    496   int sqlitepager_pagecount(Pager *pPager){
   485    497     int n;
   486    498     struct stat statbuf;
................................................................................
   802    814     /* Decrement the reference count for this page
   803    815     */
   804    816     pPg = DATA_TO_PGHDR(pData);
   805    817     assert( pPg->nRef>0 );
   806    818     pPager = pPg->pPager;
   807    819     pPg->nRef--;
   808    820   
   809         -  /* When the number of references to a page reach 0, add the
   810         -  ** page to the freelist.
          821  +  /* When the number of references to a page reach 0, call the
          822  +  ** destructor and add the page to the freelist.
   811    823     */
   812    824     if( pPg->nRef==0 ){
   813    825       pPg->pNextFree = 0;
   814    826       pPg->pPrevFree = pPager->pLast;
   815    827       pPager->pLast = pPg;
   816    828       if( pPg->pPrevFree ){
   817    829         pPg->pPrevFree->pNextFree = pPg;
   818    830       }else{
   819    831         pPager->pFirst = pPg;
   820    832       }
          833  +    if( pPager->xDestructor ){
          834  +      pPager->xDestructor(pData);
          835  +    }
   821    836     
   822    837       /* When all pages reach the freelist, drop the read lock from
   823    838       ** the database file.
   824    839       */
   825    840       pPager->nRef--;
   826    841       assert( pPager->nRef>=0 );
   827    842       if( pPager->nRef==0 ){

Changes to src/pager.h.

    21     21   **   http://www.hwaci.com/drh/
    22     22   **
    23     23   *************************************************************************
    24     24   ** This header file defines the interface that the sqlite page cache
    25     25   ** subsystem.  The page cache subsystem reads and writes a file a page
    26     26   ** at a time and provides a journal for rollback.
    27     27   **
    28         -** @(#) $Id: pager.h,v 1.3 2001/04/28 16:52:42 drh Exp $
           28  +** @(#) $Id: pager.h,v 1.4 2001/05/24 21:06:36 drh Exp $
    29     29   */
    30     30   
    31     31   /*
    32     32   ** The size of one page
    33     33   */
    34     34   #define SQLITE_PAGE_SIZE 1024
    35     35   
................................................................................
    40     40   typedef unsigned int Pgno;
    41     41   
    42     42   /*
    43     43   ** Each open file is managed by a separate instance of the "Pager" structure.
    44     44   */
    45     45   typedef struct Pager Pager;
    46     46   
    47         -int sqlitepager_open(Pager **ppPager, const char *zFilename,int nPage,int nEx);
           47  +int sqlitepager_open(Pager **ppPager,const char *zFilename,int nPage,int nEx);
           48  +void sqiltepager_set_destructor(Pager*, void(*)(void*));
    48     49   int sqlitepager_close(Pager *pPager);
    49     50   int sqlitepager_get(Pager *pPager, Pgno pgno, void **ppPage);
    50     51   void *sqlitepager_lookup(Pager *pPager, Pgno pgno);
    51     52   int sqlitepager_unref(void*);
    52     53   Pgno sqlitepager_pagenumber(void*);
    53     54   int sqlitepager_write(void*);
    54     55   int sqlitepager_pagecount(Pager*);
    55     56   int sqlitepager_commit(Pager*);
    56     57   int sqlitepager_rollback(Pager*);
    57     58   int *sqlitepager_stats(Pager*);

Changes to test/pager.test.

    19     19   #   drh@hwaci.com
    20     20   #   http://www.hwaci.com/drh/
    21     21   #
    22     22   #***********************************************************************
    23     23   # This file implements regression tests for SQLite library.  The
    24     24   # focus of this script is page cache subsystem.
    25     25   #
    26         -# $Id: pager.test,v 1.4 2001/05/21 13:45:10 drh Exp $
           26  +# $Id: pager.test,v 1.5 2001/05/24 21:06:36 drh Exp $
    27     27   
    28     28   
    29     29   set testdir [file dirname $argv0]
    30     30   source $testdir/tester.tcl
    31     31   
    32     32   if {$dbprefix!="mem:" && [info commands pager_open]!=""} {
    33     33   
................................................................................
    62     62   } {0}
    63     63   do_test pager-2.2 {
    64     64     set v [catch {
    65     65       set ::g1 [page_get $::p1 0]
    66     66     } msg]
    67     67     lappend v $msg
    68     68   } {1 SQLITE_ERROR}
    69         -do_test pager-2.3 {
           69  +do_test pager-2.3.1 {
           70  +  set ::gx [page_lookup $::p1 1]
           71  +} {}
           72  +do_test pager-2.3.2 {
           73  +  pager_stats $::p1
           74  +} {ref 0 page 0 max 10 size -1 state 0 err 0 hit 0 miss 0 ovfl 0}
           75  +do_test pager-2.3.3 {
    70     76     set v [catch {
    71     77       set ::g1 [page_get $::p1 1]
    72     78     } msg]
    73     79     if {$v} {lappend v $msg}
    74     80     set v
    75     81   } {0}
           82  +do_test pager-2.3.3 {
           83  +  pager_stats $::p1
           84  +} {ref 1 page 1 max 10 size 0 state 1 err 0 hit 0 miss 1 ovfl 0}
           85  +do_test pager-2.3.4 {
           86  +  set ::gx [page_lookup $::p1 1]
           87  +  expr {$::gx!=""}
           88  +} {1}
           89  +do_test pager-2.3.5 {
           90  +  pager_stats $::p1
           91  +} {ref 1 page 1 max 10 size 0 state 1 err 0 hit 0 miss 1 ovfl 0}
           92  +do_test pager-2.3.6 {
           93  +  expr $::g1==$::gx
           94  +} {1}
           95  +do_test pager-2.3.7 {
           96  +  page_unref $::gx
           97  +  pager_stats $::p1
           98  +} {ref 1 page 1 max 10 size 0 state 1 err 0 hit 0 miss 1 ovfl 0}
    76     99   do_test pager-2.4 {
    77    100     pager_stats $::p1
    78    101   } {ref 1 page 1 max 10 size 0 state 1 err 0 hit 0 miss 1 ovfl 0}
    79    102   do_test pager-2.5 {
    80    103     pager_pagecount $::p1
    81    104   } {0}
    82    105   do_test pager-2.6 {