/ Check-in [fdc629db]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Incremental btree.c changes. (CVS 1312)
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1:fdc629dbbf974024215969e0bd3def4597258812
User & Date: drh 2004-05-03 19:49:33
Context
2004-05-04
15:00
Added template for the utf.c file containing conversion routines. (CVS 1313) check-in: 89b42c46 user: drh tags: trunk
2004-05-03
19:49
Incremental btree.c changes. (CVS 1312) check-in: fdc629db user: drh tags: trunk
2004-05-02
21:12
Changes to btree for the new file format are mostly complete. Still need to test and debug. (CVS 1311) check-in: 0eee3b5c user: drh tags: trunk
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to src/btree.c.

     5      5   ** a legal notice, here is a blessing:
     6      6   **
     7      7   **    May you do good and not evil.
     8      8   **    May you find forgiveness for yourself and forgive others.
     9      9   **    May you share freely, never taking more than you give.
    10     10   **
    11     11   *************************************************************************
    12         -** $Id: btree.c,v 1.107 2004/05/02 21:12:19 drh Exp $
           12  +** $Id: btree.c,v 1.108 2004/05/03 19:49:33 drh Exp $
    13     13   **
    14     14   ** This file implements a external (disk-based) database using BTrees.
    15     15   ** For a detailed discussion of BTrees, refer to
    16     16   **
    17     17   **     Donald E. Knuth, THE ART OF COMPUTER PROGRAMMING, Volume 3:
    18     18   **     "Sorting And Searching", pages 473-480. Addison-Wesley
    19     19   **     Publishing Company, Reading, Massachusetts.
................................................................................
   463    463   
   464    464     data = pPage->aData;
   465    465     assert( sqlitepager_iswriteable(data->aData) );
   466    466     assert( pPage->pBt );
   467    467     if( nByte<4 ) nByte = 4;
   468    468     if( pPage->nFree<nByte || pPage->isOverfull ) return 0;
   469    469     hdr = pPage->hdrOffset;
   470         -  if( data[hdr+5]>=252 ){
          470  +  if( data[hdr+5]>=60 ){
   471    471       defragmentPage(pPage);
   472    472     }
   473    473     addr = hdr+1;
   474    474     pc = get2byte(&data[addr]);
   475    475     assert( addr<pc );
   476    476     assert( pc<=pPage->pageSize-4 );
   477    477     while( (size = get2byte(&data[pc+2]))<nByte ){
................................................................................
  1094   1094   ** to start a new checkpoint if another checkpoint is already active.
  1095   1095   */
  1096   1096   int sqlite3BtreeBeginStmt(Btree *pBt){
  1097   1097     int rc;
  1098   1098     if( !pBt->inTrans || pBt->inStmt ){
  1099   1099       return pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR;
  1100   1100     }
  1101         -  rc = pBt->readOnly ? SQLITE_OK : sqlitepager_ckpt_begin(pBt->pPager);
         1101  +  rc = pBt->readOnly ? SQLITE_OK : sqlitepager_stmt_begin(pBt->pPager);
  1102   1102     pBt->inStmt = 1;
  1103   1103     return rc;
  1104   1104   }
  1105   1105   
  1106   1106   
  1107   1107   /*
  1108   1108   ** Commit a checkpoint to transaction currently in progress.  If no
  1109   1109   ** checkpoint is active, this is a no-op.
  1110   1110   */
  1111   1111   int sqlite3BtreeCommitStmt(Btree *pBt){
  1112   1112     int rc;
  1113   1113     if( pBt->inStmt && !pBt->readOnly ){
  1114         -    rc = sqlitepager_ckpt_commit(pBt->pPager);
         1114  +    rc = sqlitepager_stmt_commit(pBt->pPager);
  1115   1115     }else{
  1116   1116       rc = SQLITE_OK;
  1117   1117     }
  1118   1118     pBt->inStmt = 0;
  1119   1119     return rc;
  1120   1120   }
  1121   1121   
................................................................................
  1127   1127   ** to use a cursor that was open at the beginning of this operation
  1128   1128   ** will result in an error.
  1129   1129   */
  1130   1130   int sqlite3BtreeRollbackStmt(Btree *pBt){
  1131   1131     int rc;
  1132   1132     BtCursor *pCur;
  1133   1133     if( pBt->inStmt==0 || pBt->readOnly ) return SQLITE_OK;
  1134         -  rc = sqlitepager_ckpt_rollback(pBt->pPager);
         1134  +  rc = sqlitepager_stmt_rollback(pBt->pPager);
  1135   1135     for(pCur=pBt->pCursor; pCur; pCur=pCur->pNext){
  1136   1136       MemPage *pPage = pCur->pPage;
  1137   1137       if( pPage && !pPage->isInit ){
  1138   1138         releasePage(pPage);
  1139   1139         pCur->pPage = 0;
  1140   1140       }
  1141   1141     }
................................................................................
  1987   1987     int rc;
  1988   1988     int n;     /* Number of pages on the freelist */
  1989   1989     int k;     /* Number of leaves on the trunk of the freelist */
  1990   1990   
  1991   1991     pPage1 = pBt->pPage1;
  1992   1992     n = get4byte(&pPage1->aData[36]);
  1993   1993     if( n>0 ){
  1994         -    /* There exists pages on the freelist.  Reuse one of those pages. */
         1994  +    /* There are pages on the freelist.  Reuse one of those pages. */
  1995   1995       MemPage *pTrunk;
  1996   1996       rc = sqlitepager_write(pPage1->aData);
  1997   1997       if( rc ) return rc;
  1998   1998       put4byte(&pPage1->aData[36], n-1);
  1999   1999       rc = getPage(pBt, get4byte(&pPage1->aData[32]), &pTrunk);
  2000   2000       if( rc ) return rc;
  2001   2001       rc = sqlitepager_write(pTrunk->aData);
................................................................................
  2129   2129       if( rc ) return rc;
  2130   2130       sqlitepager_unref(pOvfl->aData);
  2131   2131     }
  2132   2132     return SQLITE_OK;
  2133   2133   }
  2134   2134   
  2135   2135   /*
  2136         -** Compute the number of bytes required by a cell header.  Fill in
  2137         -** the nData and nKey values of the header that pHeader points to.
  2138         -*/
  2139         -static int makeCellHeader(
  2140         -  MemPage *pPage,          /* The page that will contain the cell */
  2141         -  u64 nKey,                /* Size of key, or the key value if intKey */
  2142         -  int nData,               /* Size of data.  Ignored for zerodata */
  2143         -  unsigned char *pHeader   /* Write header bytes here */
  2144         -){
  2145         -  int n = 2;
  2146         -  if( !pPage->leaf ) n += 4;
  2147         -  if( !pPage->zeroData ){
  2148         -    n += putVarint(&pHeader[n], nData);
  2149         -  }
  2150         -  n += putVarint(&pHeader[n], nKey);
  2151         -  return n;
  2152         -}
  2153         -
  2154         -/*
  2155         -** Fill in the payload section of a cell into the space provided.  If
  2156         -** the payload will not completely fit in the cell, allocate additional
  2157         -** overflow pages and fill them in.
         2136  +** Create the byte sequence used to represent a cell on page pPage
         2137  +** and write that byte sequence into pCell[].  Overflow pages are
         2138  +** allocated and filled in as necessary.  The calling procedure
         2139  +** is responsible for making sure sufficient space has been allocated
         2140  +** for pCell[].
         2141  +**
         2142  +** Note that pCell does not necessary need to point to the pPage->aData
         2143  +** area.  pCell might point to some temporary storage.  The cell will
         2144  +** be constructed in this temporary area then copied into pPage->aData
         2145  +** later.
  2158   2146   */
  2159   2147   static int fillInCell(
  2160   2148     MemPage *pPage,                /* The page that contains the cell */
  2161   2149     unsigned char *pCell,          /* Complete text of the cell */
  2162   2150     const void *pKey, u64 nKey,    /* The key */
  2163   2151     const void *pData,int nData,   /* The data */
  2164   2152     int *pnSize                    /* Write cell size here */
................................................................................
  2170   2158     MemPage *pOvfl = 0;
  2171   2159     unsigned char *pPrior;
  2172   2160     unsigned char *pPayload;
  2173   2161     Btree *pBt = pPage->pBt;
  2174   2162     Pgno pgnoOvfl = 0;
  2175   2163     int nHeader;
  2176   2164   
  2177         -  nHeader = makeCellHeader(pPage, pCell, nKey, nData);
         2165  +  /* Fill in the header. */
         2166  +  nHeader = 2;
         2167  +  if( !pPage->leaf ){
         2168  +    nHeader += 4;
         2169  +  }
         2170  +  if( !pPage->zeroData ){
         2171  +    nHeader += putVarint(&pCell[nHeader], nData);
         2172  +  }
         2173  +  nHeader += putVarint(&pCell[nHeader], nKey);
         2174  +  
         2175  +  /* Fill in the payload */
         2176  +  if( pPage->zeroData ){
         2177  +    nData = 0;
         2178  +  }
  2178   2179     nPayload = nData;
  2179   2180     if( pPage->intKey ){
  2180   2181       pSrc = pData;
  2181   2182       nSrc = nData;
  2182         -    nSrc2 = 0;
         2183  +    nData = 0;
  2183   2184     }else{
  2184   2185       nPayload += nKey;
  2185   2186       pSrc = pKey;
  2186   2187       nSrc = nKey;
  2187   2188     }
  2188   2189     if( nPayload>pBt->maxLocal ){
  2189   2190       *pnSize = nHeader + pBt->maxLocal + 4;
................................................................................
  2350   2351       put2byte(&pPage->aData[idxFrom], idx);
  2351   2352       idxFrom = idx;
  2352   2353     }
  2353   2354     put2byte(&pPage->aData[idxFrom], 0);
  2354   2355   }
  2355   2356   
  2356   2357   /*
  2357         -** Make a copy of the contents of pFrom into pTo.  The pFrom->aCell[]
         2358  +** Move the content of the page at pFrom over to pTo.  The pFrom->aCell[]
  2358   2359   ** pointers that point into pFrom->aData[] must be adjusted to point
  2359   2360   ** into pTo->aData[] instead.  But some pFrom->aCell[] entries might
  2360   2361   ** not point to pFrom->aData[].  Those are unchanged.
         2362  +**
         2363  +** Over this operation completes, the meta data for pFrom is zeroed.
  2361   2364   */
  2362   2365   static void copyPage(MemPage *pTo, MemPage *pFrom){
  2363   2366     uptr from, to;
  2364   2367     int i;
  2365   2368     int pageSize;
  2366   2369     int ofst;
  2367   2370   
  2368   2371     assert( pTo->hdrOffset==0 );
  2369   2372     ofst = pFrom->hdrOffset;
  2370   2373     pageSize = pTo->pBt->pageSize;
  2371         -  memcpy(pTo->aData, &pFrom->aData[ofst], pageSize - ofst);
  2372         -  pTo->pParent = 0;
  2373         -  pTo->isInit = 1;
  2374         -  resizeCellArray(pTo, pFrom->nCell);
  2375         -  pTo->nCell = pFrom->nCell;
  2376         -  pTo->nFree = pFrom->nFree + ofst;
         2374  +  sqliteFree(pTo->aCell);
         2375  +  memcpy(pTo->aData, &pFrom->aData[ofst], pageSize - ofst + sizeof(MemPage));
         2376  +  memset(pFrom, 0, sizeof(MemPage));
  2377   2377     assert( pTo->aData[5]<155 );
  2378   2378     pTo->aData[5] += ofst;
  2379   2379     pTo->isOverfull = pFrom->isOverfull;
  2380   2380     to = Addr(pTo->aData);
  2381         -  from = Addr(pFrom->aData);
         2381  +  from = Addr(&pFrom->aData[ofst]);
  2382   2382     for(i=0; i<pTo->nCell; i++){
  2383         -    uptr x = Addr(pFrom->aCell[i]);
  2384         -    if( x>from && x<from+pageSize ){
         2383  +    uptr x = Addr(pTo->aCell[i]);
         2384  +    if( x>from && x<from+pageSize-ofst ){
  2385   2385         *((uptr*)&pTo->aCell[i]) = x + to - from;
  2386         -    }else{
  2387         -      pTo->aCell[i] = pFrom->aCell[i];
  2388   2386       }
  2389   2387     }
  2390   2388   }
  2391   2389   
  2392   2390   /*
  2393   2391   ** The following parameters determine how many adjacent pages get involved
  2394   2392   ** in a balancing operation.  NN is the number of neighbors on either side
................................................................................
  2447   2445     int nNew;                    /* Number of pages in apNew[] */
  2448   2446     int nDiv;                    /* Number of cells in apDiv[] */
  2449   2447     int i, j, k;                 /* Loop counters */
  2450   2448     int idx;                     /* Index of pPage in pParent->apCell[] */
  2451   2449     int nxDiv;                   /* Next divider slot in pParent->apCell[] */
  2452   2450     int rc;                      /* The return code */
  2453   2451     int iCur;                    /* apCell[iCur] is the cell of the cursor */
         2452  +  int leafCorrection;          /* 4 if pPage is a leaf.  0 if not */
         2453  +  int usableSpace;             /* Bytes in pPage beyond the header */
         2454  +  int pageFlags;               /* Value of pPage->aData[0] */
  2454   2455     MemPage *pOldCurPage;        /* The cursor originally points to this page */
  2455   2456     int subtotal;                /* Subtotal of bytes in cells on one page */
  2456   2457     MemPage *apOld[NB];          /* pPage and up to two siblings */
  2457   2458     Pgno pgnoOld[NB];            /* Page numbers for each page in apOld[] */
  2458   2459     MemPage *apCopy[NB];         /* Private copies of apOld[] pages */
  2459   2460     MemPage *apNew[NB+1];        /* pPage and up to NB siblings after balancing */
  2460   2461     Pgno pgnoNew[NB+1];          /* Page numbers for each page in apNew[] */
................................................................................
  2626   2627         break;
  2627   2628       }
  2628   2629       rc = getPage(pBt, pgnoOld[i], &apOld[i]);
  2629   2630       if( rc ) goto balance_cleanup;
  2630   2631       rc = initPage(apOld[i], pParent);
  2631   2632       if( rc ) goto balance_cleanup;
  2632   2633       apOld[i]->idxParent = k;
         2634  +    apCopy[i] = 0;
         2635  +    assert( i==nOld );
  2633   2636       nOld++;
  2634   2637     }
  2635   2638   
  2636   2639     /*
  2637   2640     ** Make copies of the content of pPage and its siblings into aOld[].
  2638   2641     ** The rest of this function will use data from the copies rather
  2639   2642     ** that the original pages since the original pages will be in the
................................................................................
  2840   2843     rc = balance(pParent);
  2841   2844   
  2842   2845     /*
  2843   2846     ** Cleanup before returning.
  2844   2847     */
  2845   2848   balance_cleanup:
  2846   2849     for(i=0; i<nOld; i++){
  2847         -    if( apOld[i]!=0 ) sqlitepager_unref(apOld[i]->aData);
         2850  +    releasePage(apOld[i]);
         2851  +    if( apCopy[i] ){
         2852  +      releasePage(apCopy[i]->pParent);
         2853  +      sqliteFree(apCopy[i]->aCell);
         2854  +    }
  2848   2855     }
  2849   2856     for(i=0; i<nNew; i++){
  2850         -    sqlitepager_unref(apNew[i]->aData);
         2857  +    releasePage(apNew[i]);
  2851   2858     }
  2852         -  sqlitepager_unref(pParent->aData);
         2859  +  releasePage(pParent);
  2853   2860     return rc;
  2854   2861   }
  2855   2862   
  2856   2863   /*
  2857   2864   ** This routine checks all cursors that point to the same table
  2858   2865   ** as pCur points to.  If any of those cursors were opened with
  2859   2866   ** wrFlag==0 then this routine returns SQLITE_LOCKED.  If all
................................................................................
  2870   2877   */
  2871   2878   static int checkReadLocks(BtCursor *pCur){
  2872   2879     BtCursor *p;
  2873   2880     assert( pCur->wrFlag );
  2874   2881     for(p=pCur->pShared; p!=pCur; p=p->pShared){
  2875   2882       assert( p );
  2876   2883       assert( p->pgnoRoot==pCur->pgnoRoot );
         2884  +    assert( p->pPage->pgno==sqlitepager_pagenumber(p->pPage->aData);
  2877   2885       if( p->wrFlag==0 ) return SQLITE_LOCKED;
  2878         -    if( sqlitepager_pagenumber(p->pPage)!=p->pgnoRoot ){
         2886  +    if( p->pPage->pgno!=p->pgnoRoot ){
  2879   2887         moveToRoot(p);
  2880   2888       }
  2881   2889     }
  2882   2890     return SQLITE_OK;
  2883   2891   }
  2884   2892   
  2885   2893   /*
  2886   2894   ** Insert a new record into the BTree.  The key is given by (pKey,nKey)
  2887   2895   ** and the data is given by (pData,nData).  The cursor is used only to
  2888         -** define what database the record should be inserted into.  The cursor
         2896  +** define what table the record should be inserted into.  The cursor
  2889   2897   ** is left pointing at a random location.
  2890   2898   **
  2891   2899   ** For an INTKEY table, only the nKey value of the key is used.  pKey is
  2892   2900   ** ignored.  For a ZERODATA table, the pData and nData are both ignored.
  2893   2901   */
  2894   2902   int sqlite3BtreeInsert(
  2895   2903     BtCursor *pCur,                /* Insert data into the table of this cursor */
................................................................................
  2916   2924     }
  2917   2925     if( checkReadLocks(pCur) ){
  2918   2926       return SQLITE_LOCKED; /* The table pCur points to has a read lock */
  2919   2927     }
  2920   2928     rc = sqlite3BtreeMoveto(pCur, pKey, nKey, &loc);
  2921   2929     if( rc ) return rc;
  2922   2930     pPage = pCur->pPage;
  2923         -  assert( nData==0 || pPage->zeroData!=0 );
  2924   2931     assert( pPage->isInit );
  2925         -  rc = sqlitepager_write(pPage);
         2932  +  rc = sqlitepager_write(pPage->aData);
  2926   2933     if( rc ) return rc;
  2927   2934     rc = fillInCell(pPage, &newCell, pKey, nKey, pData, nData, &szNew);
  2928   2935     if( rc ) return rc;
  2929   2936     assert( szNew==cellSize(pPage, newCell) );
  2930   2937     if( loc==0 ){
  2931   2938       int szOld
  2932   2939       assert( pCur->idx>=0 && pCur->idx<pPage->nPage );
................................................................................
  2978   2985     }
  2979   2986     if( !pCur->wrFlag ){
  2980   2987       return SQLITE_PERM;   /* Did not open this cursor for writing */
  2981   2988     }
  2982   2989     if( checkReadLocks(pCur) ){
  2983   2990       return SQLITE_LOCKED; /* The table pCur points to has a read lock */
  2984   2991     }
  2985         -  rc = sqlitepager_write(pPage);
         2992  +  rc = sqlitepager_write(pPage->aData);
  2986   2993     if( rc ) return rc;
  2987   2994     pCell = pPage->aCell[pCur->idx];
  2988   2995     if( !pPage->leaf ){
  2989   2996       pgnoChild = get4byte(&pCell[2]);
  2990   2997     }
  2991   2998     clearCell(pPage, pCell);
  2992   2999     if( !pPage->leaf ){
................................................................................
  3003   3010       int notUsed;
  3004   3011       getTempCursor(pCur, &leafCur);
  3005   3012       rc = sqlite3BtreeNext(&leafCur, &notUsed);
  3006   3013       if( rc!=SQLITE_OK ){
  3007   3014         if( rc!=SQLITE_NOMEM ) rc = SQLITE_CORRUPT;
  3008   3015         return rc;
  3009   3016       }
  3010         -    rc = sqlitepager_write(leafCur.pPage);
         3017  +    rc = sqlitepager_write(leafCur.pPage->aData);
  3011   3018       if( rc ) return rc;
  3012   3019       dropCell(pPage, pCur->idx, cellSize(pPage, pCell));
  3013   3020       pNext = leafCur.pPage->aCell[leafCur.idx];
  3014   3021       szNext = cellSize(leafCur.pPage, pNext);
  3015   3022       insertCell(pPage, pCur->idx, &pNext[-4], szNext+4);
  3016   3023       put4byte(&pNext[-2], pgnoChild);
  3017   3024       rc = balance(pPage);
................................................................................
  3048   3055     if( pBt->readOnly ){
  3049   3056       return SQLITE_READONLY;
  3050   3057     }
  3051   3058     rc = allocatePage(pBt, &pRoot, &pgnoRoot, 0);
  3052   3059     if( rc ) return rc;
  3053   3060     assert( sqlitepager_iswriteable(pRoot->aData) );
  3054   3061     zeroPage(pBt, pRoot);
  3055         -  sqlitepager_unref(pRoot);
         3062  +  sqlitepager_unref(pRoot->aData);
  3056   3063     *piTable = (int)pgnoRoot;
  3057   3064     return SQLITE_OK;
  3058   3065   }
  3059   3066   
  3060   3067   /*
  3061   3068   ** Erase the given database page and all its children.  Return
  3062   3069   ** the page to the freelist.
................................................................................
  3272   3279       while( idx>0 && idx<SQLITE_USABLE_SIZE-MIN_CELL_SIZE ){
  3273   3280         unsigned char *pCell = &pPage->aData[idx];
  3274   3281         fileBtreePageDump(pBt, get4byte(&pPage->aData[idx+2]), 1);
  3275   3282         idx = get2byte(&pPage->aData[idx]);
  3276   3283       }
  3277   3284       fileBtreePageDump(pBt, get4byte(&pPage->aData[hdrOffset+6]), 1);
  3278   3285     }
  3279         -  sqlitepager_unref(pPage);
         3286  +  sqlitepager_unref(pPage->aData);
  3280   3287     return SQLITE_OK;
  3281   3288   }
  3282   3289   #endif
  3283   3290   
  3284   3291   #ifdef SQLITE_TEST
  3285   3292   /*
  3286   3293   ** Fill aResult[] with information about the entry and page that the
................................................................................
  3298   3305   ** This routine is used for testing and debugging only.
  3299   3306   */
  3300   3307   static int fileBtreeCursorDump(BtCursor *pCur, int *aResult){
  3301   3308     int cnt, idx;
  3302   3309     MemPage *pPage = pCur->pPage;
  3303   3310     Btree *pBt = pCur->pBt;
  3304   3311     assert( pPage->isInit );
  3305         -  aResult[0] = sqlitepager_pagenumber(pPage);
         3312  +  aResult[0] = sqlitepager_pagenumber(pPage->aData);
         3313  +  assert( aResult[0]==pPage->pgno );
  3306   3314     aResult[1] = pCur->idx;
  3307   3315     aResult[2] = pPage->nCell;
  3308   3316     if( pCur->idx>=0 && pCur->idx<pPage->nCell ){
  3309   3317       aResult[3] = cellSize(pPage, pPage->aCell[pCur->idx]);
  3310   3318       aResult[6] = pPage->leaf ? 0 : get4byte(&pPage->aCell[pCur->idx][2]);
  3311   3319     }else{
  3312   3320       aResult[3] = 0;
................................................................................
  3488   3496       sprintf(zMsg, "unable to get the page. error code=%d", rc);
  3489   3497       checkAppendMsg(pCheck, zContext, zMsg);
  3490   3498       return 0;
  3491   3499     }
  3492   3500     if( (rc = initPage(pPage, pParent))!=0 ){
  3493   3501       sprintf(zMsg, "initPage() returns error code %d", rc);
  3494   3502       checkAppendMsg(pCheck, zContext, zMsg);
  3495         -    sqlitepager_unref(pPage);
         3503  +    releasePage(pPage);
  3496   3504       return 0;
  3497   3505     }
  3498   3506   
  3499   3507   #if 0
  3500   3508   
  3501   3509     /* Check out all the cells.
  3502   3510     */