Index: src/bt_main.c ================================================================== --- src/bt_main.c +++ src/bt_main.c @@ -35,12 +35,11 @@ typedef struct BtOvfl BtOvfl; struct BtOvfl { int nKey; int nVal; - u8 *pBuf; - int nBuf; + sqlite4_buffer buf; }; /* ** Database cursor handle. */ @@ -49,10 +48,14 @@ int nPg; /* Number of valid entries in apPage[] */ int aiCell[BT_MAX_DEPTH]; /* Current cell of each apPage[] entry */ BtPage *apPage[BT_MAX_DEPTH]; /* All pages from root to current leaf */ BtOvfl ovfl; /* Overflow cache (see above) */ bt_cursor *pNextCsr; /* Next cursor opened by same db handle */ + + int bRequireReseek; + int bSkipNext; + int bSkipPrev; }; #ifndef btErrorBkpt int btErrorBkpt(int rc){ static int error_cnt = 0; @@ -188,10 +191,11 @@ return sqlite4BtPagerTransactionLevel(db->pPager); } static void btCsrSetup(bt_db *db, bt_cursor *pCsr){ memset(pCsr, 0, sizeof(bt_cursor)); + sqlite4_env_config(db->pEnv, SQLITE4_ENVCONFIG_GETMM, &pCsr->ovfl.buf.pMM); pCsr->pDb = db; } int sqlite4BtCsrOpen(bt_db *db, int nExtra, bt_cursor **ppCsr){ int rc = SQLITE4_OK; /* Return Code */ @@ -210,19 +214,29 @@ btCheckPageRefs(db); return rc; } -static void btCsrReset(bt_cursor *pCsr, int bFreeBuffer){ +static void btCsrReleaseAll(bt_cursor *pCsr){ int i; for(i=0; inPg; i++){ sqlite4BtPageRelease(pCsr->apPage[i]); } - if( bFreeBuffer ) sqlite4_free(pCsr->pDb->pEnv, pCsr->ovfl.pBuf); pCsr->nPg = 0; } + +static void btCsrReset(bt_cursor *pCsr, int bFreeBuffer){ + btCsrReleaseAll(pCsr); + if( bFreeBuffer ){ + sqlite4_buffer_clear(&pCsr->ovfl.buf); + } + pCsr->bSkipNext = 0; + pCsr->bSkipPrev = 0; + pCsr->bRequireReseek = 0; +} + int sqlite4BtCsrClose(bt_cursor *pCsr){ if( pCsr ){ bt_db *pDb = pCsr->pDb; bt_cursor **pp; btCheckPageRefs(pDb); @@ -478,20 +492,27 @@ btCsrAscend(pCsr, nAscend); *piRes = res; return rc; } +#define BT_CSRSEEK_SEEK 0 +#define BT_CSRSEEK_UPDATE 1 +#define BT_CSRSEEK_RESEEK 2 + static int btCsrSeek( bt_cursor *pCsr, const void *pK, /* Key to seek for */ int nK, /* Size of key pK in bytes */ int eSeek, /* Seek mode (a BT_SEEK_XXX constant) */ - int bUpdate + int eCsrseek ){ const int pgsz = sqlite4BtPagerPagesize(pCsr->pDb->pPager); u32 pgno; /* Page number for next page to load */ int rc = SQLITE4_OK; /* Return Code */ + + assert( eSeek==BT_SEEK_EQ || eCsrseek!=BT_CSRSEEK_RESEEK ); + assert( eSeek==BT_SEEK_GE || eCsrseek!=BT_CSRSEEK_UPDATE ); /* Reset the cursor */ btCsrReset(pCsr, 0); /* Figure out the root page number */ @@ -540,25 +561,38 @@ pgno = btChildPgno(aData, pgsz, iHi); }else{ pgno = 0; if( res!=0 ){ - assert( BT_SEEK_LEFAST<0 && BT_SEEK_LE<0 ); - if( eSeek<0 ){ - rc = sqlite4BtCsrPrev(pCsr); - }else if( eSeek==BT_SEEK_EQ ){ - rc = SQLITE4_NOTFOUND; - }else if( iHi==nCell ){ - if( bUpdate ){ - rc = SQLITE4_NOTFOUND; - }else{ - rc = sqlite4BtCsrNext(pCsr); - } - }else{ - rc = SQLITE4_INEXACT; - } - if( rc==SQLITE4_OK ) rc = SQLITE4_INEXACT; + if( eSeek==BT_SEEK_EQ ){ + if( eCsrseek==BT_CSRSEEK_RESEEK ){ + rc = SQLITE4_OK; + if( iHi==nCell ){ + assert( pCsr->aiCell[pCsr->nPg-1]>0 ); + pCsr->aiCell[pCsr->nPg-1]--; + pCsr->bSkipPrev = 1; + }else{ + pCsr->bSkipNext = 1; + } + }else{ + rc = SQLITE4_NOTFOUND; + } + }else{ + assert( BT_SEEK_LEFAST<0 && BT_SEEK_LE<0 ); + if( eSeek<0 ){ + rc = sqlite4BtCsrPrev(pCsr); + }else{ + if( iHi==nCell ){ + if( eCsrseek==BT_CSRSEEK_UPDATE ){ + rc = SQLITE4_NOTFOUND; + }else{ + rc = sqlite4BtCsrNext(pCsr); + } + } + } + if( rc==SQLITE4_OK ) rc = SQLITE4_INEXACT; + } } } } } @@ -571,11 +605,11 @@ int nK, /* Size of key pK in bytes */ int eSeek /* Seek mode (a BT_SEEK_XXX constant) */ ){ int rc; btCheckPageRefs(pCsr->pDb); - rc = btCsrSeek(pCsr, pK, nK, eSeek, 0); + rc = btCsrSeek(pCsr, pK, nK, eSeek, BT_CSRSEEK_SEEK); btCheckPageRefs(pCsr->pDb); return rc; } /* @@ -635,10 +669,32 @@ */ int sqlite4BtCsrLast(bt_cursor *pCsr){ return btCsrEnd(pCsr, 1); } +static int btCsrReseek(bt_cursor *pCsr){ + int rc = SQLITE4_OK; + if( pCsr->bRequireReseek ){ + BtOvfl ovfl; + memcpy(&ovfl, &pCsr->ovfl, sizeof(BtOvfl)); + + pCsr->ovfl.buf.n = 0; + pCsr->ovfl.buf.p = 0; + pCsr->bSkipNext = 0; + pCsr->bRequireReseek = 0; + + rc = btCsrSeek(pCsr, ovfl.buf.p, ovfl.nKey, BT_SEEK_EQ, BT_CSRSEEK_RESEEK); + assert( rc!=SQLITE4_INEXACT ); + if( pCsr->ovfl.buf.p==0 ){ + pCsr->ovfl.buf.p = ovfl.buf.p; + }else{ + sqlite4_buffer_clear(&ovfl.buf); + } + } + return rc; +} + /* ** This function does the work of both sqlite4BtCsrNext() (if parameter ** bNext is true) and Pref() (if bNext is false). */ @@ -645,10 +701,17 @@ static int btCsrStep(bt_cursor *pCsr, int bNext){ const int pgsz = sqlite4BtPagerPagesize(pCsr->pDb->pPager); int rc = SQLITE4_OK; int bRequireDescent = 0; + rc = btCsrReseek(pCsr); + if( (pCsr->bSkipNext && bNext) || (pCsr->bSkipPrev && bNext==0) ){ + pCsr->bSkipPrev = pCsr->bSkipNext = 0; + return rc; + } + pCsr->bSkipPrev = pCsr->bSkipNext = 0; + while( rc==SQLITE4_OK ){ int iPg = pCsr->nPg-1; int iCell = pCsr->aiCell[iPg]; if( bNext ){ @@ -711,21 +774,10 @@ */ int sqlite4BtCsrPrev(bt_cursor *pCsr){ return btCsrStep(pCsr, 0); } -static int btGrowBuffer(bt_db *db, int nReq, u8 **ppVal, int *pnVal){ - if( nReq>*pnVal ){ - u8 *pNew = sqlite4_malloc(db->pEnv, nReq*2); - if( pNew==0 ) return btErrorBkpt(SQLITE4_NOMEM); - sqlite4_free(db->pEnv, *ppVal); - *ppVal = pNew; - *pnVal = nReq*2; - } - return SQLITE4_OK; -} - static int btOverflowArrayRead( bt_db *db, u8 *pOvfl, u8 *aOut, int nOut @@ -826,70 +878,76 @@ return rc; } /* -** If the current cell is a type (c) leaf cell, load the entire key -** into the pCsr->ovfl buffer. If bVal is true, then also load the -** entries value into the buffer. +** Buffer the key and value belonging to the current cursor position +** in pCsr->ovfl. */ -static int btOverflowBuffer(bt_cursor *pCsr, int bVal){ +static int btCsrBuffer(bt_cursor *pCsr, int bVal){ const int pgsz = sqlite4BtPagerPagesize(pCsr->pDb->pPager); - int rc = SQLITE4_OK; - u8 *aData; - u8 *pCell; - int iCell = pCsr->aiCell[pCsr->nPg-1]; - - int nK; - int nReq; - u8 *pLocal; /* Pointer to local data within leaf page */ - u8 *aOut; /* Output buffer for overflow data */ - int nLocal = 0; - int nOvfl1 = 0; - int nOvfl2 = 0; + int rc = SQLITE4_OK; /* Return code */ + u8 *aData; /* Page data */ + u8 *pCell; /* Pointer to cell within aData[] */ + int nReq; /* Total required space */ + u8 *aOut; /* Output buffer */ + u8 *pKLocal = 0; /* Pointer to local part of key */ + u8 *pVLocal = 0; /* Pointer to local part of value (if any) */ + int nKLocal = 0; /* Bytes of key on page */ + int nVLocal = 0; /* Bytes of value on page */ + int nKOvfl = 0; /* Bytes of key on overflow pages */ + int nVOvfl = 0; /* Bytes of value on overflow pages */ aData = (u8*)sqlite4BtPageData(pCsr->apPage[pCsr->nPg-1]); - pCell = btCellFind(aData, pgsz, iCell); - pCell += sqlite4BtVarintGet32(pCell, &nK); - if( nK==0 ){ - /* type (c) leaf cell */ - pCell += sqlite4BtVarintGet32(pCell, &nLocal); - pLocal = pCell; - pCell += nLocal; - pCell += sqlite4BtVarintGet32(pCell, &nOvfl1); - pCell += sqlite4BtVarintGet32(pCell, &nOvfl2); - - pCsr->ovfl.nKey = nLocal + nOvfl1; - pCsr->ovfl.nVal = nOvfl2; + pCell = btCellFind(aData, pgsz, pCsr->aiCell[pCsr->nPg-1]); + pCell += sqlite4BtVarintGet32(pCell, &nKLocal); + if( nKLocal==0 ){ + /* Type (c) leaf cell. */ + pCell += sqlite4BtVarintGet32(pCell, &nKLocal); + pKLocal = pCell; + pCell += nKLocal; + pCell += sqlite4BtVarintGet32(pCell, &nKOvfl); + pCell += sqlite4BtVarintGet32(pCell, &nVOvfl); + }else{ - /* type (b) leaf cell */ - pCell += nK; - assert( pCell[0]==0x00 ); - pCell++; - - pCell += sqlite4BtVarintGet32(pCell, &nLocal); - pLocal = pCell; - pCell += nLocal; - pCell += sqlite4BtVarintGet32(pCell, &nOvfl1); - - pCsr->ovfl.nKey = 0; - pCsr->ovfl.nVal = nLocal + nOvfl1; - } - - nReq = nLocal + nOvfl1 + nOvfl2; - rc = btGrowBuffer(pCsr->pDb, nReq, &pCsr->ovfl.pBuf, &pCsr->ovfl.nBuf); + pKLocal = pCell; + pCell += nKLocal; + pCell += sqlite4BtVarintGet32(pCell, &nVLocal); + if( nVLocal==0 ){ + /* Type (b) */ + pCell += sqlite4BtVarintGet32(pCell, &nVLocal); + pVLocal = pCell; + pCell += nVLocal; + pCell += sqlite4BtVarintGet32(pCell, &nVOvfl); + }else{ + /* Type (a) */ + pVLocal = pCell; + } + } + + pCsr->ovfl.nKey = nKLocal + nKOvfl; + pCsr->ovfl.nVal = nVLocal + nVOvfl; + + nReq = pCsr->ovfl.nKey + pCsr->ovfl.nVal; + rc = sqlite4_buffer_resize(&pCsr->ovfl.buf, nReq); if( rc!=SQLITE4_OK ) return rc; /* Copy in local data */ - memcpy(pCsr->ovfl.pBuf, pLocal, nLocal); + aOut = (u8*)pCsr->ovfl.buf.p; + memcpy(aOut, pKLocal, nKLocal); + memcpy(&aOut[nKLocal], pVLocal, nVLocal); /* Load in overflow data */ - aOut = &pCsr->ovfl.pBuf[nLocal]; - rc = btOverflowArrayRead(pCsr->pDb, pCell, aOut, nOvfl1 + nOvfl2); + if( nKOvfl || nVOvfl ){ + rc = btOverflowArrayRead( + pCsr->pDb, pCell, &aOut[nKLocal + nVLocal], nKOvfl + nVOvfl + ); + } return rc; } + /* ** Helper function for btOverflowDelete(). ** ** TODO: This uses recursion. Which is almost certainly not a problem @@ -1001,13 +1059,13 @@ pCell = btCellFind(aData, pgsz, iCell); pCell += sqlite4BtVarintGet32(pCell, &nK); if( nK==0 ){ /* type (c) leaf cell */ - rc = btOverflowBuffer(pCsr, 0); + rc = btCsrBuffer(pCsr, 0); if( rc==SQLITE4_OK ){ - *ppK = pCsr->ovfl.pBuf; + *ppK = pCsr->ovfl.buf.p; *pnK = pCsr->ovfl.nKey; } }else{ *ppK = pCell; *pnK = nK; @@ -1038,13 +1096,14 @@ pCell += nK; pCell += sqlite4BtVarintGet32(pCell, &nV); } if( nV==0 ){ - rc = btOverflowBuffer(pCsr, 1); + rc = btCsrBuffer(pCsr, 1); if( rc==SQLITE4_OK ){ - *ppV = &pCsr->ovfl.pBuf[pCsr->ovfl.nKey]; + u8 *aBuf = (u8*)pCsr->ovfl.buf.p; + *ppV = &aBuf[pCsr->ovfl.nKey]; *pnV = pCsr->ovfl.nVal; } }else{ *ppV = pCell; *pnV = (nV-1); @@ -2299,19 +2358,19 @@ sqlite4BtDebugKV((BtLock*)db->pPager, "replace", (u8*)pK, nK, (u8*)pV, nV); btCheckPageRefs(db); btCsrSetup(db, &csr); - rc = btCsrSeek(&csr, pK, nK, BT_SEEK_GE, 1); + rc = btCsrSeek(&csr, pK, nK, BT_SEEK_GE, BT_CSRSEEK_UPDATE); if( rc==SQLITE4_OK ){ /* The cursor currently points to an entry with key pK/nK. This call ** should therefore replace that entry. So delete it and then re-seek ** the cursor. */ rc = sqlite4BtDelete(&csr); if( rc==SQLITE4_OK && nV>=0 ){ - rc = btCsrSeek(&csr, pK, nK, BT_SEEK_GE, 1); + rc = btCsrSeek(&csr, pK, nK, BT_SEEK_GE, BT_CSRSEEK_UPDATE); if( rc==SQLITE4_OK ) rc = btErrorBkpt(SQLITE4_CORRUPT); } } if( nV>=0 && (rc==SQLITE4_NOTFOUND || rc==SQLITE4_INEXACT) ){ @@ -2334,19 +2393,45 @@ btCheckPageRefs(db); return rc; } +static int btSaveAllCursor(bt_cursor *pCsr){ + int rc = SQLITE4_OK; /* Return code */ + bt_db *pDb = pCsr->pDb; /* Database handle */ + bt_cursor *p; /* Used to iterate through cursors */ + + for(p=pDb->pAllCsr; rc==SQLITE4_OK && p; p=p->pNextCsr){ + rc = btCsrBuffer(p, 0); + if( rc==SQLITE4_OK ){ + assert( p->ovfl.buf.p ); + p->bRequireReseek = 1; + if( p!=pCsr ) btCsrReleaseAll(p); + } + } + + return rc; +} + + +/* +** Delete the entry that the cursor currently points to. +*/ int sqlite4BtDelete(bt_cursor *pCsr){ int rc; - rc = btOverflowDelete(pCsr); + rc = btSaveAllCursor(pCsr); + if( rc==SQLITE4_OK ){ + rc = btOverflowDelete(pCsr); + } if( rc==SQLITE4_OK ){ rc = btDeleteFromPage(pCsr, 1); } if( rc==SQLITE4_OK ){ rc = btBalanceIfUnderfull(pCsr); } + + btCsrReleaseAll(pCsr); return rc; } int sqlite4BtSetCookie(bt_db *db, unsigned int iVal){ return sqlite4BtPagerSetCookie(db->pPager, iVal); Index: test/permutations.test ================================================================== --- test/permutations.test +++ test/permutations.test @@ -123,15 +123,23 @@ #------------------------------------------------------------------------- # Define the generic test suites: # # src4 +# bt # veryquick # quick # full # lappend ::testsuitelist xxx + +test_suite "bt" -prefix "" -description { +} -files { + select1.test select2.test +} -initialize { + kv_default bt +} test_suite "src4" -prefix "" -description { } -files { simple.test simple2.test lsm1.test lsm2.test lsm3.test lsm4.test lsm5.test Index: test/test_kv2.c ================================================================== --- test/test_kv2.c +++ test/test_kv2.c @@ -399,16 +399,46 @@ rc = aSub[iSub].xCmd(interp, objc, (Tcl_Obj **)objv); } return rc; } + +/* +** TCLCMD: kv_default DEFAULT +*/ +static int kv_default_cmd( + void * clientData, + Tcl_Interp *interp, + int objc, + Tcl_Obj *CONST objv[] +){ + sqlite4_kvfactory factory = 0; + const char *zDflt; + + if( objc!=2 ){ + Tcl_WrongNumArgs(interp, 1, objv, "DEFAULT"); + return TCL_ERROR; + } + + zDflt = Tcl_GetString(objv[1]); + + sqlite4_env_config(0, SQLITE4_ENVCONFIG_KVSTORE_GET, zDflt, &factory); + if( factory==0 ){ + Tcl_ResetResult(interp); + Tcl_AppendResult(interp, "no such factory: ", zDflt, 0); + return TCL_ERROR; + } + sqlite4_env_config(0, SQLITE4_ENVCONFIG_KVSTORE_PUSH, "main", factory); + return TCL_OK; +} /* ** Register the TCL commands defined above with the TCL interpreter. ** ** This routine should be the only externally visible symbol in this ** source code file. */ int Sqliteteststorage2_Init(Tcl_Interp *interp){ Tcl_CreateObjCommand(interp, "kvwrap", kvwrap_command, 0, 0); + Tcl_CreateObjCommand(interp, "kv_default", kv_default_cmd, 0, 0); return TCL_OK; } Index: test/tester.tcl ================================================================== --- test/tester.tcl +++ test/tester.tcl @@ -868,10 +868,84 @@ puts [format {%-4d %-12.12s %-6d %-6d %-6d % -17s %s %s} \ $addr $opcode $p1 $p2 $p3 $p4 $p5 $comment ] } } + +proc explain_i {sql {db db}} { + puts "" + puts "addr opcode p1 p2 p3 p4 p5 #" + puts "---- ------------ ------ ------ ------ ---------------- -- -" + + + # Set up colors for the different opcodes. Scheme is as follows: + # + # Red: Opcodes that write to a b-tree. + # Blue: Opcodes that reposition or seek a cursor. + # Green: The ResultRow opcode. + # + set R "\033\[31;1m" ;# Red fg + set G "\033\[32;1m" ;# Green fg + set B "\033\[34;1m" ;# Red fg + set D "\033\[39;0m" ;# Default fg + foreach opcode { + Seek SeekGe SeekGt SeekLe SeekLt NotFound Last Rewind + NoConflict Next Prev VNext VPrev VFilter + } { + set color($opcode) $B + } + foreach opcode {ResultRow} { + set color($opcode) $G + } + foreach opcode {IdxInsert Insert Delete IdxDelete} { + set color($opcode) $R + } + + set bSeenGoto 0 + $db eval "explain $sql" {} { + set x($addr) 0 + set op($addr) $opcode + + if {$opcode == "Goto" && ($bSeenGoto==0 || ($p2 > $addr+10))} { + set linebreak($p2) 1 + set bSeenGoto 1 + } + + if {$opcode=="Next" || $opcode=="Prev" + || $opcode=="VNext" || $opcode=="VPrev" + } { + for {set i $p2} {$i<$addr} {incr i} { + incr x($i) 2 + } + } + + if {$opcode == "Goto" && $p2<$addr && $op($p2)=="Yield"} { + for {set i [expr $p2+1]} {$i<$addr} {incr i} { + incr x($i) 2 + } + } + + if {$opcode == "Halt" && $comment == "End of coroutine"} { + set linebreak([expr $addr+1]) 1 + } + } + + $db eval "explain $sql" {} { + if {[info exists linebreak($addr)]} { + puts "" + } + set I [string repeat " " $x($addr)] + + set col "" + catch { set col $color($opcode) } + + puts [format {%-4d %s%s%-12.12s%s %-6d %-6d %-6d % -17s %s %s} \ + $addr $I $col $opcode $D $p1 $p2 $p3 $p4 $p5 $comment + ] + } + puts "---- ------------ ------ ------ ------ ---------------- -- -" +} # Show the VDBE program for an SQL statement but omit the Trace # opcode at the beginning. This procedure can be used to prove # that different SQL statements generate exactly the same VDBE code. #