Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Begin adding code for blind-writes.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: fc9cdc6ca37e53836d1b5c5a8434d8eae450b2b0
User & Date: dan 2013-11-25 20:50:35.123
Context
2013-11-26
20:35
Have the low-level b-tree insert routine return BT_BLOCKFULL if a level-0 tree is full. check-in: 65642c32ba user: dan tags: trunk
2013-11-25
20:50
Begin adding code for blind-writes. check-in: fc9cdc6ca3 user: dan tags: trunk
2013-11-23
18:41
Add a BT_CONTROL_INFO option to query for database header values. Add a command line interface to lsmtest to access this and other options. "lsmtest bt <filename> <option> ....". check-in: e09d4c6aa8 user: dan tags: trunk
Changes
Unified Diff Ignore Whitespace Patch
Changes to lsm-test/lsmtest.h.
104
105
106
107
108
109
110

111
112
113
114
115
116
117
u32  testPrngValue(u32 iVal);
void testPrngArray(u32 iVal, u32 *aOut, int nOut);
void testPrngString(u32 iVal, char *aOut, int nOut);

void testErrorInit(int argc, char **);
void testPrintError(const char *zFormat, ...);
void testPrintUsage(const char *zArgs);

void testTimeInit(void);
int  testTimeGet(void);

/* Functions in testmem.c. */
void testMallocInstall(lsm_env *pEnv);
void testMallocUninstall(lsm_env *pEnv);
void testMallocCheck(lsm_env *pEnv, int *, int *, FILE *);







>







104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
u32  testPrngValue(u32 iVal);
void testPrngArray(u32 iVal, u32 *aOut, int nOut);
void testPrngString(u32 iVal, char *aOut, int nOut);

void testErrorInit(int argc, char **);
void testPrintError(const char *zFormat, ...);
void testPrintUsage(const char *zArgs);
void testPrintFUsage(const char *zFormat, ...);
void testTimeInit(void);
int  testTimeGet(void);

/* Functions in testmem.c. */
void testMallocInstall(lsm_env *pEnv);
void testMallocUninstall(lsm_env *pEnv);
void testMallocCheck(lsm_env *pEnv, int *, int *, FILE *);
Changes to lsm-test/lsmtest_tdb4.c.
38
39
40
41
42
43
44




45
46
47
48
49
50

51
52
53
54
55
56
57
**   call to the xSync() VFS method (on either the db or log file).
**   If nCrashSync==2, the following call to xSync(), and so on.
**
** bCrash:
**   After a crash is simulated, this variable is set. Any subsequent
**   attempts to write to a file or modify the file system in any way 
**   fail once this is set. All the caller can do is close the connection.




*/
struct BtDb {
  TestDb base;                    /* Base class */
  bt_db *pBt;                     /* bt database handle */
  sqlite4_env *pEnv;              /* SQLite environment (for malloc/free) */
  bt_env *pVfs;                   /* Underlying VFS */


  /* Space for bt_fetch() results */
  u8 *aBuffer;                    /* Space to store results */
  int nBuffer;                    /* Allocated size of aBuffer[] in bytes */
  int nRef;

  /* Background checkpointer used by mt connections */







>
>
>
>






>







38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
**   call to the xSync() VFS method (on either the db or log file).
**   If nCrashSync==2, the following call to xSync(), and so on.
**
** bCrash:
**   After a crash is simulated, this variable is set. Any subsequent
**   attempts to write to a file or modify the file system in any way 
**   fail once this is set. All the caller can do is close the connection.
**
** bFastInsert:
**   If this variable is set to true, then a BT_CONTROL_FAST_INSERT_OP
**   control is issued before each callto BtReplace() or BtCsrOpen().
*/
struct BtDb {
  TestDb base;                    /* Base class */
  bt_db *pBt;                     /* bt database handle */
  sqlite4_env *pEnv;              /* SQLite environment (for malloc/free) */
  bt_env *pVfs;                   /* Underlying VFS */
  int bFastInsert;                /* True to use fast-insert */

  /* Space for bt_fetch() results */
  u8 *aBuffer;                    /* Space to store results */
  int nBuffer;                    /* Allocated size of aBuffer[] in bytes */
  int nRef;

  /* Background checkpointer used by mt connections */
360
361
362
363
364
365
366

367
368
369
370
371
372
373
static int bt_write(TestDb *pTestDb, void *pK, int nK, void *pV, int nV){
  BtDb *p = (BtDb*)pTestDb;
  int iLevel;
  int rc;

  rc = btMinTransaction(p, 2, &iLevel);
  if( rc==SQLITE4_OK ){

    rc = sqlite4BtReplace(p->pBt, pK, nK, pV, nV);
    rc = btRestoreTransaction(p, iLevel, rc);
  }
  return rc;
}

static int bt_delete(TestDb *pTestDb, void *pK, int nK){







>







365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
static int bt_write(TestDb *pTestDb, void *pK, int nK, void *pV, int nV){
  BtDb *p = (BtDb*)pTestDb;
  int iLevel;
  int rc;

  rc = btMinTransaction(p, 2, &iLevel);
  if( rc==SQLITE4_OK ){
    if( p->bFastInsert ) sqlite4BtControl(p->pBt, BT_CONTROL_FAST_INSERT_OP, 0);
    rc = sqlite4BtReplace(p->pBt, pK, nK, pV, nV);
    rc = btRestoreTransaction(p, iLevel, rc);
  }
  return rc;
}

static int bt_delete(TestDb *pTestDb, void *pK, int nK){
382
383
384
385
386
387
388

389
390
391
392
393
394
395
  BtDb *p = (BtDb*)pTestDb;
  bt_cursor *pCsr = 0;
  int rc = SQLITE4_OK;
  int iLevel;

  rc = btMinTransaction(p, 2, &iLevel);
  if( rc==SQLITE4_OK ){

    rc = sqlite4BtCsrOpen(p->pBt, 0, &pCsr);
  }
  while( rc==SQLITE4_OK ){
    const void *pK;
    int n;
    int nCmp;
    int res;







>







388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
  BtDb *p = (BtDb*)pTestDb;
  bt_cursor *pCsr = 0;
  int rc = SQLITE4_OK;
  int iLevel;

  rc = btMinTransaction(p, 2, &iLevel);
  if( rc==SQLITE4_OK ){
    if( p->bFastInsert ) sqlite4BtControl(p->pBt, BT_CONTROL_FAST_INSERT_OP, 0);
    rc = sqlite4BtCsrOpen(p->pBt, 0, &pCsr);
  }
  while( rc==SQLITE4_OK ){
    const void *pK;
    int n;
    int nCmp;
    int res;
437
438
439
440
441
442
443

444
445
446
447
448
449
450

  iLevel = sqlite4BtTransactionLevel(p->pBt);
  if( iLevel==0 ){ 
    rc = sqlite4BtBegin(p->pBt, 1); 
    if( rc!=SQLITE4_OK ) return rc;
  }


  rc = sqlite4BtCsrOpen(p->pBt, 0, &pCsr);
  if( rc==SQLITE4_OK ){
    rc = sqlite4BtCsrSeek(pCsr, pK, nK, BT_SEEK_EQ);
    if( rc==SQLITE4_OK ){
      const void *pV = 0;
      int nV = 0;
      rc = sqlite4BtCsrData(pCsr, 0, -1, &pV, &nV);







>







444
445
446
447
448
449
450
451
452
453
454
455
456
457
458

  iLevel = sqlite4BtTransactionLevel(p->pBt);
  if( iLevel==0 ){ 
    rc = sqlite4BtBegin(p->pBt, 1); 
    if( rc!=SQLITE4_OK ) return rc;
  }

  if( p->bFastInsert ) sqlite4BtControl(p->pBt, BT_CONTROL_FAST_INSERT_OP, 0);
  rc = sqlite4BtCsrOpen(p->pBt, 0, &pCsr);
  if( rc==SQLITE4_OK ){
    rc = sqlite4BtCsrSeek(pCsr, pK, nK, BT_SEEK_EQ);
    if( rc==SQLITE4_OK ){
      const void *pV = 0;
      int nV = 0;
      rc = sqlite4BtCsrData(pCsr, 0, -1, &pV, &nV);
483
484
485
486
487
488
489

490
491
492
493
494
495
496
  bt_cursor *pCsr = 0;
  int rc;
  int iLevel;

  rc = btMinTransaction(p, 1, &iLevel);

  if( rc==SQLITE4_OK ){

    rc = sqlite4BtCsrOpen(p->pBt, 0, &pCsr);
  }
  if( rc==SQLITE4_OK ){
    if( bReverse ){
      if( pLast ){
        rc = sqlite4BtCsrSeek(pCsr, pLast, nLast, BT_SEEK_LE);
      }else{







>







491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
  bt_cursor *pCsr = 0;
  int rc;
  int iLevel;

  rc = btMinTransaction(p, 1, &iLevel);

  if( rc==SQLITE4_OK ){
    if( p->bFastInsert ) sqlite4BtControl(p->pBt, BT_CONTROL_FAST_INSERT_OP, 0);
    rc = sqlite4BtCsrOpen(p->pBt, 0, &pCsr);
  }
  if( rc==SQLITE4_OK ){
    if( bReverse ){
      if( pLast ){
        rc = sqlite4BtCsrSeek(pCsr, pLast, nLast, BT_SEEK_LE);
      }else{
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633


634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651

652

653



654
655
656
657
658
659
660
661
662
  }

  if( *p ) return SQLITE4_ERROR;
  *piVal = i;
  return SQLITE4_OK;
}

static int testBtConfigure(bt_db *db, const char *zCfg, int *pbMt){
  int rc = SQLITE4_OK;

  if( zCfg ){
    struct CfgParam {
      const char *zParam;
      int eParam;
    } aParam[] = {
      { "safety",         BT_CONTROL_SAFETY },
      { "autockpt",       BT_CONTROL_AUTOCKPT },
      { "multiproc",      BT_CONTROL_MULTIPROC },
      { "mt",             -1 }


    };
    const char *z = zCfg;
    int n = strlen(z);
    char *aSpace;
    const char *zOpt;
    const char *zArg;

    aSpace = (char*)testMalloc(n+2);
    while( 0==testParseOption(&z, &zOpt, &zArg, aSpace) ){
      int i;
      int iVal;
      rc = testArgSelect(aParam, "param", zOpt, &i);
      if( rc!=SQLITE4_OK ) break;

      rc = testParseInt(zArg, &iVal);
      if( rc!=SQLITE4_OK ) break;

      if( aParam[i].eParam<0 ){

        *pbMt = iVal;

      }else{



        rc = sqlite4BtControl(db, aParam[i].eParam, (void*)&iVal);
        if( rc!=SQLITE4_OK ) break;
      }
    }
    testFree(aSpace);
  }

  return rc;
}







|










|
>
>








|








|
>
|
>
|
>
>
>
|
|







624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
  }

  if( *p ) return SQLITE4_ERROR;
  *piVal = i;
  return SQLITE4_OK;
}

static int testBtConfigure(BtDb *pDb, const char *zCfg, int *pbMt){
  int rc = SQLITE4_OK;

  if( zCfg ){
    struct CfgParam {
      const char *zParam;
      int eParam;
    } aParam[] = {
      { "safety",         BT_CONTROL_SAFETY },
      { "autockpt",       BT_CONTROL_AUTOCKPT },
      { "multiproc",      BT_CONTROL_MULTIPROC },
      { "mt",             -1 },
      { "fastinsert",     -2 },
      { 0, 0 }
    };
    const char *z = zCfg;
    int n = strlen(z);
    char *aSpace;
    const char *zOpt;
    const char *zArg;

    aSpace = (char*)testMalloc(n+2);
    while( rc==SQLITE4_OK && 0==testParseOption(&z, &zOpt, &zArg, aSpace) ){
      int i;
      int iVal;
      rc = testArgSelect(aParam, "param", zOpt, &i);
      if( rc!=SQLITE4_OK ) break;

      rc = testParseInt(zArg, &iVal);
      if( rc!=SQLITE4_OK ) break;

      switch( aParam[i].eParam ){
        case -1:
          *pbMt = iVal;
          break;
        case -2:
          pDb->bFastInsert = 1;
          break;
        default:
          rc = sqlite4BtControl(pDb->pBt, aParam[i].eParam, (void*)&iVal);
          break;
      }
    }
    testFree(aSpace);
  }

  return rc;
}
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
    p->env.xShmMap = btVfsShmMap;
    p->env.xShmBarrier = btVfsShmBarrier;
    p->env.xShmUnmap = btVfsShmUnmap;

    sqlite4BtControl(pBt, BT_CONTROL_GETVFS, (void*)&p->pVfs);
    sqlite4BtControl(pBt, BT_CONTROL_SETVFS, (void*)&p->env);

    rc = testBtConfigure(pBt, zSpec, &mt);
    if( rc==SQLITE4_OK ){
      rc = sqlite4BtOpen(pBt, zFilename);
    }

    if( rc==SQLITE4_OK && mt ){
      int nAuto = 0;
      rc = bgc_attach(p, zSpec);







|







734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
    p->env.xShmMap = btVfsShmMap;
    p->env.xShmBarrier = btVfsShmBarrier;
    p->env.xShmUnmap = btVfsShmUnmap;

    sqlite4BtControl(pBt, BT_CONTROL_GETVFS, (void*)&p->pVfs);
    sqlite4BtControl(pBt, BT_CONTROL_SETVFS, (void*)&p->env);

    rc = testBtConfigure(p, zSpec, &mt);
    if( rc==SQLITE4_OK ){
      rc = sqlite4BtOpen(pBt, zFilename);
    }

    if( rc==SQLITE4_OK && mt ){
      int nAuto = 0;
      rc = bgc_attach(p, zSpec);
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
  BtDb *pDb = 0;
  int rc;
  int mt;
  bt_ckpter *pCkpter = (bt_ckpter*)pArg;

  rc = test_bt_open("", (char*)pCkpter->file.p, 0, (TestDb**)&pDb);
  assert( rc==SQLITE4_OK );
  rc = testBtConfigure(pDb->pBt, (char*)pCkpter->spec.p, &mt);

  while( pCkpter->nRef>0 ){
    bt_db *db = pDb->pBt;
    int nLog = 0;

    sqlite4BtBegin(db, 1);
    sqlite4BtCommit(db, 0);







|







791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
  BtDb *pDb = 0;
  int rc;
  int mt;
  bt_ckpter *pCkpter = (bt_ckpter*)pArg;

  rc = test_bt_open("", (char*)pCkpter->file.p, 0, (TestDb**)&pDb);
  assert( rc==SQLITE4_OK );
  rc = testBtConfigure(pDb, (char*)pCkpter->spec.p, &mt);

  while( pCkpter->nRef>0 ){
    bt_db *db = pDb->pBt;
    int nLog = 0;

    sqlite4BtBegin(db, 1);
    sqlite4BtCommit(db, 0);
Changes to lsm-test/lsmtest_util.c.
131
132
133
134
135
136
137









138
139
140
141

142
143
144
145
146
147
148

void testPrintError(const char *zFormat, ...){
  va_list ap;
  va_start(ap, zFormat);
  vfprintf(stderr, zFormat, ap);
  va_end(ap);
}










void testPrintUsage(const char *zArgs){
  testPrintError("Usage: %s %s %s\n", g.argv[0], g.argv[1], zArgs);
}


static void argError(void *aData, const char *zType, int sz, const char *zArg){
  struct Entry { const char *zName; };
  struct Entry *pEntry;
  const char *zPrev = 0;

  testPrintError("unrecognized %s \"%s\": must be ", zType, zArg);







>
>
>
>
>
>
>
>
>




>







131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158

void testPrintError(const char *zFormat, ...){
  va_list ap;
  va_start(ap, zFormat);
  vfprintf(stderr, zFormat, ap);
  va_end(ap);
}

void testPrintFUsage(const char *zFormat, ...){
  va_list ap;
  va_start(ap, zFormat);
  fprintf(stderr, "Usage: %s %s ", g.argv[0], g.argv[1]);
  vfprintf(stderr, zFormat, ap);
  fprintf(stderr, "\n");
  va_end(ap);
}

void testPrintUsage(const char *zArgs){
  testPrintError("Usage: %s %s %s\n", g.argv[0], g.argv[1], zArgs);
}


static void argError(void *aData, const char *zType, int sz, const char *zArg){
  struct Entry { const char *zName; };
  struct Entry *pEntry;
  const char *zPrev = 0;

  testPrintError("unrecognized %s \"%s\": must be ", zType, zArg);
Changes to src/btInt.h.
35
36
37
38
39
40
41



42
43
44
45

46






47
48
49
50
51
52
53
#ifndef MAX
# define MAX(a,b) (((a)>(b))?(a):(b))
#endif

/* By default pages are 1024 bytes in size. */
#define BT_DEFAULT_PGSZ 1024




typedef struct BtDbHdr BtDbHdr;
struct BtDbHdr {
  u32 pgsz;                       /* Page size in bytes */
  u32 nPg;                        /* Size of database file in pages */

  u32 iRoot;                      /* B-tree root page */






  u32 iCookie;                    /* Current value of schema cookie */
  u32 iFreePg;                    /* First page in free-page list trunk */
  u32 iFreeBlk;                   /* First page in free-block list trunk */
};

/*************************************************************************
** Interface to bt_pager.c functionality.







>
>
>




>

>
>
>
>
>
>







35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
#ifndef MAX
# define MAX(a,b) (((a)>(b))?(a):(b))
#endif

/* By default pages are 1024 bytes in size. */
#define BT_DEFAULT_PGSZ 1024

/* By default blocks are 512K bytes in size. */
#define BT_DEFAULT_BLKSZ (512*1024)

typedef struct BtDbHdr BtDbHdr;
struct BtDbHdr {
  u32 pgsz;                       /* Page size in bytes */
  u32 nPg;                        /* Size of database file in pages */

  u32 iRoot;                      /* B-tree root page */
  u32 iMRoot;                     /* Root page of meta-tree */
  u32 iSRoot;                     /* Root page of schedule-tree */

  u32 iSubRoot;                   /* Root of current sub-tree */
  u32 nSubPg;                     /* Number of non-overflow pages in sub-tree */

  u32 iCookie;                    /* Current value of schema cookie */
  u32 iFreePg;                    /* First page in free-page list trunk */
  u32 iFreeBlk;                   /* First page in free-block list trunk */
};

/*************************************************************************
** Interface to bt_pager.c functionality.
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95

/*
** Query for the database page size. Requires an open read transaction.
*/
int sqlite4BtPagerPagesize(BtPager*);

/* 
** Query for the root page number. Requires an open read transaction.
*/
u32 sqlite4BtPagerRootpgno(BtPager*);

/*
** Read, write and trim existing database pages.
*/
int sqlite4BtPageGet(BtPager*, u32 pgno, BtPage **ppPage);
int sqlite4BtPageTrimPgno(BtPager*, u32 pgno);
int sqlite4BtPageWrite(BtPage*);







|

|







89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105

/*
** Query for the database page size. Requires an open read transaction.
*/
int sqlite4BtPagerPagesize(BtPager*);

/* 
** Query for the db header values. Requires an open read transaction.
*/
BtDbHdr *sqlite4BtPagerDbhdr(BtPager*);

/*
** Read, write and trim existing database pages.
*/
int sqlite4BtPageGet(BtPager*, u32 pgno, BtPage **ppPage);
int sqlite4BtPageTrimPgno(BtPager*, u32 pgno);
int sqlite4BtPageWrite(BtPage*);
Changes to src/bt_log.c.
243
244
245
246
247
248
249

250
251
252



253
254
255
256
257
258
259
static void btLogChecksum32(
  int nativeCksum,                /* True for native byte-order, else false */
  u8 *a,                          /* Content to be checksummed */
  int nByte,                      /* Bytes of content in a[]. */
  const u32 *aIn,                 /* Initial checksum value input */
  u32 *aOut                       /* OUT: Final checksum value output */
){

  assert( (nByte&0x00000007)==4 && nByte>=8 );
  btLogChecksum(nativeCksum, a, 8, aIn, aOut);
  btLogChecksum(nativeCksum, &a[4], nByte-4, aOut, aOut);



}

#define BT_PAGE_DEBUG 0
#define BT_VAL_DEBUG  0
#define BT_HDR_DEBUG  0

static void btDebugTopology(BtLock *pLock, char *zStr, int iSide, u32 *aLog){







>
|
|
|
>
>
>







243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
static void btLogChecksum32(
  int nativeCksum,                /* True for native byte-order, else false */
  u8 *a,                          /* Content to be checksummed */
  int nByte,                      /* Bytes of content in a[]. */
  const u32 *aIn,                 /* Initial checksum value input */
  u32 *aOut                       /* OUT: Final checksum value output */
){
  assert( nByte>=8 );
  if( nByte&0x00000007 ){
    btLogChecksum(nativeCksum, a, 8, aIn, aOut);
    btLogChecksum(nativeCksum, &a[4], nByte-4, aOut, aOut);
  }else{
    btLogChecksum(nativeCksum, a, nByte, aIn, aOut);
  }
}

#define BT_PAGE_DEBUG 0
#define BT_VAL_DEBUG  0
#define BT_HDR_DEBUG  0

static void btDebugTopology(BtLock *pLock, char *zStr, int iSide, u32 *aLog){
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521

static int btLogUpdateSharedHdr(BtLog *pLog){
  bt_env *pVfs = pLog->pLock->pVfs;
  BtShmHdr *p = &pLog->snapshot;
  BtShm *pShm = btLogShm(pLog);

  /* Calculate a checksum for the private snapshot object. */
  btLogChecksum(1, (u8*)p, offsetof(BtShmHdr, aCksum), 0, p->aCksum);

  /* Update the shared object. */
  pVfs->xShmBarrier(pLog->pFd);
  memcpy(&pShm->hdr1, p, sizeof(BtShmHdr));
  pVfs->xShmBarrier(pLog->pFd);
  memcpy(&pShm->hdr2, p, sizeof(BtShmHdr));








|







511
512
513
514
515
516
517
518
519
520
521
522
523
524
525

static int btLogUpdateSharedHdr(BtLog *pLog){
  bt_env *pVfs = pLog->pLock->pVfs;
  BtShmHdr *p = &pLog->snapshot;
  BtShm *pShm = btLogShm(pLog);

  /* Calculate a checksum for the private snapshot object. */
  btLogChecksum32(1, (u8*)p, offsetof(BtShmHdr, aCksum), 0, p->aCksum);

  /* Update the shared object. */
  pVfs->xShmBarrier(pLog->pFd);
  memcpy(&pShm->hdr1, p, sizeof(BtShmHdr));
  pVfs->xShmBarrier(pLog->pFd);
  memcpy(&pShm->hdr2, p, sizeof(BtShmHdr));

849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867

  aLog[5] = iLast;
  return btLogHashRollback(pLog, btLogFrameHash(pLog, iLast), iLast);
}

static void btLogDecodeDbhdr(BtLog *pLog, u8 *aData, BtDbHdr *pHdr){
  BtDbHdrCksum hdr;
  u32 aCksum[2];

  if( aData ){
    memcpy(&hdr, aData, sizeof(BtDbHdrCksum));
    btLogChecksum(1, (u8*)&hdr, offsetof(BtDbHdrCksum, aCksum), 0, aCksum);
  }

  if( aData==0 || aCksum[0]!=hdr.aCksum[0] || aCksum[1]!=hdr.aCksum[1] ){
    memset(&hdr, 0, sizeof(BtDbHdrCksum));
    hdr.hdr.pgsz = BT_DEFAULT_PGSZ;
    hdr.hdr.nPg = 2;
    hdr.hdr.iRoot = 2;







|



|







853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871

  aLog[5] = iLast;
  return btLogHashRollback(pLog, btLogFrameHash(pLog, iLast), iLast);
}

static void btLogDecodeDbhdr(BtLog *pLog, u8 *aData, BtDbHdr *pHdr){
  BtDbHdrCksum hdr;
  u32 aCksum[2] = {0,0};

  if( aData ){
    memcpy(&hdr, aData, sizeof(BtDbHdrCksum));
    btLogChecksum32(1, (u8*)&hdr, offsetof(BtDbHdrCksum, aCksum), 0, aCksum);
  }

  if( aData==0 || aCksum[0]!=hdr.aCksum[0] || aCksum[1]!=hdr.aCksum[1] ){
    memset(&hdr, 0, sizeof(BtDbHdrCksum));
    hdr.hdr.pgsz = BT_DEFAULT_PGSZ;
    hdr.hdr.nPg = 2;
    hdr.hdr.iRoot = 2;
896
897
898
899
900
901
902
903
904
905
906
907
908








909
910
911
912
913
914
915
  return rc;
}

static int btLogUpdateDbhdr(BtLog *pLog, u8 *aData){
  BtDbHdrCksum hdr;

  memcpy(&hdr.hdr, &pLog->snapshot.dbhdr, sizeof(BtDbHdr));
  btLogChecksum(1, (u8*)&hdr, offsetof(BtDbHdrCksum, aCksum), 0, hdr.aCksum);
  btDebugDbhdr(pLog->pLock, "update", &pLog->snapshot.dbhdr);

  assert( hdr.hdr.iRoot==2 );
  assert( hdr.hdr.pgsz>0 );
  memcpy(aData, &hdr, sizeof(BtDbHdrCksum));









  return SQLITE4_OK;
}


/*
** Run log recovery. In other words, read the log file from disk and 







|





>
>
>
>
>
>
>
>







900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
  return rc;
}

static int btLogUpdateDbhdr(BtLog *pLog, u8 *aData){
  BtDbHdrCksum hdr;

  memcpy(&hdr.hdr, &pLog->snapshot.dbhdr, sizeof(BtDbHdr));
  btLogChecksum32(1, (u8*)&hdr, offsetof(BtDbHdrCksum, aCksum), 0, hdr.aCksum);
  btDebugDbhdr(pLog->pLock, "update", &pLog->snapshot.dbhdr);

  assert( hdr.hdr.iRoot==2 );
  assert( hdr.hdr.pgsz>0 );
  memcpy(aData, &hdr, sizeof(BtDbHdrCksum));

#ifndef NDEBUG
  {
    BtDbHdr tst;
    btLogDecodeDbhdr(pLog, aData, &tst);
    assert( 0==memcmp(&tst, &pLog->snapshot.dbhdr, sizeof(tst)) );
  }
#endif

  return SQLITE4_OK;
}


/*
** Run log recovery. In other words, read the log file from disk and 
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408

/*
** Return true if the checksum in BtShmHdr.aCksum[] matches the rest
** of the object.
*/
static int btLogChecksumOk(BtShmHdr *pHdr){
  u32 aCksum[2];
  btLogChecksum(1, (u8*)pHdr, offsetof(BtShmHdr, aCksum), 0, aCksum);
  return (aCksum[0]==pHdr->aCksum[0] && aCksum[1]==pHdr->aCksum[1]);
}

static int btLogSnapshot(BtLog *pLog, BtShmHdr *pHdr){
  int rc;

  rc = btLogMapShm(pLog, 0);







|







1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420

/*
** Return true if the checksum in BtShmHdr.aCksum[] matches the rest
** of the object.
*/
static int btLogChecksumOk(BtShmHdr *pHdr){
  u32 aCksum[2];
  btLogChecksum32(1, (u8*)pHdr, offsetof(BtShmHdr, aCksum), 0, aCksum);
  return (aCksum[0]==pHdr->aCksum[0] && aCksum[1]==pHdr->aCksum[1]);
}

static int btLogSnapshot(BtLog *pLog, BtShmHdr *pHdr){
  int rc;

  rc = btLogMapShm(pLog, 0);
Changes to src/bt_main.c.
23
24
25
26
27
28
29


30
31
32
33
34
35
36
37
38
39
40
41
42
43
44




45


46
47


48









49
50
51
52
53
54
55
56
57


58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
** Values that make up the single byte flags field at the start of
** b-tree pages. 
*/
#define BT_PGFLAGS_INTERNAL 0x01  /* True for non-leaf nodes */

/* #define BT_STDERR_DEBUG 1 */



struct bt_db {
  sqlite4_env *pEnv;              /* SQLite environment */
  BtPager *pPager;                /* Underlying page-based database */
  bt_cursor *pAllCsr;             /* List of all open cursors */
  int bFastInsertOp;              /* Set by CONTROL_FAST_INSERT_OP */
};

typedef struct BtOvfl BtOvfl;
struct BtOvfl {
  int nKey;
  int nVal;
  sqlite4_buffer buf;
};

/*




** Database cursor handle.


*/
struct bt_cursor {


  bt_db *pDb;                     /* Database that owns this cursor */









  int nPg;                        /* Number of valid entries in apPage[] */
  int aiCell[BT_MAX_DEPTH];       /* Current cell of each apPage[] entry */
  BtPage *apPage[BT_MAX_DEPTH];   /* All pages from root to current leaf */
  BtOvfl ovfl;                    /* Overflow cache (see above) */
  bt_cursor *pNextCsr;            /* Next cursor opened by same db handle */

  int bRequireReseek;
  int bSkipNext;
  int bSkipPrev;


};

#ifndef btErrorBkpt
int btErrorBkpt(int rc){
  static int error_cnt = 0;
  error_cnt++;
  return rc;
}
#endif

#if !defined(NDEBUG) 
static void btCheckPageRefs(bt_db *pDb){
  int nActual = 0;                /* Outstanding refs according to pager */
  int nExpect = 0;                /* According to the set of open cursors */
  bt_cursor *pCsr;                /* Iterator variable */

  for(pCsr=pDb->pAllCsr; pCsr; pCsr=pCsr->pNextCsr){
    if( pCsr->nPg>0 ) nExpect += pCsr->nPg;
  }
  nActual = sqlite4BtPagerRefcount(pDb->pPager);
  assert( nActual==nExpect );
}







>
>



|











>
>
>
>
|
>
>


>
>
|
>
>
>
>
>
>
>
>
>




<

|
|
|
>
>














|







23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71

72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
** Values that make up the single byte flags field at the start of
** b-tree pages. 
*/
#define BT_PGFLAGS_INTERNAL 0x01  /* True for non-leaf nodes */

/* #define BT_STDERR_DEBUG 1 */

typedef struct BtCursor BtCursor;

struct bt_db {
  sqlite4_env *pEnv;              /* SQLite environment */
  BtPager *pPager;                /* Underlying page-based database */
  BtCursor *pAllCsr;             /* List of all open cursors */
  int bFastInsertOp;              /* Set by CONTROL_FAST_INSERT_OP */
};

typedef struct BtOvfl BtOvfl;
struct BtOvfl {
  int nKey;
  int nVal;
  sqlite4_buffer buf;
};

/*
** Candidate values for bt_cursor.eType.
*/
#define CSR_TYPE_BT    0          /* Regular b-tree cursor */
#define CSR_TYPE_FAST  1          /* Fast-insert-tree cursor */

/* 
** Base class for both cursor types (BtCursor and FiCursor).
*/
struct bt_cursor {
  int eType;                      /* Cursor type */
  void *pExtra;                   /* Extra allocated space */
  bt_db *pDb;                     /* Database this cursor belongs to */
};

/*
** Database b-tree cursor handle.
*/
struct BtCursor {
  bt_cursor base;                 /* Base cursor class */

  u32 iRoot;                      /* Root page of b-tree this cursor queries */
  int nPg;                        /* Number of valid entries in apPage[] */
  int aiCell[BT_MAX_DEPTH];       /* Current cell of each apPage[] entry */
  BtPage *apPage[BT_MAX_DEPTH];   /* All pages from root to current leaf */
  BtOvfl ovfl;                    /* Overflow cache (see above) */


  int bRequireReseek;             /* True if a btCsrReseek() is required */
  int bSkipNext;                  /* True if next CsrNext() is a no-op */
  int bSkipPrev;                  /* True if next CsrPrev() is a no-op */

  BtCursor *pNextCsr;            /* Next cursor opened by same db handle */
};

#ifndef btErrorBkpt
int btErrorBkpt(int rc){
  static int error_cnt = 0;
  error_cnt++;
  return rc;
}
#endif

#if !defined(NDEBUG) 
static void btCheckPageRefs(bt_db *pDb){
  int nActual = 0;                /* Outstanding refs according to pager */
  int nExpect = 0;                /* According to the set of open cursors */
  BtCursor *pCsr;                 /* Iterator variable */

  for(pCsr=pDb->pAllCsr; pCsr; pCsr=pCsr->pNextCsr){
    if( pCsr->nPg>0 ) nExpect += pCsr->nPg;
  }
  nActual = sqlite4BtPagerRefcount(pDb->pPager);
  assert( nActual==nExpect );
}
189
190
191
192
193
194
195
196
197



198
199
200
201
202
203
204
205

206




207
208

209
210
211

212
213
214

215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245




246
247
248
249




250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266

267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
  return rc;
}

int sqlite4BtTransactionLevel(bt_db *db){
  return sqlite4BtPagerTransactionLevel(db->pPager);
}

static void btCsrSetup(bt_db *db, bt_cursor *pCsr){
  memset(pCsr, 0, sizeof(bt_cursor));



  sqlite4_env_config(db->pEnv, SQLITE4_ENVCONFIG_GETMM, &pCsr->ovfl.buf.pMM);
  pCsr->pDb = db;
}

int sqlite4BtCsrOpen(bt_db *db, int nExtra, bt_cursor **ppCsr){
  int rc = SQLITE4_OK;            /* Return Code */
  int nByte;                      /* Total bytes of space to allocate */
  bt_cursor *pCsr;                /* New cursor object */






  nByte = sizeof(bt_cursor) + nExtra;
  *ppCsr = pCsr = (bt_cursor*)sqlite4_malloc(db->pEnv, nByte);

  if( pCsr==0 ){
    rc = btErrorBkpt(SQLITE4_NOMEM);
  }else{

    btCsrSetup(db, pCsr);
    pCsr->pNextCsr = db->pAllCsr;
    db->pAllCsr = pCsr;

  }

  btCheckPageRefs(db);
  db->bFastInsertOp = 0;
  return rc;
}

static void btCsrReleaseAll(bt_cursor *pCsr){
  int i;
  for(i=0; i<pCsr->nPg; i++){
    sqlite4BtPageRelease(pCsr->apPage[i]);
  }
  pCsr->nPg = 0;
}


static void btCsrReset(bt_cursor *pCsr, int bFreeBuffer){
  btCsrReleaseAll(pCsr);
  if( bFreeBuffer ){
    sqlite4_buffer_clear(&pCsr->ovfl.buf);
  }
  pCsr->bSkipNext = 0;
  pCsr->bSkipPrev = 0;
  pCsr->bRequireReseek = 0;
}

int sqlite4BtCsrClose(bt_cursor *pCsr){
  if( pCsr ){
    bt_db *pDb = pCsr->pDb;
    bt_cursor **pp;
    btCheckPageRefs(pDb);




    btCsrReset(pCsr, 1);
    for(pp=&pDb->pAllCsr; *pp!=pCsr; pp=&(*pp)->pNextCsr);
    *pp = pCsr->pNextCsr;
    sqlite4_free(pDb->pEnv, pCsr);




    btCheckPageRefs(pDb);
  }
  return SQLITE4_OK;
}

void *sqlite4BtCsrExtra(bt_cursor *pCsr){
  return (void*)&pCsr[1];
}

/*
** Set pCsr->apPage[pCsr->nPg] to a reference to database page pgno.
*/
static int btCsrDescend(bt_cursor *pCsr, u32 pgno){
  int rc;
  if( pCsr->nPg>=BT_MAX_DEPTH ){
    rc = btErrorBkpt(SQLITE4_CORRUPT);
  }else{

    rc = sqlite4BtPageGet(pCsr->pDb->pPager, pgno, &pCsr->apPage[pCsr->nPg]);
    if( rc==SQLITE4_OK ){
      assert( pCsr->apPage[pCsr->nPg] );
      pCsr->nPg++;
    }
  }
  return rc;
}

/*
** Move the cursor from the current page to the parent. Return 
** SQLITE4_NOTFOUND if the cursor already points to the root page,
** or SQLITE4_OK otherwise.
*/
static int btCsrAscend(bt_cursor *pCsr, int nLvl){
  int i;
  for(i=0; i<nLvl && ( pCsr->nPg>0 ); i++){
    pCsr->nPg--;
    sqlite4BtPageRelease(pCsr->apPage[pCsr->nPg]);
    pCsr->apPage[pCsr->nPg] = 0;
  }
  return (pCsr->nPg==0 ? SQLITE4_NOTFOUND : SQLITE4_OK);







|
|
>
>
>

<





|
>

>
>
>
>
|
|
>
|
|
|
>
|
|
|
>







|








|












<

>
>
>
>
|
|
|
|
>
>
>
>






|





|




>
|













|







209
210
211
212
213
214
215
216
217
218
219
220
221

222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273

274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
  return rc;
}

int sqlite4BtTransactionLevel(bt_db *db){
  return sqlite4BtPagerTransactionLevel(db->pPager);
}

static void btCsrSetup(bt_db *db, u32 iRoot, BtCursor *pCsr){
  memset(pCsr, 0, sizeof(BtCursor));
  pCsr->base.pExtra = (void*)&pCsr[1];
  pCsr->base.pDb = db;
  pCsr->iRoot = iRoot;
  sqlite4_env_config(db->pEnv, SQLITE4_ENVCONFIG_GETMM, &pCsr->ovfl.buf.pMM);

}

int sqlite4BtCsrOpen(bt_db *db, int nExtra, bt_cursor **ppCsr){
  int rc = SQLITE4_OK;            /* Return Code */
  int nByte;                      /* Total bytes of space to allocate */

  assert( sqlite4BtPagerTransactionLevel(db->pPager)>0 );

  if( db->bFastInsertOp ){
    assert( 0 );
  }else{
    BtCursor *pCsr;                /* New cursor object */
    nByte = sizeof(BtCursor) + nExtra;
    pCsr = (BtCursor*)sqlite4_malloc(db->pEnv, nByte);
    *ppCsr = (bt_cursor*)pCsr;
    if( pCsr==0 ){
      rc = btErrorBkpt(SQLITE4_NOMEM);
    }else{
      u32 iRoot = sqlite4BtPagerDbhdr(db->pPager)->iRoot;
      btCsrSetup(db, iRoot, pCsr);
      pCsr->pNextCsr = db->pAllCsr;
      db->pAllCsr = pCsr;
    }
  }

  btCheckPageRefs(db);
  db->bFastInsertOp = 0;
  return rc;
}

static void btCsrReleaseAll(BtCursor *pCsr){
  int i;
  for(i=0; i<pCsr->nPg; i++){
    sqlite4BtPageRelease(pCsr->apPage[i]);
  }
  pCsr->nPg = 0;
}


static void btCsrReset(BtCursor *pCsr, int bFreeBuffer){
  btCsrReleaseAll(pCsr);
  if( bFreeBuffer ){
    sqlite4_buffer_clear(&pCsr->ovfl.buf);
  }
  pCsr->bSkipNext = 0;
  pCsr->bSkipPrev = 0;
  pCsr->bRequireReseek = 0;
}

int sqlite4BtCsrClose(bt_cursor *pCsr){
  if( pCsr ){
    bt_db *pDb = pCsr->pDb;

    btCheckPageRefs(pDb);
    if( pCsr->eType==CSR_TYPE_BT ){
      /* A regular b-tree cursor */
      BtCursor *p = (BtCursor*)pCsr;
      BtCursor **pp;
      btCsrReset(p, 1);
      for(pp=&pDb->pAllCsr; *pp!=p; pp=&(*pp)->pNextCsr);
      *pp = p->pNextCsr;
      sqlite4_free(pDb->pEnv, p);
    }else{
      /* A fast-insert-tree cursor */
      assert( 0 );
    }
    btCheckPageRefs(pDb);
  }
  return SQLITE4_OK;
}

void *sqlite4BtCsrExtra(bt_cursor *pCsr){
  return pCsr->pExtra;
}

/*
** Set pCsr->apPage[pCsr->nPg] to a reference to database page pgno.
*/
static int btCsrDescend(BtCursor *pCsr, u32 pgno){
  int rc;
  if( pCsr->nPg>=BT_MAX_DEPTH ){
    rc = btErrorBkpt(SQLITE4_CORRUPT);
  }else{
    bt_db *pDb = pCsr->base.pDb;
    rc = sqlite4BtPageGet(pDb->pPager, pgno, &pCsr->apPage[pCsr->nPg]);
    if( rc==SQLITE4_OK ){
      assert( pCsr->apPage[pCsr->nPg] );
      pCsr->nPg++;
    }
  }
  return rc;
}

/*
** Move the cursor from the current page to the parent. Return 
** SQLITE4_NOTFOUND if the cursor already points to the root page,
** or SQLITE4_OK otherwise.
*/
static int btCsrAscend(BtCursor *pCsr, int nLvl){
  int i;
  for(i=0; i<nLvl && ( pCsr->nPg>0 ); i++){
    pCsr->nPg--;
    sqlite4BtPageRelease(pCsr->apPage[pCsr->nPg]);
    pCsr->apPage[pCsr->nPg] = 0;
  }
  return (pCsr->nPg==0 ? SQLITE4_NOTFOUND : SQLITE4_OK);
387
388
389
390
391
392
393
394

395
396
397
398

399
400








401
402
403
404
405
406
407
  }
  sqlite4BtBufAppendf(pBuf, ")\n");

  for(i=0; i<nCell; i++){
    int nKey;
    int j;
    u8 *pCell = btCellFind(aData, nData, i);
    sqlite4BtBufAppendf(pBuf, "  Key %d: ", i);

    pCell += sqlite4BtVarintGet32(pCell, &nKey);
    for(j=0; j<nKey; j++){
      sqlite4BtBufAppendf(pBuf, "%02X", (int)pCell[j]);
    }

    if( btFlags(aData) & BT_PGFLAGS_INTERNAL ){
      sqlite4BtBufAppendf(pBuf, "  child=%d ", (int)btGetU32(&pCell[j]));








    }
    sqlite4BtBufAppendf(pBuf, "\n");
  }
}

int sqlite4BtDebugPage(sqlite4_buffer *pBuf, u32 pgno, char *aData, int nData){
  btPageToAscii(pgno, (u8*)aData, nData, pBuf);







|
>




>


>
>
>
>
>
>
>
>







425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
  }
  sqlite4BtBufAppendf(pBuf, ")\n");

  for(i=0; i<nCell; i++){
    int nKey;
    int j;
    u8 *pCell = btCellFind(aData, nData, i);
    sqlite4BtBufAppendf(pBuf, "  Cell %d: ", i);

    pCell += sqlite4BtVarintGet32(pCell, &nKey);
    for(j=0; j<nKey; j++){
      sqlite4BtBufAppendf(pBuf, "%02X", (int)pCell[j]);
    }

    if( btFlags(aData) & BT_PGFLAGS_INTERNAL ){
      sqlite4BtBufAppendf(pBuf, "  child=%d ", (int)btGetU32(&pCell[j]));
    }else{
      int nVal;
      pCell += nKey;
      sqlite4BtBufAppendf(pBuf, "  ");
      pCell += sqlite4BtVarintGet32(pCell, &nVal);
      for(j=0; j<(nVal-1); j++){
        sqlite4BtBufAppendf(pBuf, "%02X", (int)pCell[j]);
      }
    }
    sqlite4BtBufAppendf(pBuf, "\n");
  }
}

int sqlite4BtDebugPage(sqlite4_buffer *pBuf, u32 pgno, char *aData, int nData){
  btPageToAscii(pgno, (u8*)aData, nData, pBuf);
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
**
**     *piRes = (C - K).
**
** In other words, *piRes is +ve, zero or -ve if C is respectively larger, 
** equal to or smaller than K.
*/
static int btCellKeyCompare(
  bt_cursor *pCsr,                /* Cursor handle */
  int bLeaf,                      /* True if cursor currently points to leaf */
  const void *pK, int nK,         /* Key to compare against cursor key */
  int *piRes                      /* OUT: Result of comparison */
){
  const void *pCsrKey;
  int nCsrKey;
  int nCmp;
  int nAscend = 0;
  int rc = SQLITE4_OK;
  int res;

  if( bLeaf ){
    rc = sqlite4BtCsrKey(pCsr, &pCsrKey, &nCsrKey);
  }else{
    const int pgsz = sqlite4BtPagerPagesize(pCsr->pDb->pPager);

    u8 *aData = sqlite4BtPageData(pCsr->apPage[pCsr->nPg-1]);
    u8 *pCell = btCellFind(aData, pgsz, pCsr->aiCell[pCsr->nPg-1]);

    pCsrKey = pCell + sqlite4BtVarintGet32(pCell, &nCsrKey);
    if( nCsrKey==0 ){
      int iCell = pCsr->aiCell[pCsr->nPg-1]+1;
      while( 1 ){
        u8 *aData = sqlite4BtPageData(pCsr->apPage[pCsr->nPg-1]);
        u32 pgno = btChildPgno(aData, pgsz, iCell);
        nAscend++;
        rc = btCsrDescend(pCsr, pgno);
        if( rc!=SQLITE4_OK ) break;
        aData = sqlite4BtPageData(pCsr->apPage[pCsr->nPg-1]);
        pCsr->aiCell[pCsr->nPg-1] = 0;
        if( (btFlags(aData) & BT_PGFLAGS_INTERNAL)==0 ) break;
        iCell = 0;
      }
      rc = sqlite4BtCsrKey(pCsr, &pCsrKey, &nCsrKey);
    }
  }

  if( rc==SQLITE4_OK ){
    nCmp = MIN(nCsrKey, nK);
    res = memcmp(pCsrKey, pK, nCmp);
    if( res==0 ){







|












|

|


















|







490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
**
**     *piRes = (C - K).
**
** In other words, *piRes is +ve, zero or -ve if C is respectively larger, 
** equal to or smaller than K.
*/
static int btCellKeyCompare(
  BtCursor *pCsr,                 /* Cursor handle */
  int bLeaf,                      /* True if cursor currently points to leaf */
  const void *pK, int nK,         /* Key to compare against cursor key */
  int *piRes                      /* OUT: Result of comparison */
){
  const void *pCsrKey;
  int nCsrKey;
  int nCmp;
  int nAscend = 0;
  int rc = SQLITE4_OK;
  int res;

  if( bLeaf ){
    rc = sqlite4BtCsrKey((bt_cursor*)pCsr, &pCsrKey, &nCsrKey);
  }else{
    const int pgsz = sqlite4BtPagerPagesize(pCsr->base.pDb->pPager);

    u8 *aData = sqlite4BtPageData(pCsr->apPage[pCsr->nPg-1]);
    u8 *pCell = btCellFind(aData, pgsz, pCsr->aiCell[pCsr->nPg-1]);

    pCsrKey = pCell + sqlite4BtVarintGet32(pCell, &nCsrKey);
    if( nCsrKey==0 ){
      int iCell = pCsr->aiCell[pCsr->nPg-1]+1;
      while( 1 ){
        u8 *aData = sqlite4BtPageData(pCsr->apPage[pCsr->nPg-1]);
        u32 pgno = btChildPgno(aData, pgsz, iCell);
        nAscend++;
        rc = btCsrDescend(pCsr, pgno);
        if( rc!=SQLITE4_OK ) break;
        aData = sqlite4BtPageData(pCsr->apPage[pCsr->nPg-1]);
        pCsr->aiCell[pCsr->nPg-1] = 0;
        if( (btFlags(aData) & BT_PGFLAGS_INTERNAL)==0 ) break;
        iCell = 0;
      }
      rc = sqlite4BtCsrKey((bt_cursor*)pCsr, &pCsrKey, &nCsrKey);
    }
  }

  if( rc==SQLITE4_OK ){
    nCmp = MIN(nCsrKey, nK);
    res = memcmp(pCsrKey, pK, nCmp);
    if( res==0 ){
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
}

#define BT_CSRSEEK_SEEK   0
#define BT_CSRSEEK_UPDATE 1
#define BT_CSRSEEK_RESEEK 2

static int btCsrSeek(
  bt_cursor *pCsr, 
  const void *pK,                 /* Key to seek for */
  int nK,                         /* Size of key pK in bytes */
  int eSeek,                      /* Seek mode (a BT_SEEK_XXX constant) */
  int eCsrseek
){
  const int pgsz = sqlite4BtPagerPagesize(pCsr->pDb->pPager);
  u32 pgno;                       /* Page number for next page to load */
  int rc = SQLITE4_OK;            /* Return Code */

  assert( eSeek==BT_SEEK_EQ || eCsrseek!=BT_CSRSEEK_RESEEK );
  assert( eSeek==BT_SEEK_GE || eCsrseek!=BT_CSRSEEK_UPDATE );

  /* Reset the cursor */
  btCsrReset(pCsr, 0);

  /* Figure out the root page number */
  assert( pCsr->nPg==0 );
  pgno = sqlite4BtPagerRootpgno(pCsr->pDb->pPager);

  while( rc==SQLITE4_OK && pgno ){
    /* Load page number pgno into the b-tree */
    rc = btCsrDescend(pCsr, pgno);
    if( rc==SQLITE4_OK ){
      int nCell;                  /* Number of cells on this page */
      int iHi;                    /* pK/nK is <= than cell iHi */







|





|










|
|







546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
}

#define BT_CSRSEEK_SEEK   0
#define BT_CSRSEEK_UPDATE 1
#define BT_CSRSEEK_RESEEK 2

static int btCsrSeek(
  BtCursor *pCsr, 
  const void *pK,                 /* Key to seek for */
  int nK,                         /* Size of key pK in bytes */
  int eSeek,                      /* Seek mode (a BT_SEEK_XXX constant) */
  int eCsrseek
){
  const int pgsz = sqlite4BtPagerPagesize(pCsr->base.pDb->pPager);
  u32 pgno;                       /* Page number for next page to load */
  int rc = SQLITE4_OK;            /* Return Code */

  assert( eSeek==BT_SEEK_EQ || eCsrseek!=BT_CSRSEEK_RESEEK );
  assert( eSeek==BT_SEEK_GE || eCsrseek!=BT_CSRSEEK_UPDATE );

  /* Reset the cursor */
  btCsrReset(pCsr, 0);

  /* Figure out the root page number */
  assert( pCsr->iRoot>1 && pCsr->nPg==0 );
  pgno = pCsr->iRoot;

  while( rc==SQLITE4_OK && pgno ){
    /* Load page number pgno into the b-tree */
    rc = btCsrDescend(pCsr, pgno);
    if( rc==SQLITE4_OK ){
      int nCell;                  /* Number of cells on this page */
      int iHi;                    /* pK/nK is <= than cell iHi */
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615


616




617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640

641
642
643
644






645
646
647
648
649
650
651
652
653
              }
            }else{
              rc = SQLITE4_NOTFOUND;
            }
          }else{
            assert( BT_SEEK_LEFAST<0 && BT_SEEK_LE<0 );
            if( eSeek<0 ){
              rc = sqlite4BtCsrPrev(pCsr);
            }else{
              if( iHi==nCell ){
                if( eCsrseek==BT_CSRSEEK_UPDATE ){
                  rc = SQLITE4_NOTFOUND;
                }else{
                  rc = sqlite4BtCsrNext(pCsr);
                }
              }
            }
            if( rc==SQLITE4_OK ) rc = SQLITE4_INEXACT;
          }
        }
      }
    }
  }

  if( rc!=SQLITE4_OK && rc!=SQLITE4_INEXACT && eCsrseek!=BT_CSRSEEK_UPDATE ){
    btCsrReset(pCsr, 0);
  }
  return rc;
}

int sqlite4BtCsrSeek(
  bt_cursor *pCsr, 
  const void *pK,                 /* Key to seek for */
  int nK,                         /* Size of key pK in bytes */
  int eSeek                       /* Seek mode (a BT_SEEK_XXX constant) */
){
  int rc;
  btCheckPageRefs(pCsr->pDb);


  rc = btCsrSeek(pCsr, pK, nK, eSeek, BT_CSRSEEK_SEEK);




  btCheckPageRefs(pCsr->pDb);
  return rc;
}

/*
** This function seeks the cursor as required for either sqlite4BtCsrFirst()
** (if parameter bLast is false) or sqlite4BtCsrLast() (if bLast is true).
*/
static int btCsrEnd(bt_cursor *pCsr, int bLast){
  const int pgsz = sqlite4BtPagerPagesize(pCsr->pDb->pPager);
  int rc;                         /* Return Code */
  u32 pgno;                       /* Page number for next page to load */

  /* Reset the cursor */
  btCsrReset(pCsr, 0);

  /* Figure out the root page number */
  assert( pCsr->nPg==0 );
  pgno = sqlite4BtPagerRootpgno(pCsr->pDb->pPager);

  while( rc==SQLITE4_OK ){
    /* Load page number pgno into the b-tree */
    rc = btCsrDescend(pCsr, pgno);
    if( rc==SQLITE4_OK ){

      int nByte;
      u8 *pCell;
      u8 *aData = (u8*)sqlite4BtPageData(pCsr->apPage[pCsr->nPg-1]);







      /* If the cursor has descended to a leaf break out of the loop. */
      pCsr->aiCell[pCsr->nPg-1] = (bLast ? btCellCount(aData, pgsz) : 0);
      if( (aData[0] & BT_PGFLAGS_INTERNAL)==0 ) break;
      
      /* Otherwise, set pgno to the left or rightmost child of the page
      ** just loaded, depending on whether the cursor is seeking to the
      ** start or end of the tree.  */
      if( bLast==0 ){
        pCell = btCellFind(aData, pgsz, 0);







|





|

















|





|
>
>
|
>
>
>
>
|







|
|
|






|
|





>




>
>
>
>
>
>

|







626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
              }
            }else{
              rc = SQLITE4_NOTFOUND;
            }
          }else{
            assert( BT_SEEK_LEFAST<0 && BT_SEEK_LE<0 );
            if( eSeek<0 ){
              rc = sqlite4BtCsrPrev((bt_cursor*)pCsr);
            }else{
              if( iHi==nCell ){
                if( eCsrseek==BT_CSRSEEK_UPDATE ){
                  rc = SQLITE4_NOTFOUND;
                }else{
                  rc = sqlite4BtCsrNext((bt_cursor*)pCsr);
                }
              }
            }
            if( rc==SQLITE4_OK ) rc = SQLITE4_INEXACT;
          }
        }
      }
    }
  }

  if( rc!=SQLITE4_OK && rc!=SQLITE4_INEXACT && eCsrseek!=BT_CSRSEEK_UPDATE ){
    btCsrReset(pCsr, 0);
  }
  return rc;
}

int sqlite4BtCsrSeek(
  bt_cursor *pBase, 
  const void *pK,                 /* Key to seek for */
  int nK,                         /* Size of key pK in bytes */
  int eSeek                       /* Seek mode (a BT_SEEK_XXX constant) */
){
  int rc;
  btCheckPageRefs(pBase->pDb);
  if( pBase->eType==CSR_TYPE_BT ){
    BtCursor *pCsr = (BtCursor*)pBase;
    rc = btCsrSeek(pCsr, pK, nK, eSeek, BT_CSRSEEK_SEEK);
  }else{
    /* fast-insert-tree cursor */
    assert( 0 );
  }
  btCheckPageRefs(pBase->pDb);
  return rc;
}

/*
** This function seeks the cursor as required for either sqlite4BtCsrFirst()
** (if parameter bLast is false) or sqlite4BtCsrLast() (if bLast is true).
*/
static int btCsrEnd(BtCursor *pCsr, int bLast){
  const int pgsz = sqlite4BtPagerPagesize(pCsr->base.pDb->pPager);
  int rc = SQLITE4_OK;            /* Return Code */
  u32 pgno;                       /* Page number for next page to load */

  /* Reset the cursor */
  btCsrReset(pCsr, 0);

  /* Figure out the root page number */
  assert( pCsr->iRoot>1 && pCsr->nPg==0 );
  pgno = pCsr->iRoot;

  while( rc==SQLITE4_OK ){
    /* Load page number pgno into the b-tree */
    rc = btCsrDescend(pCsr, pgno);
    if( rc==SQLITE4_OK ){
      int nCell;                  /* Number of cells on this page */
      int nByte;
      u8 *pCell;
      u8 *aData = (u8*)sqlite4BtPageData(pCsr->apPage[pCsr->nPg-1]);

      nCell = btCellCount(aData, pgsz);
      if( nCell==0 ){
        btCsrReset(pCsr, 0);
        return SQLITE4_NOTFOUND;
      }

      /* If the cursor has descended to a leaf break out of the loop. */
      pCsr->aiCell[pCsr->nPg-1] = (bLast ? nCell : 0);
      if( (aData[0] & BT_PGFLAGS_INTERNAL)==0 ) break;
      
      /* Otherwise, set pgno to the left or rightmost child of the page
      ** just loaded, depending on whether the cursor is seeking to the
      ** start or end of the tree.  */
      if( bLast==0 ){
        pCell = btCellFind(aData, pgsz, 0);
663
664
665
666
667
668
669






670
671
672
673
674
675
676






677
678
679
680
681
682
683
684
685
686
687
  return rc;
}

/*
** Position cursor pCsr to point to the smallest key in the database.
*/
int sqlite4BtCsrFirst(bt_cursor *pCsr){






  return btCsrEnd(pCsr, 0);
}

/*
** Position cursor pCsr to point to the largest key in the database.
*/
int sqlite4BtCsrLast(bt_cursor *pCsr){






  return btCsrEnd(pCsr, 1);
}

static int btCsrReseek(bt_cursor *pCsr){
  int rc = SQLITE4_OK;
  if( pCsr->bRequireReseek ){
    BtOvfl ovfl;
    memcpy(&ovfl, &pCsr->ovfl, sizeof(BtOvfl));

    pCsr->ovfl.buf.n = 0;
    pCsr->ovfl.buf.p = 0;







>
>
>
>
>
>
|






>
>
>
>
>
>
|


|







724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
  return rc;
}

/*
** Position cursor pCsr to point to the smallest key in the database.
*/
int sqlite4BtCsrFirst(bt_cursor *pCsr){
  int rc;
  if( pCsr->eType==CSR_TYPE_BT ){
    rc = btCsrEnd((BtCursor*)pCsr, 0);
  }else{
    assert( 0 );
  }
  return rc;
}

/*
** Position cursor pCsr to point to the largest key in the database.
*/
int sqlite4BtCsrLast(bt_cursor *pCsr){
  int rc;
  if( pCsr->eType==CSR_TYPE_BT ){
    rc = btCsrEnd((BtCursor*)pCsr, 1);
  }else{
    assert( 0 );
  }
  return rc;
}

static int btCsrReseek(BtCursor *pCsr){
  int rc = SQLITE4_OK;
  if( pCsr->bRequireReseek ){
    BtOvfl ovfl;
    memcpy(&ovfl, &pCsr->ovfl, sizeof(BtOvfl));

    pCsr->ovfl.buf.n = 0;
    pCsr->ovfl.buf.p = 0;
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
}


/*
** This function does the work of both sqlite4BtCsrNext() (if parameter
** bNext is true) and Pref() (if bNext is false).
*/
static int btCsrStep(bt_cursor *pCsr, int bNext){
  const int pgsz = sqlite4BtPagerPagesize(pCsr->pDb->pPager);
  int rc = SQLITE4_OK;
  int bRequireDescent = 0;

  rc = btCsrReseek(pCsr);
  if( rc==SQLITE4_OK && pCsr->nPg==0 ) rc = SQLITE4_NOTFOUND;

  if( (pCsr->bSkipNext && bNext) || (pCsr->bSkipPrev && bNext==0) ){







|
|







773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
}


/*
** This function does the work of both sqlite4BtCsrNext() (if parameter
** bNext is true) and Pref() (if bNext is false).
*/
static int btCsrStep(BtCursor *pCsr, int bNext){
  const int pgsz = sqlite4BtPagerPagesize(pCsr->base.pDb->pPager);
  int rc = SQLITE4_OK;
  int bRequireDescent = 0;

  rc = btCsrReseek(pCsr);
  if( rc==SQLITE4_OK && pCsr->nPg==0 ) rc = SQLITE4_NOTFOUND;

  if( (pCsr->bSkipNext && bNext) || (pCsr->bSkipPrev && bNext==0) ){
770
771
772
773
774
775
776






777
778
779
780
781
782
783






784
785
786
787
788
789
790
791
}


/*
** Advance to the next entry in the tree.
*/
int sqlite4BtCsrNext(bt_cursor *pCsr){






  return btCsrStep(pCsr, 1);
}

/*
** Retreat to the previous entry in the tree.
*/
int sqlite4BtCsrPrev(bt_cursor *pCsr){






  return btCsrStep(pCsr, 0);
}

static int btOverflowArrayRead(
  bt_db *db,
  u8 *pOvfl,
  u8 *aOut,
  int nOut







>
>
>
>
>
>
|






>
>
>
>
>
>
|







843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
}


/*
** Advance to the next entry in the tree.
*/
int sqlite4BtCsrNext(bt_cursor *pCsr){
  int rc;
  if( pCsr->eType==CSR_TYPE_BT ){
    rc = btCsrStep((BtCursor*)pCsr, 1);
  }else{
    assert( 0 );
  }
  return rc;
}

/*
** Retreat to the previous entry in the tree.
*/
int sqlite4BtCsrPrev(bt_cursor *pCsr){
  int rc;
  if( pCsr->eType==CSR_TYPE_BT ){
    rc = btCsrStep((BtCursor*)pCsr, 0);
  }else{
    assert( 0 );
  }
  return rc;
}

static int btOverflowArrayRead(
  bt_db *db,
  u8 *pOvfl,
  u8 *aOut,
  int nOut
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
  return rc;
}

/*
** Buffer the key and value belonging to the current cursor position
** in pCsr->ovfl.
*/
static int btCsrBuffer(bt_cursor *pCsr, int bVal){
  const int pgsz = sqlite4BtPagerPagesize(pCsr->pDb->pPager);
  int rc = SQLITE4_OK;            /* Return code */
  u8 *aData;                      /* Page data */
  u8 *pCell;                      /* Pointer to cell within aData[] */
  int nReq;                       /* Total required space */
  u8 *aOut;                       /* Output buffer */
  u8 *pKLocal = 0;                /* Pointer to local part of key */
  u8 *pVLocal = 0;                /* Pointer to local part of value (if any) */







|
|







972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
  return rc;
}

/*
** Buffer the key and value belonging to the current cursor position
** in pCsr->ovfl.
*/
static int btCsrBuffer(BtCursor *pCsr, int bVal){
  const int pgsz = sqlite4BtPagerPagesize(pCsr->base.pDb->pPager);
  int rc = SQLITE4_OK;            /* Return code */
  u8 *aData;                      /* Page data */
  u8 *pCell;                      /* Pointer to cell within aData[] */
  int nReq;                       /* Total required space */
  u8 *aOut;                       /* Output buffer */
  u8 *pKLocal = 0;                /* Pointer to local part of key */
  u8 *pVLocal = 0;                /* Pointer to local part of value (if any) */
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
  aOut = (u8*)pCsr->ovfl.buf.p;
  memcpy(aOut, pKLocal, nKLocal);
  memcpy(&aOut[nKLocal], pVLocal, nVLocal);

  /* Load in overflow data */
  if( nKOvfl || nVOvfl ){
    rc = btOverflowArrayRead(
        pCsr->pDb, pCell, &aOut[nKLocal + nVLocal], nKOvfl + nVOvfl
    );
  }

  return rc;
}









|







1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
  aOut = (u8*)pCsr->ovfl.buf.p;
  memcpy(aOut, pKLocal, nKLocal);
  memcpy(&aOut[nKLocal], pVLocal, nVLocal);

  /* Load in overflow data */
  if( nKOvfl || nVOvfl ){
    rc = btOverflowArrayRead(
        pCsr->base.pDb, pCell, &aOut[nKLocal + nVLocal], nKOvfl + nVOvfl
    );
  }

  return rc;
}


997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
/*
** Cursor pCsr currently points to a leaf page cell. If the leaf page
** cell contains an overflow array, all overflow pages are trimmed here.
**
** SQLITE4_OK is returned if no error occurs, or an SQLite4 error code
** otherwise.
*/
static int btOverflowDelete(bt_cursor *pCsr){
  BtPager *pPager = pCsr->pDb->pPager;
  const int pgsz = sqlite4BtPagerPagesize(pPager);
  u8 *aData;
  u8 *pCell;
  u8 *pOvfl = 0;
  int iCell = pCsr->aiCell[pCsr->nPg-1];
  int n;
  int rc = SQLITE4_OK;







|
|







1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
/*
** Cursor pCsr currently points to a leaf page cell. If the leaf page
** cell contains an overflow array, all overflow pages are trimmed here.
**
** SQLITE4_OK is returned if no error occurs, or an SQLite4 error code
** otherwise.
*/
static int btOverflowDelete(BtCursor *pCsr){
  BtPager *pPager = pCsr->base.pDb->pPager;
  const int pgsz = sqlite4BtPagerPagesize(pPager);
  u8 *aData;
  u8 *pCell;
  u8 *pOvfl = 0;
  int iCell = pCsr->aiCell[pCsr->nPg-1];
  int n;
  int rc = SQLITE4_OK;
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059


1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085



1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105




1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145



1146
1147
1148
1149
1150
1151
1152
      rc = btOverflowTrimtree(pgsz, pPager, rootpgno, nDepth);
    }
  }

  return rc;
}

int sqlite4BtCsrKey(bt_cursor *pCsr, const void **ppK, int *pnK){
  int rc = SQLITE4_OK;            /* Return code */



  if( pCsr->bRequireReseek ){
    *ppK = (const void*)pCsr->ovfl.buf.p;
    *pnK = pCsr->ovfl.nKey;
  }else{
    const int pgsz = sqlite4BtPagerPagesize(pCsr->pDb->pPager);
    u8 *aData;
    u8 *pCell;
    int nK;
    int iCell = pCsr->aiCell[pCsr->nPg-1];

    aData = (u8*)sqlite4BtPageData(pCsr->apPage[pCsr->nPg-1]);
    assert( btCellCount(aData, pgsz)>iCell );
    pCell = btCellFind(aData, pgsz, iCell);
    pCell += sqlite4BtVarintGet32(pCell, &nK);

    if( nK==0 ){
      /* type (c) leaf cell */
      rc = btCsrBuffer(pCsr, 0);
      if( rc==SQLITE4_OK ){
        *ppK = pCsr->ovfl.buf.p;
        *pnK = pCsr->ovfl.nKey;
      }
    }else{
      *ppK = pCell;
      *pnK = nK;
    }



  }

  return rc;
}

int sqlite4BtCsrData(
  bt_cursor *pCsr,                /* Cursor handle */
  int iOffset,                    /* Offset of requested data */
  int nByte,                      /* Bytes requested (or -ve for all avail.) */
  const void **ppV,               /* OUT: Pointer to data buffer */
  int *pnV                        /* OUT: Size of data buffer in bytes */
){
  const int pgsz = sqlite4BtPagerPagesize(pCsr->pDb->pPager);
  int rc;
  u8 *aData;
  u8 *pCell;
  int iCell = pCsr->aiCell[pCsr->nPg-1];
  int nK = 0;
  int nV = 0;





  rc = btCsrReseek(pCsr);
  if( rc==SQLITE4_OK ){
    if( pCsr->bSkipNext || pCsr->bSkipPrev ){
      /* The row has been deleted out from under this cursor. So return
      ** NULL for data.  */
      *ppV = 0;
      *pnV = 0;
    }else{

      aData = (u8*)sqlite4BtPageData(pCsr->apPage[pCsr->nPg-1]);
      pCell = btCellFind(aData, pgsz, iCell);
      pCell += sqlite4BtVarintGet32(pCell, &nK);
      if( nK>0 ){
        pCell += nK;
        pCell += sqlite4BtVarintGet32(pCell, &nV);
      }

      if( nV==0 ){
        rc = btCsrBuffer(pCsr, 1);
        if( rc==SQLITE4_OK ){
          u8 *aBuf = (u8*)pCsr->ovfl.buf.p;
          *ppV = &aBuf[pCsr->ovfl.nKey];
          *pnV = pCsr->ovfl.nVal;
        }
      }else{
        *ppV = pCell;
        *pnV = (nV-1);
      }

#ifndef NDEBUG
      if( rc==SQLITE4_OK ){
        const void *pK; int nK;
        rc = sqlite4BtCsrKey(pCsr, &pK, &nK);
        if( rc==SQLITE4_OK ){
          BtLock *pLock = (BtLock*)pCsr->pDb->pPager;
          sqlite4BtDebugKV(pLock, "select", (u8*)pK, nK, (u8*)*ppV, *pnV);
        }
      }
#endif
    }



  }

  return rc;
}

/*
** The argument points to a buffer containing an overflow array. Return







|

|
>
>
|
|
|
|
|
|
|
|
|

|
|
|
|

|
|
|
|
|
|
|
|
|
|
|
>
>
>






|





|



<



>
>
>
>
|
|
|
|
|
|
|
|

|
|
|
|
|
|
|

|
|
|
|
|
|
|
|
|
|
|


|
|
|
|
|
|
|
|

|
>
>
>







1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191

1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
      rc = btOverflowTrimtree(pgsz, pPager, rootpgno, nDepth);
    }
  }

  return rc;
}

int sqlite4BtCsrKey(bt_cursor *pBase, const void **ppK, int *pnK){
  int rc = SQLITE4_OK;            /* Return code */
  
  if( pBase->eType==CSR_TYPE_BT ){
    BtCursor *pCsr = (BtCursor*)pBase;
    if( pCsr->bRequireReseek ){
      *ppK = (const void*)pCsr->ovfl.buf.p;
      *pnK = pCsr->ovfl.nKey;
    }else{
      const int pgsz = sqlite4BtPagerPagesize(pCsr->base.pDb->pPager);
      u8 *aData;
      u8 *pCell;
      int nK;
      int iCell = pCsr->aiCell[pCsr->nPg-1];

      aData = (u8*)sqlite4BtPageData(pCsr->apPage[pCsr->nPg-1]);
      assert( btCellCount(aData, pgsz)>iCell );
      pCell = btCellFind(aData, pgsz, iCell);
      pCell += sqlite4BtVarintGet32(pCell, &nK);

      if( nK==0 ){
        /* type (c) leaf cell */
        rc = btCsrBuffer(pCsr, 0);
        if( rc==SQLITE4_OK ){
          *ppK = pCsr->ovfl.buf.p;
          *pnK = pCsr->ovfl.nKey;
        }
      }else{
        *ppK = pCell;
        *pnK = nK;
      }
    }
  }else{
    assert( 0 );
  }

  return rc;
}

int sqlite4BtCsrData(
  bt_cursor *pBase,               /* Cursor handle */
  int iOffset,                    /* Offset of requested data */
  int nByte,                      /* Bytes requested (or -ve for all avail.) */
  const void **ppV,               /* OUT: Pointer to data buffer */
  int *pnV                        /* OUT: Size of data buffer in bytes */
){
  const int pgsz = sqlite4BtPagerPagesize(pBase->pDb->pPager);
  int rc;
  u8 *aData;
  u8 *pCell;

  int nK = 0;
  int nV = 0;

  if( pBase->eType==CSR_TYPE_BT ){
    BtCursor *pCsr = (BtCursor*)pBase;
    int iCell = pCsr->aiCell[pCsr->nPg-1];

    rc = btCsrReseek(pCsr);
    if( rc==SQLITE4_OK ){
      if( pCsr->bSkipNext || pCsr->bSkipPrev ){
        /* The row has been deleted out from under this cursor. So return
         ** NULL for data.  */
        *ppV = 0;
        *pnV = 0;
      }else{

        aData = (u8*)sqlite4BtPageData(pCsr->apPage[pCsr->nPg-1]);
        pCell = btCellFind(aData, pgsz, iCell);
        pCell += sqlite4BtVarintGet32(pCell, &nK);
        if( nK>0 ){
          pCell += nK;
          pCell += sqlite4BtVarintGet32(pCell, &nV);
        }

        if( nV==0 ){
          rc = btCsrBuffer(pCsr, 1);
          if( rc==SQLITE4_OK ){
            u8 *aBuf = (u8*)pCsr->ovfl.buf.p;
            *ppV = &aBuf[pCsr->ovfl.nKey];
            *pnV = pCsr->ovfl.nVal;
          }
        }else{
          *ppV = pCell;
          *pnV = (nV-1);
        }

#ifndef NDEBUG
        if( rc==SQLITE4_OK ){
          const void *pK; int nK;
          rc = sqlite4BtCsrKey((bt_cursor*)pCsr, &pK, &nK);
          if( rc==SQLITE4_OK ){
            BtLock *pLock = (BtLock*)pCsr->base.pDb->pPager;
            sqlite4BtDebugKV(pLock, "select", (u8*)pK, nK, (u8*)*ppV, *pnV);
          }
        }
#endif
      }
    }
  }else{
    assert( 0 );
  }

  return rc;
}

/*
** The argument points to a buffer containing an overflow array. Return
1365
1366
1367
1368
1369
1370
1371




































1372
1373
1374
1375
1376
1377
1378
  nPg = (nContent + pgsz - 1) / pgsz;
  if( nPg<=BT_MAX_DIRECT_OVERFLOW ){
    return 1 + nPg*4;
  }
  return 1 + (BT_MAX_DIRECT_OVERFLOW+1) * 4;
}





































static int btAllocateAndZero(bt_db *db, BtPage **ppPg){
  BtPage *pPg = 0;                /* Allocated page handle */
  int rc;                         /* Return code */

  rc = sqlite4BtPageAllocate(db->pPager, &pPg);
  if( rc==SQLITE4_OK ){
    const int pgsz = sqlite4BtPagerPagesize(db->pPager);







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
  nPg = (nContent + pgsz - 1) / pgsz;
  if( nPg<=BT_MAX_DIRECT_OVERFLOW ){
    return 1 + nPg*4;
  }
  return 1 + (BT_MAX_DIRECT_OVERFLOW+1) * 4;
}


/*
** Allocate a non-overflow page.
**
** This function is a simple wrapper around sqlite4BtPageAllocate(),
** except that if the database is currenly in fast-insert mode the
** BtDbHdr.nSubPg counter is incremented.
*/
static int btAllocateNonOverflow(bt_db *db, BtPage **ppPg){
  int rc = sqlite4BtPageAllocate(db->pPager, ppPg);
  if( rc==SQLITE4_OK && db->bFastInsertOp ){
    BtDbHdr *pHdr = sqlite4BtPagerDbhdr(db->pPager);
    pHdr->nSubPg++;
  }
  return rc;
}

/*
** Trim a non-overflow page.
**
** This function is a simple wrapper around sqlite4BtPageAllocate(),
** except that if the database is currenly in fast-insert mode the
** BtDbHdr.nSubPg counter is incremented.
*/
static int btTrimNonOverflow(bt_db *db, BtPage *pPg){
  int rc = sqlite4BtPageTrim(pPg);
  if( rc==SQLITE4_OK && db->bFastInsertOp ){
    BtDbHdr *pHdr = sqlite4BtPagerDbhdr(db->pPager);
    pHdr->nSubPg--;
  }
  return rc;
}

/*
** Allocate and zero an overflow page.
*/
static int btAllocateAndZero(bt_db *db, BtPage **ppPg){
  BtPage *pPg = 0;                /* Allocated page handle */
  int rc;                         /* Return code */

  rc = sqlite4BtPageAllocate(db->pPager, &pPg);
  if( rc==SQLITE4_OK ){
    const int pgsz = sqlite4BtPagerPagesize(db->pPager);
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
  return rc;
}

typedef struct BalanceCtx BalanceCtx;
struct BalanceCtx {
  int pgsz;                       /* Database page size */
  int bLeaf;                      /* True if we are rebalancing leaf data */
  bt_cursor *pCsr;                /* Cursor identifying where to insert pKV */
  int nKV;                        /* Number of KV pairs */
  KeyValue *apKV;                 /* New KV pairs being inserted */

  /* Populated by btGatherSiblings */
  int nIn;                        /* Number of sibling pages */
  BtPage *apPg[5];                /* Array of sibling pages */








|







1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
  return rc;
}

typedef struct BalanceCtx BalanceCtx;
struct BalanceCtx {
  int pgsz;                       /* Database page size */
  int bLeaf;                      /* True if we are rebalancing leaf data */
  BtCursor *pCsr;                 /* Cursor identifying where to insert pKV */
  int nKV;                        /* Number of KV pairs */
  KeyValue *apKV;                 /* New KV pairs being inserted */

  /* Populated by btGatherSiblings */
  int nIn;                        /* Number of sibling pages */
  BtPage *apPg[5];                /* Array of sibling pages */

1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
  u8 *apOut[5];                   /* Buffers to assemble output in */
  KeyValue aPCell[5];             /* Cells to push into the parent page */
  u8 *pTmp;                       /* Space for apCell[x].pKey if required */
  int iTmp;                       /* Offset to free space within pTmp */
};

static int btGatherSiblings(BalanceCtx *p){
  bt_cursor *pCsr = p->pCsr;
  bt_db * const pDb = pCsr->pDb; 
  const int pgsz = sqlite4BtPagerPagesize(pDb->pPager);

  int rc = SQLITE4_OK;
  int nCell;                      /* Number of cells in parent page */
  u8 *aParent;                    /* Buffer of parent page */
  int iChild;                     /* Index of child page within parent */
  int nSib;                       /* Number of siblings */







|
|







1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
  u8 *apOut[5];                   /* Buffers to assemble output in */
  KeyValue aPCell[5];             /* Cells to push into the parent page */
  u8 *pTmp;                       /* Space for apCell[x].pKey if required */
  int iTmp;                       /* Offset to free space within pTmp */
};

static int btGatherSiblings(BalanceCtx *p){
  BtCursor *pCsr = p->pCsr;
  bt_db * const pDb = pCsr->base.pDb; 
  const int pgsz = sqlite4BtPagerPagesize(pDb->pPager);

  int rc = SQLITE4_OK;
  int nCell;                      /* Number of cells in parent page */
  u8 *aParent;                    /* Buffer of parent page */
  int iChild;                     /* Index of child page within parent */
  int nSib;                       /* Number of siblings */
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
    }
  }

  return rc;
}

/* Called recursively by btBalance(). todo: Fix this! */
static int btInsertAndBalance(bt_cursor *, int, KeyValue *);
static int btDeleteFromPage(bt_cursor *, int);
static int btBalanceIfUnderfull(bt_cursor *pCsr);

static int btBalanceMeasure(
  BalanceCtx *p,                  /* Description of balance operation */
  int iCell,                      /* Cell number in this iteration */
  u8 *pCell, int nByte,           /* Binary cell */
  KeyValue *pKV                   /* Key-value cell */
){







|
|
|







1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
    }
  }

  return rc;
}

/* Called recursively by btBalance(). todo: Fix this! */
static int btInsertAndBalance(BtCursor *, int, KeyValue *);
static int btDeleteFromPage(BtCursor *, int);
static int btBalanceIfUnderfull(BtCursor *pCsr);

static int btBalanceMeasure(
  BalanceCtx *p,                  /* Description of balance operation */
  int iCell,                      /* Cell number in this iteration */
  u8 *pCell, int nByte,           /* Binary cell */
  KeyValue *pKV                   /* Key-value cell */
){
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
  return SQLITE4_OK;
}

static int btBalanceVisitCells(
  BalanceCtx *p,
  int (*xVisit)(BalanceCtx*, int, u8*, int, KeyValue*)
){
  const int pgsz = sqlite4BtPagerPagesize(p->pCsr->pDb->pPager);
  int rc = SQLITE4_OK;            /* Return code */
  int iPg;                        /* Current page in apPg[] */
  int iCall = 0;
  int i;                          /* Used to iterate through KV pairs */

  BtPage *pIns = p->pCsr->apPage[p->pCsr->nPg-1];
  int iIns = p->pCsr->aiCell[p->pCsr->nPg-1];







|







1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
  return SQLITE4_OK;
}

static int btBalanceVisitCells(
  BalanceCtx *p,
  int (*xVisit)(BalanceCtx*, int, u8*, int, KeyValue*)
){
  const int pgsz = sqlite4BtPagerPagesize(p->pCsr->base.pDb->pPager);
  int rc = SQLITE4_OK;            /* Return code */
  int iPg;                        /* Current page in apPg[] */
  int iCall = 0;
  int i;                          /* Used to iterate through KV pairs */

  BtPage *pIns = p->pCsr->apPage[p->pCsr->nPg-1];
  int iIns = p->pCsr->aiCell[p->pCsr->nPg-1];
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
    pKV->pK = aRight;
    pKV->nK = i + 1;
    assert( pKV->nK<=nRight );
  }
}

int btBalance(
  bt_cursor *pCsr,                /* Cursor pointed to page to rebalance */
  int bLeaf,                      /* True if rebalancing leaf pages */
  int nKV,                        /* Number of entries in apKV[] array */
  KeyValue *apKV                  /* Extra entries to add while rebalancing */
){
  bt_db * const pDb = pCsr->pDb; 
  const int pgsz = sqlite4BtPagerPagesize(pDb->pPager);
  const int nSpacePerPage = (pgsz - 1 - 6 - (!bLeaf)*4);

  int iPg;                        /* Used to iterate through pages */
  int iCell;                      /* Used to iterate through cells */

  int anByteOut[5];               /* Bytes of content on each output page */







|




|







2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
    pKV->pK = aRight;
    pKV->nK = i + 1;
    assert( pKV->nK<=nRight );
  }
}

int btBalance(
  BtCursor *pCsr,                 /* Cursor pointed to page to rebalance */
  int bLeaf,                      /* True if rebalancing leaf pages */
  int nKV,                        /* Number of entries in apKV[] array */
  KeyValue *apKV                  /* Extra entries to add while rebalancing */
){
  bt_db * const pDb = pCsr->base.pDb; 
  const int pgsz = sqlite4BtPagerPagesize(pDb->pPager);
  const int nSpacePerPage = (pgsz - 1 - 6 - (!bLeaf)*4);

  int iPg;                        /* Used to iterate through pages */
  int iCell;                      /* Used to iterate through cells */

  int anByteOut[5];               /* Bytes of content on each output page */
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
    u8 *aRightSibling = sqlite4BtPageData(ctx.apPg[ctx.nIn-1]);
    memcpy(&(ctx.apOut[ctx.nOut-1])[1], &aRightSibling[1], 4);
  }

  /* Clobber the old pages with the new buffers */
  for(iPg=0; iPg<ctx.nOut; iPg++){
    if( iPg>=ctx.nIn ){
      rc = sqlite4BtPageAllocate(pDb->pPager, &ctx.apPg[iPg]);
      if( rc!=SQLITE4_OK ) goto rebalance_out;
    }
    btSetBuffer(pDb, ctx.apPg[iPg], ctx.apOut[iPg]);
    ctx.apOut[iPg] = 0;
  }
  for(iPg=ctx.nOut; iPg<ctx.nIn; iPg++){
    rc = sqlite4BtPageTrim(ctx.apPg[iPg]);
    ctx.apPg[iPg] = 0;
    if( rc!=SQLITE4_OK ) goto rebalance_out;
  }

#ifdef BT_STDERR_DEBUG
  {
    int iDbg;







|






|







2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
    u8 *aRightSibling = sqlite4BtPageData(ctx.apPg[ctx.nIn-1]);
    memcpy(&(ctx.apOut[ctx.nOut-1])[1], &aRightSibling[1], 4);
  }

  /* Clobber the old pages with the new buffers */
  for(iPg=0; iPg<ctx.nOut; iPg++){
    if( iPg>=ctx.nIn ){
      rc = btAllocateNonOverflow(pDb, &ctx.apPg[iPg]);
      if( rc!=SQLITE4_OK ) goto rebalance_out;
    }
    btSetBuffer(pDb, ctx.apPg[iPg], ctx.apOut[iPg]);
    ctx.apOut[iPg] = 0;
  }
  for(iPg=ctx.nOut; iPg<ctx.nIn; iPg++){
    rc = btTrimNonOverflow(pDb, ctx.apPg[iPg]);
    ctx.apPg[iPg] = 0;
    if( rc!=SQLITE4_OK ) goto rebalance_out;
  }

#ifdef BT_STDERR_DEBUG
  {
    int iDbg;
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
    sqlite4BtPageRelease(ctx.apPg[iPg]);
  }
  btFreeBuffer(pDb, ctx.pTmp);
  sqlite4_free(pDb->pEnv, ctx.anCellSz);
  return rc;
}

static int btExtendTree(bt_cursor *pCsr){
  bt_db * const pDb = pCsr->pDb;
  const int pgsz = sqlite4BtPagerPagesize(pDb->pPager);
  int rc;                         /* Return code */
  BtPage *pNew;                   /* New (and only) child of root page */
  BtPage *pRoot = pCsr->apPage[0];

  assert( pCsr->nPg==1 );

  rc = sqlite4BtPageWrite(pRoot);
  if( rc==SQLITE4_OK ){
    rc = sqlite4BtPageAllocate(pDb->pPager, &pNew);
  }
  if( rc==SQLITE4_OK ){
    u8 *aRoot = sqlite4BtPageData(pRoot);
    u8 *aData = sqlite4BtPageData(pNew);

    memcpy(aData, aRoot, pgsz);
    aRoot[0] = BT_PGFLAGS_INTERNAL;







|
|









|







2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
    sqlite4BtPageRelease(ctx.apPg[iPg]);
  }
  btFreeBuffer(pDb, ctx.pTmp);
  sqlite4_free(pDb->pEnv, ctx.anCellSz);
  return rc;
}

static int btExtendTree(BtCursor *pCsr){
  bt_db * const pDb = pCsr->base.pDb;
  const int pgsz = sqlite4BtPagerPagesize(pDb->pPager);
  int rc;                         /* Return code */
  BtPage *pNew;                   /* New (and only) child of root page */
  BtPage *pRoot = pCsr->apPage[0];

  assert( pCsr->nPg==1 );

  rc = sqlite4BtPageWrite(pRoot);
  if( rc==SQLITE4_OK ){
    rc = btAllocateNonOverflow(pDb, &pNew);
  }
  if( rc==SQLITE4_OK ){
    u8 *aRoot = sqlite4BtPageData(pRoot);
    u8 *aData = sqlite4BtPageData(pNew);

    memcpy(aData, aRoot, pgsz);
    aRoot[0] = BT_PGFLAGS_INTERNAL;
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
**
**     * nKV entries are inserted in their place.
**
** The tree balancing routine is called if this causes the page to
** become either overfull or to contain no entries at all.
*/
static int btInsertAndBalance(
  bt_cursor *pCsr,                /* Cursor identifying page to modify */
  int nKV,                        /* Number of entries in apKV */
  KeyValue *apKV                  /* New cells to insert into the page */
){
  int rc = SQLITE4_OK;
  const int pgsz = sqlite4BtPagerPagesize(pCsr->pDb->pPager);
  u8 *aData;                      /* Page buffer */
  int nCell;                      /* Number of cells on this page already */
  int nFree;                      /* Contiguous free space on this page */
  int nReq = 0;                   /* Space required for type (a) cells */
  int iCell;                      /* Position to insert new key */
  int iWrite;                     /* Byte offset at which to write new cell */
  int i;







|




|







2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
**
**     * nKV entries are inserted in their place.
**
** The tree balancing routine is called if this causes the page to
** become either overfull or to contain no entries at all.
*/
static int btInsertAndBalance(
  BtCursor *pCsr,                 /* Cursor identifying page to modify */
  int nKV,                        /* Number of entries in apKV */
  KeyValue *apKV                  /* New cells to insert into the page */
){
  int rc = SQLITE4_OK;
  const int pgsz = sqlite4BtPagerPagesize(pCsr->base.pDb->pPager);
  u8 *aData;                      /* Page buffer */
  int nCell;                      /* Number of cells on this page already */
  int nFree;                      /* Contiguous free space on this page */
  int nReq = 0;                   /* Space required for type (a) cells */
  int iCell;                      /* Position to insert new key */
  int iWrite;                     /* Byte offset at which to write new cell */
  int i;
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
    iWrite = (bLeaf ? 1 : 5);
    nFree = pgsz - iWrite - 6;
  }else{
    if( btFreeContiguous(aData, pgsz)<nReq && btFreeSpace(aData, pgsz)>=nReq ){
      /* Special case - the new entry will not fit on the page at present
      ** but would if the page were defragmented. So defragment it before
      ** continuing.  */
      rc = btDefragmentPage(pCsr->pDb, pLeaf);
      aData = sqlite4BtPageData(pLeaf);
    }

    iWrite = btFreeOffset(aData, pgsz);
    nFree = btFreeContiguous(aData, pgsz);
  }








|







2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
    iWrite = (bLeaf ? 1 : 5);
    nFree = pgsz - iWrite - 6;
  }else{
    if( btFreeContiguous(aData, pgsz)<nReq && btFreeSpace(aData, pgsz)>=nReq ){
      /* Special case - the new entry will not fit on the page at present
      ** but would if the page were defragmented. So defragment it before
      ** continuing.  */
      rc = btDefragmentPage(pCsr->base.pDb, pLeaf);
      aData = sqlite4BtPageData(pLeaf);
    }

    iWrite = btFreeOffset(aData, pgsz);
    nFree = btFreeContiguous(aData, pgsz);
  }

2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
      rc = btBalance(pCsr, bLeaf, nKV, apKV);
    }
  }

  return rc;
}

static int btDeleteFromPage(bt_cursor *pCsr, int nDel){
  const int pgsz = sqlite4BtPagerPagesize(pCsr->pDb->pPager);
  int rc = SQLITE4_OK;            /* Return code */
  BtPage *pPg;                    /* Page to delete entries from */

  pPg = pCsr->apPage[pCsr->nPg-1];
  rc = sqlite4BtPageWrite(pPg);
  if( rc==SQLITE4_OK ){
    int i;                        /* Used to iterate through cells to delete */







|
|







2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
      rc = btBalance(pCsr, bLeaf, nKV, apKV);
    }
  }

  return rc;
}

static int btDeleteFromPage(BtCursor *pCsr, int nDel){
  const int pgsz = sqlite4BtPagerPagesize(pCsr->base.pDb->pPager);
  int rc = SQLITE4_OK;            /* Return code */
  BtPage *pPg;                    /* Page to delete entries from */

  pPg = pCsr->apPage[pCsr->nPg-1];
  rc = sqlite4BtPageWrite(pPg);
  if( rc==SQLITE4_OK ){
    int i;                        /* Used to iterate through cells to delete */
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411

2412
2413


2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
    /* Increase total free space */
    btPutU16(&aData[pgsz-4], btFreeSpace(aData, pgsz) + nFreed);
  }
  
  return rc;
}

static int btBalanceIfUnderfull(bt_cursor *pCsr){
  const int pgsz = sqlite4BtPagerPagesize(pCsr->pDb->pPager);
  int rc = SQLITE4_OK;
  int iPg = pCsr->nPg-1;
  BtPage *pPg = pCsr->apPage[iPg];
  u8 *aData = sqlite4BtPageData(pPg);
  int nCell = btCellCount(aData, pgsz);
  int nFree = btFreeSpace(aData, pgsz);
  int bLeaf = (0==(btFlags(aData) & BT_PGFLAGS_INTERNAL));

  if( iPg==0 ){
    /* Root page. If it contains no cells at all and is not already
    ** a leaf, shorten the tree by one here by copying the contents 
    ** of the only child into the root. */
    if( nCell==0 && bLeaf==0 ){
      BtPager *pPager = pCsr->pDb->pPager;
      u32 pgno = btChildPgno(aData, pgsz, 0);
      BtPage *pChild;

      rc = sqlite4BtPageWrite(pPg);
      if( rc==SQLITE4_OK ){
        rc = sqlite4BtPageGet(pPager, pgno, &pChild);
      }
      if( rc==SQLITE4_OK ){
        u8 *a = sqlite4BtPageData(pChild);
        memcpy(aData, a, pgsz);
        rc = sqlite4BtPageTrim(pChild);
      }
    }
  }else if( nCell==0 || (nFree>(2*pgsz/3) && bLeaf==0) ){
    rc = btBalance(pCsr, bLeaf, 0, 0);
  }
  return rc;
}

static int btSaveAllCursor(bt_db *pDb, bt_cursor *pCsr){
  int rc = SQLITE4_OK;            /* Return code */
  bt_cursor *p;                   /* Used to iterate through cursors */

  for(p=pDb->pAllCsr; rc==SQLITE4_OK && p; p=p->pNextCsr){
    if( p->nPg>0 ){
      assert( p->bRequireReseek==0 );
      rc = btCsrBuffer(p, 0);
      if( rc==SQLITE4_OK ){
        assert( p->ovfl.buf.p );
        p->bRequireReseek = 1;
        if( p!=pCsr ) btCsrReleaseAll(p);
      }
    }
  }

  return rc;
}

/*
** Insert a new key/value pair or replace an existing one.

*/
int sqlite4BtReplace(bt_db *db, const void *pK, int nK, const void *pV, int nV){


  int rc = SQLITE4_OK;
  bt_cursor csr;

  rc = btSaveAllCursor(db, 0);
  assert( rc!=SQLITE4_NOTFOUND && rc!=SQLITE4_INEXACT );
  if( rc==SQLITE4_OK ){
    sqlite4BtDebugKV((BtLock*)db->pPager, "replace", (u8*)pK, nK, (u8*)pV, nV);
    btCheckPageRefs(db);
    btCsrSetup(db, &csr);
    rc = btCsrSeek(&csr, pK, nK, BT_SEEK_GE, BT_CSRSEEK_UPDATE);
  }

  if( rc==SQLITE4_OK ){
    /* The cursor currently points to an entry with key pK/nK. This call
    ** should therefore replace that entry. So delete it and then re-seek
    ** the cursor.  */
    rc = sqlite4BtDelete(&csr);

    if( rc==SQLITE4_OK && nV>=0 ){
      rc = btCsrSeek(&csr, pK, nK, BT_SEEK_GE, BT_CSRSEEK_UPDATE);
      if( rc==SQLITE4_OK ) rc = btErrorBkpt(SQLITE4_CORRUPT);
    }
  }








|
|













|










|








|

|
















<
|
>
|
|
>
>
|
|

|
<
<
|
<
|
|
<





|







2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541

2542
2543
2544
2545
2546
2547
2548
2549
2550
2551


2552

2553
2554

2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
    /* Increase total free space */
    btPutU16(&aData[pgsz-4], btFreeSpace(aData, pgsz) + nFreed);
  }
  
  return rc;
}

static int btBalanceIfUnderfull(BtCursor *pCsr){
  const int pgsz = sqlite4BtPagerPagesize(pCsr->base.pDb->pPager);
  int rc = SQLITE4_OK;
  int iPg = pCsr->nPg-1;
  BtPage *pPg = pCsr->apPage[iPg];
  u8 *aData = sqlite4BtPageData(pPg);
  int nCell = btCellCount(aData, pgsz);
  int nFree = btFreeSpace(aData, pgsz);
  int bLeaf = (0==(btFlags(aData) & BT_PGFLAGS_INTERNAL));

  if( iPg==0 ){
    /* Root page. If it contains no cells at all and is not already
    ** a leaf, shorten the tree by one here by copying the contents 
    ** of the only child into the root. */
    if( nCell==0 && bLeaf==0 ){
      BtPager *pPager = pCsr->base.pDb->pPager;
      u32 pgno = btChildPgno(aData, pgsz, 0);
      BtPage *pChild;

      rc = sqlite4BtPageWrite(pPg);
      if( rc==SQLITE4_OK ){
        rc = sqlite4BtPageGet(pPager, pgno, &pChild);
      }
      if( rc==SQLITE4_OK ){
        u8 *a = sqlite4BtPageData(pChild);
        memcpy(aData, a, pgsz);
        rc = btTrimNonOverflow(pCsr->base.pDb, pChild);
      }
    }
  }else if( nCell==0 || (nFree>(2*pgsz/3) && bLeaf==0) ){
    rc = btBalance(pCsr, bLeaf, 0, 0);
  }
  return rc;
}

static int btSaveAllCursor(bt_db *pDb, BtCursor *pCsr){
  int rc = SQLITE4_OK;            /* Return code */
  BtCursor *p;                    /* Used to iterate through cursors */

  for(p=pDb->pAllCsr; rc==SQLITE4_OK && p; p=p->pNextCsr){
    if( p->nPg>0 ){
      assert( p->bRequireReseek==0 );
      rc = btCsrBuffer(p, 0);
      if( rc==SQLITE4_OK ){
        assert( p->ovfl.buf.p );
        p->bRequireReseek = 1;
        if( p!=pCsr ) btCsrReleaseAll(p);
      }
    }
  }

  return rc;
}


static int btReplace(
  bt_db *db,                      /* Database handle */
  u32 iRoot,                      /* Root page of b-tree to update */
  const void *pK, int nK,         /* Key to insert */
  const void *pV, int nV          /* Value to insert. (nV<0) -> delete */
){
  int rc;                         /* Return code */
  BtCursor csr;                  /* Cursor object to seek to insert point */

  /* Seek stack cursor csr to the b-tree page that key pK/nK is/would be


  ** stored on.  */

  btCsrSetup(db, iRoot, &csr);
  rc = btCsrSeek(&csr, pK, nK, BT_SEEK_GE, BT_CSRSEEK_UPDATE);


  if( rc==SQLITE4_OK ){
    /* The cursor currently points to an entry with key pK/nK. This call
    ** should therefore replace that entry. So delete it and then re-seek
    ** the cursor.  */
    rc = sqlite4BtDelete(&csr.base);

    if( rc==SQLITE4_OK && nV>=0 ){
      rc = btCsrSeek(&csr, pK, nK, BT_SEEK_GE, BT_CSRSEEK_UPDATE);
      if( rc==SQLITE4_OK ) rc = btErrorBkpt(SQLITE4_CORRUPT);
    }
  }

2448
2449
2450
2451
2452
2453
2454


























































































































2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467



2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482



2483
2484
2485
2486
2487
2488
2489
      rc = btInsertAndBalance(&csr, 1, &kv);
    }
    if( kv.eType==KV_CELL ){
      sqlite4_free(db->pEnv, (void*)kv.pV);
    }
  }
  btCsrReset(&csr, 1);



























































































































  btCheckPageRefs(db);
  db->bFastInsertOp = 0;
  return rc;
}


/*
** Delete the entry that the cursor currently points to.
*/
int sqlite4BtDelete(bt_cursor *pCsr){
  int rc;




  rc = btCsrReseek(pCsr);
  if( rc==SQLITE4_OK ){
    rc = btSaveAllCursor(pCsr->pDb, pCsr);
  }
  if( rc==SQLITE4_OK ){
    rc = btOverflowDelete(pCsr);
  }
  if( rc==SQLITE4_OK ){
    rc =  btDeleteFromPage(pCsr, 1);
  }
  if( rc==SQLITE4_OK ){
    rc = btBalanceIfUnderfull(pCsr);
  }

  btCsrReleaseAll(pCsr);



  return rc;
}

int sqlite4BtSetCookie(bt_db *db, unsigned int iVal){
  return sqlite4BtPagerSetCookie(db->pPager, iVal);
}








>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>










|


>
>
>
|
|
|
|
|
|
|
|
|
|
|
|
|

|
>
>
>







2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
      rc = btInsertAndBalance(&csr, 1, &kv);
    }
    if( kv.eType==KV_CELL ){
      sqlite4_free(db->pEnv, (void*)kv.pV);
    }
  }
  btCsrReset(&csr, 1);

  return rc;
}

static int btAllocateNewRoot(bt_db *db, u32 *piNew){
  u32 iNew = 0;
  BtPage *pPg;
  int rc;

  rc = btAllocateNonOverflow(db, &pPg);
  if( rc==SQLITE4_OK ){
    iNew = sqlite4BtPagePgno(pPg);
    sqlite4BtPageRelease(pPg);
  }

  *piNew = iNew;
  return rc;
}

static int btFastInsertMaxLevel(
  bt_db *db, 
  BtDbHdr *pHdr, 
  u32 *piLevel
){
  int rc;
  BtCursor csr;

  btCsrSetup(db, pHdr->iMRoot, &csr);
  rc = btCsrEnd(&csr, 1);
  assert( rc!=SQLITE4_INEXACT );
  if( rc==SQLITE4_OK ){
    u8 *aK; int nK;
    rc = sqlite4BtCsrKey(&csr.base, (const void**)&aK, &nK);
    if( rc==SQLITE4_OK ){
      *piLevel = btGetU32(&aK[0]);
    }
  }else if( rc==SQLITE4_NOTFOUND ){
    rc = SQLITE4_OK;
    *piLevel = 0;
  }
  btCsrReset(&csr, 1);

  return rc;
}

static int btFastInsertRoot(
  bt_db *db, 
  BtDbHdr *pHdr, 
  u32 *piRoot
){
  int rc = SQLITE4_OK;
  u32 iSubRoot = 0;

  if( pHdr->iMRoot==0 ){
    rc = btAllocateNewRoot(db, &pHdr->iMRoot);
  }
  iSubRoot = pHdr->iSubRoot;

  /* If the current writable sub-tree is full, start a new one. */
  if( pHdr->nSubPg >= (BT_DEFAULT_BLKSZ / pHdr->pgsz) ){
    iSubRoot = 0;
  }

  /* If no writable sub-tree has been discovered, create one now. */
  if( iSubRoot==0 ){
    u32 iMaxLevel = 0;

    u8 aKey[4];
    u8 aVal[8];

    if( rc==SQLITE4_OK ){
      rc = btFastInsertMaxLevel(db, pHdr, &iMaxLevel);
    }
    if( rc==SQLITE4_OK ){
      rc = btAllocateNewRoot(db, &iSubRoot);
    }
    if( rc==SQLITE4_OK ){
      pHdr->iSubRoot = iSubRoot;
      pHdr->nSubPg = 0;

      btPutU32(aKey, iMaxLevel+1);
      btPutU32(&aVal[0], iSubRoot);
      btPutU32(&aVal[4], 1);
      rc = btReplace(db, pHdr->iMRoot, aKey, 4, aVal, 8);
    }
  }

  *piRoot = iSubRoot;
  return rc;
}

/*
** Insert a new key/value pair or replace an existing one.
**
** This function may modify either the b-tree or fast-insert-tree, depending
** on whether or not the db->bFastInsertOp flag is set.
*/
int sqlite4BtReplace(bt_db *db, const void *pK, int nK, const void *pV, int nV){
  int rc = SQLITE4_OK;

  /* Debugging output. */
  sqlite4BtDebugKV((BtLock*)db->pPager, "replace", (u8*)pK, nK, (u8*)pV, nV);

  /* Save the position of any open cursors */
  rc = btSaveAllCursor(db, 0);
  assert( rc!=SQLITE4_NOTFOUND && rc!=SQLITE4_INEXACT );
  btCheckPageRefs(db);

  /* Seek stack cursor csr to the b-tree page that key pK/nK is/would be
  ** stored on.  */
  if( rc==SQLITE4_OK ){
    BtDbHdr *pHdr = sqlite4BtPagerDbhdr(db->pPager);
    u32 iRoot;
    if( db->bFastInsertOp ){
      rc = btFastInsertRoot(db, pHdr, &iRoot);
    }else{
      iRoot = pHdr->iRoot;
    }
    if( rc==SQLITE4_OK ){
      rc = btReplace(db, iRoot, pK, nK, pV, nV);
    }
  }

  btCheckPageRefs(db);
  db->bFastInsertOp = 0;
  return rc;
}


/*
** Delete the entry that the cursor currently points to.
*/
int sqlite4BtDelete(bt_cursor *pBase){
  int rc;

  if( pBase->eType==CSR_TYPE_BT ){
    BtCursor *pCsr = (BtCursor*)pBase;

    rc = btCsrReseek(pCsr);
    if( rc==SQLITE4_OK ){
      rc = btSaveAllCursor(pBase->pDb, pCsr);
    }
    if( rc==SQLITE4_OK ){
      rc = btOverflowDelete(pCsr);
    }
    if( rc==SQLITE4_OK ){
      rc =  btDeleteFromPage(pCsr, 1);
    }
    if( rc==SQLITE4_OK ){
      rc = btBalanceIfUnderfull(pCsr);
    }

    btCsrReleaseAll(pCsr);
  }else{
    rc = btErrorBkpt(SQLITE4_MISUSE);
  }
  return rc;
}

int sqlite4BtSetCookie(bt_db *db, unsigned int iVal){
  return sqlite4BtPagerSetCookie(db->pPager, iVal);
}

Changes to src/bt_pager.c.
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
  /* assert( p->iTransactionLevel>=1 && p->btl.pFd ); */
  return (int)p->pHdr->pgsz;
}

/* 
** Query for the root page number. Requires an open read transaction.
*/
u32 sqlite4BtPagerRootpgno(BtPager *p){
  assert( p->iTransactionLevel>=1 && p->btl.pFd );
  return 2;
}

/*
** Request a reference to page pgno of the database.
*/
int sqlite4BtPageGet(BtPager *p, u32 pgno, BtPage **ppPg){
  int rc = SQLITE4_OK;            /* Return code */







|
<
|







757
758
759
760
761
762
763
764

765
766
767
768
769
770
771
772
  /* assert( p->iTransactionLevel>=1 && p->btl.pFd ); */
  return (int)p->pHdr->pgsz;
}

/* 
** Query for the root page number. Requires an open read transaction.
*/
BtDbHdr *sqlite4BtPagerDbhdr(BtPager *p){

  return p->pHdr;
}

/*
** Request a reference to page pgno of the database.
*/
int sqlite4BtPageGet(BtPager *p, u32 pgno, BtPage **ppPg){
  int rc = SQLITE4_OK;            /* Return code */
1089
1090
1091
1092
1093
1094
1095
1096

1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
** or SQLITE4_OK otherwise.
*/
int sqlite4BtPagerHdrdump(BtPager *pPager, sqlite4_buffer *pBuf){
  BtDbHdr *pHdr = pPager->pHdr;
  int rc = SQLITE4_OK;

  sqlite4BtBufAppendf(pBuf, 
      "pgsz=%d nPg=%d iRoot=%d"

      " iCookie=%d iFreePg=%d iFreeBlk=%d",
      pHdr->pgsz, pHdr->nPg, pHdr->iRoot,
      pHdr->iCookie, pHdr->iFreePg, pHdr->iFreeBlk
  );

  return rc;
}

#ifndef NDEBUG
int sqlite4BtPagerRefcount(BtPager *p){
  return p->nTotalRef;
}
#endif








|
>

|












1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
** or SQLITE4_OK otherwise.
*/
int sqlite4BtPagerHdrdump(BtPager *pPager, sqlite4_buffer *pBuf){
  BtDbHdr *pHdr = pPager->pHdr;
  int rc = SQLITE4_OK;

  sqlite4BtBufAppendf(pBuf, 
      "pgsz=%d nPg=%d"
      " iRoot=%d iMRoot=%d iSRoot=%d"
      " iCookie=%d iFreePg=%d iFreeBlk=%d",
      pHdr->pgsz, pHdr->nPg, pHdr->iRoot, pHdr->iMRoot, pHdr->iSRoot,
      pHdr->iCookie, pHdr->iFreePg, pHdr->iFreeBlk
  );

  return rc;
}

#ifndef NDEBUG
int sqlite4BtPagerRefcount(BtPager *p){
  return p->nTotalRef;
}
#endif