Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Performance tweaks for seek operations.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 18ae7f98554bc11041ded8879d62fc7dba4cdab8
User & Date: dan 2014-02-21 17:36:30.025
Original Comment: :)
Context
2014-02-22
19:54
Add other bt optimizations. Fix a problem in mutex_noop.c. check-in: 1ecbf355e3 user: dan tags: trunk
2014-02-21
17:36
Performance tweaks for seek operations. check-in: 18ae7f9855 user: dan tags: trunk
2014-02-19
11:28
Fix a couple of problems related to log recovery and checkpointing. check-in: e64f3ba5f0 user: dan tags: trunk
Changes
Unified Diff Ignore Whitespace Patch
Changes to lsm-test/sqltest.c.
242
243
244
245
246
247
248








249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274

  return iRet;
}
/*
** End of integer query implementations.
*************************************************************************/









static int do_insert1_test4(
  const char *zFile,
  int nRow,                       /* Number of rows to insert in total */
  int nRowPerTrans,               /* Number of rows per transaction */
  int nIdx,                       /* Number of aux indexes (aside from PK) */
  int iSync                       /* PRAGMA synchronous value (0, 1 or 2) */
){
  char *zCreateTbl;               /* Create table statement */
  char *zInsert;                  /* INSERT statement */
  sqlite4_stmt *pInsert;          /* Compiled INSERT statement */
  sqlite4 *db = 0;                /* Database handle */
  int i;                          /* Counter to count nRow rows */
  int nMs;                        /* Test time in ms */

  lsm_db *pLsm;

  if( zFile==0 ) zFile = SQLITE4_DB_FILE;
  unlink_db(zFile);
  EXPLODE(  sqlite4_open(0, zFile, &db)  );
  sqlite4_kvstore_control(db, "main", SQLITE4_KVCTRL_LSM_HANDLE, &pLsm);
  i = iSync;
  lsm_config(pLsm, LSM_CONFIG_SAFETY, &i);
  assert( i==iSync );

  install_rblob_function4(db);








>
>
>
>
>
>
>
>


















|







242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282

  return iRet;
}
/*
** End of integer query implementations.
*************************************************************************/


static int bt_open(sqlite4_env *pEnv, const char *zFile, sqlite4 **pDb){
  char *zUri = sqlite3_mprintf("file:%s?kv=bt", zFile);
  int rc = sqlite4_open(pEnv, zUri, pDb);
  sqlite3_free(zUri);
  return rc;
}

static int do_insert1_test4(
  const char *zFile,
  int nRow,                       /* Number of rows to insert in total */
  int nRowPerTrans,               /* Number of rows per transaction */
  int nIdx,                       /* Number of aux indexes (aside from PK) */
  int iSync                       /* PRAGMA synchronous value (0, 1 or 2) */
){
  char *zCreateTbl;               /* Create table statement */
  char *zInsert;                  /* INSERT statement */
  sqlite4_stmt *pInsert;          /* Compiled INSERT statement */
  sqlite4 *db = 0;                /* Database handle */
  int i;                          /* Counter to count nRow rows */
  int nMs;                        /* Test time in ms */

  lsm_db *pLsm;

  if( zFile==0 ) zFile = SQLITE4_DB_FILE;
  unlink_db(zFile);
  EXPLODE(  bt_open(0, zFile, &db)  );
  sqlite4_kvstore_control(db, "main", SQLITE4_KVCTRL_LSM_HANDLE, &pLsm);
  i = iSync;
  lsm_config(pLsm, LSM_CONFIG_SAFETY, &i);
  assert( i==iSync );

  install_rblob_function4(db);

440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
  sqlite4_stmt *pSelect = 0;
  char *zSelect;
  sqlite4 *db;
  int i;
  int nTblRow;

  if( zFile==0 ) zFile = SQLITE4_DB_FILE;
  EXPLODE( sqlite4_open(0, zFile, &db) );
  install_rblob_function4(db);

  nTblRow = integer_query4(db, "SELECT count(*) FROM t1");

  /* Create the db schema and prepare the INSERT statement */
  zSelect = create_select_sql(iIdx);
  EXPLODE(  sqlite4_prepare(db, zSelect, -1, &pSelect, 0)  );







|







448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
  sqlite4_stmt *pSelect = 0;
  char *zSelect;
  sqlite4 *db;
  int i;
  int nTblRow;

  if( zFile==0 ) zFile = SQLITE4_DB_FILE;
  EXPLODE( bt_open(0, zFile, &db) );
  install_rblob_function4(db);

  nTblRow = integer_query4(db, "SELECT count(*) FROM t1");

  /* Create the db schema and prepare the INSERT statement */
  zSelect = create_select_sql(iIdx);
  EXPLODE(  sqlite4_prepare(db, zSelect, -1, &pSelect, 0)  );
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
      }
    }
    *pp = (SqlDatabase *)p;
  }else{
    SqlDatabase4 *p = sqlite4_malloc(0, sizeof(SqlDatabase4));
    memset(p, 0, sizeof(SqlDatabase4));
    p->x.iDb = 4;
    rc = sqlite4_open(0, zFile, &p->db);
    if( rc!=SQLITE4_OK ){
      sqlite4_free(0, p);
      p = 0;
    }else{
      install_rint_function4(p->db);
      if( zConfig ) {
        rc = sqlite4_exec(p->db, zConfig, 0, 0);







|







650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
      }
    }
    *pp = (SqlDatabase *)p;
  }else{
    SqlDatabase4 *p = sqlite4_malloc(0, sizeof(SqlDatabase4));
    memset(p, 0, sizeof(SqlDatabase4));
    p->x.iDb = 4;
    rc = bt_open(0, zFile, &p->db);
    if( rc!=SQLITE4_OK ){
      sqlite4_free(0, p);
      p = 0;
    }else{
      install_rint_function4(p->db);
      if( zConfig ) {
        rc = sqlite4_exec(p->db, zConfig, 0, 0);
Changes to src/btInt.h.
46
47
48
49
50
51
52



53
54
55
56
57
58
59
#endif

/* By default pages are 1024 bytes in size. */
#define BT_DEFAULT_PGSZ 1024

/* By default blocks are 512K bytes in size. */
#define BT_DEFAULT_BLKSZ (512*1024)




/*
** This structure is the in-memory representation of all data stored in
** the database header at the start of the db file.
**
** pgsz, blksz:
**   Byte offset 0 of the database file is the first byte of both page 1







>
>
>







46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
#endif

/* By default pages are 1024 bytes in size. */
#define BT_DEFAULT_PGSZ 1024

/* By default blocks are 512K bytes in size. */
#define BT_DEFAULT_BLKSZ (512*1024)

/* Default cache size in pages */
#define BT_DEFAULT_CACHESZ 1000

/*
** This structure is the in-memory representation of all data stored in
** the database header at the start of the db file.
**
** pgsz, blksz:
**   Byte offset 0 of the database file is the first byte of both page 1
Changes to src/bt_log.c.
1208
1209
1210
1211
1212
1213
1214

1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232

1233
1234
1235
1236
1237
1238
1239

  u32 *aLog = pLog->snapshot.aLog;
  int iSafeIdx = sqlite4BtLogFrameToIdx(aLog, iSafe);

  /* Loop through regions (c), (b) and (a) of the log file. In that order. */
  for(i=2; i>=0 && rc==SQLITE4_NOTFOUND; i--){
    u32 iLo = pLog->snapshot.aLog[i*2+0];

    u32 iHi = pLog->snapshot.aLog[i*2+1];
    int iSide;
    int iHash;
    int iHashLast;

    iHash = btLogFrameHash(pLog, iHi);
    iHashLast = btLogFrameHash(pLog, iLo);
    iSide = (pLog->snapshot.iHashSide + (i==0)) % 2;

    for( ; rc==SQLITE4_NOTFOUND && iHash>=iHashLast; iHash--){
      rc = btLogHashSearch(pLog, iSide, iHash, iHi, pgno, &iFrame);
      if( rc==SQLITE4_OK ){
        if( iFrame<iLo || iFrame>iHi ){
          rc = SQLITE4_NOTFOUND;
        }else{
          assert( sqlite4BtLogFrameToIdx(aLog, iFrame)>=0 );
          if( iSafeIdx>=0 && sqlite4BtLogFrameToIdx(aLog, iFrame)>iSafeIdx ){
            return SQLITE4_NOTFOUND;

          }
        }
      }
    }
  }

  btDebugLogSearch(pLog->pLock, pgno, iSafe, (rc==SQLITE4_OK ? iFrame : 0));







>
|
|
|
|

|
|
|

|
|
|
|
|
|
|
|
|
>







1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241

  u32 *aLog = pLog->snapshot.aLog;
  int iSafeIdx = sqlite4BtLogFrameToIdx(aLog, iSafe);

  /* Loop through regions (c), (b) and (a) of the log file. In that order. */
  for(i=2; i>=0 && rc==SQLITE4_NOTFOUND; i--){
    u32 iLo = pLog->snapshot.aLog[i*2+0];
    if( iLo ){
      u32 iHi = pLog->snapshot.aLog[i*2+1];
      int iSide;
      int iHash;
      int iHashLast;

      iHash = btLogFrameHash(pLog, iHi);
      iHashLast = btLogFrameHash(pLog, iLo);
      iSide = (pLog->snapshot.iHashSide + (i==0)) % 2;

      for( ; rc==SQLITE4_NOTFOUND && iHash>=iHashLast; iHash--){
        rc = btLogHashSearch(pLog, iSide, iHash, iHi, pgno, &iFrame);
        if( rc==SQLITE4_OK ){
          if( iFrame<iLo || iFrame>iHi ){
            rc = SQLITE4_NOTFOUND;
          }else{
            assert( sqlite4BtLogFrameToIdx(aLog, iFrame)>=0 );
            if( iSafeIdx>=0 && sqlite4BtLogFrameToIdx(aLog, iFrame)>iSafeIdx ){
              return SQLITE4_NOTFOUND;
            }
          }
        }
      }
    }
  }

  btDebugLogSearch(pLog->pLock, pgno, iSafe, (rc==SQLITE4_OK ? iFrame : 0));
1262
1263
1264
1265
1266
1267
1268




1269
1270
1271
1272
1273
1274
1275
** If the log does not contain any version of page pgno, SQLITE4_NOTFOUND
** is returned and the contents of buffer aData[] are not modified.
**
** If any other error occurs, an SQLite4 error code is returned. The final
** state of buffer aData[] is undefined in this case.
*/
int sqlite4BtLogRead(BtLog *pLog, u32 pgno, u8 *aData){




  return btLogRead(pLog, pgno, aData, 0);
}

static int btLogZeroHash(BtLog *pLog, int iHash){
  int iSide = pLog->snapshot.iHashSide;
  ht_slot *aHash;
  u32 *aPgno;







>
>
>
>







1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
** If the log does not contain any version of page pgno, SQLITE4_NOTFOUND
** is returned and the contents of buffer aData[] are not modified.
**
** If any other error occurs, an SQLite4 error code is returned. The final
** state of buffer aData[] is undefined in this case.
*/
int sqlite4BtLogRead(BtLog *pLog, u32 pgno, u8 *aData){
  if( pLog->snapshot.aLog[4]==0 ){
    assert( pLog->snapshot.aLog[0]==0 && pLog->snapshot.aLog[2]==0 );
    return SQLITE4_NOTFOUND;
  }
  return btLogRead(pLog, pgno, aData, 0);
}

static int btLogZeroHash(BtLog *pLog, int iHash){
  int iSide = pLog->snapshot.iHashSide;
  ht_slot *aHash;
  u32 *aPgno;
Changes to src/bt_main.c.
11
12
13
14
15
16
17

18
19
20
21





22
23
24
25
26
27
28

29
30
31
32
33
34
35
36
37
38
39
40
41
42

43
44
45
46
47


48
49
50
51
52
53
54
*************************************************************************
**
*/

#include "btInt.h"
#include <string.h>
#include <assert.h>


#define BT_MAX_DEPTH 32           /* Maximum possible depth of tree */
#define BT_MAX_DIRECT_OVERFLOW 8  /* Maximum direct overflow pages per cell */






/*
** Values that make up the single byte flags field at the start of
** b-tree pages. 
*/
#define BT_PGFLAGS_INTERNAL 0x01  /* True for non-leaf nodes */
#define BT_PGFLAGS_METATREE 0x02  /* True for a meta-tree page */
#define BT_PGFLAGS_SCHEDULE 0x04  /* True for a schedule-tree page */


/*
** Maximum depth of fast-insert sub-trees.
*/
#define MAX_SUBTREE_DEPTH 8

/* #define BT_STDERR_DEBUG 1 */

typedef struct BtCursor BtCursor;
typedef struct FiCursor FiCursor;
typedef struct FiSubCursor FiSubCursor;

struct bt_db {
  sqlite4_env *pEnv;              /* SQLite environment */

  BtPager *pPager;                /* Underlying page-based database */
  bt_cursor *pAllCsr;             /* List of all open cursors */
  int nMinMerge;
  int nScheduleAlloc;
  int bFastInsertOp;              /* Set by CONTROL_FAST_INSERT_OP */


};

/*
** Overflow buffer is valid if nKey!=0.
*/
typedef struct BtOvfl BtOvfl;
struct BtOvfl {







>




>
>
>
>
>




|
|
|
>














>





>
>







11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
*************************************************************************
**
*/

#include "btInt.h"
#include <string.h>
#include <assert.h>
#include <stddef.h>

#define BT_MAX_DEPTH 32           /* Maximum possible depth of tree */
#define BT_MAX_DIRECT_OVERFLOW 8  /* Maximum direct overflow pages per cell */

/* Maximum size of a key-prefix stored on an internal node. Parts of the
** code in this file assume that this value can be encoded as a single
** byte SQLite4 varint.  */
#define BT_MAX_INTERNAL_KEY 200   /* Maximum bytes of key on internal node */

/*
** Values that make up the single byte flags field at the start of
** b-tree pages. 
*/
#define BT_PGFLAGS_INTERNAL  0x01  /* True for non-leaf nodes */
#define BT_PGFLAGS_METATREE  0x02  /* True for a meta-tree page */
#define BT_PGFLAGS_SCHEDULE  0x04  /* True for a schedule-tree page */
#define BT_PGFLAGS_LARGEKEYS 0x08  /* True if keys larger than 200 bytes */

/*
** Maximum depth of fast-insert sub-trees.
*/
#define MAX_SUBTREE_DEPTH 8

/* #define BT_STDERR_DEBUG 1 */

typedef struct BtCursor BtCursor;
typedef struct FiCursor FiCursor;
typedef struct FiSubCursor FiSubCursor;

struct bt_db {
  sqlite4_env *pEnv;              /* SQLite environment */
  sqlite4_mm *pMM;                /* Memory allocator for pEnv */
  BtPager *pPager;                /* Underlying page-based database */
  bt_cursor *pAllCsr;             /* List of all open cursors */
  int nMinMerge;
  int nScheduleAlloc;
  int bFastInsertOp;              /* Set by CONTROL_FAST_INSERT_OP */

  BtCursor *pFreeCsr;
};

/*
** Overflow buffer is valid if nKey!=0.
*/
typedef struct BtOvfl BtOvfl;
struct BtOvfl {
91
92
93
94
95
96
97
98
99
100
101
102
103
104





105
106
107
108
109
110
111
** Database b-tree cursor handle.
*/
struct BtCursor {
  bt_cursor base;                 /* Base cursor class */

  u32 iRoot;                      /* Root page of b-tree this cursor queries */
  int nPg;                        /* Number of valid entries in apPage[] */
  int aiCell[BT_MAX_DEPTH];       /* Current cell of each apPage[] entry */
  BtPage *apPage[BT_MAX_DEPTH];   /* All pages from root to current leaf */
  BtOvfl ovfl;                    /* Overflow cache (see above) */

  int bRequireReseek;             /* True if a btCsrReseek() is required */
  int bSkipNext;                  /* True if next CsrNext() is a no-op */
  int bSkipPrev;                  /* True if next CsrPrev() is a no-op */





};

/*
** Database f-tree cursor handle.
*/
struct FiSubCursor {
  u8 aPrefix[8];                  /* Meta-tree key prefix for this age/level */







<
<





>
>
>
>
>







101
102
103
104
105
106
107


108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
** Database b-tree cursor handle.
*/
struct BtCursor {
  bt_cursor base;                 /* Base cursor class */

  u32 iRoot;                      /* Root page of b-tree this cursor queries */
  int nPg;                        /* Number of valid entries in apPage[] */


  BtOvfl ovfl;                    /* Overflow cache (see above) */

  int bRequireReseek;             /* True if a btCsrReseek() is required */
  int bSkipNext;                  /* True if next CsrNext() is a no-op */
  int bSkipPrev;                  /* True if next CsrPrev() is a no-op */

  BtCursor *pNextFree;            /* Next in list of free BtCursor structures */

  int aiCell[BT_MAX_DEPTH];       /* Current cell of each apPage[] entry */
  BtPage *apPage[BT_MAX_DEPTH];   /* All pages from root to current leaf */
};

/*
** Database f-tree cursor handle.
*/
struct FiSubCursor {
  u8 aPrefix[8];                  /* Meta-tree key prefix for this age/level */
205
206
207
208
209
210
211



212
213
214
215
216
217
218
  a[0] = (u8)((i>>24) & 0xFF);
  a[1] = (u8)((i>>16) & 0xFF);
  a[2] = (u8)((i>>8) & 0xFF);
  a[3] = (u8)((i>>0) & 0xFF);
}
#define btPutU32(x,y) sqlite4BtPutU32(x,y)




/*
** Allocate a new database handle.
*/
int sqlite4BtNew(sqlite4_env *pEnv, int nExtra, bt_db **ppDb){
  static const int MIN_MERGE = 2;
  static const int SCHEDULE_ALLOC = 4;








>
>
>







218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
  a[0] = (u8)((i>>24) & 0xFF);
  a[1] = (u8)((i>>16) & 0xFF);
  a[2] = (u8)((i>>8) & 0xFF);
  a[3] = (u8)((i>>0) & 0xFF);
}
#define btPutU32(x,y) sqlite4BtPutU32(x,y)

struct FakePage { u8 *aData; };
#define btPageData(pPg) (((struct FakePage*)(pPg))->aData)

/*
** Allocate a new database handle.
*/
int sqlite4BtNew(sqlite4_env *pEnv, int nExtra, bt_db **ppDb){
  static const int MIN_MERGE = 2;
  static const int SCHEDULE_ALLOC = 4;

238
239
240
241
242
243
244






245
246
247
248
249
250
251
252
253
254
255
256
257
258
259

260
261
262
263
264
265
266

/*
** Close an existing database handle. Once this function has been 
** called, the handle may not be used for any purpose.
*/
int sqlite4BtClose(bt_db *db){
  if( db ){






    sqlite4BtPagerClose(db->pPager);
  }
  return SQLITE4_OK;
}

/*
** Return a pointer to the nExtra bytes of space allocated along with 
** the database handle. 
*/
void *sqlite4BtExtra(bt_db *db){
  return (void*)&db[1];
}

int sqlite4BtOpen(bt_db *db, const char *zFilename){
  int rc;

  rc = sqlite4BtPagerOpen(db->pPager, zFilename);
  return rc;
}

int sqlite4BtBegin(bt_db *db, int iLevel){
  int rc;
  rc = sqlite4BtPagerBegin(db->pPager, iLevel);







>
>
>
>
>
>















>







254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289

/*
** Close an existing database handle. Once this function has been 
** called, the handle may not be used for any purpose.
*/
int sqlite4BtClose(bt_db *db){
  if( db ){
    BtCursor *pCsr;
    BtCursor *pNext;
    for(pCsr=db->pFreeCsr; pCsr; pCsr=pNext){
      pNext = pCsr->pNextFree;
      sqlite4_free(db->pEnv, pCsr);
    }
    sqlite4BtPagerClose(db->pPager);
  }
  return SQLITE4_OK;
}

/*
** Return a pointer to the nExtra bytes of space allocated along with 
** the database handle. 
*/
void *sqlite4BtExtra(bt_db *db){
  return (void*)&db[1];
}

int sqlite4BtOpen(bt_db *db, const char *zFilename){
  int rc;
  sqlite4_env_config(db->pEnv, SQLITE4_ENVCONFIG_GETMM, &db->pMM);
  rc = sqlite4BtPagerOpen(db->pPager, zFilename);
  return rc;
}

int sqlite4BtBegin(bt_db *db, int iLevel){
  int rc;
  rc = sqlite4BtPagerBegin(db->pPager, iLevel);
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
}

int sqlite4BtTransactionLevel(bt_db *db){
  return sqlite4BtPagerTransactionLevel(db->pPager);
}

static void btCsrSetup(bt_db *db, u32 iRoot, BtCursor *pCsr){
  memset(pCsr, 0, sizeof(BtCursor));
  pCsr->base.flags = CSR_TYPE_BT;
  pCsr->base.pExtra = (void*)&pCsr[1];
  pCsr->base.pDb = db;
  pCsr->iRoot = iRoot;
  sqlite4_env_config(db->pEnv, SQLITE4_ENVCONFIG_GETMM, &pCsr->ovfl.buf.pMM);
}

int sqlite4BtCsrOpen(bt_db *db, int nExtra, bt_cursor **ppCsr){
  int rc = SQLITE4_OK;            /* Return Code */
  bt_cursor *pRet = 0;

  assert( sqlite4BtPagerTransactionLevel(db->pPager)>0 );







|




|







309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
}

int sqlite4BtTransactionLevel(bt_db *db){
  return sqlite4BtPagerTransactionLevel(db->pPager);
}

static void btCsrSetup(bt_db *db, u32 iRoot, BtCursor *pCsr){
  memset(pCsr, 0, offsetof(BtCursor, aiCell));
  pCsr->base.flags = CSR_TYPE_BT;
  pCsr->base.pExtra = (void*)&pCsr[1];
  pCsr->base.pDb = db;
  pCsr->iRoot = iRoot;
  pCsr->ovfl.buf.pMM = db->pMM;
}

int sqlite4BtCsrOpen(bt_db *db, int nExtra, bt_cursor **ppCsr){
  int rc = SQLITE4_OK;            /* Return Code */
  bt_cursor *pRet = 0;

  assert( sqlite4BtPagerTransactionLevel(db->pPager)>0 );
317
318
319
320
321
322
323






324
325
326
327



328
329
330
331
332
333
334
335
336
337
338
339
340
341


342
343
344
345
346
347
348
      pCsr->base.pExtra = (void*)&pCsr[1];
      pCsr->base.pDb = db;
      pRet = (bt_cursor*)pCsr;
    }

  }else{
    BtCursor *pCsr;                /* New cursor object */






    int nByte = sizeof(BtCursor) + nExtra;
    pCsr = (BtCursor*)sqlite4_malloc(db->pEnv, nByte);
    if( pCsr==0 ){
      rc = btErrorBkpt(SQLITE4_NOMEM);



    }else{
      u32 iRoot = sqlite4BtPagerDbhdr(db->pPager)->iRoot;
      btCsrSetup(db, iRoot, pCsr);
      pRet = (bt_cursor*)pCsr;
    }
  }

  assert( (pRet==0)==(rc!=SQLITE4_OK) );
  if( rc==SQLITE4_OK ){
    pRet->pNextCsr = db->pAllCsr;
    db->pAllCsr = pRet;
  }
  *ppCsr = pRet;



  btCheckPageRefs(db);
  db->bFastInsertOp = 0;
  return rc;
}

static void btCsrReleaseAll(BtCursor *pCsr){
  int i;







>
>
>
>
>
>
|
|
|
|
>
>
>
|
<
|
|
<







<

>
>







340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360

361
362

363
364
365
366
367
368
369

370
371
372
373
374
375
376
377
378
379
      pCsr->base.pExtra = (void*)&pCsr[1];
      pCsr->base.pDb = db;
      pRet = (bt_cursor*)pCsr;
    }

  }else{
    BtCursor *pCsr;                /* New cursor object */
    u32 iRoot = sqlite4BtPagerDbhdr(db->pPager)->iRoot;
    
    if( db->pFreeCsr ){
      pCsr = db->pFreeCsr;
      db->pFreeCsr = pCsr->pNextFree;
    }else{
      int nByte = sizeof(BtCursor) + nExtra;
      pCsr = (BtCursor*)sqlite4_malloc(db->pEnv, nByte);
      if( pCsr==0 ){
        rc = btErrorBkpt(SQLITE4_NOMEM);
        goto csr_open_out;
      }
    }


    btCsrSetup(db, iRoot, pCsr);
    pRet = (bt_cursor*)pCsr;

  }

  assert( (pRet==0)==(rc!=SQLITE4_OK) );
  if( rc==SQLITE4_OK ){
    pRet->pNextCsr = db->pAllCsr;
    db->pAllCsr = pRet;
  }


 csr_open_out:
  *ppCsr = pRet;
  btCheckPageRefs(db);
  db->bFastInsertOp = 0;
  return rc;
}

static void btCsrReleaseAll(BtCursor *pCsr){
  int i;
388
389
390
391
392
393
394


395
396
397
398
399

400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420

421
422
423
424
425
426
427
428
    for(pp=&pDb->pAllCsr; *pp!=pCsr; pp=&(*pp)->pNextCsr);
    *pp = pCsr->pNextCsr;

    if( IsBtCsr(pCsr) ){
      /* A regular b-tree cursor */
      BtCursor *p = (BtCursor*)pCsr;
      btCsrReset(p, 1);


    }else{
      /* A fast-insert-tree cursor */
      fiCsrReset((FiCursor*)pCsr);
    }
    sqlite4_free(pDb->pEnv, pCsr);

    btCheckPageRefs(pDb);
  }
  return SQLITE4_OK;
}

void *sqlite4BtCsrExtra(bt_cursor *pCsr){
  return pCsr->pExtra;
}

/*
** Set pCsr->apPage[pCsr->nPg] to a reference to database page pgno.
*/
static int btCsrDescend(BtCursor *pCsr, u32 pgno){
  int rc;
  if( pCsr->nPg>=BT_MAX_DEPTH ){
    rc = btErrorBkpt(SQLITE4_CORRUPT);
  }else{
    bt_db *pDb = pCsr->base.pDb;
    assert( pCsr->nPg>=0 );
    rc = sqlite4BtPageGet(pDb->pPager, pgno, &pCsr->apPage[pCsr->nPg]);
    if( rc==SQLITE4_OK ){

      assert( pCsr->apPage[pCsr->nPg] );
      pCsr->nPg++;
    }
  }
  return rc;
}

/*







>
>



<
|
>












|




<

|

>
|







419
420
421
422
423
424
425
426
427
428
429
430

431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449

450
451
452
453
454
455
456
457
458
459
460
461
    for(pp=&pDb->pAllCsr; *pp!=pCsr; pp=&(*pp)->pNextCsr);
    *pp = pCsr->pNextCsr;

    if( IsBtCsr(pCsr) ){
      /* A regular b-tree cursor */
      BtCursor *p = (BtCursor*)pCsr;
      btCsrReset(p, 1);
      p->pNextFree = pDb->pFreeCsr;
      pDb->pFreeCsr = p;
    }else{
      /* A fast-insert-tree cursor */
      fiCsrReset((FiCursor*)pCsr);

      sqlite4_free(pDb->pEnv, pCsr);
    }
    btCheckPageRefs(pDb);
  }
  return SQLITE4_OK;
}

void *sqlite4BtCsrExtra(bt_cursor *pCsr){
  return pCsr->pExtra;
}

/*
** Set pCsr->apPage[pCsr->nPg] to a reference to database page pgno.
*/
static int btCsrDescend(BtCursor *pCsr, u32 pgno, BtPage **ppPg){
  int rc;
  if( pCsr->nPg>=BT_MAX_DEPTH ){
    rc = btErrorBkpt(SQLITE4_CORRUPT);
  }else{

    assert( pCsr->nPg>=0 );
    rc = sqlite4BtPageGet(pCsr->base.pDb->pPager, pgno, ppPg);
    if( rc==SQLITE4_OK ){
      assert( *ppPg );
      pCsr->apPage[pCsr->nPg] = *ppPg;
      pCsr->nPg++;
    }
  }
  return rc;
}

/*
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
      for(i=0; i<=btCellCount(aData, nData); i++){
        BtPage *pChild;
        u8 *aChild;
        u32 child;

        child = btChildPgno(aData, nData, i);
        sqlite4BtPageGet(pPager, child, &pChild);
        aChild = sqlite4BtPageData(pChild);
        btPageToAscii(child, bAscii, pPager, aChild, nData, pBuf);
        sqlite4BtPageRelease(pChild);
      }
    }
  }
  sqlite4BtBufAppendf(pBuf, "\n");
}

static int btFreelistToAscii(bt_db *db, u32 iFirst, sqlite4_buffer *pBuf){
  int rc = SQLITE4_OK;
  u32 iTrunk = iFirst;
  while( iTrunk && rc==SQLITE4_OK ){
    BtPage *pPg = 0;
    rc = sqlite4BtPageGet(db->pPager, iTrunk, &pPg);
    if( rc==SQLITE4_OK ){
      u8 *aData = sqlite4BtPageData(pPg);
      u32 nFree = btGetU32(aData);
      u32 iNext = btGetU32(&aData[4]);
      int i;

      sqlite4BtBufAppendf(pBuf, "iTrunk=%d ", (int)iTrunk);
      sqlite4BtBufAppendf(pBuf, "nFree=%d iNext=%d (", (int)nFree, (int)iNext);
      for(i=0; i<(int)nFree; i++){







|















|







735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
      for(i=0; i<=btCellCount(aData, nData); i++){
        BtPage *pChild;
        u8 *aChild;
        u32 child;

        child = btChildPgno(aData, nData, i);
        sqlite4BtPageGet(pPager, child, &pChild);
        aChild = btPageData(pChild);
        btPageToAscii(child, bAscii, pPager, aChild, nData, pBuf);
        sqlite4BtPageRelease(pChild);
      }
    }
  }
  sqlite4BtBufAppendf(pBuf, "\n");
}

static int btFreelistToAscii(bt_db *db, u32 iFirst, sqlite4_buffer *pBuf){
  int rc = SQLITE4_OK;
  u32 iTrunk = iFirst;
  while( iTrunk && rc==SQLITE4_OK ){
    BtPage *pPg = 0;
    rc = sqlite4BtPageGet(db->pPager, iTrunk, &pPg);
    if( rc==SQLITE4_OK ){
      u8 *aData = btPageData(pPg);
      u32 nFree = btGetU32(aData);
      u32 iNext = btGetU32(&aData[4]);
      int i;

      sqlite4BtBufAppendf(pBuf, "iTrunk=%d ", (int)iTrunk);
      sqlite4BtBufAppendf(pBuf, "nFree=%d iNext=%d (", (int)nFree, (int)iNext);
      for(i=0; i<(int)nFree; i++){
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783

int printPgdataToStderr(u32 pgno, u8 *aData, int nData){
  printPage(stderr, pgno, aData, nData);
  return 0;
}

int printPgToStderr(BtPage *pPg){
  printPage(stderr, sqlite4BtPagePgno(pPg), sqlite4BtPageData(pPg), 1024);
  return 0;
}

static void btPrintMetaTree(BtPager *pPager, int bAscii, BtDbHdr *pHdr){
  u8 *aData;
  int nData;
  sqlite4_buffer buf;
  BtPage *pPg = 0;

  sqlite4BtPageGet(pPager, pHdr->iMRoot, &pPg);
  aData = sqlite4BtPageData(pPg);
  nData = pHdr->pgsz;
  sqlite4_buffer_init(&buf, 0);
  btPageToAscii(pHdr->iMRoot, bAscii, pPager, aData, nData, &buf);
  sqlite4_buffer_append(&buf, "", 1);

  fprintf(stderr, "%s", (char*)buf.p);
  sqlite4_buffer_clear(&buf);







|










|







791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816

int printPgdataToStderr(u32 pgno, u8 *aData, int nData){
  printPage(stderr, pgno, aData, nData);
  return 0;
}

int printPgToStderr(BtPage *pPg){
  printPage(stderr, sqlite4BtPagePgno(pPg), btPageData(pPg), 1024);
  return 0;
}

static void btPrintMetaTree(BtPager *pPager, int bAscii, BtDbHdr *pHdr){
  u8 *aData;
  int nData;
  sqlite4_buffer buf;
  BtPage *pPg = 0;

  sqlite4BtPageGet(pPager, pHdr->iMRoot, &pPg);
  aData = btPageData(pPg);
  nData = pHdr->pgsz;
  sqlite4_buffer_init(&buf, 0);
  btPageToAscii(pHdr->iMRoot, bAscii, pPager, aData, nData, &buf);
  sqlite4_buffer_append(&buf, "", 1);

  fprintf(stderr, "%s", (char*)buf.p);
  sqlite4_buffer_clear(&buf);
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972

973
974
975
976
977
978
979
980
981
982
983
984
985
  int nCsrKey;
  int nCmp;
  int nAscend = 0;
  int rc = SQLITE4_OK;
  int res;

  if( bLeaf ){
    rc = sqlite4BtCsrKey((bt_cursor*)pCsr, &pCsrKey, &nCsrKey);
  }else{
    const int pgsz = sqlite4BtPagerPagesize(pCsr->base.pDb->pPager);

    u8 *aData = sqlite4BtPageData(pCsr->apPage[pCsr->nPg-1]);
    u8 *pCell = btCellFind(aData, pgsz, pCsr->aiCell[pCsr->nPg-1]);

    pCsrKey = pCell + sqlite4BtVarintGet32(pCell, &nCsrKey);
    if( nCsrKey==0 ){
      int iCell = pCsr->aiCell[pCsr->nPg-1]+1;
      while( 1 ){

        u8 *aData = sqlite4BtPageData(pCsr->apPage[pCsr->nPg-1]);
        u32 pgno = btChildPgno(aData, pgsz, iCell);
        nAscend++;
        rc = btCsrDescend(pCsr, pgno);
        if( rc!=SQLITE4_OK ) break;
        aData = sqlite4BtPageData(pCsr->apPage[pCsr->nPg-1]);
        pCsr->aiCell[pCsr->nPg-1] = 0;
        if( (btFlags(aData) & BT_PGFLAGS_INTERNAL)==0 ) break;
        iCell = 0;
      }
      rc = sqlite4BtCsrKey((bt_cursor*)pCsr, &pCsrKey, &nCsrKey);
    }
  }







|



|






>
|


|

|







988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
  int nCsrKey;
  int nCmp;
  int nAscend = 0;
  int rc = SQLITE4_OK;
  int res;

  if( bLeaf ){
    rc = btCsrKey(pCsr, &pCsrKey, &nCsrKey);
  }else{
    const int pgsz = sqlite4BtPagerPagesize(pCsr->base.pDb->pPager);

    u8 *aData = btPageData(pCsr->apPage[pCsr->nPg-1]);
    u8 *pCell = btCellFind(aData, pgsz, pCsr->aiCell[pCsr->nPg-1]);

    pCsrKey = pCell + sqlite4BtVarintGet32(pCell, &nCsrKey);
    if( nCsrKey==0 ){
      int iCell = pCsr->aiCell[pCsr->nPg-1]+1;
      while( 1 ){
        BtPage *pPg;
        u8 *aData = btPageData(pCsr->apPage[pCsr->nPg-1]);
        u32 pgno = btChildPgno(aData, pgsz, iCell);
        nAscend++;
        rc = btCsrDescend(pCsr, pgno, &pPg);
        if( rc!=SQLITE4_OK ) break;
        aData = btPageData(pPg);
        pCsr->aiCell[pCsr->nPg-1] = 0;
        if( (btFlags(aData) & BT_PGFLAGS_INTERNAL)==0 ) break;
        iCell = 0;
      }
      rc = sqlite4BtCsrKey((bt_cursor*)pCsr, &pCsrKey, &nCsrKey);
    }
  }
1005
1006
1007
1008
1009
1010
1011


















1012
1013
1014
1015
1016
1017
1018
  }

 keycompare_done:
  btCsrAscend(pCsr, nAscend);
  *piRes = res;
  return rc;
}



















#define BT_CSRSEEK_SEEK   0
#define BT_CSRSEEK_UPDATE 1
#define BT_CSRSEEK_RESEEK 2

static int btCsrSeek(
  BtCursor *pCsr,                 /* Cursor object to seek */







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
  }

 keycompare_done:
  btCsrAscend(pCsr, nAscend);
  *piRes = res;
  return rc;
}

/*
** Return an integer representing the result of (K1 - K2).
*/
static int btKeyCompare(
  const void *pKey1, int nKey1, 
  const void *pKey2, int nKey2
){
  int nCmp = MIN(nKey1, nKey2);
  int res;

  res = memcmp(pKey1, pKey2, nCmp);
  if( res==0 ){
    res = nKey1 - nKey2;
  }
  return res;
}


#define BT_CSRSEEK_SEEK   0
#define BT_CSRSEEK_UPDATE 1
#define BT_CSRSEEK_RESEEK 2

static int btCsrSeek(
  BtCursor *pCsr,                 /* Cursor object to seek */
1034
1035
1036
1037
1038
1039
1040

1041
1042
1043
1044
1045
1046
1047


1048
1049
1050
1051
1052

1053
1054



1055
1056
1057


1058



1059



1060







1061
1062
1063
1064
1065

1066
1067

1068
1069
1070
1071
1072
1073


1074


1075
1076
1077
1078
1079
1080
1081

  /* Figure out the root page number */
  assert( pCsr->iRoot>1 && pCsr->nPg==0 );
  pgno = pCsr->iRoot;

  while( rc==SQLITE4_OK && pgno ){
    /* Load page number pgno into the b-tree */

    rc = btCsrDescend(pCsr, pgno);
    if( rc==SQLITE4_OK ){
      int nCell;                  /* Number of cells on this page */
      int iHi;                    /* pK/nK is <= than cell iHi */
      int iLo;                    /* pK/nK is > than cell (iLo-1) */
      int res;                    /* Result of comparison */
      u8 *aData = (u8*)sqlite4BtPageData(pCsr->apPage[pCsr->nPg-1]);


      int bLeaf = ((btFlags(aData) & BT_PGFLAGS_INTERNAL)==0);

      iLo = 0;
      iHi = nCell = btCellCount(aData, pgsz);


      while( iHi>iLo ){
        int iTst = (iHi+iLo)/2;   /* Cell to compare to pK/nK */



        pCsr->aiCell[pCsr->nPg-1] = iTst;
        rc = btCellKeyCompare(pCsr, bLeaf, aPrefix, pK, nK, &res);
        if( rc!=SQLITE4_OK || res==0 ){


          /* Cell iTst is EQUAL to pK/nK */



          iHi = iLo = iTst;



        }else if( res<0 ){







          /* Cell iTst is SMALLER than pK/nK */
          iLo = iTst+1;
        }else{
          /* Cell iTst is LARGER than pK/nK */
          iHi = iTst;

        }
      }

      if( rc!=SQLITE4_OK ) break;
      assert( iHi==iLo );

      iHi += (nCell>0 && bLeaf==0 && res==0);
      pCsr->aiCell[pCsr->nPg-1] = iHi;
      if( bLeaf==0 ){


        pgno = btChildPgno(aData, pgsz, iHi);


      }else{
        pgno = 0;

        if( nCell==0 ){
          rc = SQLITE4_NOTFOUND;
        }else if( res!=0 ){
          if( eSeek==BT_SEEK_EQ ){







>
|





|
>
>





>
|
|
>
>
>
|
|
|
>
>
|
>
>
>
|
>
>
>
|
>
>
>
>
>
>
>
|
|
|
|
|
>
|
|
>

<




>
>
|
>
>







1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144

1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160

  /* Figure out the root page number */
  assert( pCsr->iRoot>1 && pCsr->nPg==0 );
  pgno = pCsr->iRoot;

  while( rc==SQLITE4_OK && pgno ){
    /* Load page number pgno into the b-tree */
    BtPage *pPg;
    rc = btCsrDescend(pCsr, pgno, &pPg);
    if( rc==SQLITE4_OK ){
      int nCell;                  /* Number of cells on this page */
      int iHi;                    /* pK/nK is <= than cell iHi */
      int iLo;                    /* pK/nK is > than cell (iLo-1) */
      int res;                    /* Result of comparison */

      u8 *aData = btPageData(pPg);
      u16 *aCellPtr = btCellPtrFind(aData, pgsz, 0);
      int bLeaf = ((btFlags(aData) & BT_PGFLAGS_INTERNAL)==0);

      iLo = 0;
      iHi = nCell = btCellCount(aData, pgsz);

      if( btFlags(aData) & BT_PGFLAGS_LARGEKEYS ){
        while( iHi>iLo ){
          int iTst = (iHi+iLo)/2;   /* Cell to compare to pK/nK */
          u8 *pCell = &aData[btGetU16(aCellPtr - iTst)];
          int n = *pCell;

          pCsr->aiCell[pCsr->nPg-1] = iTst;
          rc = btCellKeyCompare(pCsr, bLeaf, 0, pK, nK, &res);
          if( rc!=SQLITE4_OK ) break;

          if( res<0 ){
            /* Cell iTst is SMALLER than pK/nK */
            iLo = iTst+1;
          }else{
            /* Cell iTst is LARGER than (or equal to) pK/nK */
            iHi = iTst;
            if( res==0 ) break;
          }
        }
      }else{
        while( iHi>iLo ){
          int iTst = (iHi+iLo)/2;   /* Cell to compare to pK/nK */
          u8 *pCell = &aData[btGetU16(aCellPtr - iTst)];
          int n = *pCell;
          res = memcmp(&pCell[1], pK, MIN(nK, n));

          if( res<0 || (res==0 && (res = n - nK)<0) ){
            /* Cell iTst is SMALLER than pK/nK */
            iLo = iTst+1;
          }else{
            /* Cell iTst is LARGER than (or equal to) pK/nK */
            iHi = iTst;
            if( res==0 ) break;
          }
        }
      }
      if( rc!=SQLITE4_OK ) break;


      iHi += (nCell>0 && bLeaf==0 && res==0);
      pCsr->aiCell[pCsr->nPg-1] = iHi;
      if( bLeaf==0 ){
        if( iHi==nCell ) pgno = btGetU32(&aData[1]);
        else{
          u8 *pCell = btCellFind(aData, pgsz, iHi);
          pgno = btGetU32(&pCell[1 + (int)*pCell]);
        }
      }else{
        pgno = 0;

        if( nCell==0 ){
          rc = SQLITE4_NOTFOUND;
        }else if( res!=0 ){
          if( eSeek==BT_SEEK_EQ ){
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192

1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
  pCsr->bSkipPrev = pCsr->bSkipNext = 0;

  while( rc==SQLITE4_OK ){
    int iPg = pCsr->nPg-1;
    int iCell = pCsr->aiCell[iPg];

    if( bNext ){
      u8 *aData = (u8*)sqlite4BtPageData(pCsr->apPage[iPg]);
      int nCell = btCellCount(aData, pgsz);
      assert( bRequireDescent==0 || bRequireDescent==1 );
      if( iCell<(nCell+bRequireDescent-1) ){
        pCsr->aiCell[iPg]++;
        break;
      }
    }else{
      if( pCsr->aiCell[iPg]>0 ){
        pCsr->aiCell[iPg]--;
        break;
      }
    }

    rc = btCsrAscend(pCsr, 1);
    bRequireDescent = 1;
  }

  if( bRequireDescent && rc==SQLITE4_OK ){
    u32 pgno;                   /* Child page number */
    u8 *aData = (u8*)sqlite4BtPageData(pCsr->apPage[pCsr->nPg-1]);

    pgno = btChildPgno(aData, pgsz, pCsr->aiCell[pCsr->nPg-1]);

    while( 1 ){

      rc = btCsrDescend(pCsr, pgno);
      if( rc!=SQLITE4_OK ){
        break;
      }else{
        int nCell;
        aData = (u8*)sqlite4BtPageData(pCsr->apPage[pCsr->nPg-1]);
        nCell = btCellCount(aData, pgsz);
        if( btFlags(aData) & BT_PGFLAGS_INTERNAL ){
          pCsr->aiCell[pCsr->nPg-1] = (bNext ? 0 : nCell);
          pgno = btChildPgno(aData, pgsz, pCsr->aiCell[pCsr->nPg-1]);
        }else{
          pCsr->aiCell[pCsr->nPg-1] = (bNext ? 0 : nCell-1);
          break;







|



















|




>
|




|







1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
  pCsr->bSkipPrev = pCsr->bSkipNext = 0;

  while( rc==SQLITE4_OK ){
    int iPg = pCsr->nPg-1;
    int iCell = pCsr->aiCell[iPg];

    if( bNext ){
      u8 *aData = (u8*)btPageData(pCsr->apPage[iPg]);
      int nCell = btCellCount(aData, pgsz);
      assert( bRequireDescent==0 || bRequireDescent==1 );
      if( iCell<(nCell+bRequireDescent-1) ){
        pCsr->aiCell[iPg]++;
        break;
      }
    }else{
      if( pCsr->aiCell[iPg]>0 ){
        pCsr->aiCell[iPg]--;
        break;
      }
    }

    rc = btCsrAscend(pCsr, 1);
    bRequireDescent = 1;
  }

  if( bRequireDescent && rc==SQLITE4_OK ){
    u32 pgno;                   /* Child page number */
    u8 *aData = (u8*)btPageData(pCsr->apPage[pCsr->nPg-1]);

    pgno = btChildPgno(aData, pgsz, pCsr->aiCell[pCsr->nPg-1]);

    while( 1 ){
      BtPage *pPg;
      rc = btCsrDescend(pCsr, pgno, &pPg);
      if( rc!=SQLITE4_OK ){
        break;
      }else{
        int nCell;
        aData = (u8*)btPageData(pPg);
        nCell = btCellCount(aData, pgsz);
        if( btFlags(aData) & BT_PGFLAGS_INTERNAL ){
          pCsr->aiCell[pCsr->nPg-1] = (bNext ? 0 : nCell);
          pgno = btChildPgno(aData, pgsz, pCsr->aiCell[pCsr->nPg-1]);
        }else{
          pCsr->aiCell[pCsr->nPg-1] = (bNext ? 0 : nCell-1);
          break;
1226
1227
1228
1229
1230
1231
1232

1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245

  /* Figure out the root page number */
  assert( pCsr->iRoot>1 && pCsr->nPg==0 );
  pgno = pCsr->iRoot;

  while( rc==SQLITE4_OK ){
    /* Load page number pgno into the b-tree */

    rc = btCsrDescend(pCsr, pgno);
    if( rc==SQLITE4_OK ){
      int nCell;                  /* Number of cells on this page */
      int nByte;
      u8 *pCell;
      u8 *aData = (u8*)sqlite4BtPageData(pCsr->apPage[pCsr->nPg-1]);

      nCell = btCellCount(aData, pgsz);
      pCsr->aiCell[pCsr->nPg-1] = (bLast ? nCell : 0);

      /* If the cursor has descended to a leaf break out of the loop. */
      if( (aData[0] & BT_PGFLAGS_INTERNAL)==0 ){
        if( nCell==0 ){







>
|




|







1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326

  /* Figure out the root page number */
  assert( pCsr->iRoot>1 && pCsr->nPg==0 );
  pgno = pCsr->iRoot;

  while( rc==SQLITE4_OK ){
    /* Load page number pgno into the b-tree */
    BtPage *pPg;
    rc = btCsrDescend(pCsr, pgno, &pPg);
    if( rc==SQLITE4_OK ){
      int nCell;                  /* Number of cells on this page */
      int nByte;
      u8 *pCell;
      u8 *aData = (u8*)btPageData(pPg);

      nCell = btCellCount(aData, pgsz);
      pCsr->aiCell[pCsr->nPg-1] = (bLast ? nCell : 0);

      /* If the cursor has descended to a leaf break out of the loop. */
      if( (aData[0] & BT_PGFLAGS_INTERNAL)==0 ){
        if( nCell==0 ){
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
static int btCsrIsDelete(BtCursor *pCsr){
  const int pgsz = sqlite4BtPagerPagesize(pCsr->base.pDb->pPager);
  int bRet;                       /* Return value */
  u8 *aData;
  u8 *pCell;
  int n;

  aData = sqlite4BtPageData(pCsr->apPage[pCsr->nPg-1]);
  pCell = btCellFind(aData, pgsz, pCsr->aiCell[pCsr->nPg-1]);

  pCell += sqlite4BtVarintGet32(pCell, &n);
  if( n==0 ){
    /* Type (c) cell */
    pCell += sqlite4BtVarintGet32(pCell, &n);
    pCell += n;







|







1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
static int btCsrIsDelete(BtCursor *pCsr){
  const int pgsz = sqlite4BtPagerPagesize(pCsr->base.pDb->pPager);
  int bRet;                       /* Return value */
  u8 *aData;
  u8 *pCell;
  int n;

  aData = btPageData(pCsr->apPage[pCsr->nPg-1]);
  pCell = btCellFind(aData, pgsz, pCsr->aiCell[pCsr->nPg-1]);

  pCell += sqlite4BtVarintGet32(pCell, &n);
  if( n==0 ){
    /* Type (c) cell */
    pCell += sqlite4BtVarintGet32(pCell, &n);
    pCell += n;
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357

static int fiCsrIsDelete(FiCursor *pCsr){
  int res = 0;
  if( (pCsr->base.flags & CSR_VISIT_DEL)==0 ){
    BtCursor *p = &pCsr->aSub[pCsr->iBt].csr;
    res = btCsrIsDelete(p);
  }
  return res;
}

/*
** Return an integer representing the result of (K1 - K2).
*/
static int btKeyCompare(
  const void *pKey1, int nKey1, 
  const void *pKey2, int nKey2
){
  int nCmp = MIN(nKey1, nKey2);
  int res;

  res = memcmp(pKey1, pKey2, nCmp);
  if( res==0 ){
    res = nKey1 - nKey2;
  }
  return res;
}

static int btOverflowArrayRead(
  bt_db *db,
  u8 *pOvfl,
  u8 *aOut,







<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







1408
1409
1410
1411
1412
1413
1414

















1415
1416
1417
1418
1419
1420
1421

static int fiCsrIsDelete(FiCursor *pCsr){
  int res = 0;
  if( (pCsr->base.flags & CSR_VISIT_DEL)==0 ){
    BtCursor *p = &pCsr->aSub[pCsr->iBt].csr;
    res = btCsrIsDelete(p);
  }

















  return res;
}

static int btOverflowArrayRead(
  bt_db *db,
  u8 *pOvfl,
  u8 *aOut,
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
  ** it has a depth of zero.  */
  for(iPg=0; rc==SQLITE4_OK && iPg<(nDirect+(nDepth==0)) && iOut<nOut; iPg++){
    u32 pgno = btGetU32(&pOvfl[1+iPg*4]);
    BtPage *pPg = 0;
    rc = sqlite4BtPageGet(db->pPager, pgno, &pPg);
    if( rc==SQLITE4_OK ){
      int nCopy = MIN(nOut-iOut, pgsz);
      u8 *a = sqlite4BtPageData(pPg);
      memcpy(&aOut[iOut], a, nCopy);
      sqlite4BtPageRelease(pPg);
      iOut += nCopy;
    }
  }

  /* Read from the overflow tree, if it was not read by the block above. */







|







1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
  ** it has a depth of zero.  */
  for(iPg=0; rc==SQLITE4_OK && iPg<(nDirect+(nDepth==0)) && iOut<nOut; iPg++){
    u32 pgno = btGetU32(&pOvfl[1+iPg*4]);
    BtPage *pPg = 0;
    rc = sqlite4BtPageGet(db->pPager, pgno, &pPg);
    if( rc==SQLITE4_OK ){
      int nCopy = MIN(nOut-iOut, pgsz);
      u8 *a = btPageData(pPg);
      memcpy(&aOut[iOut], a, nCopy);
      sqlite4BtPageRelease(pPg);
      iOut += nCopy;
    }
  }

  /* Read from the overflow tree, if it was not read by the block above. */
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445

    /* Initialize the apHier[] array. */
    pgno = btGetU32(&pOvfl[1+nDirect*4]);
    for(i=0; i<nDepth && rc==SQLITE4_OK; i++){
      u8 *a;
      rc = sqlite4BtPageGet(db->pPager, pgno, &apHier[i].pPg);
      if( rc==SQLITE4_OK ){
        a = sqlite4BtPageData(apHier[i].pPg);
        pgno = btGetU32(a);
      }
    }

    /* Loop runs once for each leaf page we read from. */
    while( iOut<nOut ){
      u8 *a;                      /* Data associated with some page */
      BtPage *pLeaf;              /* Leaf page */
      int nCopy;                  /* Bytes of data to read from leaf page */

      int iLvl;

      nCopy =  MIN(nOut-iOut, pgsz);
      assert( nCopy>0 );

      /* Read data from the current leaf page */
      rc = sqlite4BtPageGet(db->pPager, pgno, &pLeaf);
      if( rc!=SQLITE4_OK ) break;
      a = sqlite4BtPageData(pLeaf);
      memcpy(&aOut[iOut], a, nCopy);
      sqlite4BtPageRelease(pLeaf);
      iOut += nCopy;

      /* If all required data has been read, break out of the loop */
      if( iOut>=nOut ) break;

      for(iLvl=nDepth-1; iLvl>=0; iLvl--){
        if( apHier[iLvl].iCell<(nPgPtr-1) ) break;
      }
      if( iLvl<0 ) break; /* SQLITE4_CORRUPT? */
      apHier[iLvl].iCell++;

      for(; iLvl<nDepth && rc==SQLITE4_OK; iLvl++){
        a = sqlite4BtPageData(apHier[iLvl].pPg);
        pgno = btGetU32(&a[apHier[iLvl].iCell * 4]);
        if( iLvl<(nDepth-1) ){
          apHier[iLvl+1].iCell = 0;
          sqlite4BtPageRelease(apHier[iLvl+1].pPg);
          apHier[iLvl+1].pPg = 0;
          rc = sqlite4BtPageGet(db->pPager, pgno, &apHier[iLvl+1].pPg);
        }







|


















|














|







1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509

    /* Initialize the apHier[] array. */
    pgno = btGetU32(&pOvfl[1+nDirect*4]);
    for(i=0; i<nDepth && rc==SQLITE4_OK; i++){
      u8 *a;
      rc = sqlite4BtPageGet(db->pPager, pgno, &apHier[i].pPg);
      if( rc==SQLITE4_OK ){
        a = btPageData(apHier[i].pPg);
        pgno = btGetU32(a);
      }
    }

    /* Loop runs once for each leaf page we read from. */
    while( iOut<nOut ){
      u8 *a;                      /* Data associated with some page */
      BtPage *pLeaf;              /* Leaf page */
      int nCopy;                  /* Bytes of data to read from leaf page */

      int iLvl;

      nCopy =  MIN(nOut-iOut, pgsz);
      assert( nCopy>0 );

      /* Read data from the current leaf page */
      rc = sqlite4BtPageGet(db->pPager, pgno, &pLeaf);
      if( rc!=SQLITE4_OK ) break;
      a = btPageData(pLeaf);
      memcpy(&aOut[iOut], a, nCopy);
      sqlite4BtPageRelease(pLeaf);
      iOut += nCopy;

      /* If all required data has been read, break out of the loop */
      if( iOut>=nOut ) break;

      for(iLvl=nDepth-1; iLvl>=0; iLvl--){
        if( apHier[iLvl].iCell<(nPgPtr-1) ) break;
      }
      if( iLvl<0 ) break; /* SQLITE4_CORRUPT? */
      apHier[iLvl].iCell++;

      for(; iLvl<nDepth && rc==SQLITE4_OK; iLvl++){
        a = btPageData(apHier[iLvl].pPg);
        pgno = btGetU32(&a[apHier[iLvl].iCell * 4]);
        if( iLvl<(nDepth-1) ){
          apHier[iLvl+1].iCell = 0;
          sqlite4BtPageRelease(apHier[iLvl+1].pPg);
          apHier[iLvl+1].pPg = 0;
          rc = sqlite4BtPageGet(db->pPager, pgno, &apHier[iLvl+1].pPg);
        }
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
    u8 *pKLocal = 0;                /* Pointer to local part of key */
    u8 *pVLocal = 0;                /* Pointer to local part of value, if any */
    int nKLocal = 0;                /* Bytes of key on page */
    int nVLocal = 0;                /* Bytes of value on page */
    int nKOvfl = 0;                 /* Bytes of key on overflow pages */
    int nVOvfl = 0;                 /* Bytes of value on overflow pages */

    aData = (u8*)sqlite4BtPageData(pCsr->apPage[pCsr->nPg-1]);
    pCell = btCellFind(aData, pgsz, pCsr->aiCell[pCsr->nPg-1]);
    pCell += sqlite4BtVarintGet32(pCell, &nKLocal);
    if( nKLocal==0 ){
      /* Type (c) leaf cell. */
      pCell += sqlite4BtVarintGet32(pCell, &nKLocal);
      pKLocal = pCell;
      pCell += nKLocal;







|







1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
    u8 *pKLocal = 0;                /* Pointer to local part of key */
    u8 *pVLocal = 0;                /* Pointer to local part of value, if any */
    int nKLocal = 0;                /* Bytes of key on page */
    int nVLocal = 0;                /* Bytes of value on page */
    int nKOvfl = 0;                 /* Bytes of key on overflow pages */
    int nVOvfl = 0;                 /* Bytes of value on overflow pages */

    aData = (u8*)btPageData(pCsr->apPage[pCsr->nPg-1]);
    pCell = btCellFind(aData, pgsz, pCsr->aiCell[pCsr->nPg-1]);
    pCell += sqlite4BtVarintGet32(pCell, &nKLocal);
    if( nKLocal==0 ){
      /* Type (c) leaf cell. */
      pCell += sqlite4BtVarintGet32(pCell, &nKLocal);
      pKLocal = pCell;
      pCell += nKLocal;
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
  }else{
    const int pgsz = sqlite4BtPagerPagesize(pCsr->base.pDb->pPager);
    u8 *aData;
    u8 *pCell;
    int nK;
    int iCell = pCsr->aiCell[pCsr->nPg-1];

    aData = (u8*)sqlite4BtPageData(pCsr->apPage[pCsr->nPg-1]);
    assert( btCellCount(aData, pgsz)>iCell );
    pCell = btCellFind(aData, pgsz, iCell);
    pCell += sqlite4BtVarintGet32(pCell, &nK);

    if( nK==0 ){
      /* type (c) leaf cell */
      rc = btCsrBuffer(pCsr, 0);







|







1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
  }else{
    const int pgsz = sqlite4BtPagerPagesize(pCsr->base.pDb->pPager);
    u8 *aData;
    u8 *pCell;
    int nK;
    int iCell = pCsr->aiCell[pCsr->nPg-1];

    aData = btPageData(pCsr->apPage[pCsr->nPg-1]);
    assert( btCellCount(aData, pgsz)>iCell );
    pCell = btCellFind(aData, pgsz, iCell);
    pCell += sqlite4BtVarintGet32(pCell, &nK);

    if( nK==0 ){
      /* type (c) leaf cell */
      rc = btCsrBuffer(pCsr, 0);
1817
1818
1819
1820
1821
1822
1823

1824
1825
1826
1827
1828


1829
1830
1831
1832
1833
1834
1835
  BtDbHdr *pHdr = sqlite4BtPagerDbhdr(db->pPager);

  assert( eSeek==BT_SEEK_LE || eSeek==BT_SEEK_EQ || eSeek==BT_SEEK_GE );
  assert( (pCsr->base.flags & CSR_VISIT_DEL)==0 || eSeek==BT_SEEK_GE );
  fiCsrReset(pCsr);

  if( pHdr->iMRoot ){

    FiLevelIter iter;

    /* Initialize the iterator used to skip through database levels */
    rc = fiLevelIterInit(db, &iter);
    if( rc!=SQLITE4_OK ) return rc;



    if( eSeek==BT_SEEK_EQ ){
      FiSubCursor *pSub;
      BtCursor *pM;

      pCsr->base.flags &= ~(CSR_NEXT_OK | CSR_PREV_OK);








>





>
>







1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
  BtDbHdr *pHdr = sqlite4BtPagerDbhdr(db->pPager);

  assert( eSeek==BT_SEEK_LE || eSeek==BT_SEEK_EQ || eSeek==BT_SEEK_GE );
  assert( (pCsr->base.flags & CSR_VISIT_DEL)==0 || eSeek==BT_SEEK_GE );
  fiCsrReset(pCsr);

  if( pHdr->iMRoot ){
    u8 *pKey;
    FiLevelIter iter;

    /* Initialize the iterator used to skip through database levels */
    rc = fiLevelIterInit(db, &iter);
    if( rc!=SQLITE4_OK ) return rc;
    pKey = sqlite4_malloc(db->pEnv, nK+8);
    if( pKey==0 ) return SQLITE4_NOMEM;

    if( eSeek==BT_SEEK_EQ ){
      FiSubCursor *pSub;
      BtCursor *pM;

      pCsr->base.flags &= ~(CSR_NEXT_OK | CSR_PREV_OK);

1843
1844
1845
1846
1847
1848
1849

1850
1851
1852
1853
1854
1855
1856
1857
      pSub = pCsr->aSub;
      pM = &pSub->mcsr;

      btCsrSetup(db, pHdr->iMRoot, pM);
      while( 0==fiLevelIterNext(&iter) ){

        fiFormatPrefix(pSub->aPrefix, iter.iAge, iter.iLvl);

        rc = btCsrSeek(pM, pSub->aPrefix, pK, nK, BT_SEEK_LE, BT_CSRSEEK_SEEK);

        if( rc==SQLITE4_INEXACT ){
          rc = fiSubCsrCheckPrefix(pSub);
        }

        if( rc==SQLITE4_NOTFOUND ){
          /* All keys in this level are greater than pK/nK. */







>
|







1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
      pSub = pCsr->aSub;
      pM = &pSub->mcsr;

      btCsrSetup(db, pHdr->iMRoot, pM);
      while( 0==fiLevelIterNext(&iter) ){

        fiFormatPrefix(pSub->aPrefix, iter.iAge, iter.iLvl);
        memcpy(pKey, pSub->aPrefix, sizeof(pSub->aPrefix));
        rc = btCsrSeek(pM, 0, pKey, nK+8, BT_SEEK_LE, BT_CSRSEEK_SEEK);

        if( rc==SQLITE4_INEXACT ){
          rc = fiSubCsrCheckPrefix(pSub);
        }

        if( rc==SQLITE4_NOTFOUND ){
          /* All keys in this level are greater than pK/nK. */
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909

1910

1911
1912
1913
1914
1915
1916
1917

      /* This loop runs once for each sub-cursor */
      while( rc==SQLITE4_OK && 0==fiLevelIterNext(&iter) ){
        FiSubCursor *pSub = &pCsr->aSub[iter.iSub];
        BtCursor *pM = &pSub->mcsr;
        btCsrSetup(db, pHdr->iMRoot, pM);

        btPutU32(&pSub->aPrefix[0], (u32)iter.iAge);
        btPutU32(&pSub->aPrefix[4], ~(u32)iter.iLvl);

        rc = btCsrSeek(pM, pSub->aPrefix, pK, nK, BT_SEEK_LE, BT_CSRSEEK_SEEK);
        if( rc==SQLITE4_INEXACT ) rc = fiSubCsrCheckPrefix(pSub);
        if( rc==SQLITE4_NOTFOUND && eSeek==BT_SEEK_GE ){

          rc = btCsrSeek(pM, pSub->aPrefix, 0, 0, BT_SEEK_GE, BT_CSRSEEK_SEEK);

          if( rc==SQLITE4_INEXACT ) rc = fiSubCsrCheckPrefix(pSub);
        }

        if( rc==SQLITE4_NOTFOUND ){
          /* No keys to visit in this level */
          assert( pSub->mcsr.nPg==0 );
          assert( pSub->csr.nPg==0 );







|
|

|


>
|
>







1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987

      /* This loop runs once for each sub-cursor */
      while( rc==SQLITE4_OK && 0==fiLevelIterNext(&iter) ){
        FiSubCursor *pSub = &pCsr->aSub[iter.iSub];
        BtCursor *pM = &pSub->mcsr;
        btCsrSetup(db, pHdr->iMRoot, pM);

        fiFormatPrefix(pSub->aPrefix, iter.iAge, iter.iLvl);
        memcpy(pKey, pSub->aPrefix, sizeof(pSub->aPrefix));

        rc = btCsrSeek(pM, 0, pKey, nK+8, BT_SEEK_LE, BT_CSRSEEK_SEEK);
        if( rc==SQLITE4_INEXACT ) rc = fiSubCsrCheckPrefix(pSub);
        if( rc==SQLITE4_NOTFOUND && eSeek==BT_SEEK_GE ){
          rc = btCsrSeek(pM, 0, pSub->aPrefix, sizeof(pSub->aPrefix), 
              BT_SEEK_GE, BT_CSRSEEK_SEEK
          );
          if( rc==SQLITE4_INEXACT ) rc = fiSubCsrCheckPrefix(pSub);
        }

        if( rc==SQLITE4_NOTFOUND ){
          /* No keys to visit in this level */
          assert( pSub->mcsr.nPg==0 );
          assert( pSub->csr.nPg==0 );
1963
1964
1965
1966
1967
1968
1969

1970
1971
1972
1973
1974
1975
1976
          }else if( bMatch==0 ){
            rc = (bHit ? SQLITE4_INEXACT : SQLITE4_NOTFOUND);
          }
        }
      }
    }


    fiLevelIterCleanup(&iter);
  }

  return rc;
}

static int fiCsrEnd(FiCursor *pCsr, int bLast){







>







2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
          }else if( bMatch==0 ){
            rc = (bHit ? SQLITE4_INEXACT : SQLITE4_NOTFOUND);
          }
        }
      }
    }

    sqlite4_free(db->pEnv, pKey);
    fiLevelIterCleanup(&iter);
  }

  return rc;
}

static int fiCsrEnd(FiCursor *pCsr, int bLast){
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
    const int nPgPtr = pgsz / 4;
    BtPage *pPg;
    u8 *aData;
    int i;

    rc = sqlite4BtPageGet(pPager, pgno, &pPg);
    if( rc!=SQLITE4_OK ) return rc;
    aData = sqlite4BtPageData(pPg);

    for(i=0; rc==SQLITE4_OK && i<nPgPtr; i++){
      u32 child = btGetU32(&aData[i*4]);
      if( child==0 ) break;
      rc = btOverflowTrimtree(pgsz, pPager, child, nDepth-1);
    }








|







2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
    const int nPgPtr = pgsz / 4;
    BtPage *pPg;
    u8 *aData;
    int i;

    rc = sqlite4BtPageGet(pPager, pgno, &pPg);
    if( rc!=SQLITE4_OK ) return rc;
    aData = btPageData(pPg);

    for(i=0; rc==SQLITE4_OK && i<nPgPtr; i++){
      u32 child = btGetU32(&aData[i*4]);
      if( child==0 ) break;
      rc = btOverflowTrimtree(pgsz, pPager, child, nDepth-1);
    }

2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
  u8 *aData;
  u8 *pCell;
  u8 *pOvfl = 0;
  int iCell = pCsr->aiCell[pCsr->nPg-1];
  int n;
  int rc = SQLITE4_OK;
  
  aData = (u8*)sqlite4BtPageData(pCsr->apPage[pCsr->nPg-1]);
  assert( btCellCount(aData, pgsz)>iCell );
  pCell = btCellFind(aData, pgsz, iCell);
  pCell += sqlite4BtVarintGet32(pCell, &n);

  if( n==0 ){
    /* Type (c) cell */
    pCell += sqlite4BtVarintGet32(pCell, &n);







|







2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
  u8 *aData;
  u8 *pCell;
  u8 *pOvfl = 0;
  int iCell = pCsr->aiCell[pCsr->nPg-1];
  int n;
  int rc = SQLITE4_OK;
  
  aData = (u8*)btPageData(pCsr->apPage[pCsr->nPg-1]);
  assert( btCellCount(aData, pgsz)>iCell );
  pCell = btCellFind(aData, pgsz, iCell);
  pCell += sqlite4BtVarintGet32(pCell, &n);

  if( n==0 ){
    /* Type (c) cell */
    pCell += sqlite4BtVarintGet32(pCell, &n);
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
      /* The row has been deleted out from under this cursor. So return
       ** NULL for data.  */
      *ppV = 0;
      *pnV = 0;
    }else{
      int iCell = pCsr->aiCell[pCsr->nPg-1];

      aData = (u8*)sqlite4BtPageData(pCsr->apPage[pCsr->nPg-1]);
      pCell = btCellFind(aData, pgsz, iCell);
      pCell += sqlite4BtVarintGet32(pCell, &nK);
      if( nK>0 ){
        pCell += nK;
        pCell += sqlite4BtVarintGet32(pCell, &nV);
      }








|







2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
      /* The row has been deleted out from under this cursor. So return
       ** NULL for data.  */
      *ppV = 0;
      *pnV = 0;
    }else{
      int iCell = pCsr->aiCell[pCsr->nPg-1];

      aData = (u8*)btPageData(pCsr->apPage[pCsr->nPg-1]);
      pCell = btCellFind(aData, pgsz, iCell);
      pCell += sqlite4BtVarintGet32(pCell, &nK);
      if( nK>0 ){
        pCell += nK;
        pCell += sqlite4BtVarintGet32(pCell, &nV);
      }

2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
  const int pgsz = sqlite4BtPagerPagesize(pCsr->base.pDb->pPager);
  FiSubCursor *pSub;              /* Current sub-cursor */
  u8 *aData;                      /* Current page data */
  int iCell;

  assert( pCsr->iBt>=0 );
  pSub = &pCsr->aSub[pCsr->iBt];
  aData = sqlite4BtPageData(pSub->csr.apPage[pSub->csr.nPg-1]);
  iCell = pSub->csr.aiCell[pSub->csr.nPg-1];

  *ppCell = btCellFindSize(aData, pgsz, iCell, pnCell);
}

/*
** Return true if the cell that the cursor currently points to contains 
** pointers to one or more overflow pages. Or false otherwise.
*/
static int btCsrOverflow(BtCursor *pCsr){
  const int pgsz = sqlite4BtPagerPagesize(pCsr->base.pDb->pPager);
  u8 *aData;                      /* Current page data */
  int nKey;                       /* First varint in cell */
  int res;                       /* First varint in cell */

  aData = sqlite4BtPageData(pCsr->apPage[pCsr->nPg-1]);
  aData = btCellFind(aData, pgsz, pCsr->aiCell[pCsr->nPg-1]);

  aData += sqlite4BtVarintGet32(aData, &nKey);
  res = (nKey==0 || aData[nKey]==0);
  return res;
}








|















|







2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
  const int pgsz = sqlite4BtPagerPagesize(pCsr->base.pDb->pPager);
  FiSubCursor *pSub;              /* Current sub-cursor */
  u8 *aData;                      /* Current page data */
  int iCell;

  assert( pCsr->iBt>=0 );
  pSub = &pCsr->aSub[pCsr->iBt];
  aData = btPageData(pSub->csr.apPage[pSub->csr.nPg-1]);
  iCell = pSub->csr.aiCell[pSub->csr.nPg-1];

  *ppCell = btCellFindSize(aData, pgsz, iCell, pnCell);
}

/*
** Return true if the cell that the cursor currently points to contains 
** pointers to one or more overflow pages. Or false otherwise.
*/
static int btCsrOverflow(BtCursor *pCsr){
  const int pgsz = sqlite4BtPagerPagesize(pCsr->base.pDb->pPager);
  u8 *aData;                      /* Current page data */
  int nKey;                       /* First varint in cell */
  int res;                       /* First varint in cell */

  aData = btPageData(pCsr->apPage[pCsr->nPg-1]);
  aData = btCellFind(aData, pgsz, pCsr->aiCell[pCsr->nPg-1]);

  aData += sqlite4BtVarintGet32(aData, &nKey);
  res = (nKey==0 || aData[nKey]==0);
  return res;
}

2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
** Attach a buffer to an existing page object.
*/
static int btSetBuffer(bt_db *pDb, BtPage *pPg, u8 *aBuf){
  const int pgsz = sqlite4BtPagerPagesize(pDb->pPager);
  int rc;
  rc = sqlite4BtPageWrite(pPg);
  if( rc==SQLITE4_OK ){
    u8 *aData = sqlite4BtPageData(pPg);
    memcpy(aData, aBuf, pgsz);
    sqlite4_free(pDb->pEnv, aBuf);
  }
  return rc;
}

/*







|







2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
** Attach a buffer to an existing page object.
*/
static int btSetBuffer(bt_db *pDb, BtPage *pPg, u8 *aBuf){
  const int pgsz = sqlite4BtPagerPagesize(pDb->pPager);
  int rc;
  rc = sqlite4BtPageWrite(pPg);
  if( rc==SQLITE4_OK ){
    u8 *aData = btPageData(pPg);
    memcpy(aData, aBuf, pgsz);
    sqlite4_free(pDb->pEnv, aBuf);
  }
  return rc;
}

/*
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
  int iWrite;                     /* Write next cell at this offset in aTmp[] */
  int i;                          /* Used to iterate through cells */
  int bLeaf;                      /* True if pPg is a leaf page */
  int nHdr;                       /* Bytes in header of this page */

  if( btNewBuffer(pDb, &aTmp) ) return SQLITE4_NOMEM;

  aData = sqlite4BtPageData(pPg);
  nCell = btCellCount(aData, pgsz);

  bLeaf = 0==(btFlags(aData) & BT_PGFLAGS_INTERNAL);
  nHdr = bLeaf ? 1 : 5;

  /* Set header bytes of new page */
  memcpy(aTmp, aData, nHdr);







|







2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
  int iWrite;                     /* Write next cell at this offset in aTmp[] */
  int i;                          /* Used to iterate through cells */
  int bLeaf;                      /* True if pPg is a leaf page */
  int nHdr;                       /* Bytes in header of this page */

  if( btNewBuffer(pDb, &aTmp) ) return SQLITE4_NOMEM;

  aData = btPageData(pPg);
  nCell = btCellCount(aData, pgsz);

  bLeaf = 0==(btFlags(aData) & BT_PGFLAGS_INTERNAL);
  nHdr = bLeaf ? 1 : 5;

  /* Set header bytes of new page */
  memcpy(aTmp, aData, nHdr);
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
static int btAllocateAndZero(bt_db *db, BtPage **ppPg){
  BtPage *pPg = 0;                /* Allocated page handle */
  int rc;                         /* Return code */

  rc = sqlite4BtPageAllocate(db->pPager, &pPg);
  if( rc==SQLITE4_OK ){
    const int pgsz = sqlite4BtPagerPagesize(db->pPager);
    memset(sqlite4BtPageData(pPg), 0, pgsz);
  }

  *ppPg = pPg;
  return rc;
}

static int btOverflowArrayPopulate(







|







2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
static int btAllocateAndZero(bt_db *db, BtPage **ppPg){
  BtPage *pPg = 0;                /* Allocated page handle */
  int rc;                         /* Return code */

  rc = sqlite4BtPageAllocate(db->pPager, &pPg);
  if( rc==SQLITE4_OK ){
    const int pgsz = sqlite4BtPagerPagesize(db->pPager);
    memset(btPageData(pPg), 0, pgsz);
  }

  *ppPg = pPg;
  return rc;
}

static int btOverflowArrayPopulate(
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
  for(i=0; rc==SQLITE4_OK && i<nDepth; i++){
    u32 pgno;
    rc = btAllocateAndZero(db, &apHier[i].pPg);
    pgno = sqlite4BtPagePgno(apHier[i].pPg);
    if( i==0 ){
      btPutU32(&aOut[1 + BT_MAX_DIRECT_OVERFLOW*4], pgno);
    }else{
      u8 *a = sqlite4BtPageData(apHier[i-1].pPg);
      btPutU32(a, pgno);
      apHier[i-1].iCell++;
    }
  }

  for(iOvfl=0; rc==SQLITE4_OK && (n1<nBuf1 || n2<nBuf2); iOvfl++){
    int nCopy1, nCopy2;           /* Bytes to copy from pBuf1 and pBuf2 */
    u8 *aData;
    BtPage *pPg;
    u32 pgno;

    rc = sqlite4BtPageAllocate(db->pPager, &pPg);
    if( rc!=SQLITE4_OK ) break;
    aData = sqlite4BtPageData(pPg);
    pgno = sqlite4BtPagePgno(pPg);

    nCopy1 = MIN(pgsz, nBuf1 - n1);
    nCopy2 = MIN(pgsz - nCopy1, nBuf2 - n2);

    memcpy(aData, &pBuf1[n1], nCopy1); n1 += nCopy1;
    memcpy(&aData[nCopy1], &pBuf2[n2], nCopy2); n2 += nCopy2;
    rc = sqlite4BtPageRelease(pPg);
    if( rc!=SQLITE4_OK ) break;

    if( iOvfl<(BT_MAX_DIRECT_OVERFLOW+(nDepth==0)) ){
      btPutU32(&aOut[1 + iOvfl*4], pgno);
      nDirect++;
    }else{
      assert( nDepth>0 );
      for(i=nDepth-1; pgno && i>=0; i--){
        u8 *a = sqlite4BtPageData(apHier[i].pPg);
        if( apHier[i].iCell==nPgPtr ){
          BtPage *pNew = 0;
          rc = sqlite4BtPageRelease(apHier[i].pPg);
          if( rc==SQLITE4_OK ){
            rc = btAllocateAndZero(db, &pNew);
            if( rc==SQLITE4_OK ){
              u8 *a = sqlite4BtPageData(pNew);
              btPutU32(a, pgno);
              pgno = sqlite4BtPagePgno(pNew);
            }
          }

          if( rc!=SQLITE4_OK ){
            pgno = 0;







|













|
















|






|







2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
  for(i=0; rc==SQLITE4_OK && i<nDepth; i++){
    u32 pgno;
    rc = btAllocateAndZero(db, &apHier[i].pPg);
    pgno = sqlite4BtPagePgno(apHier[i].pPg);
    if( i==0 ){
      btPutU32(&aOut[1 + BT_MAX_DIRECT_OVERFLOW*4], pgno);
    }else{
      u8 *a = btPageData(apHier[i-1].pPg);
      btPutU32(a, pgno);
      apHier[i-1].iCell++;
    }
  }

  for(iOvfl=0; rc==SQLITE4_OK && (n1<nBuf1 || n2<nBuf2); iOvfl++){
    int nCopy1, nCopy2;           /* Bytes to copy from pBuf1 and pBuf2 */
    u8 *aData;
    BtPage *pPg;
    u32 pgno;

    rc = sqlite4BtPageAllocate(db->pPager, &pPg);
    if( rc!=SQLITE4_OK ) break;
    aData = btPageData(pPg);
    pgno = sqlite4BtPagePgno(pPg);

    nCopy1 = MIN(pgsz, nBuf1 - n1);
    nCopy2 = MIN(pgsz - nCopy1, nBuf2 - n2);

    memcpy(aData, &pBuf1[n1], nCopy1); n1 += nCopy1;
    memcpy(&aData[nCopy1], &pBuf2[n2], nCopy2); n2 += nCopy2;
    rc = sqlite4BtPageRelease(pPg);
    if( rc!=SQLITE4_OK ) break;

    if( iOvfl<(BT_MAX_DIRECT_OVERFLOW+(nDepth==0)) ){
      btPutU32(&aOut[1 + iOvfl*4], pgno);
      nDirect++;
    }else{
      assert( nDepth>0 );
      for(i=nDepth-1; pgno && i>=0; i--){
        u8 *a = btPageData(apHier[i].pPg);
        if( apHier[i].iCell==nPgPtr ){
          BtPage *pNew = 0;
          rc = sqlite4BtPageRelease(apHier[i].pPg);
          if( rc==SQLITE4_OK ){
            rc = btAllocateAndZero(db, &pNew);
            if( rc==SQLITE4_OK ){
              u8 *a = btPageData(pNew);
              btPutU32(a, pgno);
              pgno = sqlite4BtPagePgno(pNew);
            }
          }

          if( rc!=SQLITE4_OK ){
            pgno = 0;
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
  u8 *aParent;                    /* Buffer of parent page */
  int iChild;                     /* Index of child page within parent */
  int nSib;                       /* Number of siblings */
  int iSib;                       /* Index of left-most sibling page */

  int i;

  aParent = sqlite4BtPageData(pCsr->apPage[pCsr->nPg-2]);
  iChild = pCsr->aiCell[pCsr->nPg-2];
  nCell = btCellCount(aParent, pgsz);

  if( nCell<2 ){
    nSib = nCell+1;
  }else{
    nSib = 3;







|







2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
  u8 *aParent;                    /* Buffer of parent page */
  int iChild;                     /* Index of child page within parent */
  int nSib;                       /* Number of siblings */
  int iSib;                       /* Index of left-most sibling page */

  int i;

  aParent = btPageData(pCsr->apPage[pCsr->nPg-2]);
  iChild = pCsr->aiCell[pCsr->nPg-2];
  nCell = btCellCount(aParent, pgsz);

  if( nCell<2 ){
    nSib = nCell+1;
  }else{
    nSib = 3;
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978

static int btSetChildPgno(bt_db *pDb, BtPage *pPg, int iChild, u32 pgno){
  const int pgsz = sqlite4BtPagerPagesize(pDb->pPager);
  int rc;

  rc = sqlite4BtPageWrite(pPg);
  if( rc==SQLITE4_OK ){
    u8 *aData = sqlite4BtPageData(pPg);
    int nCell = btCellCount(aData, pgsz);
    if( iChild>=nCell ){
      btPutU32(&aData[1], pgno);
    }else{
      int nKey;
      u8 *pCell = btCellFind(aData, pgsz, iChild);
      pCell += sqlite4BtVarintGet32(pCell, &nKey);







|







3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049

static int btSetChildPgno(bt_db *pDb, BtPage *pPg, int iChild, u32 pgno){
  const int pgsz = sqlite4BtPagerPagesize(pDb->pPager);
  int rc;

  rc = sqlite4BtPageWrite(pPg);
  if( rc==SQLITE4_OK ){
    u8 *aData = btPageData(pPg);
    int nCell = btCellCount(aData, pgsz);
    if( iChild>=nCell ){
      btPutU32(&aData[1], pgno);
    }else{
      int nKey;
      u8 *pCell = btCellFind(aData, pgsz, iChild);
      pCell += sqlite4BtVarintGet32(pCell, &nKey);
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
  for(iPg=0; iPg<p->nIn && rc==SQLITE4_OK; iPg++){
    BtPage *pPg;                  /* Current page */
    u8 *aData;                    /* Page data */
    int nCell;                    /* Number of cells on page pPg */
    int iCell;                    /* Current cell in pPg */

    pPg = p->apPg[iPg];
    aData = sqlite4BtPageData(pPg);
    nCell = btCellCount(aData, pgsz);

    for(iCell=0; iCell<nCell && rc==SQLITE4_OK; iCell++){
      int nByte;
      u8 *pCell;

      if( pPg==pIns && iCell==iIns ){







|







3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
  for(iPg=0; iPg<p->nIn && rc==SQLITE4_OK; iPg++){
    BtPage *pPg;                  /* Current page */
    u8 *aData;                    /* Page data */
    int nCell;                    /* Number of cells on page pPg */
    int iCell;                    /* Current cell in pPg */

    pPg = p->apPg[iPg];
    aData = btPageData(pPg);
    nCell = btCellCount(aData, pgsz);

    for(iCell=0; iCell<nCell && rc==SQLITE4_OK; iCell++){
      int nByte;
      u8 *pCell;

      if( pPg==pIns && iCell==iIns ){
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
    }

    /* If the siblings being balanced are not leaves, and the page just
    ** processed was not the right-most sibling, visit a cell from the
    ** parent page.  */
    if( p->bLeaf==0 && iPg<(p->nIn-1) && rc==SQLITE4_OK ){
      int iPar = p->pCsr->nPg-2;
      u8 *aParent = sqlite4BtPageData(p->pCsr->apPage[iPar]);
      u8 *pCell = btCellFind(aParent, pgsz, p->pCsr->aiCell[iPar] + iPg);
      KeyValue kv;
      btInternalCellToKeyValue(pCell, &kv);
      kv.pgno = btGetU32(&aData[1]);
      rc = xVisit(p, iCall++, 0, 0, &kv);
    }
  }







|







3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
    }

    /* If the siblings being balanced are not leaves, and the page just
    ** processed was not the right-most sibling, visit a cell from the
    ** parent page.  */
    if( p->bLeaf==0 && iPg<(p->nIn-1) && rc==SQLITE4_OK ){
      int iPar = p->pCsr->nPg-2;
      u8 *aParent = btPageData(p->pCsr->apPage[iPar]);
      u8 *pCell = btCellFind(aParent, pgsz, p->pCsr->aiCell[iPar] + iPg);
      KeyValue kv;
      btInternalCellToKeyValue(pCell, &kv);
      kv.pgno = btGetU32(&aData[1]);
      rc = xVisit(p, iCall++, 0, 0, &kv);
    }
  }
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
** set to the size of the prefix in bytes.
*/
static u8 *btKeyPrefix(const int pgsz, BtPage *pPg, int bLast, int *pnByte){
  u8 *p;
  int n;
  u8 *aData;

  aData = sqlite4BtPageData(pPg);
  p = btCellFind(aData, pgsz, bLast ? btCellCount(aData, pgsz)-1 : 0);
  p += sqlite4BtVarintGet32(p, &n);
  if( n==0 ) p += sqlite4BtVarintGet32(p, &n);

  *pnByte = n;
  return p;
}







|







3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
** set to the size of the prefix in bytes.
*/
static u8 *btKeyPrefix(const int pgsz, BtPage *pPg, int bLast, int *pnByte){
  u8 *p;
  int n;
  u8 *aData;

  aData = btPageData(pPg);
  p = btCellFind(aData, pgsz, bLast ? btCellCount(aData, pgsz)-1 : 0);
  p += sqlite4BtVarintGet32(p, &n);
  if( n==0 ) p += sqlite4BtVarintGet32(p, &n);

  *pnByte = n;
  return p;
}
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
**   * larger than all keys on pLeft, and 
**   * smaller than or equal to all keys on pRight.
*/
static void btPrefixKey(
    const int pgsz, BtPage *pLeft, BtPage *pRight, KeyValue *pKV
){
  int nMax;
  int nMaxPrefix = pgsz/4;

  u8 *aLeft; int nLeft;
  u8 *aRight; int nRight;
  int i;

  aLeft = btKeyPrefix(pgsz, pLeft, 1, &nLeft);
  aRight = btKeyPrefix(pgsz, pRight, 0, &nRight);







|







3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
**   * larger than all keys on pLeft, and 
**   * smaller than or equal to all keys on pRight.
*/
static void btPrefixKey(
    const int pgsz, BtPage *pLeft, BtPage *pRight, KeyValue *pKV
){
  int nMax;
  int nMaxPrefix = BT_MAX_INTERNAL_KEY;

  u8 *aLeft; int nLeft;
  u8 *aRight; int nRight;
  int i;

  aLeft = btKeyPrefix(pgsz, pLeft, 1, &nLeft);
  aRight = btKeyPrefix(pgsz, pRight, 0, &nRight);
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
  BalanceCtx ctx;
  memset(&ctx, 0, sizeof(ctx));
  ctx.pCsr = pCsr;
  ctx.nKV = nKV;
  ctx.apKV = apKV;
  ctx.pgsz = pgsz;
  ctx.bLeaf = bLeaf;
  ctx.flags = *(u8*)sqlite4BtPageData(pCsr->apPage[pCsr->nPg-1]);

  memset(anByteOut, 0, sizeof(anByteOut));

  /* Gather the sibling pages from which cells will be redistributed into
  ** the ctx.apPg[] array.  */
  assert( bLeaf==0 || bLeaf==1 );
  assert( pCsr->nPg>1 );
  rc = btGatherSiblings(&ctx);
  if( rc!=SQLITE4_OK ) goto rebalance_out;
  pPar = pCsr->apPage[pCsr->nPg-2];
  iSib = pCsr->aiCell[pCsr->nPg-2];

  /* Count the number of input cells. */
  ctx.nCell = nKV;
  for(iPg=0; iPg<ctx.nIn; iPg++){
    u8 *aData = sqlite4BtPageData(ctx.apPg[iPg]);
    ctx.nCell += btCellCount(aData, pgsz);
  }
  if( bLeaf==0 ) ctx.nCell += (ctx.nIn-1);
  assert( ctx.nCell>0 );

  /* Allocate and populate the anCellSz[] array */
  ctx.anCellSz = (int*)sqlite4_malloc(pDb->pEnv, sizeof(int)*ctx.nCell);







|















|







3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
  BalanceCtx ctx;
  memset(&ctx, 0, sizeof(ctx));
  ctx.pCsr = pCsr;
  ctx.nKV = nKV;
  ctx.apKV = apKV;
  ctx.pgsz = pgsz;
  ctx.bLeaf = bLeaf;
  ctx.flags = *(u8*)btPageData(pCsr->apPage[pCsr->nPg-1]);

  memset(anByteOut, 0, sizeof(anByteOut));

  /* Gather the sibling pages from which cells will be redistributed into
  ** the ctx.apPg[] array.  */
  assert( bLeaf==0 || bLeaf==1 );
  assert( pCsr->nPg>1 );
  rc = btGatherSiblings(&ctx);
  if( rc!=SQLITE4_OK ) goto rebalance_out;
  pPar = pCsr->apPage[pCsr->nPg-2];
  iSib = pCsr->aiCell[pCsr->nPg-2];

  /* Count the number of input cells. */
  ctx.nCell = nKV;
  for(iPg=0; iPg<ctx.nIn; iPg++){
    u8 *aData = btPageData(ctx.apPg[iPg]);
    ctx.nCell += btCellCount(aData, pgsz);
  }
  if( bLeaf==0 ) ctx.nCell += (ctx.nIn-1);
  assert( ctx.nCell>0 );

  /* Allocate and populate the anCellSz[] array */
  ctx.anCellSz = (int*)sqlite4_malloc(pDb->pEnv, sizeof(int)*ctx.nCell);
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
#ifdef BT_STDERR_DEBUG
  {
    int iDbg;
    fprintf(stderr, 
        "\nbtBalance(): bLeaf=%d nIn=%d anIn[] = ", ctx.bLeaf, ctx.nIn
    );
    for(iDbg=0; iDbg<ctx.nIn; iDbg++){
      u8 *aData = sqlite4BtPageData(ctx.apPg[iDbg]);
      fprintf(stderr, "%d ", btCellCount(aData, pgsz));
    }
    fprintf(stderr, " ->  nOut=%d anOut[] = ", ctx.nOut);
    for(iDbg=0; iDbg<ctx.nOut; iDbg++){
      fprintf(stderr, "%d ", ctx.anOut[iDbg]);
    }
    fprintf(stderr, "\n");







|







3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
#ifdef BT_STDERR_DEBUG
  {
    int iDbg;
    fprintf(stderr, 
        "\nbtBalance(): bLeaf=%d nIn=%d anIn[] = ", ctx.bLeaf, ctx.nIn
    );
    for(iDbg=0; iDbg<ctx.nIn; iDbg++){
      u8 *aData = btPageData(ctx.apPg[iDbg]);
      fprintf(stderr, "%d ", btCellCount(aData, pgsz));
    }
    fprintf(stderr, " ->  nOut=%d anOut[] = ", ctx.nOut);
    for(iDbg=0; iDbg<ctx.nOut; iDbg++){
      fprintf(stderr, "%d ", ctx.anOut[iDbg]);
    }
    fprintf(stderr, "\n");
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
  /* Populate the new buffers with the new page images. */
  rc = btBalanceVisitCells(&ctx, btBalanceOutput);
  if( rc!=SQLITE4_OK ) goto rebalance_out;

  if( ctx.bLeaf==0 ){
    /* Set the right-child pointer of the rightmost new sibling to a copy
    ** of the same pointer from the rightmost original sibling.  */
    u8 *aRightSibling = sqlite4BtPageData(ctx.apPg[ctx.nIn-1]);
    memcpy(&(ctx.apOut[ctx.nOut-1])[1], &aRightSibling[1], 4);
  }

  /* Clobber the old pages with the new buffers */
  for(iPg=0; iPg<ctx.nOut; iPg++){
    if( iPg>=ctx.nIn ){
      rc = btAllocateNonOverflow(pDb, &ctx.apPg[iPg]);







|







3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
  /* Populate the new buffers with the new page images. */
  rc = btBalanceVisitCells(&ctx, btBalanceOutput);
  if( rc!=SQLITE4_OK ) goto rebalance_out;

  if( ctx.bLeaf==0 ){
    /* Set the right-child pointer of the rightmost new sibling to a copy
    ** of the same pointer from the rightmost original sibling.  */
    u8 *aRightSibling = btPageData(ctx.apPg[ctx.nIn-1]);
    memcpy(&(ctx.apOut[ctx.nOut-1])[1], &aRightSibling[1], 4);
  }

  /* Clobber the old pages with the new buffers */
  for(iPg=0; iPg<ctx.nOut; iPg++){
    if( iPg>=ctx.nIn ){
      rc = btAllocateNonOverflow(pDb, &ctx.apPg[iPg]);
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
    if( rc!=SQLITE4_OK ) goto rebalance_out;
  }

#ifdef BT_STDERR_DEBUG
  {
    int iDbg;
    for(iDbg=0; iDbg<ctx.nOut; iDbg++){
      u8 *aData = sqlite4BtPageData(ctx.apPg[iDbg]);
      printPage(stderr, sqlite4BtPagePgno(ctx.apPg[iDbg]), aData, pgsz);
    }
  }
#endif

  /* The leaves are written. Now gather the keys and page numbers to
  ** push up into the parent page. This is only required when rebalancing







|







3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
    if( rc!=SQLITE4_OK ) goto rebalance_out;
  }

#ifdef BT_STDERR_DEBUG
  {
    int iDbg;
    for(iDbg=0; iDbg<ctx.nOut; iDbg++){
      u8 *aData = btPageData(ctx.apPg[iDbg]);
      printPage(stderr, sqlite4BtPagePgno(ctx.apPg[iDbg]), aData, pgsz);
    }
  }
#endif

  /* The leaves are written. Now gather the keys and page numbers to
  ** push up into the parent page. This is only required when rebalancing
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
  }
  if( rc==SQLITE4_OK && iPg==pCsr->nPg ){
    rc = btBalanceIfUnderfull(pCsr);
  }

#ifdef BT_STDERR_DEBUG
  {
    u8 *aData = sqlite4BtPageData(pPar);
    printPage(stderr, sqlite4BtPagePgno(pPar), aData, pgsz);
  }
#endif

 rebalance_out:
  for(iPg=0; iPg<array_size(ctx.apPg); iPg++){
    sqlite4BtPageRelease(ctx.apPg[iPg]);







|







3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
  }
  if( rc==SQLITE4_OK && iPg==pCsr->nPg ){
    rc = btBalanceIfUnderfull(pCsr);
  }

#ifdef BT_STDERR_DEBUG
  {
    u8 *aData = btPageData(pPar);
    printPage(stderr, sqlite4BtPagePgno(pPar), aData, pgsz);
  }
#endif

 rebalance_out:
  for(iPg=0; iPg<array_size(ctx.apPg); iPg++){
    sqlite4BtPageRelease(ctx.apPg[iPg]);
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
  assert( pCsr->nPg==1 );

  rc = sqlite4BtPageWrite(pRoot);
  if( rc==SQLITE4_OK ){
    rc = btAllocateNonOverflow(pDb, &pNew);
  }
  if( rc==SQLITE4_OK ){
    u8 *aRoot = sqlite4BtPageData(pRoot);
    u8 *aData = sqlite4BtPageData(pNew);

    memcpy(aData, aRoot, pgsz);
    aRoot[0] = BT_PGFLAGS_INTERNAL;
    if( pHdr->iMRoot==pCsr->iRoot ) aRoot[0] |= BT_PGFLAGS_METATREE;
    btPutU32(&aRoot[1], sqlite4BtPagePgno(pNew));
    btPutU16(&aRoot[pgsz-2], 0);
    btPutU16(&aRoot[pgsz-4], 5);







|
|







3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
  assert( pCsr->nPg==1 );

  rc = sqlite4BtPageWrite(pRoot);
  if( rc==SQLITE4_OK ){
    rc = btAllocateNonOverflow(pDb, &pNew);
  }
  if( rc==SQLITE4_OK ){
    u8 *aRoot = btPageData(pRoot);
    u8 *aData = btPageData(pNew);

    memcpy(aData, aRoot, pgsz);
    aRoot[0] = BT_PGFLAGS_INTERNAL;
    if( pHdr->iMRoot==pCsr->iRoot ) aRoot[0] |= BT_PGFLAGS_METATREE;
    btPutU32(&aRoot[1], sqlite4BtPagePgno(pNew));
    btPutU16(&aRoot[pgsz-2], 0);
    btPutU16(&aRoot[pgsz-4], 5);
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
  for(i=0; i<nKV; i++){
    nReq += btKVCellSize(&apKV[i]) + 2;
  }

  iCell = pCsr->aiCell[pCsr->nPg-1];
  assert( pCsr->nPg>0 );
  pLeaf = pCsr->apPage[pCsr->nPg-1];
  aData = (u8*)sqlite4BtPageData(pLeaf);

  /* Set the bLeaf variable to true if inserting into a leaf page, or
  ** false otherwise. Return SQLITE4_CORRUPT if the page is a leaf but
  ** the KeyValue pairs being inserted are suitable for internal nodes,
  ** or vice-versa.  */
  assert( nKV>0 );
  if( (0==(btFlags(aData) & BT_PGFLAGS_INTERNAL))!=bLeaf ){







|







3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
  for(i=0; i<nKV; i++){
    nReq += btKVCellSize(&apKV[i]) + 2;
  }

  iCell = pCsr->aiCell[pCsr->nPg-1];
  assert( pCsr->nPg>0 );
  pLeaf = pCsr->apPage[pCsr->nPg-1];
  aData = (u8*)btPageData(pLeaf);

  /* Set the bLeaf variable to true if inserting into a leaf page, or
  ** false otherwise. Return SQLITE4_CORRUPT if the page is a leaf but
  ** the KeyValue pairs being inserted are suitable for internal nodes,
  ** or vice-versa.  */
  assert( nKV>0 );
  if( (0==(btFlags(aData) & BT_PGFLAGS_INTERNAL))!=bLeaf ){
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
    nFree = pgsz - iWrite - 6;
  }else{
    if( btFreeContiguous(aData, pgsz)<nReq && btFreeSpace(aData, pgsz)>=nReq ){
      /* Special case - the new entry will not fit on the page at present
      ** but would if the page were defragmented. So defragment it before
      ** continuing.  */
      rc = btDefragmentPage(pCsr->base.pDb, pLeaf);
      aData = sqlite4BtPageData(pLeaf);
    }

    iWrite = btFreeOffset(aData, pgsz);
    nFree = btFreeContiguous(aData, pgsz);
  }

  if( nFree>=nReq ){
    /* The new entry will fit on the page. So in this case all there
    ** is to do is update this single page. The easy case. */
    rc = sqlite4BtPageWrite(pLeaf);
    if( rc==SQLITE4_OK ){
      aData = sqlite4BtPageData(pLeaf);

      /* Make space within the cell pointer array */
      if( iCell!=nCell ){
        u8 *aFrom = btCellPtrFind(aData, pgsz, nCell-1);
        u8 *aTo = btCellPtrFind(aData, pgsz, nCell-1+nKV);
        memmove(aTo, aFrom, (nCell-iCell) * 2);
      }







|











|







3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
    nFree = pgsz - iWrite - 6;
  }else{
    if( btFreeContiguous(aData, pgsz)<nReq && btFreeSpace(aData, pgsz)>=nReq ){
      /* Special case - the new entry will not fit on the page at present
      ** but would if the page were defragmented. So defragment it before
      ** continuing.  */
      rc = btDefragmentPage(pCsr->base.pDb, pLeaf);
      aData = btPageData(pLeaf);
    }

    iWrite = btFreeOffset(aData, pgsz);
    nFree = btFreeContiguous(aData, pgsz);
  }

  if( nFree>=nReq ){
    /* The new entry will fit on the page. So in this case all there
    ** is to do is update this single page. The easy case. */
    rc = sqlite4BtPageWrite(pLeaf);
    if( rc==SQLITE4_OK ){
      aData = btPageData(pLeaf);

      /* Make space within the cell pointer array */
      if( iCell!=nCell ){
        u8 *aFrom = btCellPtrFind(aData, pgsz, nCell-1);
        u8 *aTo = btCellPtrFind(aData, pgsz, nCell-1+nKV);
        memmove(aTo, aFrom, (nCell-iCell) * 2);
      }
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
    int i;                        /* Used to iterate through cells to delete */
    u8 *aData;                    /* Page buffer */
    int nCell;                    /* Number of cells initially on this page */
    int iDel;                     /* Index of cell to delete */
    int nFreed = 0;               /* Total bytes of space freed */

    iDel = pCsr->aiCell[pCsr->nPg-1];
    aData = (u8*)sqlite4BtPageData(pPg);
    nCell = btCellCount(aData, pgsz);

    for(i=iDel; i<(iDel+nDel); i++){
      int nByte;
      btCellFindSize(aData, pgsz, i, &nByte);
      nFreed += nByte + 2;
    }







|







3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
    int i;                        /* Used to iterate through cells to delete */
    u8 *aData;                    /* Page buffer */
    int nCell;                    /* Number of cells initially on this page */
    int iDel;                     /* Index of cell to delete */
    int nFreed = 0;               /* Total bytes of space freed */

    iDel = pCsr->aiCell[pCsr->nPg-1];
    aData = (u8*)btPageData(pPg);
    nCell = btCellCount(aData, pgsz);

    for(i=iDel; i<(iDel+nDel); i++){
      int nByte;
      btCellFindSize(aData, pgsz, i, &nByte);
      nFreed += nByte + 2;
    }
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
}

static int btBalanceIfUnderfull(BtCursor *pCsr){
  const int pgsz = sqlite4BtPagerPagesize(pCsr->base.pDb->pPager);
  int rc = SQLITE4_OK;
  int iPg = pCsr->nPg-1;
  BtPage *pPg = pCsr->apPage[iPg];
  u8 *aData = sqlite4BtPageData(pPg);
  int nCell = btCellCount(aData, pgsz);
  int nFree = btFreeSpace(aData, pgsz);
  int bLeaf = (0==(btFlags(aData) & BT_PGFLAGS_INTERNAL));

  if( iPg==0 ){
    /* Root page. If it contains no cells at all and is not already
    ** a leaf, shorten the tree by one here by copying the contents 
    ** of the only child into the root. */
    if( nCell==0 && bLeaf==0 ){
      BtPager *pPager = pCsr->base.pDb->pPager;
      u32 pgno = btChildPgno(aData, pgsz, 0);
      BtPage *pChild;

      rc = sqlite4BtPageWrite(pPg);
      if( rc==SQLITE4_OK ){
        rc = sqlite4BtPageGet(pPager, pgno, &pChild);
      }
      if( rc==SQLITE4_OK ){
        u8 *a = sqlite4BtPageData(pChild);
        memcpy(aData, a, pgsz);
        rc = btTrimNonOverflow(pCsr->base.pDb, pChild);
      }
    }
  }else if( nCell==0 || (nFree>(2*pgsz/3) && bLeaf==0) ){
    rc = btBalance(pCsr, bLeaf, 0, 0);
  }







|


















|







3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
}

static int btBalanceIfUnderfull(BtCursor *pCsr){
  const int pgsz = sqlite4BtPagerPagesize(pCsr->base.pDb->pPager);
  int rc = SQLITE4_OK;
  int iPg = pCsr->nPg-1;
  BtPage *pPg = pCsr->apPage[iPg];
  u8 *aData = btPageData(pPg);
  int nCell = btCellCount(aData, pgsz);
  int nFree = btFreeSpace(aData, pgsz);
  int bLeaf = (0==(btFlags(aData) & BT_PGFLAGS_INTERNAL));

  if( iPg==0 ){
    /* Root page. If it contains no cells at all and is not already
    ** a leaf, shorten the tree by one here by copying the contents 
    ** of the only child into the root. */
    if( nCell==0 && bLeaf==0 ){
      BtPager *pPager = pCsr->base.pDb->pPager;
      u32 pgno = btChildPgno(aData, pgsz, 0);
      BtPage *pChild;

      rc = sqlite4BtPageWrite(pPg);
      if( rc==SQLITE4_OK ){
        rc = sqlite4BtPageGet(pPager, pgno, &pChild);
      }
      if( rc==SQLITE4_OK ){
        u8 *a = btPageData(pChild);
        memcpy(aData, a, pgsz);
        rc = btTrimNonOverflow(pCsr->base.pDb, pChild);
      }
    }
  }else if( nCell==0 || (nFree>(2*pgsz/3) && bLeaf==0) ){
    rc = btBalance(pCsr, bLeaf, 0, 0);
  }
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
  u32 iNew = 0;
  BtPage *pPg;
  int rc;

  assert( flag==BT_PGFLAGS_METATREE || flag==BT_PGFLAGS_SCHEDULE || flag==0 );
  rc = sqlite4BtPageAllocate(db->pPager, &pPg);
  if( rc==SQLITE4_OK ){
    u8 *aData = sqlite4BtPageData(pPg);
    aData[0] = (flag & 0xFF);
    iNew = sqlite4BtPagePgno(pPg);
    sqlite4BtPageRelease(pPg);
  }

  *piNew = iNew;
  return rc;







|







3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
  u32 iNew = 0;
  BtPage *pPg;
  int rc;

  assert( flag==BT_PGFLAGS_METATREE || flag==BT_PGFLAGS_SCHEDULE || flag==0 );
  rc = sqlite4BtPageAllocate(db->pPager, &pPg);
  if( rc==SQLITE4_OK ){
    u8 *aData = btPageData(pPg);
    aData[0] = (flag & 0xFF);
    iNew = sqlite4BtPagePgno(pPg);
    sqlite4BtPageRelease(pPg);
  }

  *piNew = iNew;
  return rc;
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
  return SQLITE4_OK;
}

static void btWriteSchedulePage(BtPage *pPg, BtSchedule *p, int *pRc){
  if( *pRc==SQLITE4_OK ){
    int rc = sqlite4BtPageWrite(pPg);
    if( rc==SQLITE4_OK ){
      u8 *aData = sqlite4BtPageData(pPg);
      btWriteSchedule(aData, p, &rc);
    }
    *pRc = rc;
  }
}

static int btAllocateBlock(







|







4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
  return SQLITE4_OK;
}

static void btWriteSchedulePage(BtPage *pPg, BtSchedule *p, int *pRc){
  if( *pRc==SQLITE4_OK ){
    int rc = sqlite4BtPageWrite(pPg);
    if( rc==SQLITE4_OK ){
      u8 *aData = btPageData(pPg);
      btWriteSchedule(aData, p, &rc);
    }
    *pRc = rc;
  }
}

static int btAllocateBlock(
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
    delcsr.nPg = 1;
    delcsr.base.pDb = db;

    while( rc==SQLITE4_OK && iTrunk!=0 ){
      BtPage *pTrunk = 0;
      rc = sqlite4BtPageGet(db->pPager, iTrunk, &pTrunk);
      if( rc==SQLITE4_OK ){
        u8 *aTData = sqlite4BtPageData(pTrunk);
        int nOvfl = btGetU32(aTData);
        int i;

        for(i=0; i<nOvfl; i++){
          u32 lpgno = btGetU32(&aTData[8 + i*8]);
          delcsr.aiCell[0] = (int)btGetU32(&aTData[8 + i*8 + 4]);
          rc = sqlite4BtPageGet(db->pPager, lpgno, &delcsr.apPage[0]);







|







4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165
    delcsr.nPg = 1;
    delcsr.base.pDb = db;

    while( rc==SQLITE4_OK && iTrunk!=0 ){
      BtPage *pTrunk = 0;
      rc = sqlite4BtPageGet(db->pPager, iTrunk, &pTrunk);
      if( rc==SQLITE4_OK ){
        u8 *aTData = btPageData(pTrunk);
        int nOvfl = btGetU32(aTData);
        int i;

        for(i=0; i<nOvfl; i++){
          u32 lpgno = btGetU32(&aTData[8 + i*8]);
          delcsr.aiCell[0] = (int)btGetU32(&aTData[8 + i*8 + 4]);
          rc = sqlite4BtPageGet(db->pPager, lpgno, &delcsr.apPage[0]);
4282
4283
4284
4285
4286
4287
4288
4289
4290
4291
4292
4293
4294
4295
4296
4297
4298
4299
4300
4301
4302
4303
4304
4305
4306
4307
4308
4309
  u32 iMax;                       /* Maximum input level number */
  u32 iOutLvl;                    /* Output level number */

  /* Find the schedule page. If there is no schedule page, allocate it now. */
  if( pHdr->iSRoot==0 ){
    rc = sqlite4BtPageAllocate(db->pPager, &pPg);
    if( rc==SQLITE4_OK ){
      u8 *aData = sqlite4BtPageData(pPg);
      memset(aData, 0, pHdr->pgsz);
      sqlite4BtPagerDbhdrDirty(db->pPager);
      pHdr->iSRoot = sqlite4BtPagePgno(pPg);
    }
  }else{
    rc = sqlite4BtPageGet(db->pPager, pHdr->iSRoot, &pPg);
  }

  /* Check if the schedule page is busy. If so, no new merge may be 
  ** scheduled. If the schedule page is not busy, call btFindMerge() to
  ** figure out which levels should be scheduled for merge.  */
  if( rc==SQLITE4_OK ){
    aData = sqlite4BtPageData(pPg);
    
    switch( btGetU32(aData) ){
      case BT_SCHEDULE_BUSY:
        rc = SQLITE4_NOTFOUND;
        break;

      case BT_SCHEDULE_DONE: {







|












|







4353
4354
4355
4356
4357
4358
4359
4360
4361
4362
4363
4364
4365
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375
4376
4377
4378
4379
4380
  u32 iMax;                       /* Maximum input level number */
  u32 iOutLvl;                    /* Output level number */

  /* Find the schedule page. If there is no schedule page, allocate it now. */
  if( pHdr->iSRoot==0 ){
    rc = sqlite4BtPageAllocate(db->pPager, &pPg);
    if( rc==SQLITE4_OK ){
      u8 *aData = btPageData(pPg);
      memset(aData, 0, pHdr->pgsz);
      sqlite4BtPagerDbhdrDirty(db->pPager);
      pHdr->iSRoot = sqlite4BtPagePgno(pPg);
    }
  }else{
    rc = sqlite4BtPageGet(db->pPager, pHdr->iSRoot, &pPg);
  }

  /* Check if the schedule page is busy. If so, no new merge may be 
  ** scheduled. If the schedule page is not busy, call btFindMerge() to
  ** figure out which levels should be scheduled for merge.  */
  if( rc==SQLITE4_OK ){
    aData = btPageData(pPg);
    
    switch( btGetU32(aData) ){
      case BT_SCHEDULE_BUSY:
        rc = SQLITE4_NOTFOUND;
        break;

      case BT_SCHEDULE_DONE: {
4393
4394
4395
4396
4397
4398
4399
4400
4401
4402
4403
4404
4405
4406
4407
    if( rc==SQLITE4_OK ){
      u32 iRoot = btFirstOfBlock(pHdr, pHdr->iSubBlock);
      BtPage *pPg = 0;

      rc = sqlite4BtPageGet(db->pPager, iRoot, &pPg);
      if( rc==SQLITE4_OK ) rc = sqlite4BtPageWrite(pPg);
      if( rc==SQLITE4_OK ){
        u8 *aData = sqlite4BtPageData(pPg);
        memset(&aData[pHdr->pgsz-6], 0, 6);
        aData[0] = 0;
      }
      sqlite4BtPageRelease(pPg);
    }
  }








|







4464
4465
4466
4467
4468
4469
4470
4471
4472
4473
4474
4475
4476
4477
4478
    if( rc==SQLITE4_OK ){
      u32 iRoot = btFirstOfBlock(pHdr, pHdr->iSubBlock);
      BtPage *pPg = 0;

      rc = sqlite4BtPageGet(db->pPager, iRoot, &pPg);
      if( rc==SQLITE4_OK ) rc = sqlite4BtPageWrite(pPg);
      if( rc==SQLITE4_OK ){
        u8 *aData = btPageData(pPg);
        memset(&aData[pHdr->pgsz-6], 0, 6);
        aData[0] = 0;
      }
      sqlite4BtPageRelease(pPg);
    }
  }

4957
4958
4959
4960
4961
4962
4963
4964
4965
4966
4967
4968
4969
4970
4971
  BtPage *pPg;
  sqlite4_buffer buf;
  int pgsz;

  pgsz = sqlite4BtPagerPagesize(db->pPager);
  sqlite4_buffer_init(&buf, 0);
  sqlite4BtPageGet(db->pPager, iRoot, &pPg);
  btPageToAscii(iRoot, 1, db->pPager, sqlite4BtPageData(pPg), pgsz, &buf);
  fprintf(stderr, "%d TREE at %d:\n", iCall, (int)iRoot);
  fprintf(stderr, "%.*s", buf.n, (char*)buf.p);
  sqlite4_buffer_clear(&buf);
  sqlite4BtPageRelease(pPg);
}

void sqlite4BtDebugFastTree(bt_db *db, int iCall){







|







5028
5029
5030
5031
5032
5033
5034
5035
5036
5037
5038
5039
5040
5041
5042
  BtPage *pPg;
  sqlite4_buffer buf;
  int pgsz;

  pgsz = sqlite4BtPagerPagesize(db->pPager);
  sqlite4_buffer_init(&buf, 0);
  sqlite4BtPageGet(db->pPager, iRoot, &pPg);
  btPageToAscii(iRoot, 1, db->pPager, btPageData(pPg), pgsz, &buf);
  fprintf(stderr, "%d TREE at %d:\n", iCall, (int)iRoot);
  fprintf(stderr, "%.*s", buf.n, (char*)buf.p);
  sqlite4_buffer_clear(&buf);
  sqlite4BtPageRelease(pPg);
}

void sqlite4BtDebugFastTree(bt_db *db, int iCall){
5076
5077
5078
5079
5080
5081
5082
5083
5084
5085
5086
5087
5088
5089
5090
        BtPage *pPg = 0;
        rc = sqlite4BtPageGet(db->pPager, pInfo->pgno, &pPg);
        if( rc==SQLITE4_OK ){
          BtPager *p = db->pPager;
          int bAscii = (pInfo->eType==BT_INFO_PAGEDUMP_ASCII);
          u8 *aData;
          int nData;
          aData = sqlite4BtPageData(pPg);
          nData = sqlite4BtPagerPagesize(p);
          btPageToAscii(pInfo->pgno, bAscii, p, aData, nData, &pInfo->output);
          sqlite4_buffer_append(&pInfo->output, "", 1);
          sqlite4BtPageRelease(pPg);
        }
        btControlTransactionDone(db, iCtx);
      }







|







5147
5148
5149
5150
5151
5152
5153
5154
5155
5156
5157
5158
5159
5160
5161
        BtPage *pPg = 0;
        rc = sqlite4BtPageGet(db->pPager, pInfo->pgno, &pPg);
        if( rc==SQLITE4_OK ){
          BtPager *p = db->pPager;
          int bAscii = (pInfo->eType==BT_INFO_PAGEDUMP_ASCII);
          u8 *aData;
          int nData;
          aData = btPageData(pPg);
          nData = sqlite4BtPagerPagesize(p);
          btPageToAscii(pInfo->pgno, bAscii, p, aData, nData, &pInfo->output);
          sqlite4_buffer_append(&pInfo->output, "", 1);
          sqlite4BtPageRelease(pPg);
        }
        btControlTransactionDone(db, iCtx);
      }
5318
5319
5320
5321
5322
5323
5324
5325
5326
5327
5328
5329
5330
5331
5332

    while( rc==SQLITE4_OK && iTrunk ){
      BtPage *pPg = 0;
      rc = sqlite4BtPageGet(db->pPager, iTrunk, &pPg);
      if( rc==SQLITE4_OK ){
        int i;
        u32 nFree;
        u8 *aData = sqlite4BtPageData(pPg);

        nFree = btGetU32(aData);
        for(i=0; i<nFree; i++){
          u32 pgno = btGetU32(&aData[8 + i*4]);
          if( bBlocklist ){
            markBlockAsUsed(db, pgno, aUsed);
          }else{







|







5389
5390
5391
5392
5393
5394
5395
5396
5397
5398
5399
5400
5401
5402
5403

    while( rc==SQLITE4_OK && iTrunk ){
      BtPage *pPg = 0;
      rc = sqlite4BtPageGet(db->pPager, iTrunk, &pPg);
      if( rc==SQLITE4_OK ){
        int i;
        u32 nFree;
        u8 *aData = btPageData(pPg);

        nFree = btGetU32(aData);
        for(i=0; i<nFree; i++){
          u32 pgno = btGetU32(&aData[8 + i*4]);
          if( bBlocklist ){
            markBlockAsUsed(db, pgno, aUsed);
          }else{
5351
5352
5353
5354
5355
5356
5357
5358
5359
5360
5361
5362
5363
5364
5365
    int rc;

    rc = sqlite4BtPageGet(db->pPager, pHdr->iSRoot, &pPg);
    if( rc==SQLITE4_OK ){
      BtSchedule s;
      int i;

      btReadSchedule(db, sqlite4BtPageData(pPg), &s);
      sqlite4BtPageRelease(pPg);

      assert( s.eBusy!=BT_SCHEDULE_BUSY || s.aRoot[0]==0 );
      if( s.eBusy!=BT_SCHEDULE_EMPTY ){
        for(i=0; rc==SQLITE4_OK && i<array_size(s.aBlock); i++){
          markBlockAsUsed(db, s.aBlock[i], aUsed);
        }







|







5422
5423
5424
5425
5426
5427
5428
5429
5430
5431
5432
5433
5434
5435
5436
    int rc;

    rc = sqlite4BtPageGet(db->pPager, pHdr->iSRoot, &pPg);
    if( rc==SQLITE4_OK ){
      BtSchedule s;
      int i;

      btReadSchedule(db, btPageData(pPg), &s);
      sqlite4BtPageRelease(pPg);

      assert( s.eBusy!=BT_SCHEDULE_BUSY || s.aRoot[0]==0 );
      if( s.eBusy!=BT_SCHEDULE_EMPTY ){
        for(i=0; rc==SQLITE4_OK && i<array_size(s.aBlock); i++){
          markBlockAsUsed(db, s.aBlock[i], aUsed);
        }
Changes to src/bt_pager.c.
59
60
61
62
63
64
65




66

67
68
69
70
71
72
73


74
75
76
77
78
79
80
  BtPage *pPg;                    /* Pointer to page this object belongs to */
  u8 *aData;                      /* Saved data */
  BtSavepage *pNext;              /* Next saved page in the same savepoint */
  int iSavepoint;                 /* Transaction number of savepoint */
  BtSavepage *pNextSavepage;      /* Next saved page on the same BtPage */
};





struct BtPage {

  BtPager *pPager;                /* Pager object that owns this page handle */
  u32 pgno;                       /* Current page number */
  int nRef;                       /* Number of references to this page */
  int flags;                      /* Mask of BTPAGE_XXX flags */
  u8 *aData;                      /* Pointer to current data */
  BtPage *pNextHash;              /* Next entry with same hash key */
  BtPage *pNextDirty;             /* Next page in BtPager.pDirty list */


  BtSavepage *pSavepage;          /* List of saved page images */
};

/*
** Candidate values for BtPage.flags
*/
#define BT_PAGE_DIRTY 0x0001      /* Set for pages in BtPager.pDirty list */







>
>
>
>

>




<


>
>







59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75

76
77
78
79
80
81
82
83
84
85
86
  BtPage *pPg;                    /* Pointer to page this object belongs to */
  u8 *aData;                      /* Saved data */
  BtSavepage *pNext;              /* Next saved page in the same savepoint */
  int iSavepoint;                 /* Transaction number of savepoint */
  BtSavepage *pNextSavepage;      /* Next saved page on the same BtPage */
};

/*
** See macro btPageData() in bt_main.c for why the aData variable must be
** first in this structure.
*/
struct BtPage {
  u8 *aData;                      /* Pointer to current data. MUST BE FIRST */
  BtPager *pPager;                /* Pager object that owns this page handle */
  u32 pgno;                       /* Current page number */
  int nRef;                       /* Number of references to this page */
  int flags;                      /* Mask of BTPAGE_XXX flags */

  BtPage *pNextHash;              /* Next entry with same hash key */
  BtPage *pNextDirty;             /* Next page in BtPager.pDirty list */
  BtPage *pNextLru;               /* Next page in LRU list */
  BtPage *pPrevLru;               /* Previous page in LRU list */
  BtSavepage *pSavepage;          /* List of saved page images */
};

/*
** Candidate values for BtPage.flags
*/
#define BT_PAGE_DIRTY 0x0001      /* Set for pages in BtPager.pDirty list */
88
89
90
91
92
93
94




95
96
97
98
99
100
101
  BtLock btl;                     /* Variables shared with bt_lock module */
  BtLog *pLog;                    /* Logging module */
  int iTransactionLevel;          /* Current transaction level (see bt.h) */
  char *zFile;                    /* Database file name */
  int nFile;                      /* Length of string zFile in bytes */
  BtPageHash hash;                /* Hash table */
  BtPage *pDirty;                 /* List of all dirty pages */




  int nTotalRef;                  /* Total number of outstanding page refs */
  int bDoAutoCkpt;                /* Do auto-checkpoint after next unlock */
  BtSavepoint *aSavepoint;        /* Savepoint array */
  int nSavepoint;                 /* Number of entries in aSavepoint array */
  BtDbHdr *pHdr;                  /* Header object for current read snapshot */
  int bDirtyHdr;                  /* True if pHdr has been modified */
  void *pLogsizeCtx;              /* A copy of this is passed to xLogsize() */







>
>
>
>







94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
  BtLock btl;                     /* Variables shared with bt_lock module */
  BtLog *pLog;                    /* Logging module */
  int iTransactionLevel;          /* Current transaction level (see bt.h) */
  char *zFile;                    /* Database file name */
  int nFile;                      /* Length of string zFile in bytes */
  BtPageHash hash;                /* Hash table */
  BtPage *pDirty;                 /* List of all dirty pages */
  BtPage *pLru;                   /* Head of LRU list */
  BtPage *pLruTail;               /* Tail of LRU list */
  int nPageAlloc;                 /* Number of page objects allocated */
  int nPageLimit;                 /* Maximum page objects to allocate */
  int nTotalRef;                  /* Total number of outstanding page refs */
  int bDoAutoCkpt;                /* Do auto-checkpoint after next unlock */
  BtSavepoint *aSavepoint;        /* Savepoint array */
  int nSavepoint;                 /* Number of entries in aSavepoint array */
  BtDbHdr *pHdr;                  /* Header object for current read snapshot */
  int bDirtyHdr;                  /* True if pHdr has been modified */
  void *pLogsizeCtx;              /* A copy of this is passed to xLogsize() */
202
203
204
205
206
207
208




































209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228

229
230
231
232
233
234
235
    }
  }
}
#endif
/*
** End of BtPageHash object interface.
**************************************************************************/





































/*
** Open a new pager database handle.
*/
int sqlite4BtPagerNew(sqlite4_env *pEnv, int nExtra, BtPager **pp){
  BtPager *p;
  int nByte;

  nByte = sizeof(BtPager) + nExtra;
  p = (BtPager*)sqlite4_malloc(pEnv, nByte);
  if( !p ) return btErrorBkpt(SQLITE4_NOMEM); 
  memset(p, 0, nByte);

  p->btl.pEnv = pEnv;
  p->btl.pVfs = sqlite4BtEnvDefault();
  p->btl.iSafetyLevel = BT_DEFAULT_SAFETY;
  p->btl.nAutoCkpt = BT_DEFAULT_AUTOCKPT;
  p->btl.bRequestMultiProc = BT_DEFAULT_MULTIPROC;
  p->btl.nBlksz = BT_DEFAULT_BLKSZ;
  p->btl.nPgsz = BT_DEFAULT_PGSZ;

  *pp = p;
  return SQLITE4_OK;
}

static void btFreePage(BtPager *p, BtPage *pPg){
  if( pPg ){
    sqlite4_free(p->btl.pEnv, pPg->aData);







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>




















>







212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
    }
  }
}
#endif
/*
** End of BtPageHash object interface.
**************************************************************************/

static void btLruAdd(BtPager *pPager, BtPage *pPg){
  assert( pPg->pPrevLru==0 );
  assert( pPg->pNextLru==0 );
  if( pPager->pLru ){
    pPager->pLruTail->pNextLru = pPg;
    pPg->pPrevLru = pPager->pLruTail;
    pPager->pLruTail = pPg;
  }else{
    pPager->pLru = pPg;
    pPager->pLruTail = pPg;
  }
}

/*
** Remove page pPg from the LRU list. If pPg is not currently part of
** the LRU list, the results are undefined.
*/
static void btLruRemove(BtPager *pPager, BtPage *pPg){
  assert( (pPg==pPager->pLru)==(pPg->pPrevLru==0) );
  assert( (pPg==pPager->pLruTail)==(pPg->pNextLru==0) );

  if( pPg->pNextLru ){
    pPg->pNextLru->pPrevLru = pPg->pPrevLru;
  }else{
    pPager->pLruTail = pPg->pPrevLru;
  }
  if( pPg->pPrevLru ){
    pPg->pPrevLru->pNextLru = pPg->pNextLru;
  }else{
    pPager->pLru = pPg->pNextLru;
  }

  pPg->pNextLru = 0;
  pPg->pPrevLru = 0;
}

/*
** Open a new pager database handle.
*/
int sqlite4BtPagerNew(sqlite4_env *pEnv, int nExtra, BtPager **pp){
  BtPager *p;
  int nByte;

  nByte = sizeof(BtPager) + nExtra;
  p = (BtPager*)sqlite4_malloc(pEnv, nByte);
  if( !p ) return btErrorBkpt(SQLITE4_NOMEM); 
  memset(p, 0, nByte);

  p->btl.pEnv = pEnv;
  p->btl.pVfs = sqlite4BtEnvDefault();
  p->btl.iSafetyLevel = BT_DEFAULT_SAFETY;
  p->btl.nAutoCkpt = BT_DEFAULT_AUTOCKPT;
  p->btl.bRequestMultiProc = BT_DEFAULT_MULTIPROC;
  p->btl.nBlksz = BT_DEFAULT_BLKSZ;
  p->btl.nPgsz = BT_DEFAULT_PGSZ;
  p->nPageLimit = BT_DEFAULT_CACHESZ;
  *pp = p;
  return SQLITE4_OK;
}

static void btFreePage(BtPager *p, BtPage *pPg){
  if( pPg ){
    sqlite4_free(p->btl.pEnv, pPg->aData);
247
248
249
250
251
252
253



254
255
256
257
258
259
260
    BtPage *pNext;
    for(pPg=p->hash.aHash[i]; pPg; pPg=pNext){
      pNext = pPg->pNextHash;
      btFreePage(p, pPg);
    }
  }
  btHashClear(p);



}

static int btCheckpoint(BtLock *pLock){
  BtPager *p = (BtPager*)pLock;
  if( p->pLog==0 ) return SQLITE4_BUSY;
  return sqlite4BtLogCheckpoint(p->pLog, 0);
}







>
>
>







294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
    BtPage *pNext;
    for(pPg=p->hash.aHash[i]; pPg; pPg=pNext){
      pNext = pPg->pNextHash;
      btFreePage(p, pPg);
    }
  }
  btHashClear(p);

  p->pLruTail = 0;
  p->pLru = 0;
}

static int btCheckpoint(BtLock *pLock){
  BtPager *p = (BtPager*)pLock;
  if( p->pLog==0 ) return SQLITE4_BUSY;
  return sqlite4BtLogCheckpoint(p->pLog, 0);
}
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557

  assert( p->pHdr );
  p->pHdr = 0;
  rc = sqlite4BtLogSnapshotClose(p->pLog);

  /* Purge the page cache. */
  assert( p->pDirty==0 );
  btPurgeCache(p);

  if( rc==SQLITE4_OK && p->bDoAutoCkpt ){
    sqlite4BtLogCheckpoint(p->pLog, (p->btl.nAutoCkpt / 2));
  }
  p->bDoAutoCkpt = 0;

  return rc;







|







593
594
595
596
597
598
599
600
601
602
603
604
605
606
607

  assert( p->pHdr );
  p->pHdr = 0;
  rc = sqlite4BtLogSnapshotClose(p->pLog);

  /* Purge the page cache. */
  assert( p->pDirty==0 );
  //btPurgeCache(p);

  if( rc==SQLITE4_OK && p->bDoAutoCkpt ){
    sqlite4BtLogCheckpoint(p->pLog, (p->btl.nAutoCkpt / 2));
  }
  p->bDoAutoCkpt = 0;

  return rc;
583
584
585
586
587
588
589

590
591
592
593
594
595
596
597
598
    pNext = pPg->pNextDirty;
    pPg->flags &= ~(BT_PAGE_DIRTY);
    pPg->pNextDirty = 0;
    if( rc==SQLITE4_OK ){
      int nPg = ((pNext==0) ? p->pHdr->nPg : 0);
      rc = sqlite4BtLogWrite(p->pLog, pPg->pgno, pPg->aData, nPg);
    }

  }
  p->pDirty = 0;
  sqlite4BtLogSnapshotEndWrite(p->pLog);

  nLogsize = sqlite4BtLogSize(p->pLog);

  if( p->btl.nAutoCkpt && nLogsize>=p->btl.nAutoCkpt ){
    p->bDoAutoCkpt = 1;
  }







>

|







633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
    pNext = pPg->pNextDirty;
    pPg->flags &= ~(BT_PAGE_DIRTY);
    pPg->pNextDirty = 0;
    if( rc==SQLITE4_OK ){
      int nPg = ((pNext==0) ? p->pHdr->nPg : 0);
      rc = sqlite4BtLogWrite(p->pLog, pPg->pgno, pPg->aData, nPg);
    }
    if( pPg->nRef==0 ) btLruAdd(p, pPg);
  }
  p->pDirty = pPg;
  sqlite4BtLogSnapshotEndWrite(p->pLog);

  nLogsize = sqlite4BtLogSize(p->pLog);

  if( p->btl.nAutoCkpt && nLogsize>=p->btl.nAutoCkpt ){
    p->bDoAutoCkpt = 1;
  }
618
619
620
621
622
623
624
625
626
627



628









629











630

631
632
633
634
635
636
637
638
639
640
641

642
643
644
645
646
647
648
    rc = p->btl.pVfs->xRead(p->btl.pFd, iOff, pPg->aData, p->pHdr->pgsz);
  }

  return rc;
}

static int btAllocatePage(BtPager *p, BtPage **ppPg){
  int rc;                         /* Return code */
  BtPage *pRet;
  u8 *aData;













  pRet = (BtPage*)sqlite4_malloc(p->btl.pEnv, sizeof(BtPage));











  aData = (u8*)sqlite4_malloc(p->btl.pEnv, p->pHdr->pgsz);


  if( pRet && aData ){
    memset(pRet, 0, sizeof(BtPage));
    pRet->aData = aData;
    pRet->pPager = p;
    rc = SQLITE4_OK;
  }else{
    sqlite4_free(p->btl.pEnv, pRet);
    sqlite4_free(p->btl.pEnv, aData);
    rc = btErrorBkpt(SQLITE4_NOMEM);
    pRet = 0;

  }

  *ppPg = pRet;
  return rc;
}

/*







|

|
>
>
>

>
>
>
>
>
>
>
>
>
|
>
>
>
>
>
>
>
>
>
>
>
|
>

|
|
|
|
<
|
|
|
|
|
>







669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710

711
712
713
714
715
716
717
718
719
720
721
722
723
    rc = p->btl.pVfs->xRead(p->btl.pFd, iOff, pPg->aData, p->pHdr->pgsz);
  }

  return rc;
}

static int btAllocatePage(BtPager *p, BtPage **ppPg){
  int rc = SQLITE4_OK;            /* Return code */
  BtPage *pRet;

  if( p->hash.nEntry>=p->nPageLimit && p->pLru ){
    BtPage **pp;
    int h;

    /* Remove the page from the head of the LRU list. */
    pRet = p->pLru;
    assert( (pRet->pNextLru==0)==(pRet==p->pLruTail) );
    p->pLru = pRet->pNextLru;
    if( p->pLru==0 ){
      p->pLruTail = 0;
    }else{
      p->pLru->pPrevLru = 0;
    }

    /* Remove the page from the hash table. */
    btHashRemove(p, pRet);

    assert( pRet->pPrevLru==0 );
    assert( pRet->nRef==0 );
    assert( pRet->pSavepage==0 );
    pRet->flags = 0;
    pRet->pNextHash = 0;
    pRet->pNextDirty = 0;
    pRet->pNextLru = 0;
  }else{
    u8 *aData = (u8*)sqlite4_malloc(p->btl.pEnv, p->pHdr->pgsz);
    pRet = (BtPage*)sqlite4_malloc(p->btl.pEnv, sizeof(BtPage));

    if( pRet && aData ){
      memset(pRet, 0, sizeof(BtPage));
      pRet->aData = aData;
      pRet->pPager = p;

    }else{
      sqlite4_free(p->btl.pEnv, pRet);
      sqlite4_free(p->btl.pEnv, aData);
      rc = btErrorBkpt(SQLITE4_NOMEM);
      pRet = 0;
    }
  }

  *ppPg = pRet;
  return rc;
}

/*
846
847
848
849
850
851
852


853
854
855
856
857
858
859
      if( rc!=SQLITE4_OK ){
        btFreePage(p, pRet);
        pRet = 0;
      }else{
        sqlite4BtDebugReadPage(&p->btl, pgno, pRet->aData, p->pHdr->pgsz);
      }
    }


  }

  assert( (pRet!=0)==(rc==SQLITE4_OK) );
  if( rc==SQLITE4_OK ){
    p->nTotalRef++;
    pRet->nRef++;
  }







>
>







921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
      if( rc!=SQLITE4_OK ){
        btFreePage(p, pRet);
        pRet = 0;
      }else{
        sqlite4BtDebugReadPage(&p->btl, pgno, pRet->aData, p->pHdr->pgsz);
      }
    }
  }else if( pRet->nRef==0 && (pRet->flags & BT_PAGE_DIRTY)==0 ){
    btLruRemove(p, pRet);
  }

  assert( (pRet!=0)==(rc==SQLITE4_OK) );
  if( rc==SQLITE4_OK ){
    p->nTotalRef++;
    pRet->nRef++;
  }
1007
1008
1009
1010
1011
1012
1013


1014
1015
1016






1017
1018
1019
1020
1021
1022
1023
*/
int sqlite4BtPageTrimPgno(BtPager *pPager, u32 pgno){
  return btFreelistAdd(pPager, 0, pgno);
}

int sqlite4BtPageRelease(BtPage *pPg){
  if( pPg ){


    assert( pPg->nRef>=1 );
    pPg->nRef--;
    pPg->pPager->nTotalRef--;






  }
  return SQLITE4_OK;
}

void sqlite4BtPageReference(BtPage *pPg){
  assert( pPg->nRef>=1 );
  pPg->nRef++;







>
>



>
>
>
>
>
>







1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
*/
int sqlite4BtPageTrimPgno(BtPager *pPager, u32 pgno){
  return btFreelistAdd(pPager, 0, pgno);
}

int sqlite4BtPageRelease(BtPage *pPg){
  if( pPg ){
    BtPager *pPager = pPg->pPager;

    assert( pPg->nRef>=1 );
    pPg->nRef--;
    pPg->pPager->nTotalRef--;

    /* If the refcount is now zero and the page is not dirty, add it to
    ** the LRU list.  */
    if( pPg->nRef==0 && (pPg->flags & BT_PAGE_DIRTY)==0 ){
      btLruAdd(pPager, pPg);
    }
  }
  return SQLITE4_OK;
}

void sqlite4BtPageReference(BtPage *pPg){
  assert( pPg->nRef>=1 );
  pPg->nRef++;
Changes to test/select1.test.
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
# focus of this file is testing the SELECT statement.
#
# $Id: select1.test,v 1.70 2009/05/28 01:00:56 drh Exp $

set testdir [file dirname $argv0]
source $testdir/tester.tcl

btenv BT
BT attach db

# Try to select on a non-existant table.
#
do_test select1-1.1 {
  set v [catch {execsql {SELECT * FROM test1}} msg]
  lappend v $msg
} {1 {no such table: test1}}








<
<
<







12
13
14
15
16
17
18



19
20
21
22
23
24
25
# focus of this file is testing the SELECT statement.
#
# $Id: select1.test,v 1.70 2009/05/28 01:00:56 drh Exp $

set testdir [file dirname $argv0]
source $testdir/tester.tcl




# Try to select on a non-existant table.
#
do_test select1-1.1 {
  set v [catch {execsql {SELECT * FROM test1}} msg]
  lappend v $msg
} {1 {no such table: test1}}