Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Further btree updates.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 0b68007d899d2551a19fa101cc764b03ff2062d5
User & Date: dan 2013-09-19 19:19:29.482
Context
2013-09-24
20:39
Further progress on b-treee backend. check-in: c954958697 user: dan tags: trunk
2013-09-19
19:19
Further btree updates. check-in: 0b68007d89 user: dan tags: trunk
15:13
Add b-tree design notes to www/ directory. check-in: 8c828db5a6 user: dan tags: trunk
Changes
Unified Diff Ignore Whitespace Patch
Changes to src/bt_main.c.
15
16
17
18
19
20
21











22














23
24
25




26



27
28
29




30
31
32
33
34


35
36
37
38


39
40
41
42


43
44
45
46


47
48
49
50


51
52
53
54
55
56
57
58













59
60
61
62
63
64
65
66

67
68
69
70
71
72












73





74
75
76



77
78
79
80
81
82
83
#include "btInt.h"

struct bt_db {
  sqlite4_env *pEnv;              /* SQLite environment */
  BtPager *pPager;                /* Underlying page-based database */
};












int sqlite4BtNew(sqlite4_env *pEnv, int nExtra, bt_db **ppDb){














  return SQLITE4_OK;
}





int sqlite4BtClose(bt_db *db){



  return SQLITE4_OK;
}





void *sqlite4BtExtra(bt_db *db){
  return SQLITE4_OK;
}

int sqlite4BtOpen(bt_db *db, const char *zFilename){


  return SQLITE4_OK;
}

int sqlite4BtBegin(bt_db *db, int iLevel){


  return SQLITE4_OK;
}

int sqlite4BtCommit(bt_db *db, int iLevel){


  return SQLITE4_OK;
}

int sqlite4BtRevert(bt_db *db, int iLevel){


  return SQLITE4_OK;
}

int sqlite4BtRollback(bt_db *db, int iLevel){


  return SQLITE4_OK;
}

int sqlite4BtTransactionLevel(bt_db *db){
  return sqlite4BtPagerTransactionLevel(db->pPager);
}

int sqlite4BtCsrOpen(bt_db *db, int nExtra, bt_cursor **ppCsr){













  return SQLITE4_OK;
}

int sqlite4BtCsrClose(bt_cursor *pCsr){
  return SQLITE4_OK;
}

void *sqlite4BtCsrExtra(bt_cursor *pCsr){

}

int sqlite4BtCsrSeek(bt_cursor *pCsr, const void *pK, int nK, int eSeek){
  return SQLITE4_OK;
}













int sqlite4BtCsrFirst(bt_cursor *pCsr){





  return SQLITE4_OK;
}




int sqlite4BtCsrLast(bt_cursor *pCsr){
  return SQLITE4_OK;
}

int sqlite4BtCsrNext(bt_cursor *pCsr){
  return SQLITE4_OK;
}







>
>
>
>
>
>
>
>
>
>
>

>
>
>
>
>
>
>
>
>
>
>
>
>
>
|


>
>
>
>

>
>
>



>
>
>
>

|



>
>
|



>
>
|



>
>
|



>
>
|



>
>
|







>
>
>
>
>
>
>
>
>
>
>
>
>
|







>






>
>
>
>
>
>
>
>
>
>
>
>

>
>
>
>
>



>
>
>







15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
#include "btInt.h"

struct bt_db {
  sqlite4_env *pEnv;              /* SQLite environment */
  BtPager *pPager;                /* Underlying page-based database */
};

struct bt_cursor {
  bt_db *pDb;                     /* Database that owns this cursor */

  int nPg;                        /* Number of valid entries in apPage[] */
  int iCell;                      /* Current cell in apPage[nPg] */
  BtPage *apPage[32];             /* All pages from root to current leaf */
};

/*
** Allocate a new database handle.
*/
int sqlite4BtNew(sqlite4_env *pEnv, int nExtra, bt_db **ppDb){
  bt_db *db = 0;                  /* New database object */
  BtPager *pPager = 0;            /* Pager object for this database */
  int nReq;                       /* Bytes of space required for bt_db object */
  int rc;                         /* Return code */

  nReq = sizeof(bt_db);
  rc = sqlite4BtPagerNew(pEnv, nExtra + nReq, &pPager);
  if( rc==SQLITE4_OK ){
    db = (bt_db*)sqlite4BtPagerExtra(pPager);
    db->pPager = pPager;
    db->pEnv = pEnv;
  }

  *ppDb = db;
  return rc;
}

/*
** Close an existing database handle. Once this function has been 
** called, the handle may not be used for any purpose.
*/
int sqlite4BtClose(bt_db *db){
  if( db ){
    sqlite4BtPagerClose(db->pPager);
  }
  return SQLITE4_OK;
}

/*
** Return a pointer to the nExtra bytes of space allocated along with 
** the database handle. 
*/
void *sqlite4BtExtra(bt_db *db){
  return (void*)&db[1];
}

int sqlite4BtOpen(bt_db *db, const char *zFilename){
  int rc;
  rc = sqlite4BtPagerOpen(db->pPager, zFilename);
  return rc;
}

int sqlite4BtBegin(bt_db *db, int iLevel){
  int rc;
  rc = sqlite4BtPagerBegin(db->pPager, iLevel);
  return rc;
}

int sqlite4BtCommit(bt_db *db, int iLevel){
  int rc;
  rc = sqlite4BtPagerCommit(db->pPager, iLevel);
  return rc;
}

int sqlite4BtRevert(bt_db *db, int iLevel){
  int rc;
  rc = sqlite4BtPagerRevert(db->pPager, iLevel);
  return rc;
}

int sqlite4BtRollback(bt_db *db, int iLevel){
  int rc;
  rc = sqlite4BtPagerRollback(db->pPager, iLevel);
  return rc;
}

int sqlite4BtTransactionLevel(bt_db *db){
  return sqlite4BtPagerTransactionLevel(db->pPager);
}

int sqlite4BtCsrOpen(bt_db *db, int nExtra, bt_cursor **ppCsr){
  int rc;                         /* Return Code */
  int nByte;                      /* Total bytes of space to allocate */
  bt_cursor *pCsr;                /* New cursor object */

  nByte = sizeof(bt_cursor) + nExtra;
  pCsr = (bt_cursor*)sqlite4_malloc(db->pEnv, nByte);
  if( pCsr==0 ){
    rc = SQLITE4_NOMEM;
  }else{
    memset(pCsr, 0, nByte);
    pCsr->pDb = db;
  }

  return rc;
}

int sqlite4BtCsrClose(bt_cursor *pCsr){
  return SQLITE4_OK;
}

void *sqlite4BtCsrExtra(bt_cursor *pCsr){
  return (void*)&pCsr[1];
}

int sqlite4BtCsrSeek(bt_cursor *pCsr, const void *pK, int nK, int eSeek){
  return SQLITE4_OK;
}

static void btCsrReset(bt_cursor *pCsr){
  int i;
  for(i=0; i<pCsr->nPg; i++){
    sqlite4BtPageRelease(pCsr->apPage[i]);
  }
  pCsr->nPg = 0;
  pCsr->iCell = 0;
}

/*
** Position cursor pCsr to point to the smallest key in the database.
*/
int sqlite4BtCsrFirst(bt_cursor *pCsr){
  /* Reset the cursor */
  btCsrReset(pCsr);

  /* Load the root page into pCsr->apPage[0] */

  return SQLITE4_OK;
}

/*
** Position cursor pCsr to point to the largest key in the database.
*/
int sqlite4BtCsrLast(bt_cursor *pCsr){
  return SQLITE4_OK;
}

int sqlite4BtCsrNext(bt_cursor *pCsr){
  return SQLITE4_OK;
}
109
110
111
112
113
114
115
116
117
118
119
120
121
122
}

int sqlite4BtDelete(bt_cursor *pCsr){
  return SQLITE4_OK;
}

int sqlite4BtSetCookie(bt_db *db, unsigned int iVal){
  return 0;
}

int sqlite4BtGetCookie(bt_db *db, unsigned int *piVal){
  return 0;
}








|



|


189
190
191
192
193
194
195
196
197
198
199
200
201
202
}

int sqlite4BtDelete(bt_cursor *pCsr){
  return SQLITE4_OK;
}

int sqlite4BtSetCookie(bt_db *db, unsigned int iVal){
  return sqlite4BtPagerSetCookie(db->pPager, iVal);
}

int sqlite4BtGetCookie(bt_db *db, unsigned int *piVal){
  return sqlite4BtPagerGetCookie(db->pPager, piVal);
}

Changes to www/bt.wiki.


1
2
3











4
5
6
7
8
9
10



<title>BT Design Overview</title>
<nowiki>

















<div id=start_of_toc></div>
>
>



>
>
>
>
>
>
>
>
>
>
>







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23



<title>BT Design Overview</title>
<nowiki>

















<div id=start_of_toc></div>
20
21
22
23
24
25
26


27
28
29
30
31
32
33
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<a href=#changes_to_locking style=text-decoration:none>2.1.3. Changes to Locking</a><br>
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<a href=#read-only_clients_and_halted_databases style=text-decoration:none>2.1.3.5. Read-only Clients and Halted Databases</a><br>
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<a href=#read-only_clients_and_live_databases style=text-decoration:none>2.1.3.6. Read-only Clients and Live Databases</a><br>
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<a href=#b-tree_level_changes style=text-decoration:none>2.2. B-Tree Level Changes</a><br>
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<a href=#free_page_management style=text-decoration:none>2.2.1. Free Page Management</a><br>
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<a href=#large_records style=text-decoration:none>2.2.2. Large Records</a><br>
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<a href=#b-tree_page_format style=text-decoration:none>2.2.3. B-Tree Page Format</a><br>


&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<a href=#merge-tree_database_notes style=text-decoration:none>3. Merge-Tree Database Notes</a><br>
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<a href=#merge-tree_format style=text-decoration:none>3.1. Merge-Tree Format</a><br>
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<a href=#in-memory_tree style=text-decoration:none>3.2. In-Memory Tree</a><br>
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<a href=#use_of_a_background_thread_or_process style=text-decoration:none>3.3. Use of a Background Thread or Process</a><br>
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<a href=#design_summary style=text-decoration:none>4. Design Summary</a><br>
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<a href=#locking style=text-decoration:none>4.1. Locking</a><br>








>
>







33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<a href=#changes_to_locking style=text-decoration:none>2.1.3. Changes to Locking</a><br>
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<a href=#read-only_clients_and_halted_databases style=text-decoration:none>2.1.3.5. Read-only Clients and Halted Databases</a><br>
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<a href=#read-only_clients_and_live_databases style=text-decoration:none>2.1.3.6. Read-only Clients and Live Databases</a><br>
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<a href=#b-tree_level_changes style=text-decoration:none>2.2. B-Tree Level Changes</a><br>
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<a href=#free_page_management style=text-decoration:none>2.2.1. Free Page Management</a><br>
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<a href=#large_records style=text-decoration:none>2.2.2. Large Records</a><br>
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<a href=#b-tree_page_format style=text-decoration:none>2.2.3. B-Tree Page Format</a><br>
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<a href=#page_formats style=text-decoration:none>2.2.3.7. Page Formats</a><br>
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<a href=#cell_formats style=text-decoration:none>2.2.3.8. Cell Formats</a><br>
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<a href=#merge-tree_database_notes style=text-decoration:none>3. Merge-Tree Database Notes</a><br>
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<a href=#merge-tree_format style=text-decoration:none>3.1. Merge-Tree Format</a><br>
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<a href=#in-memory_tree style=text-decoration:none>3.2. In-Memory Tree</a><br>
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<a href=#use_of_a_background_thread_or_process style=text-decoration:none>3.3. Use of a Background Thread or Process</a><br>
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<a href=#design_summary style=text-decoration:none>4. Design Summary</a><br>
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<a href=#locking style=text-decoration:none>4.1. Locking</a><br>

466
467
468
469
470
471
472



473

















474
475
476
477
478
479
480
<ul>
  <li><p> Make it easy to avoid overreads.
  <li><p> Make binary searches as fast as possible.
</ul>

<p>
  Each b-tree page has a fixed-size header and a variable-sized footer. The



  purpose of the footer is to prevent buffer overreads.


















<p><b>B-Tree Nodes</b>

<p>Cell format:

<ul>
  <li> Number of bytes in the key-prefix (nKey), as a varint. Or, if the key-prefix for







>
>
>
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
<ul>
  <li><p> Make it easy to avoid overreads.
  <li><p> Make binary searches as fast as possible.
</ul>

<p>
  Each b-tree page has a fixed-size header and a variable-sized footer. The
  purpose of moving some header data to a footer is to prevent buffer overreads
  occuring while attempting to read varint fields from the body of a corrupt
  b-tree page.

<h4 id=page_formats>2.2.3.7. Page Formats</h4>

<p><b>Page Header</b>

<p>Page headers are similar to those in SQLite3: 

<ul>
  <li> 1 byte - flags.
  <li> 2 bytes - Number of cells on this page.
  <li> 2 bytes - Offset of first byte after last cell.
  <li> Internal nodes only: 4 bytes - Page number of rightmost child node.
</ul>

<p><b>Page Footer</b>


<h4 id=cell_formats>2.2.3.8. Cell Formats</h4>

<p><b>B-Tree Nodes</b>

<p>Cell format:

<ul>
  <li> Number of bytes in the key-prefix (nKey), as a varint. Or, if the key-prefix for
1110
1111
1112
1113
1114
1115
1116


1117
1118
1119
1120
1121
1122
1123
  The pager level provides two sizes of allocation:
  <ul>
    <li> Page size (1-4 KB), and
    <li> Block size (512KB to 2MB).
  </ul>
</p>
-->
















>
>







1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
  The pager level provides two sizes of allocation:
  <ul>
    <li> Page size (1-4 KB), and
    <li> Block size (512KB to 2MB).
  </ul>
</p>
-->