︙ | | |
19
20
21
22
23
24
25
26
27
28
29
30
31
32
|
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
|
+
+
|
#include "vdbeInt.h"
#ifndef SQLITE_OMIT_MERGE_SORT
typedef struct VdbeSorterIter VdbeSorterIter;
/*
** NOTES ON DATA STRUCTURE USED FOR N-WAY MERGES:
**
** As keys are added to the sorter, they are written to disk in a series
** of sorted packed-memory-arrays (PMAs). The size of each PMA is roughly
** the same as the cache-size allowed for temporary databases. In order
** to allow the caller to extract keys from the sorter in sorted order,
** all PMAs currently stored on disk must be merged together. This comment
** describes the data structure used to do so. The structure supports
** merging any number of arrays in a single pass with no redundant comparison
|
︙ | | |
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
|
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
|
-
+
-
+
+
-
-
-
-
+
+
+
+
+
-
+
-
-
-
-
+
+
+
|
int nAlloc; /* Bytes of space at aAlloc */
u8 *aAlloc; /* Allocated space */
int nKey; /* Number of bytes in key */
u8 *aKey; /* Pointer to current key */
};
/* Minimum allowable value for the VdbeSorter.nWorking variable */
#define SORTER_MIN_SEGMENT_SIZE 10
#define SORTER_MIN_WORKING 10
/* Maximum number of segments to merge in a single pass. */
#define SORTER_MAX_MERGE_COUNT 16
/*
** Free all memory belonging to the VdbeSorterIter object passed as the second
** argument. All structure fields are set to zero before returning.
*/
static void vdbeSorterIterZero(sqlite3 *db, VdbeSorterIter *pIter){
sqlite3DbFree(db, pIter->aAlloc);
memset(pIter, 0, sizeof(VdbeSorterIter));
}
/*
** Advance iterator pIter to the next key in its PMA.
** Advance iterator pIter to the next key in its PMA. Return SQLITE_OK if
** no error occurs, or an SQLite error code if one does.
*/
static int vdbeSorterIterNext(
sqlite3 *db, /* Database handle (for sqlite3DbMalloc() ) */
VdbeSorterIter *pIter /* Iterator to advance */
){
int rc;
int nRead;
int nRec;
int iOff;
int rc; /* Return Code */
int nRead; /* Number of bytes read */
int nRec; /* Size of record in bytes */
int iOff; /* Size of serialized size varint in bytes */
nRead = pIter->iEof - pIter->iReadOff;
if( nRead>5 ) nRead = 5;
if( nRead<=0 ){
/* This is an EOF condition */
vdbeSorterIterZero(db, pIter);
return SQLITE_OK;
}
rc = sqlite3OsRead(pIter->pFile, pIter->aAlloc, nRead, pIter->iReadOff);
iOff = getVarint32(pIter->aAlloc, nRec);
if( rc==SQLITE_OK && (iOff+nRec)>nRead ){
int nRead2;
int nRead2; /* Number of extra bytes to read */
if( (iOff+nRec)>pIter->nAlloc ){
int nNew = pIter->nAlloc*2;
while( (iOff+nRec)>nNew ) nNew = nNew*2;
pIter->aAlloc = sqlite3DbReallocOrFree(db, pIter->aAlloc, nNew);
if( !pIter->aAlloc ) return SQLITE_NOMEM;
pIter->nAlloc = nNew;
}
nRead2 = iOff + nRec - nRead;
rc = sqlite3OsRead(
pIter->pFile, &pIter->aAlloc[nRead], nRead2, pIter->iReadOff+nRead
);
}
assert( nRec>0 || rc!=SQLITE_OK );
pIter->iReadOff += iOff+nRec;
pIter->nKey = nRec;
pIter->aKey = &pIter->aAlloc[iOff];
return rc;
}
/*
** Write a single varint, value iVal, to file-descriptor pFile. Return
** SQLITE_OK if successful, or an SQLite error code if some error occurs.
**
** The value of *piOffset when this function is called is used as the byte
** offset in file pFile to write to. Before returning, *piOffset is
** incremented by the number of bytes written.
*/
static int vdbeSorterWriteVarint(
sqlite3_file *pFile,
i64 iVal,
i64 *piOffset
sqlite3_file *pFile, /* File to write to */
i64 iVal, /* Value to write as a varint */
i64 *piOffset /* IN/OUT: Write offset in file pFile */
){
u8 aVarint[9]; /* Buffer large enough for a varint */
int nVarint; /* Number of used bytes in varint */
int rc; /* Result of write() call */
nVarint = sqlite3PutVarint(aVarint, iVal);
rc = sqlite3OsWrite(pFile, aVarint, nVarint, *piOffset);
|
︙ | | |
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
|
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
|
-
+
-
+
|
** byte offset in file pFile from whence to read the varint. If successful
** (i.e. if no IO error occurs), then *piOffset is set to the offset of
** the first byte past the end of the varint before returning. *piVal is
** set to the integer value read. If an error occurs, the final values of
** both *piOffset and *piVal are undefined.
*/
static int vdbeSorterReadVarint(
sqlite3_file *pFile,
sqlite3_file *pFile, /* File to read from */
i64 iEof, /* Total number of bytes in file */
i64 *piOffset, /* IN/OUT: Read offset */
i64 *piOffset, /* IN/OUT: Read offset in pFile */
i64 *piVal /* OUT: Value read from file */
){
u8 aVarint[9]; /* Buffer large enough for a varint */
i64 iOff = *piOffset; /* Offset in file to read from */
int nRead = 9; /* Number of bytes to read from file */
int rc; /* Return code */
|
︙ | | |
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
|
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
|
-
-
+
+
-
+
|
sqlite3 *db, /* Database handle */
VdbeSorter *pSorter, /* Sorter object */
i64 iStart, /* Start offset in pFile */
VdbeSorterIter *pIter, /* Iterator to populate */
i64 *pnByte /* IN/OUT: Increment this value by PMA size */
){
int rc;
i64 iEof = pSorter->iWriteOff;
assert( iEof>iStart );
assert( pSorter->iWriteOff>iStart );
assert( pIter->aAlloc==0 );
pIter->pFile = pSorter->pTemp1;
pIter->iReadOff = iStart;
pIter->nAlloc = 128;
pIter->aAlloc = (u8 *)sqlite3DbMallocRaw(db, pIter->nAlloc);
if( !pIter->aAlloc ){
rc = SQLITE_NOMEM;
}else{
i64 iEof = pSorter->iWriteOff; /* EOF of file pSorter->pTemp1 */
i64 nByte;
i64 nByte; /* Total size of PMA in bytes */
rc = vdbeSorterReadVarint(pSorter->pTemp1, iEof, &pIter->iReadOff, &nByte);
*pnByte += nByte;
pIter->iEof = pIter->iReadOff + nByte;
}
if( rc==SQLITE_OK ){
rc = vdbeSorterIterNext(db, pIter);
}
|
︙ | | |
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
|
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
|
-
-
-
-
-
+
-
-
+
-
|
return SQLITE_OK;
}
/*
** Initialize the temporary index cursor just opened as a sorter cursor.
*/
int sqlite3VdbeSorterInit(sqlite3 *db, VdbeCursor *pCsr){
VdbeSorter *pSorter; /* Allocated sorter object */
/* Cursor must be a temp cursor and not open on an intkey table */
assert( pCsr->pKeyInfo && pCsr->pBt );
pSorter = sqlite3DbMallocZero(db, sizeof(VdbeSorter));
pCsr->pSorter = sqlite3DbMallocZero(db, sizeof(VdbeSorter));
if( !pSorter ) return SQLITE_NOMEM;
pCsr->pSorter = pSorter;
return (pCsr->pSorter ? SQLITE_NOMEM : SQLITE_OK);
return SQLITE_OK;
}
/*
** Free any cursor components allocated by sqlite3VdbeSorterXXX routines.
*/
void sqlite3VdbeSorterClose(sqlite3 *db, VdbeCursor *pCsr){
VdbeSorter *pSorter = pCsr->pSorter;
|
︙ | | |
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
|
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
|
+
+
+
+
+
+
+
+
+
-
-
-
+
+
+
+
-
-
-
-
|
);
}
/*
** Write the current contents of the b-tree to a PMA. Return SQLITE_OK
** if successful, or an SQLite error code otherwise.
**
** The format of a PMA is:
**
** * A varint. This varint contains the total number of bytes of content
** in the PMA (not including the varint itself).
**
** * One or more records packed end-to-end in order of ascending keys.
** Each record consists of a varint followed by a blob of data (the
** key). The varint is the number of bytes in the blob of data.
*/
static int vdbeSorterBtreeToPMA(sqlite3 *db, VdbeCursor *pCsr){
int rc = SQLITE_OK; /* Return code */
VdbeSorter *pSorter = pCsr->pSorter;
i64 iWriteOff = pSorter->iWriteOff;
int res = 0;
void *aMalloc = 0;
int nMalloc = 0;
rc = sqlite3BtreeFirst(pCsr->pCursor, &res);
if( rc!=SQLITE_OK || res ) return rc;
assert( pSorter->nBtree>0 );
/* If the first temporary PMA file has not been opened, open it now. */
if( pSorter->pTemp1==0 ){
rc = vdbeSorterOpenTempFile(db, &pSorter->pTemp1);
assert( rc!=SQLITE_OK || pSorter->pTemp1 );
assert( pSorter->iWriteOff==0 );
assert( pSorter->nPMA==0 );
}
if( rc==SQLITE_OK ){
i64 iWriteOff = pSorter->iWriteOff;
void *aMalloc = 0; /* Array used to hold a single record */
int nMalloc = 0; /* Allocated size of aMalloc[] in bytes */
pSorter->nPMA++;
/* Write a varint containg the size of the PMA in bytes into the file. */
assert( pSorter->nBtree>0 );
for(
rc = vdbeSorterWriteVarint(pSorter->pTemp1, pSorter->nBtree, &iWriteOff);
rc==SQLITE_OK && res==0;
rc = sqlite3BtreeNext(pCsr->pCursor, &res)
){
i64 nKey; /* Size of this key in bytes */
|
︙ | | |
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
|
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
|
+
+
+
+
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
-
-
+
+
|
iWriteOff += nKey;
}
}
if( rc!=SQLITE_OK ) break;
}
/* This assert verifies that unless an error has occurred, the size of
** the PMA on disk is the same as the expected size stored in
** pSorter->nBtree. */
assert( rc!=SQLITE_OK || pSorter->nBtree==(
iWriteOff-pSorter->iWriteOff-sqlite3VarintLen(pSorter->nBtree)
));
pSorter->iWriteOff = iWriteOff;
sqlite3DbFree(db, aMalloc);
}
pSorter->nBtree = 0;
return rc;
}
/*
** This function is called on a sorter cursor before each row is inserted.
** If the current b-tree being constructed is already considered "full",
** a new tree is started.
** This function is called on a sorter cursor by the VDBE before each row
** is inserted into VdbeCursor.pCsr. Argument nKey is the size of the key, in
** bytes, about to be inserted.
**
** If it is determined that the temporary b-tree accessed via VdbeCursor.pCsr
** is large enough, its contents are written to a sorted PMA on disk and the
** tree emptied. This prevents the b-tree (which must be small enough to
** fit entirely in the cache in order to support efficient inserts) from
** growing too large.
**
** An SQLite error code is returned if an error occurs. Otherwise, SQLITE_OK.
*/
int sqlite3VdbeSorterWrite(sqlite3 *db, VdbeCursor *pCsr, int nKey){
int rc = SQLITE_OK; /* Return code */
VdbeSorter *pSorter = pCsr->pSorter;
if( pSorter ){
Pager *pPager = sqlite3BtreePager(pCsr->pBt);
int nPage; /* Current size of temporary file in pages */
/* Determine how many pages the temporary b-tree has grown to */
sqlite3PagerPagecount(pPager, &nPage);
/* If pSorter->nWorking is still zero, but the temporary file has been
** created in the file-system, then the most recent insert into the
** current b-tree segment probably caused the cache to overflow (it is
** also possible that sqlite3_release_memory() was called). So set the
** size of the working set to a little less than the current size of the
** file in pages. */
if( pSorter->nWorking==0 && sqlite3PagerFile(pPager)->pMethods ){
pSorter->nWorking = nPage-5;
if( pSorter->nWorking<SORTER_MIN_SEGMENT_SIZE ){
pSorter->nWorking = SORTER_MIN_SEGMENT_SIZE;
if( pSorter->nWorking<SORTER_MIN_WORKING ){
pSorter->nWorking = SORTER_MIN_WORKING;
}
}
/* If the number of pages used by the current b-tree segment is greater
** than the size of the working set (VdbeSorter.nWorking), start a new
** segment b-tree. */
if( pSorter->nWorking && nPage>=pSorter->nWorking ){
|
︙ | | |
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
|
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
|
-
+
|
*pbEof = (pSorter->aIter[pSorter->aTree[1]].pFile==0);
return rc;
}
/*
** Copy the current sorter key into the memory cell pOut.
*/
int sqlite3VdbeSorterRowkey(sqlite3 *db, VdbeCursor *pCsr, Mem *pOut){
int sqlite3VdbeSorterRowkey(VdbeCursor *pCsr, Mem *pOut){
VdbeSorter *pSorter = pCsr->pSorter;
VdbeSorterIter *pIter;
pIter = &pSorter->aIter[ pSorter->aTree[1] ];
/* Coverage testing note: As things are currently, this call will always
** succeed. This is because the memory cell passed by the VDBE layer
|
︙ | | |