SQLite

Check-in [acdc7d1270]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Do not let the multi-core sorter use lookaside memory, which is not thread-safe.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | threads-sort-ex1
Files: files | file ages | folders
SHA1: acdc7d1270b6aacf4612296a8d4dd596042b058c
User & Date: drh 2012-08-22 15:16:36.722
Context
2012-08-22
15:16
Do not let the multi-core sorter use lookaside memory, which is not thread-safe. (Leaf check-in: acdc7d1270 user: drh tags: threads-sort-ex1)
2012-08-21
17:46
Merge in all the latest trunk changes. (check-in: 45cdc32f1e user: drh tags: threads-sort-ex1)
Changes
Unified Diff Ignore Whitespace Patch
Changes to src/threads.c.
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
/* Create a new thread */
int sqlite3ThreadCreate(
  SQLiteThread **ppThread,  /* OUT: Write the thread object here */
  void *(*xTask)(void*),    /* Routine to run in a separate thread */
  void *pIn                 /* Argument passed into xTask() */
){
  SQLiteThread *p;
  int rc;

  assert( ppThread!=0 );
  assert( xTask!=0 );
  *ppThread = 0;
  p = sqlite3Malloc(sizeof(*p));
  if( p==0 ) return SQLITE_NOMEM;
  memset(p, 0, sizeof(*p));







<







43
44
45
46
47
48
49

50
51
52
53
54
55
56
/* Create a new thread */
int sqlite3ThreadCreate(
  SQLiteThread **ppThread,  /* OUT: Write the thread object here */
  void *(*xTask)(void*),    /* Routine to run in a separate thread */
  void *pIn                 /* Argument passed into xTask() */
){
  SQLiteThread *p;


  assert( ppThread!=0 );
  assert( xTask!=0 );
  *ppThread = 0;
  p = sqlite3Malloc(sizeof(*p));
  if( p==0 ) return SQLITE_NOMEM;
  memset(p, 0, sizeof(*p));
Changes to src/vdbe.c.
4401
4402
4403
4404
4405
4406
4407
4408
4409
4410
4411
4412
4413
4414
4415

  assert( pOp->p1>=0 && pOp->p1<p->nCursor );
  pC = p->apCsr[pOp->p1];
  assert( pC!=0 );
  assert( pC->isSorter==(pOp->opcode==OP_SorterSort) );
  res = 1;
  if( isSorter(pC) ){
    rc = sqlite3VdbeSorterRewind(db, pC, &res);
  }else{
    pCrsr = pC->pCursor;
    assert( pCrsr );
    rc = sqlite3BtreeFirst(pCrsr, &res);
    pC->atFirst = res==0 ?1:0;
    pC->deferredMoveto = 0;
    pC->cacheStatus = CACHE_STALE;







|







4401
4402
4403
4404
4405
4406
4407
4408
4409
4410
4411
4412
4413
4414
4415

  assert( pOp->p1>=0 && pOp->p1<p->nCursor );
  pC = p->apCsr[pOp->p1];
  assert( pC!=0 );
  assert( pC->isSorter==(pOp->opcode==OP_SorterSort) );
  res = 1;
  if( isSorter(pC) ){
    rc = sqlite3VdbeSorterRewind(pC, &res);
  }else{
    pCrsr = pC->pCursor;
    assert( pCrsr );
    rc = sqlite3BtreeFirst(pCrsr, &res);
    pC->atFirst = res==0 ?1:0;
    pC->deferredMoveto = 0;
    pC->cacheStatus = CACHE_STALE;
4470
4471
4472
4473
4474
4475
4476
4477
4478
4479
4480
4481
4482
4483
4484
  pC = p->apCsr[pOp->p1];
  if( pC==0 ){
    break;  /* See ticket #2273 */
  }
  assert( pC->isSorter==(pOp->opcode==OP_SorterNext) );
  if( isSorter(pC) ){
    assert( pOp->opcode==OP_SorterNext );
    rc = sqlite3VdbeSorterNext(db, pC, &res);
  }else{
    res = 1;
    assert( pC->deferredMoveto==0 );
    assert( pC->pCursor );
    assert( pOp->opcode!=OP_Next || pOp->p4.xAdvance==sqlite3BtreeNext );
    assert( pOp->opcode!=OP_Prev || pOp->p4.xAdvance==sqlite3BtreePrevious );
    rc = pOp->p4.xAdvance(pC->pCursor, &res);







|







4470
4471
4472
4473
4474
4475
4476
4477
4478
4479
4480
4481
4482
4483
4484
  pC = p->apCsr[pOp->p1];
  if( pC==0 ){
    break;  /* See ticket #2273 */
  }
  assert( pC->isSorter==(pOp->opcode==OP_SorterNext) );
  if( isSorter(pC) ){
    assert( pOp->opcode==OP_SorterNext );
    rc = sqlite3VdbeSorterNext(pC, &res);
  }else{
    res = 1;
    assert( pC->deferredMoveto==0 );
    assert( pC->pCursor );
    assert( pOp->opcode!=OP_Next || pOp->p4.xAdvance==sqlite3BtreeNext );
    assert( pOp->opcode!=OP_Prev || pOp->p4.xAdvance==sqlite3BtreePrevious );
    rc = pOp->p4.xAdvance(pC->pCursor, &res);
4526
4527
4528
4529
4530
4531
4532
4533
4534
4535
4536
4537
4538
4539
4540
  assert( pIn2->flags & MEM_Blob );
  pCrsr = pC->pCursor;
  if( ALWAYS(pCrsr!=0) ){
    assert( pC->isTable==0 );
    rc = ExpandBlob(pIn2);
    if( rc==SQLITE_OK ){
      if( isSorter(pC) ){
        rc = sqlite3VdbeSorterWrite(db, pC, pIn2);
      }else{
        nKey = pIn2->n;
        zKey = pIn2->z;
        rc = sqlite3BtreeInsert(pCrsr, zKey, nKey, "", 0, 0, pOp->p3, 
            ((pOp->p5 & OPFLAG_USESEEKRESULT) ? pC->seekResult : 0)
            );
        assert( pC->deferredMoveto==0 );







|







4526
4527
4528
4529
4530
4531
4532
4533
4534
4535
4536
4537
4538
4539
4540
  assert( pIn2->flags & MEM_Blob );
  pCrsr = pC->pCursor;
  if( ALWAYS(pCrsr!=0) ){
    assert( pC->isTable==0 );
    rc = ExpandBlob(pIn2);
    if( rc==SQLITE_OK ){
      if( isSorter(pC) ){
        rc = sqlite3VdbeSorterWrite(pC, pIn2);
      }else{
        nKey = pIn2->n;
        zKey = pIn2->z;
        rc = sqlite3BtreeInsert(pCrsr, zKey, nKey, "", 0, 0, pOp->p3, 
            ((pOp->p5 & OPFLAG_USESEEKRESULT) ? pC->seekResult : 0)
            );
        assert( pC->deferredMoveto==0 );
Changes to src/vdbeInt.h.
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
void sqlite3VdbeFrameDelete(VdbeFrame*);
int sqlite3VdbeFrameRestore(VdbeFrame *);
void sqlite3VdbeMemStoreType(Mem *pMem);
int sqlite3VdbeTransferError(Vdbe *p);

#ifdef SQLITE_OMIT_MERGE_SORT
# define sqlite3VdbeSorterInit(Y,Z)      SQLITE_OK
# define sqlite3VdbeSorterWrite(X,Y,Z)   SQLITE_OK
# define sqlite3VdbeSorterClose(Y,Z)
# define sqlite3VdbeSorterRowkey(Y,Z)    SQLITE_OK
# define sqlite3VdbeSorterRewind(X,Y,Z)  SQLITE_OK
# define sqlite3VdbeSorterNext(X,Y,Z)    SQLITE_OK
# define sqlite3VdbeSorterCompare(X,Y,Z) SQLITE_OK
#else
int sqlite3VdbeSorterInit(sqlite3 *, VdbeCursor *);
void sqlite3VdbeSorterClose(sqlite3 *, VdbeCursor *);
int sqlite3VdbeSorterRowkey(const VdbeCursor *, Mem *);
int sqlite3VdbeSorterNext(sqlite3 *, const VdbeCursor *, int *);
int sqlite3VdbeSorterRewind(sqlite3 *, const VdbeCursor *, int *);
int sqlite3VdbeSorterWrite(sqlite3 *, const VdbeCursor *, Mem *);
int sqlite3VdbeSorterCompare(const VdbeCursor *, Mem *, int *);
#endif

#if !defined(SQLITE_OMIT_SHARED_CACHE) && SQLITE_THREADSAFE>0
  void sqlite3VdbeEnter(Vdbe*);
  void sqlite3VdbeLeave(Vdbe*);
#else







|
|

|
|
|


|

|
|
|







418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
void sqlite3VdbeFrameDelete(VdbeFrame*);
int sqlite3VdbeFrameRestore(VdbeFrame *);
void sqlite3VdbeMemStoreType(Mem *pMem);
int sqlite3VdbeTransferError(Vdbe *p);

#ifdef SQLITE_OMIT_MERGE_SORT
# define sqlite3VdbeSorterInit(Y,Z)      SQLITE_OK
# define sqlite3VdbeSorterWrite(Y,Z)     SQLITE_OK
# define sqlite3VdbeSorterClose(Z)
# define sqlite3VdbeSorterRowkey(Y,Z)    SQLITE_OK
# define sqlite3VdbeSorterRewind(Y,Z)    SQLITE_OK
# define sqlite3VdbeSorterNext(Y,Z)      SQLITE_OK
# define sqlite3VdbeSorterCompare(Y,Z)   SQLITE_OK
#else
int sqlite3VdbeSorterInit(sqlite3 *, VdbeCursor *);
void sqlite3VdbeSorterClose(VdbeCursor *);
int sqlite3VdbeSorterRowkey(const VdbeCursor *, Mem *);
int sqlite3VdbeSorterNext(const VdbeCursor *, int *);
int sqlite3VdbeSorterRewind(const VdbeCursor *, int *);
int sqlite3VdbeSorterWrite(const VdbeCursor *, Mem *);
int sqlite3VdbeSorterCompare(const VdbeCursor *, Mem *, int *);
#endif

#if !defined(SQLITE_OMIT_SHARED_CACHE) && SQLITE_THREADSAFE>0
  void sqlite3VdbeEnter(Vdbe*);
  void sqlite3VdbeLeave(Vdbe*);
#else
Changes to src/vdbeaux.c.
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
** Close a VDBE cursor and release all the resources that cursor 
** happens to hold.
*/
void sqlite3VdbeFreeCursor(Vdbe *p, VdbeCursor *pCx){
  if( pCx==0 ){
    return;
  }
  sqlite3VdbeSorterClose(p->db, pCx);
  if( pCx->pBt ){
    sqlite3BtreeClose(pCx->pBt);
    /* The pCx->pCursor will be close automatically, if it exists, by
    ** the call above. */
  }else if( pCx->pCursor ){
    sqlite3BtreeCloseCursor(pCx->pCursor);
  }







|







1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
** Close a VDBE cursor and release all the resources that cursor 
** happens to hold.
*/
void sqlite3VdbeFreeCursor(Vdbe *p, VdbeCursor *pCx){
  if( pCx==0 ){
    return;
  }
  sqlite3VdbeSorterClose(pCx);
  if( pCx->pBt ){
    sqlite3BtreeClose(pCx->pBt);
    /* The pCx->pCursor will be close automatically, if it exists, by
    ** the call above. */
  }else if( pCx->pCursor ){
    sqlite3BtreeCloseCursor(pCx->pCursor);
  }
Changes to src/vdbesort.c.
103
104
105
106
107
108
109


110
111
112
113
114
115
116
117
118

119
120
121
122
123
124
125
  int mxPmaSize;                  /* Maximum PMA size, in bytes.  0==no limit */
  VdbeSorterIter *aIter;          /* Array of iterators to merge */
  int *aTree;                     /* Current state of incremental merge */
  sqlite3_file *pTemp1;           /* PMA file 1 */
  SorterRecord *pRecord;          /* Head of in-memory record list */
  int nRecord;                    /* Number of elements on the pRecord list */
  UnpackedRecord *pUnpacked;      /* Used to unpack keys */


};

/*
** The following type is an iterator for a PMA. It caches the current key in 
** variables nKey/aKey. If the iterator is at EOF, pFile==0.
*/
struct VdbeSorterIter {
  i64 iReadOff;                   /* Current read offset */
  i64 iEof;                       /* 1 byte past EOF for this iterator */

  int nAlloc;                     /* Bytes of space at aAlloc */
  int nKey;                       /* Number of bytes in key */
  sqlite3_file *pFile;            /* File iterator is reading from */
  u8 *aAlloc;                     /* Allocated space */
  u8 *aKey;                       /* Pointer to current key */
  u8 *aBuffer;                    /* Current read buffer */
  int nBuffer;                    /* Size of read buffer in bytes */







>
>









>







103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
  int mxPmaSize;                  /* Maximum PMA size, in bytes.  0==no limit */
  VdbeSorterIter *aIter;          /* Array of iterators to merge */
  int *aTree;                     /* Current state of incremental merge */
  sqlite3_file *pTemp1;           /* PMA file 1 */
  SorterRecord *pRecord;          /* Head of in-memory record list */
  int nRecord;                    /* Number of elements on the pRecord list */
  UnpackedRecord *pUnpacked;      /* Used to unpack keys */
  KeyInfo *pKeyInfo;              /* Copy of cursor KeyInfo without db ptr */
  sqlite3 *db;                    /* Database connection */
};

/*
** The following type is an iterator for a PMA. It caches the current key in 
** variables nKey/aKey. If the iterator is at EOF, pFile==0.
*/
struct VdbeSorterIter {
  i64 iReadOff;                   /* Current read offset */
  i64 iEof;                       /* 1 byte past EOF for this iterator */
  sqlite3 *db;                    /* Corresponding database connection */
  int nAlloc;                     /* Bytes of space at aAlloc */
  int nKey;                       /* Number of bytes in key */
  sqlite3_file *pFile;            /* File iterator is reading from */
  u8 *aAlloc;                     /* Allocated space */
  u8 *aKey;                       /* Pointer to current key */
  u8 *aBuffer;                    /* Current read buffer */
  int nBuffer;                    /* Size of read buffer in bytes */
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
** and return SQLITE_OK. Otherwise, if an error occurs, return an SQLite
** error code.
**
** The buffer indicated by *ppOut may only be considered valid until the
** next call to this function.
*/
static int vdbeSorterIterRead(
  sqlite3 *db,                    /* Database handle (for malloc) */
  VdbeSorterIter *p,              /* Iterator */
  int nByte,                      /* Bytes of data to read */
  u8 **ppOut                      /* OUT: Pointer to buffer containing data */
){
  int iBuf;                       /* Offset within buffer to read from */
  int nAvail;                     /* Bytes of data available in buffer */
  assert( p->aBuffer );







<







177
178
179
180
181
182
183

184
185
186
187
188
189
190
** and return SQLITE_OK. Otherwise, if an error occurs, return an SQLite
** error code.
**
** The buffer indicated by *ppOut may only be considered valid until the
** next call to this function.
*/
static int vdbeSorterIterRead(

  VdbeSorterIter *p,              /* Iterator */
  int nByte,                      /* Bytes of data to read */
  u8 **ppOut                      /* OUT: Pointer to buffer containing data */
){
  int iBuf;                       /* Offset within buffer to read from */
  int nAvail;                     /* Bytes of data available in buffer */
  assert( p->aBuffer );
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293

294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324

325
326
327
328
329
330
331
332

333
334
335
336
337
338
339
    ** range into. Then return a copy of pointer p->aAlloc to the caller.  */
    int nRem;                     /* Bytes remaining to copy */

    /* Extend the p->aAlloc[] allocation if required. */
    if( p->nAlloc<nByte ){
      int nNew = p->nAlloc*2;
      while( nByte>nNew ) nNew = nNew*2;
      p->aAlloc = sqlite3DbReallocOrFree(db, p->aAlloc, nNew);
      if( !p->aAlloc ) return SQLITE_NOMEM;
      p->nAlloc = nNew;
    }

    /* Copy as much data as is available in the buffer into the start of
    ** p->aAlloc[].  */
    memcpy(p->aAlloc, &p->aBuffer[iBuf], nAvail);
    p->iReadOff += nAvail;
    nRem = nByte - nAvail;

    /* The following loop copies up to p->nBuffer bytes per iteration into
    ** the p->aAlloc[] buffer.  */
    while( nRem>0 ){
      int rc;                     /* vdbeSorterIterRead() return code */
      int nCopy;                  /* Number of bytes to copy */
      u8 *aNext;                  /* Pointer to buffer to copy data from */

      nCopy = nRem;
      if( nRem>p->nBuffer ) nCopy = p->nBuffer;
      rc = vdbeSorterIterRead(db, p, nCopy, &aNext);
      if( rc!=SQLITE_OK ) return rc;
      assert( aNext!=p->aAlloc );
      memcpy(&p->aAlloc[nByte - nRem], aNext, nCopy);
      nRem -= nCopy;
    }

    *ppOut = p->aAlloc;
  }

  return SQLITE_OK;
}

/*
** Read a varint from the stream of data accessed by p. Set *pnOut to
** the value read.
*/
static int vdbeSorterIterVarint(sqlite3 *db, VdbeSorterIter *p, u64 *pnOut){
  int iBuf;

  iBuf = p->iReadOff % p->nBuffer;
  if( iBuf && (p->nBuffer-iBuf)>=9 ){
    p->iReadOff += sqlite3GetVarint(&p->aBuffer[iBuf], pnOut);
  }else{
    u8 aVarint[16], *a;
    int i = 0, rc;
    do{
      rc = vdbeSorterIterRead(db, p, 1, &a);
      if( rc ) return rc;
      aVarint[(i++)&0xf] = a[0];
    }while( (a[0]&0x80)!=0 );
    sqlite3GetVarint(aVarint, pnOut);
  }

  return SQLITE_OK;
}


/*
** Advance iterator pIter to the next key in its PMA. Return SQLITE_OK if
** no error occurs, or an SQLite error code if one does.
*/
static int vdbeSorterIterNext(
  sqlite3 *db,                    /* Database handle (for sqlite3DbMalloc() ) */
  VdbeSorterIter *pIter           /* Iterator to advance */
){
  int rc;                         /* Return Code */
  u64 nRec = 0;                   /* Size of record in bytes */


  if( pIter->iReadOff>=pIter->iEof ){
    /* This is an EOF condition */
    vdbeSorterIterZero(db, pIter);
    return SQLITE_OK;
  }

  rc = vdbeSorterIterVarint(db, pIter, &nRec);
  if( rc==SQLITE_OK ){
    pIter->nKey = (int)nRec;
    rc = vdbeSorterIterRead(db, pIter, (int)nRec, &pIter->aKey);
  }

  return rc;
}

/*
** Initialize iterator pIter to scan through the PMA stored in file pFile
** starting at offset iStart and ending at offset iEof-1. This function 
** leaves the iterator pointing to the first key in the PMA (or EOF if the 
** PMA is empty).
*/
static int vdbeSorterIterInit(
  sqlite3 *db,                    /* Database handle */
  const VdbeSorter *pSorter,      /* Sorter object */
  i64 iStart,                     /* Start offset in pFile */
  VdbeSorterIter *pIter,          /* Iterator to populate */
  i64 *pnByte                     /* IN/OUT: Increment this value by PMA size */
){
  int rc = SQLITE_OK;
  int nBuf;


  nBuf = sqlite3BtreeGetPageSize(db->aDb[0].pBt);

  assert( pSorter->iWriteOff>iStart );
  assert( pIter->aAlloc==0 );
  assert( pIter->aBuffer==0 );
  pIter->pFile = pSorter->pTemp1;
  pIter->iReadOff = iStart;

  pIter->nAlloc = 128;
  pIter->aAlloc = (u8 *)sqlite3DbMallocRaw(db, pIter->nAlloc);
  pIter->nBuffer = nBuf;
  pIter->aBuffer = (u8 *)sqlite3DbMallocRaw(db, nBuf);

  if( !pIter->aBuffer ){
    rc = SQLITE_NOMEM;







|



















|
















|









|















<




>







|


|












<







>








>







221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290

291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318

319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
    ** range into. Then return a copy of pointer p->aAlloc to the caller.  */
    int nRem;                     /* Bytes remaining to copy */

    /* Extend the p->aAlloc[] allocation if required. */
    if( p->nAlloc<nByte ){
      int nNew = p->nAlloc*2;
      while( nByte>nNew ) nNew = nNew*2;
      p->aAlloc = sqlite3DbReallocOrFree(p->db, p->aAlloc, nNew);
      if( !p->aAlloc ) return SQLITE_NOMEM;
      p->nAlloc = nNew;
    }

    /* Copy as much data as is available in the buffer into the start of
    ** p->aAlloc[].  */
    memcpy(p->aAlloc, &p->aBuffer[iBuf], nAvail);
    p->iReadOff += nAvail;
    nRem = nByte - nAvail;

    /* The following loop copies up to p->nBuffer bytes per iteration into
    ** the p->aAlloc[] buffer.  */
    while( nRem>0 ){
      int rc;                     /* vdbeSorterIterRead() return code */
      int nCopy;                  /* Number of bytes to copy */
      u8 *aNext;                  /* Pointer to buffer to copy data from */

      nCopy = nRem;
      if( nRem>p->nBuffer ) nCopy = p->nBuffer;
      rc = vdbeSorterIterRead(p, nCopy, &aNext);
      if( rc!=SQLITE_OK ) return rc;
      assert( aNext!=p->aAlloc );
      memcpy(&p->aAlloc[nByte - nRem], aNext, nCopy);
      nRem -= nCopy;
    }

    *ppOut = p->aAlloc;
  }

  return SQLITE_OK;
}

/*
** Read a varint from the stream of data accessed by p. Set *pnOut to
** the value read.
*/
static int vdbeSorterIterVarint(VdbeSorterIter *p, u64 *pnOut){
  int iBuf;

  iBuf = p->iReadOff % p->nBuffer;
  if( iBuf && (p->nBuffer-iBuf)>=9 ){
    p->iReadOff += sqlite3GetVarint(&p->aBuffer[iBuf], pnOut);
  }else{
    u8 aVarint[16], *a;
    int i = 0, rc;
    do{
      rc = vdbeSorterIterRead(p, 1, &a);
      if( rc ) return rc;
      aVarint[(i++)&0xf] = a[0];
    }while( (a[0]&0x80)!=0 );
    sqlite3GetVarint(aVarint, pnOut);
  }

  return SQLITE_OK;
}


/*
** Advance iterator pIter to the next key in its PMA. Return SQLITE_OK if
** no error occurs, or an SQLite error code if one does.
*/
static int vdbeSorterIterNext(

  VdbeSorterIter *pIter           /* Iterator to advance */
){
  int rc;                         /* Return Code */
  u64 nRec = 0;                   /* Size of record in bytes */
  sqlite3 *db = pIter->db;        /* Database connection */

  if( pIter->iReadOff>=pIter->iEof ){
    /* This is an EOF condition */
    vdbeSorterIterZero(db, pIter);
    return SQLITE_OK;
  }

  rc = vdbeSorterIterVarint(pIter, &nRec);
  if( rc==SQLITE_OK ){
    pIter->nKey = (int)nRec;
    rc = vdbeSorterIterRead(pIter, (int)nRec, &pIter->aKey);
  }

  return rc;
}

/*
** Initialize iterator pIter to scan through the PMA stored in file pFile
** starting at offset iStart and ending at offset iEof-1. This function 
** leaves the iterator pointing to the first key in the PMA (or EOF if the 
** PMA is empty).
*/
static int vdbeSorterIterInit(

  const VdbeSorter *pSorter,      /* Sorter object */
  i64 iStart,                     /* Start offset in pFile */
  VdbeSorterIter *pIter,          /* Iterator to populate */
  i64 *pnByte                     /* IN/OUT: Increment this value by PMA size */
){
  int rc = SQLITE_OK;
  int nBuf;
  sqlite3 *db = pSorter->db;

  nBuf = sqlite3BtreeGetPageSize(db->aDb[0].pBt);

  assert( pSorter->iWriteOff>iStart );
  assert( pIter->aAlloc==0 );
  assert( pIter->aBuffer==0 );
  pIter->pFile = pSorter->pTemp1;
  pIter->iReadOff = iStart;
  pIter->db = db;
  pIter->nAlloc = 128;
  pIter->aAlloc = (u8 *)sqlite3DbMallocRaw(db, pIter->nAlloc);
  pIter->nBuffer = nBuf;
  pIter->aBuffer = (u8 *)sqlite3DbMallocRaw(db, nBuf);

  if( !pIter->aBuffer ){
    rc = SQLITE_NOMEM;
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
      );
      assert( rc!=SQLITE_IOERR_SHORT_READ );
    }

    if( rc==SQLITE_OK ){
      u64 nByte;                       /* Size of PMA in bytes */
      pIter->iEof = pSorter->iWriteOff;
      rc = vdbeSorterIterVarint(db, pIter, &nByte);
      pIter->iEof = pIter->iReadOff + nByte;
      *pnByte += nByte;
    }
  }

  if( rc==SQLITE_OK ){
    rc = vdbeSorterIterNext(db, pIter);
  }
  return rc;
}


/*
** Compare key1 (buffer pKey1, size nKey1 bytes) with key2 (buffer pKey2, 







|






|







354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
      );
      assert( rc!=SQLITE_IOERR_SHORT_READ );
    }

    if( rc==SQLITE_OK ){
      u64 nByte;                       /* Size of PMA in bytes */
      pIter->iEof = pSorter->iWriteOff;
      rc = vdbeSorterIterVarint(pIter, &nByte);
      pIter->iEof = pIter->iReadOff + nByte;
      *pnByte += nByte;
    }
  }

  if( rc==SQLITE_OK ){
    rc = vdbeSorterIterNext(pIter);
  }
  return rc;
}


/*
** Compare key1 (buffer pKey1, size nKey1 bytes) with key2 (buffer pKey2, 
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
** is true and key1 contains even a single NULL value, it is considered to
** be less than key2. Even if key2 also contains NULL values.
**
** If pKey2 is passed a NULL pointer, then it is assumed that the pCsr->aSpace
** has been allocated and contains an unpacked record that is used as key2.
*/
static void vdbeSorterCompare(
  const VdbeCursor *pCsr,         /* Cursor object (for pKeyInfo) */
  int bOmitRowid,                 /* Ignore rowid field at end of keys */
  const void *pKey1, int nKey1,   /* Left side of comparison */
  const void *pKey2, int nKey2,   /* Right side of comparison */
  int *pRes,                      /* OUT: Result of comparison */
  UnpackedRecord *r2              /* Space to hold the unpacked Key2 record */
){
  KeyInfo *pKeyInfo = pCsr->pKeyInfo;
  int i;

  if( pKey2 ){
    assert( r2!=0 );
    sqlite3VdbeRecordUnpack(pKeyInfo, nKey2, pKey2, r2);
  }








|






|







383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
** is true and key1 contains even a single NULL value, it is considered to
** be less than key2. Even if key2 also contains NULL values.
**
** If pKey2 is passed a NULL pointer, then it is assumed that the pCsr->aSpace
** has been allocated and contains an unpacked record that is used as key2.
*/
static void vdbeSorterCompare(
  VdbeSorter *pSorter,            /* The sorter */
  int bOmitRowid,                 /* Ignore rowid field at end of keys */
  const void *pKey1, int nKey1,   /* Left side of comparison */
  const void *pKey2, int nKey2,   /* Right side of comparison */
  int *pRes,                      /* OUT: Result of comparison */
  UnpackedRecord *r2              /* Space to hold the unpacked Key2 record */
){
  KeyInfo *pKeyInfo = pSorter->pKeyInfo;
  int i;

  if( pKey2 ){
    assert( r2!=0 );
    sqlite3VdbeRecordUnpack(pKeyInfo, nKey2, pKey2, r2);
  }

415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
}

/*
** This function is called to compare two iterator keys when merging 
** multiple b-tree segments. Parameter iOut is the index of the aTree[] 
** value to recalculate.
*/
static int vdbeSorterDoCompare(const VdbeCursor *pCsr, int iOut){
  VdbeSorter *pSorter = pCsr->pSorter;
  int i1;
  int i2;
  int iRes;
  VdbeSorterIter *p1;
  VdbeSorterIter *p2;

  assert( iOut<pSorter->nTree && iOut>0 );







|
<







418
419
420
421
422
423
424
425

426
427
428
429
430
431
432
}

/*
** This function is called to compare two iterator keys when merging 
** multiple b-tree segments. Parameter iOut is the index of the aTree[] 
** value to recalculate.
*/
static int vdbeSorterDoCompare(VdbeSorter *pSorter, int iOut){

  int i1;
  int i2;
  int iRes;
  VdbeSorterIter *p1;
  VdbeSorterIter *p2;

  assert( iOut<pSorter->nTree && iOut>0 );
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
  if( p1->pFile==0 ){
    iRes = i2;
  }else if( p2->pFile==0 ){
    iRes = i1;
  }else{
    int res;
    vdbeSorterCompare(
        pCsr, 0, p1->aKey, p1->nKey, p2->aKey, p2->nKey, &res, 
        pSorter->pUnpacked
    );
    if( res<=0 ){
      iRes = i1;
    }else{
      iRes = i2;
    }







|







445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
  if( p1->pFile==0 ){
    iRes = i2;
  }else if( p2->pFile==0 ){
    iRes = i1;
  }else{
    int res;
    vdbeSorterCompare(
        pSorter, 0, p1->aKey, p1->nKey, p2->aKey, p2->nKey, &res,
        pSorter->pUnpacked
    );
    if( res<=0 ){
      iRes = i1;
    }else{
      iRes = i2;
    }
465
466
467
468
469
470
471

472
473
474
475
476
477

478
479
480
481













482
483
484
485
486
487
488
** Initialize the temporary index cursor just opened as a sorter cursor.
*/
int sqlite3VdbeSorterInit(sqlite3 *db, VdbeCursor *pCsr){
  int pgsz;                       /* Page size of main database */
  int mxCache;                    /* Cache size */
  VdbeSorter *pSorter;            /* The new sorter */
  char *d;                        /* Dummy */


  assert( pCsr->pKeyInfo && pCsr->pBt==0 );
  pCsr->pSorter = pSorter = sqlite3DbMallocZero(db, sizeof(VdbeSorter));
  if( pSorter==0 ){
    return SQLITE_NOMEM;
  }

  
  pSorter->pUnpacked = sqlite3VdbeAllocUnpackedRecord(pCsr->pKeyInfo, 0, 0, &d);
  if( pSorter->pUnpacked==0 ) return SQLITE_NOMEM;
  assert( pSorter->pUnpacked==(UnpackedRecord *)d );














  if( !sqlite3TempInMemory(db) ){
    pgsz = sqlite3BtreeGetPageSize(db->aDb[0].pBt);
    pSorter->mnPmaSize = SORTER_MIN_WORKING * pgsz;
    mxCache = db->aDb[0].pSchema->cache_size;
    if( mxCache<SORTER_MIN_WORKING ) mxCache = SORTER_MIN_WORKING;
    pSorter->mxPmaSize = mxCache * pgsz;







>






>




>
>
>
>
>
>
>
>
>
>
>
>
>







467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
** Initialize the temporary index cursor just opened as a sorter cursor.
*/
int sqlite3VdbeSorterInit(sqlite3 *db, VdbeCursor *pCsr){
  int pgsz;                       /* Page size of main database */
  int mxCache;                    /* Cache size */
  VdbeSorter *pSorter;            /* The new sorter */
  char *d;                        /* Dummy */
  int nByte;                      /* Bytes in pKeyInfo */

  assert( pCsr->pKeyInfo && pCsr->pBt==0 );
  pCsr->pSorter = pSorter = sqlite3DbMallocZero(db, sizeof(VdbeSorter));
  if( pSorter==0 ){
    return SQLITE_NOMEM;
  }
  pSorter->db = db;
  
  pSorter->pUnpacked = sqlite3VdbeAllocUnpackedRecord(pCsr->pKeyInfo, 0, 0, &d);
  if( pSorter->pUnpacked==0 ) return SQLITE_NOMEM;
  assert( pSorter->pUnpacked==(UnpackedRecord *)d );

  /* pSorter->pKeyInfo is a copy of pCsr->pKeyInfo with the db field set to
  ** zero.  We use this modified pKeyInfo for sorting so that no lookaside
  ** memory will be used, so that sorting can proceed in parallel in multiple
  ** threads.
  */
  nByte = sizeof(KeyInfo) + (pCsr->pKeyInfo->nField - 1)*sizeof(CollSeq*);
  pSorter->pKeyInfo = sqlite3DbMallocRaw(db, nByte);
  if( pSorter->pKeyInfo==0 ){
    return SQLITE_NOMEM;
  }
  memcpy(pSorter->pKeyInfo, pCsr->pKeyInfo, nByte);
  pSorter->pKeyInfo->db = 0;  

  if( !sqlite3TempInMemory(db) ){
    pgsz = sqlite3BtreeGetPageSize(db->aDb[0].pBt);
    pSorter->mnPmaSize = SORTER_MIN_WORKING * pgsz;
    mxCache = db->aDb[0].pSchema->cache_size;
    if( mxCache<SORTER_MIN_WORKING ) mxCache = SORTER_MIN_WORKING;
    pSorter->mxPmaSize = mxCache * pgsz;
502
503
504
505
506
507
508
509
510
511

512
513
514
515
516
517
518
519
520
521
522
523

524
525
526
527
528
529
530
    sqlite3DbFree(db, p);
  }
}

/*
** Free any cursor components allocated by sqlite3VdbeSorterXXX routines.
*/
void sqlite3VdbeSorterClose(sqlite3 *db, VdbeCursor *pCsr){
  VdbeSorter *pSorter = pCsr->pSorter;
  if( pSorter ){

    if( pSorter->aIter ){
      int i;
      for(i=0; i<pSorter->nTree; i++){
        vdbeSorterIterZero(db, &pSorter->aIter[i]);
      }
      sqlite3DbFree(db, pSorter->aIter);
    }
    if( pSorter->pTemp1 ){
      sqlite3OsCloseFree(pSorter->pTemp1);
    }
    vdbeSorterRecordFree(db, pSorter->pRecord);
    sqlite3DbFree(db, pSorter->pUnpacked);

    sqlite3DbFree(db, pSorter);
    pCsr->pSorter = 0;
  }
}

/*
** Allocate space for a file-handle and open a temporary file. If successful,







|


>












>







519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
    sqlite3DbFree(db, p);
  }
}

/*
** Free any cursor components allocated by sqlite3VdbeSorterXXX routines.
*/
void sqlite3VdbeSorterClose(VdbeCursor *pCsr){
  VdbeSorter *pSorter = pCsr->pSorter;
  if( pSorter ){
    sqlite3 *db = pSorter->db;
    if( pSorter->aIter ){
      int i;
      for(i=0; i<pSorter->nTree; i++){
        vdbeSorterIterZero(db, &pSorter->aIter[i]);
      }
      sqlite3DbFree(db, pSorter->aIter);
    }
    if( pSorter->pTemp1 ){
      sqlite3OsCloseFree(pSorter->pTemp1);
    }
    vdbeSorterRecordFree(db, pSorter->pRecord);
    sqlite3DbFree(db, pSorter->pUnpacked);
    sqlite3DbFree(db, pSorter->pKeyInfo);
    sqlite3DbFree(db, pSorter);
    pCsr->pSorter = 0;
  }
}

/*
** Allocate space for a file-handle and open a temporary file. If successful,
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
}

/*
** Merge the two sorted lists p1 and p2 into a single list.
** Set *ppOut to the head of the new list.
*/
static void vdbeSorterMerge(
  const VdbeCursor *pCsr,         /* For pKeyInfo */
  SorterRecord *p1,               /* First list to merge */
  SorterRecord *p2,               /* Second list to merge */
  SorterRecord **ppOut,           /* OUT: Head of merged list */
  UnpackedRecord *pUnpacked       /* Space to hold an unpacked record */
){
  SorterRecord *pFinal = 0;
  SorterRecord **pp = &pFinal;
  void *pVal2 = p2 ? p2->pVal : 0;

  while( p1 && p2 ){
    int res;
    vdbeSorterCompare(pCsr, 0, p1->pVal, p1->nVal, pVal2, p2->nVal, &res,
                      pUnpacked);
    if( res<=0 ){
      *pp = p1;
      pp = &p1->pNext;
      p1 = p1->pNext;
      pVal2 = 0;
    }else{







|











|







560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
}

/*
** Merge the two sorted lists p1 and p2 into a single list.
** Set *ppOut to the head of the new list.
*/
static void vdbeSorterMerge(
  VdbeSorter *pSorter,            /* The sorter object */
  SorterRecord *p1,               /* First list to merge */
  SorterRecord *p2,               /* Second list to merge */
  SorterRecord **ppOut,           /* OUT: Head of merged list */
  UnpackedRecord *pUnpacked       /* Space to hold an unpacked record */
){
  SorterRecord *pFinal = 0;
  SorterRecord **pp = &pFinal;
  void *pVal2 = p2 ? p2->pVal : 0;

  while( p1 && p2 ){
    int res;
    vdbeSorterCompare(pSorter, 0, p1->pVal, p1->nVal, pVal2, p2->nVal, &res,
                      pUnpacked);
    if( res<=0 ){
      *pp = p1;
      pp = &p1->pNext;
      p1 = p1->pNext;
      pVal2 = 0;
    }else{
576
577
578
579
580
581
582
583
584
585

586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
  *ppOut = pFinal;
}

/*
** Background sorting task
*/
typedef struct SortTask {
  SorterRecord *pList;          /* List of elements to be sorted */
  const VdbeCursor *pCsr;       /* Cursor.  Needed for pCur->pKeyInfo */
  UnpackedRecord *pUnpacked;    /* Space to hold an unpacked key */

  SorterRecord **apSlot;        /* Temp memory for the merge sort */
} SortTask;

/*
** Do a sort in a background thread
*/
void *vdbeSorterBackgroundSort(SortTask *pTask){
  SorterRecord *p = pTask->pList;
  SorterRecord **a = pTask->apSlot;
  int i;
  for(i=0; i<64; i++) a[i] = 0;
  while( p ){
    SorterRecord *pNext = p->pNext;
    p->pNext = 0;
    for(i=0; a[i]; i++){
      if( a[i]==0 ) break;
      vdbeSorterMerge(pTask->pCsr, a[i], p, &p, pTask->pUnpacked);
      a[i] = 0;
    }
    a[i] = p;
    p = pNext;
  }
  p = 0;
  for(i=0; i<64; i++){
    vdbeSorterMerge(pTask->pCsr, a[i], p, &p, pTask->pUnpacked);
  }
  pTask->pList = p;
  return p;
}

/*
** Divide a linked list of SorterRecord objects into two separate







|
<

>
















|







|







595
596
597
598
599
600
601
602

603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
  *ppOut = pFinal;
}

/*
** Background sorting task
*/
typedef struct SortTask {
  VdbeSorter *pSorter;          /* The sorter for which this task works */

  UnpackedRecord *pUnpacked;    /* Space to hold an unpacked key */
  SorterRecord *pList;          /* List of elements to be sorted */
  SorterRecord **apSlot;        /* Temp memory for the merge sort */
} SortTask;

/*
** Do a sort in a background thread
*/
void *vdbeSorterBackgroundSort(SortTask *pTask){
  SorterRecord *p = pTask->pList;
  SorterRecord **a = pTask->apSlot;
  int i;
  for(i=0; i<64; i++) a[i] = 0;
  while( p ){
    SorterRecord *pNext = p->pNext;
    p->pNext = 0;
    for(i=0; a[i]; i++){
      if( a[i]==0 ) break;
      vdbeSorterMerge(pTask->pSorter, a[i], p, &p, pTask->pUnpacked);
      a[i] = 0;
    }
    a[i] = p;
    p = pNext;
  }
  p = 0;
  for(i=0; i<64; i++){
    vdbeSorterMerge(pTask->pSorter, a[i], p, &p, pTask->pUnpacked);
  }
  pTask->pList = p;
  return p;
}

/*
** Divide a linked list of SorterRecord objects into two separate
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
}

/*
** Sort the linked list of records headed at pCsr->pRecord. Return SQLITE_OK
** if successful, or an SQLite error code (i.e. SQLITE_NOMEM) if an error
** occurs.
*/
static int vdbeSorterSort(const VdbeCursor *pCsr){
  int rc;
  SorterRecord *p;
  VdbeSorter *pSorter = pCsr->pSorter;
  char *pDummy = 0;
  int nByteA, nByteB;
  SortTask aTask[2];
  SQLiteThread *pThread;

  nByteA = 64*sizeof(SorterRecord*);
  nByteB = ROUND8(sizeof(UnpackedRecord));
  nByteB += sizeof(Mem)*(pCsr->pKeyInfo->nField+1);

  aTask[0].apSlot = (SorterRecord **)sqlite3MallocZero(2*(nByteA + nByteB));
  if( !aTask[0].apSlot ){
    return SQLITE_NOMEM;
  }
  aTask[0].pCsr = pCsr;
  aTask[0].pUnpacked = sqlite3VdbeAllocUnpackedRecord(pCsr->pKeyInfo,
                          (char*)&aTask[0].apSlot[64], nByteB, &pDummy);
  assert( pDummy==0 );
  aTask[1].apSlot = (nByteA+nByteB)+(char*)aTask[0].apSlot;
  aTask[1].pCsr = pCsr;
  aTask[1].pUnpacked = sqlite3VdbeAllocUnpackedRecord(pCsr->pKeyInfo,
                          (char*)&aTask[1].apSlot[64], nByteB, &pDummy);
  assert( pDummy==0 );

  vdbeSorterDivideList(pSorter->pRecord, &aTask[0].pList, &aTask[1].pList);
  rc = sqlite3ThreadCreate(&pThread, 
          (void*(*)(void*))vdbeSorterBackgroundSort, &aTask[0]);
  vdbeSorterBackgroundSort(&aTask[1]);
  if( rc==SQLITE_NOMEM ){
    vdbeSorterBackgroundSort(&aTask[0]);
  }else{
    rc = sqlite3ThreadJoin(pThread, &pDummy);
  }
  vdbeSorterMerge(pCsr, aTask[0].pList, aTask[1].pList, &pSorter->pRecord,
                  aTask[0].pUnpacked);
  sqlite3_free(aTask[0].apSlot);
  return rc;
}

/*
** Initialize a file-writer object.
*/







|

<
<







|





|
|


|
|
|










|

|
|







659
660
661
662
663
664
665
666
667


668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
}

/*
** Sort the linked list of records headed at pCsr->pRecord. Return SQLITE_OK
** if successful, or an SQLite error code (i.e. SQLITE_NOMEM) if an error
** occurs.
*/
static int vdbeSorterSort(VdbeSorter *pSorter){
  int rc;


  char *pDummy = 0;
  int nByteA, nByteB;
  SortTask aTask[2];
  SQLiteThread *pThread;

  nByteA = 64*sizeof(SorterRecord*);
  nByteB = ROUND8(sizeof(UnpackedRecord));
  nByteB += sizeof(Mem)*(pSorter->pKeyInfo->nField+1);

  aTask[0].apSlot = (SorterRecord **)sqlite3MallocZero(2*(nByteA + nByteB));
  if( !aTask[0].apSlot ){
    return SQLITE_NOMEM;
  }
  aTask[0].pSorter = pSorter;
  aTask[0].pUnpacked = sqlite3VdbeAllocUnpackedRecord(pSorter->pKeyInfo,
                          (char*)&aTask[0].apSlot[64], nByteB, &pDummy);
  assert( pDummy==0 );
  aTask[1].apSlot = (SorterRecord**)((nByteA+nByteB)+(char*)aTask[0].apSlot);
  aTask[1].pSorter = pSorter;
  aTask[1].pUnpacked = sqlite3VdbeAllocUnpackedRecord(pSorter->pKeyInfo,
                          (char*)&aTask[1].apSlot[64], nByteB, &pDummy);
  assert( pDummy==0 );

  vdbeSorterDivideList(pSorter->pRecord, &aTask[0].pList, &aTask[1].pList);
  rc = sqlite3ThreadCreate(&pThread, 
          (void*(*)(void*))vdbeSorterBackgroundSort, &aTask[0]);
  vdbeSorterBackgroundSort(&aTask[1]);
  if( rc==SQLITE_NOMEM ){
    vdbeSorterBackgroundSort(&aTask[0]);
  }else{
    rc = sqlite3ThreadJoin(pThread, (void**)&pDummy);
  }
  vdbeSorterMerge(pSorter, aTask[0].pList, aTask[1].pList, &pSorter->pRecord,
                  pSorter->pUnpacked);
  sqlite3_free(aTask[0].apSlot);
  return rc;
}

/*
** Initialize a file-writer object.
*/
781
782
783
784
785
786
787
788
789
790
791

792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
**     * A varint. This varint contains the total number of bytes of content
**       in the PMA (not including the varint itself).
**
**     * One or more records packed end-to-end in order of ascending keys. 
**       Each record consists of a varint followed by a blob of data (the 
**       key). The varint is the number of bytes in the blob of data.
*/
static int vdbeSorterListToPMA(sqlite3 *db, const VdbeCursor *pCsr){
  int rc = SQLITE_OK;             /* Return code */
  VdbeSorter *pSorter = pCsr->pSorter;
  FileWriter writer;


  memset(&writer, 0, sizeof(FileWriter));

  if( pSorter->nInMemory==0 ){
    assert( pSorter->pRecord==0 );
    assert( pSorter->nRecord==0 );
    return rc;
  }

  rc = vdbeSorterSort(pCsr);

  /* If the first temporary PMA file has not been opened, open it now. */
  if( rc==SQLITE_OK && pSorter->pTemp1==0 ){
    rc = vdbeSorterOpenTempFile(db, &pSorter->pTemp1);
    assert( rc!=SQLITE_OK || pSorter->pTemp1 );
    assert( pSorter->iWriteOff==0 );
    assert( pSorter->nPMA==0 );







|

<

>









|







798
799
800
801
802
803
804
805
806

807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
**     * A varint. This varint contains the total number of bytes of content
**       in the PMA (not including the varint itself).
**
**     * One or more records packed end-to-end in order of ascending keys. 
**       Each record consists of a varint followed by a blob of data (the 
**       key). The varint is the number of bytes in the blob of data.
*/
static int vdbeSorterListToPMA(VdbeSorter *pSorter){
  int rc = SQLITE_OK;             /* Return code */

  FileWriter writer;
  sqlite3 *db = pSorter->db;

  memset(&writer, 0, sizeof(FileWriter));

  if( pSorter->nInMemory==0 ){
    assert( pSorter->pRecord==0 );
    assert( pSorter->nRecord==0 );
    return rc;
  }

  rc = vdbeSorterSort(pSorter);

  /* If the first temporary PMA file has not been opened, open it now. */
  if( rc==SQLITE_OK && pSorter->pTemp1==0 ){
    rc = vdbeSorterOpenTempFile(db, &pSorter->pTemp1);
    assert( rc!=SQLITE_OK || pSorter->pTemp1 );
    assert( pSorter->iWriteOff==0 );
    assert( pSorter->nPMA==0 );
829
830
831
832
833
834
835
836
837
838
839
840

841
842
843
844
845
846
847
  return rc;
}

/*
** Add a record to the sorter.
*/
int sqlite3VdbeSorterWrite(
  sqlite3 *db,                    /* Database handle */
  const VdbeCursor *pCsr,               /* Sorter cursor */
  Mem *pVal                       /* Memory cell containing record */
){
  VdbeSorter *pSorter = pCsr->pSorter;

  int rc = SQLITE_OK;             /* Return Code */
  SorterRecord *pNew;             /* New list element */

  assert( pSorter );
  pSorter->nInMemory += sqlite3VarintLen(pVal->n) + pVal->n;

  pNew = (SorterRecord *)sqlite3DbMallocRaw(db, pVal->n + sizeof(SorterRecord));







<
|



>







846
847
848
849
850
851
852

853
854
855
856
857
858
859
860
861
862
863
864
  return rc;
}

/*
** Add a record to the sorter.
*/
int sqlite3VdbeSorterWrite(

  const VdbeCursor *pCsr,         /* Sorter cursor */
  Mem *pVal                       /* Memory cell containing record */
){
  VdbeSorter *pSorter = pCsr->pSorter;
  sqlite3 *db = pSorter->db;
  int rc = SQLITE_OK;             /* Return Code */
  SorterRecord *pNew;             /* New list element */

  assert( pSorter );
  pSorter->nInMemory += sqlite3VarintLen(pVal->n) + pVal->n;

  pNew = (SorterRecord *)sqlite3DbMallocRaw(db, pVal->n + sizeof(SorterRecord));
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921

922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
     || (pSorter->nInMemory>pSorter->mnPmaSize && sqlite3HeapNearlyFull())
  )){
#ifdef SQLITE_DEBUG
    i64 nExpect = pSorter->iWriteOff
                + sqlite3VarintLen(pSorter->nInMemory)
                + pSorter->nInMemory;
#endif
    rc = vdbeSorterListToPMA(db, pCsr);
    pSorter->nInMemory = 0;
    assert( rc!=SQLITE_OK || (nExpect==pSorter->iWriteOff) );
  }

  return rc;
}

/*
** Helper function for sqlite3VdbeSorterRewind(). 
*/
static int vdbeSorterInitMerge(
  sqlite3 *db,                    /* Database handle */
  const VdbeCursor *pCsr,         /* Cursor handle for this sorter */
  i64 *pnByte                     /* Sum of bytes in all opened PMAs */
){
  VdbeSorter *pSorter = pCsr->pSorter;
  int rc = SQLITE_OK;             /* Return code */
  int i;                          /* Used to iterator through aIter[] */
  i64 nByte = 0;                  /* Total bytes in all opened PMAs */

  /* Initialize the iterators. */
  for(i=0; i<SORTER_MAX_MERGE_COUNT; i++){
    VdbeSorterIter *pIter = &pSorter->aIter[i];
    rc = vdbeSorterIterInit(db, pSorter, pSorter->iReadOff, pIter, &nByte);
    pSorter->iReadOff = pIter->iEof;
    assert( rc!=SQLITE_OK || pSorter->iReadOff<=pSorter->iWriteOff );
    if( rc!=SQLITE_OK || pSorter->iReadOff>=pSorter->iWriteOff ) break;
  }

  /* Initialize the aTree[] array. */
  for(i=pSorter->nTree-1; rc==SQLITE_OK && i>0; i--){
    rc = vdbeSorterDoCompare(pCsr, i);
  }

  *pnByte = nByte;
  return rc;
}

/*
** Once the sorter has been populated, this function is called to prepare
** for iterating through its contents in sorted order.
*/
int sqlite3VdbeSorterRewind(sqlite3 *db, const VdbeCursor *pCsr, int *pbEof){
  VdbeSorter *pSorter = pCsr->pSorter;

  int rc;                         /* Return code */
  sqlite3_file *pTemp2 = 0;       /* Second temp file to use */
  i64 iWrite2 = 0;                /* Write offset for pTemp2 */
  int nIter;                      /* Number of iterators used */
  int nByte;                      /* Bytes of space required for aIter/aTree */
  int N = 2;                      /* Power of 2 >= nIter */

  assert( pSorter );

  /* If no data has been written to disk, then do not do so now. Instead,
  ** sort the VdbeSorter.pRecord list. The vdbe layer will read data directly
  ** from the in-memory list.  */
  if( pSorter->nPMA==0 ){
    *pbEof = !pSorter->pRecord;
    assert( pSorter->aTree==0 );
    return vdbeSorterSort(pCsr);
  }

  /* Write the current in-memory list to a PMA. */
  rc = vdbeSorterListToPMA(db, pCsr);
  if( rc!=SQLITE_OK ) return rc;

  /* Allocate space for aIter[] and aTree[]. */
  nIter = pSorter->nPMA;
  if( nIter>SORTER_MAX_MERGE_COUNT ) nIter = SORTER_MAX_MERGE_COUNT;
  assert( nIter>0 );
  while( N<nIter ) N += N;







|











|
<


<







|







|










|

>















|



|







887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906

907
908

909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
     || (pSorter->nInMemory>pSorter->mnPmaSize && sqlite3HeapNearlyFull())
  )){
#ifdef SQLITE_DEBUG
    i64 nExpect = pSorter->iWriteOff
                + sqlite3VarintLen(pSorter->nInMemory)
                + pSorter->nInMemory;
#endif
    rc = vdbeSorterListToPMA(pSorter);
    pSorter->nInMemory = 0;
    assert( rc!=SQLITE_OK || (nExpect==pSorter->iWriteOff) );
  }

  return rc;
}

/*
** Helper function for sqlite3VdbeSorterRewind(). 
*/
static int vdbeSorterInitMerge(
  VdbeSorter *pSorter,            /* The sorter */

  i64 *pnByte                     /* Sum of bytes in all opened PMAs */
){

  int rc = SQLITE_OK;             /* Return code */
  int i;                          /* Used to iterator through aIter[] */
  i64 nByte = 0;                  /* Total bytes in all opened PMAs */

  /* Initialize the iterators. */
  for(i=0; i<SORTER_MAX_MERGE_COUNT; i++){
    VdbeSorterIter *pIter = &pSorter->aIter[i];
    rc = vdbeSorterIterInit(pSorter, pSorter->iReadOff, pIter, &nByte);
    pSorter->iReadOff = pIter->iEof;
    assert( rc!=SQLITE_OK || pSorter->iReadOff<=pSorter->iWriteOff );
    if( rc!=SQLITE_OK || pSorter->iReadOff>=pSorter->iWriteOff ) break;
  }

  /* Initialize the aTree[] array. */
  for(i=pSorter->nTree-1; rc==SQLITE_OK && i>0; i--){
    rc = vdbeSorterDoCompare(pSorter, i);
  }

  *pnByte = nByte;
  return rc;
}

/*
** Once the sorter has been populated, this function is called to prepare
** for iterating through its contents in sorted order.
*/
int sqlite3VdbeSorterRewind(const VdbeCursor *pCsr, int *pbEof){
  VdbeSorter *pSorter = pCsr->pSorter;
  sqlite3 *db = pSorter->db;
  int rc;                         /* Return code */
  sqlite3_file *pTemp2 = 0;       /* Second temp file to use */
  i64 iWrite2 = 0;                /* Write offset for pTemp2 */
  int nIter;                      /* Number of iterators used */
  int nByte;                      /* Bytes of space required for aIter/aTree */
  int N = 2;                      /* Power of 2 >= nIter */

  assert( pSorter );

  /* If no data has been written to disk, then do not do so now. Instead,
  ** sort the VdbeSorter.pRecord list. The vdbe layer will read data directly
  ** from the in-memory list.  */
  if( pSorter->nPMA==0 ){
    *pbEof = !pSorter->pRecord;
    assert( pSorter->aTree==0 );
    return vdbeSorterSort(pSorter);
  }

  /* Write the current in-memory list to a PMA. */
  rc = vdbeSorterListToPMA(pSorter);
  if( rc!=SQLITE_OK ) return rc;

  /* Allocate space for aIter[] and aTree[]. */
  nIter = pSorter->nPMA;
  if( nIter>SORTER_MAX_MERGE_COUNT ) nIter = SORTER_MAX_MERGE_COUNT;
  assert( nIter>0 );
  while( N<nIter ) N += N;
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
      ** These iterators will be incrementally merged as the VDBE layer calls
      ** sqlite3VdbeSorterNext().
      **
      ** Otherwise, if pTemp1 contains more than SORTER_MAX_MERGE_COUNT PMAs,
      ** initialize interators for SORTER_MAX_MERGE_COUNT of them. These PMAs
      ** are merged into a single PMA that is written to file pTemp2.
      */
      rc = vdbeSorterInitMerge(db, pCsr, &nWrite);
      assert( rc!=SQLITE_OK || pSorter->aIter[ pSorter->aTree[1] ].pFile );
      if( rc!=SQLITE_OK || pSorter->nPMA<=SORTER_MAX_MERGE_COUNT ){
        break;
      }

      /* Open the second temp file, if it is not already open. */
      if( pTemp2==0 ){







|







986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
      ** These iterators will be incrementally merged as the VDBE layer calls
      ** sqlite3VdbeSorterNext().
      **
      ** Otherwise, if pTemp1 contains more than SORTER_MAX_MERGE_COUNT PMAs,
      ** initialize interators for SORTER_MAX_MERGE_COUNT of them. These PMAs
      ** are merged into a single PMA that is written to file pTemp2.
      */
      rc = vdbeSorterInitMerge(pSorter, &nWrite);
      assert( rc!=SQLITE_OK || pSorter->aIter[ pSorter->aTree[1] ].pFile );
      if( rc!=SQLITE_OK || pSorter->nPMA<=SORTER_MAX_MERGE_COUNT ){
        break;
      }

      /* Open the second temp file, if it is not already open. */
      if( pTemp2==0 ){
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
        fileWriterWriteVarint(&writer, nWrite);
        while( rc==SQLITE_OK && bEof==0 ){
          VdbeSorterIter *pIter = &pSorter->aIter[ pSorter->aTree[1] ];
          assert( pIter->pFile );

          fileWriterWriteVarint(&writer, pIter->nKey);
          fileWriterWrite(&writer, pIter->aKey, pIter->nKey);
          rc = sqlite3VdbeSorterNext(db, pCsr, &bEof);
        }
        rc2 = fileWriterFinish(db, &writer, &iWrite2);
        if( rc==SQLITE_OK ) rc = rc2;
      }
    }

    if( pSorter->nPMA<=SORTER_MAX_MERGE_COUNT ){







|







1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
        fileWriterWriteVarint(&writer, nWrite);
        while( rc==SQLITE_OK && bEof==0 ){
          VdbeSorterIter *pIter = &pSorter->aIter[ pSorter->aTree[1] ];
          assert( pIter->pFile );

          fileWriterWriteVarint(&writer, pIter->nKey);
          fileWriterWrite(&writer, pIter->aKey, pIter->nKey);
          rc = sqlite3VdbeSorterNext(pCsr, &bEof);
        }
        rc2 = fileWriterFinish(db, &writer, &iWrite2);
        if( rc==SQLITE_OK ) rc = rc2;
      }
    }

    if( pSorter->nPMA<=SORTER_MAX_MERGE_COUNT ){
1022
1023
1024
1025
1026
1027
1028
1029
1030

1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
  *pbEof = (pSorter->aIter[pSorter->aTree[1]].pFile==0);
  return rc;
}

/*
** Advance to the next element in the sorter.
*/
int sqlite3VdbeSorterNext(sqlite3 *db, const VdbeCursor *pCsr, int *pbEof){
  VdbeSorter *pSorter = pCsr->pSorter;

  int rc;                         /* Return code */

  if( pSorter->aTree ){
    int iPrev = pSorter->aTree[1];/* Index of iterator to advance */
    int i;                        /* Index of aTree[] to recalculate */

    rc = vdbeSorterIterNext(db, &pSorter->aIter[iPrev]);
    for(i=(pSorter->nTree+iPrev)/2; rc==SQLITE_OK && i>0; i=i/2){
      rc = vdbeSorterDoCompare(pCsr, i);
    }

    *pbEof = (pSorter->aIter[pSorter->aTree[1]].pFile==0);
  }else{
    SorterRecord *pFree = pSorter->pRecord;
    pSorter->pRecord = pFree->pNext;
    pSorter->nRecord--;







|

>






|

|







1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
  *pbEof = (pSorter->aIter[pSorter->aTree[1]].pFile==0);
  return rc;
}

/*
** Advance to the next element in the sorter.
*/
int sqlite3VdbeSorterNext(const VdbeCursor *pCsr, int *pbEof){
  VdbeSorter *pSorter = pCsr->pSorter;
  sqlite3 *db = pSorter->db;
  int rc;                         /* Return code */

  if( pSorter->aTree ){
    int iPrev = pSorter->aTree[1];/* Index of iterator to advance */
    int i;                        /* Index of aTree[] to recalculate */

    rc = vdbeSorterIterNext(&pSorter->aIter[iPrev]);
    for(i=(pSorter->nTree+iPrev)/2; rc==SQLITE_OK && i>0; i=i/2){
      rc = vdbeSorterDoCompare(pSorter, i);
    }

    *pbEof = (pSorter->aIter[pSorter->aTree[1]].pFile==0);
  }else{
    SorterRecord *pFree = pSorter->pRecord;
    pSorter->pRecord = pFree->pNext;
    pSorter->nRecord--;
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
  Mem *pVal,                      /* Value to compare to current sorter key */
  int *pRes                       /* OUT: Result of comparison */
){
  VdbeSorter *pSorter = pCsr->pSorter;
  void *pKey; int nKey;           /* Sorter key to compare pVal with */

  pKey = vdbeSorterRowkey(pSorter, &nKey);
  vdbeSorterCompare(pCsr, 1, pVal->z, pVal->n, pKey, nKey, pRes,
                    pSorter->pUnpacked);
  return SQLITE_OK;
}

#endif /* #ifndef SQLITE_OMIT_MERGE_SORT */







|





1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
  Mem *pVal,                      /* Value to compare to current sorter key */
  int *pRes                       /* OUT: Result of comparison */
){
  VdbeSorter *pSorter = pCsr->pSorter;
  void *pKey; int nKey;           /* Sorter key to compare pVal with */

  pKey = vdbeSorterRowkey(pSorter, &nKey);
  vdbeSorterCompare(pSorter, 1, pVal->z, pVal->n, pKey, nKey, pRes,
                    pSorter->pUnpacked);
  return SQLITE_OK;
}

#endif /* #ifndef SQLITE_OMIT_MERGE_SORT */