/ Check-in [821d1ac4]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:When sorting data for a CREATE INDEX statement in single-threaded mode, assume that keys are delivered to the sorter in primary key order. Also fix various comments that had fallen out of date.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | threads
Files: files | file ages | folders
SHA1:821d1ac4504243fa13b9e3c0d56361ad9fb80d78
User & Date: dan 2014-04-01 18:41:51
Context
2014-04-02
14:38
Change the name of the SorterThread object to "SortSubtask" to avoid confusion with the SQLiteThread object. check-in: 4ee2d910 user: drh tags: threads
14:16
Incorrect change to a comment. Closed-Leaf check-in: abbdb925 user: drh tags: mistake
2014-04-01
18:41
When sorting data for a CREATE INDEX statement in single-threaded mode, assume that keys are delivered to the sorter in primary key order. Also fix various comments that had fallen out of date. check-in: 821d1ac4 user: dan tags: threads
15:38
Even if compile time option SQLITE_MAX_WORKER_THREADS is set to one or greater, set the default number of worker threads to zero. Distribute data more evenly between threads in sqlite3VdbeSorterWrite() to improve performance when sorting large amounts of data. Add new test file sort2.test. check-in: 643c86a0 user: dan tags: threads
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to src/build.c.

2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
  }else{
    tnum = pIndex->tnum;
  }
  pKey = sqlite3KeyInfoOfIndex(pParse, pIndex);

  /* Open the sorter cursor if we are to use one. */
  iSorter = pParse->nTab++;
  sqlite3VdbeAddOp4(v, OP_SorterOpen, iSorter, 0, 0, (char*)
                    sqlite3KeyInfoRef(pKey), P4_KEYINFO);

  /* Open the table. Loop through all rows of the table, inserting index
  ** records into the sorter. */
  sqlite3OpenTable(pParse, iTab, iDb, pTab, OP_OpenRead);
  addr1 = sqlite3VdbeAddOp2(v, OP_Rewind, iTab, 0); VdbeCoverage(v);
  regRecord = sqlite3GetTempReg(pParse);







|







2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
  }else{
    tnum = pIndex->tnum;
  }
  pKey = sqlite3KeyInfoOfIndex(pParse, pIndex);

  /* Open the sorter cursor if we are to use one. */
  iSorter = pParse->nTab++;
  sqlite3VdbeAddOp4(v, OP_SorterOpen, iSorter, 0, pIndex->nKeyCol, (char*)
                    sqlite3KeyInfoRef(pKey), P4_KEYINFO);

  /* Open the table. Loop through all rows of the table, inserting index
  ** records into the sorter. */
  sqlite3OpenTable(pParse, iTab, iDb, pTab, OP_OpenRead);
  addr1 = sqlite3VdbeAddOp2(v, OP_Rewind, iTab, 0); VdbeCoverage(v);
  regRecord = sqlite3GetTempReg(pParse);

Changes to src/vdbe.c.

3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397




3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
      pCx->isTable = 1;
    }
  }
  pCx->isOrdered = (pOp->p5!=BTREE_UNORDERED);
  break;
}

/* Opcode: SorterOpen P1 P2 * P4 *
**
** This opcode works like OP_OpenEphemeral except that it opens
** a transient index that is specifically designed to sort large
** tables using an external merge-sort algorithm.




*/
case OP_SorterOpen: {
  VdbeCursor *pCx;

  assert( pOp->p1>=0 );
  assert( pOp->p2>=0 );
  pCx = allocateCursor(p, pOp->p1, pOp->p2, -1, 1);
  if( pCx==0 ) goto no_mem;
  pCx->pKeyInfo = pOp->p4.pKeyInfo;
  assert( pCx->pKeyInfo->db==db );
  assert( pCx->pKeyInfo->enc==ENC(db) );
  rc = sqlite3VdbeSorterInit(db, pCx);
  break;
}

/* Opcode: SequenceTest P1 P2 * * *
** Synopsis: if( cursor[P1].ctr++ ) pc = P2
**
** P1 is a sorter cursor. If the sequence counter is currently zero, jump







|




>
>
>
>











|







3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
      pCx->isTable = 1;
    }
  }
  pCx->isOrdered = (pOp->p5!=BTREE_UNORDERED);
  break;
}

/* Opcode: SorterOpen P1 P2 P3 P4 *
**
** This opcode works like OP_OpenEphemeral except that it opens
** a transient index that is specifically designed to sort large
** tables using an external merge-sort algorithm.
**
** If argument P3 is non-zero, then it indicates that the sorter may
** assume that a stable sort considering the first P3 fields of each
** key is sufficient to produce the required results.
*/
case OP_SorterOpen: {
  VdbeCursor *pCx;

  assert( pOp->p1>=0 );
  assert( pOp->p2>=0 );
  pCx = allocateCursor(p, pOp->p1, pOp->p2, -1, 1);
  if( pCx==0 ) goto no_mem;
  pCx->pKeyInfo = pOp->p4.pKeyInfo;
  assert( pCx->pKeyInfo->db==db );
  assert( pCx->pKeyInfo->enc==ENC(db) );
  rc = sqlite3VdbeSorterInit(db, pOp->p3, pCx);
  break;
}

/* Opcode: SequenceTest P1 P2 * * *
** Synopsis: if( cursor[P1].ctr++ ) pc = P2
**
** P1 is a sorter cursor. If the sequence counter is currently zero, jump

Changes to src/vdbeInt.h.

433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
const char *sqlite3OpcodeName(int);
int sqlite3VdbeMemGrow(Mem *pMem, int n, int preserve);
int sqlite3VdbeCloseStatement(Vdbe *, int);
void sqlite3VdbeFrameDelete(VdbeFrame*);
int sqlite3VdbeFrameRestore(VdbeFrame *);
int sqlite3VdbeTransferError(Vdbe *p);

int sqlite3VdbeSorterInit(sqlite3 *, VdbeCursor *);
void sqlite3VdbeSorterReset(sqlite3 *, VdbeSorter *);
void sqlite3VdbeSorterClose(sqlite3 *, VdbeCursor *);
int sqlite3VdbeSorterRowkey(const VdbeCursor *, Mem *);
int sqlite3VdbeSorterNext(sqlite3 *, const VdbeCursor *, int *);
int sqlite3VdbeSorterRewind(sqlite3 *, const VdbeCursor *, int *);
int sqlite3VdbeSorterWrite(sqlite3 *, const VdbeCursor *, Mem *);
int sqlite3VdbeSorterCompare(const VdbeCursor *, Mem *, int, int *);







|







433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
const char *sqlite3OpcodeName(int);
int sqlite3VdbeMemGrow(Mem *pMem, int n, int preserve);
int sqlite3VdbeCloseStatement(Vdbe *, int);
void sqlite3VdbeFrameDelete(VdbeFrame*);
int sqlite3VdbeFrameRestore(VdbeFrame *);
int sqlite3VdbeTransferError(Vdbe *p);

int sqlite3VdbeSorterInit(sqlite3 *, int, VdbeCursor *);
void sqlite3VdbeSorterReset(sqlite3 *, VdbeSorter *);
void sqlite3VdbeSorterClose(sqlite3 *, VdbeCursor *);
int sqlite3VdbeSorterRowkey(const VdbeCursor *, Mem *);
int sqlite3VdbeSorterNext(sqlite3 *, const VdbeCursor *, int *);
int sqlite3VdbeSorterRewind(sqlite3 *, const VdbeCursor *, int *);
int sqlite3VdbeSorterWrite(sqlite3 *, const VdbeCursor *, Mem *);
int sqlite3VdbeSorterCompare(const VdbeCursor *, Mem *, int, int *);

Changes to src/vdbesort.c.

485
486
487
488
489
490
491
492
493
494
495
496


497
498
499
500
501
502
503


504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
...
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
...
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
...
604
605
606
607
608
609
610

611
612
613
614
615
616
617
...
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
....
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
....
1591
1592
1593
1594
1595
1596
1597



1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611


1612
1613



1614
1615




1616
1617





  }
  return rc;
}


/*
** Compare key1 (buffer pKey1, size nKey1 bytes) with key2 (buffer pKey2, 
** size nKey2 bytes).  Argument pKeyInfo supplies the collation functions
** used by the comparison. If an error occurs, return an SQLite error code.
** Otherwise, return SQLITE_OK and set *pRes to a negative, zero or positive
** value, depending on whether key1 is smaller, equal to or larger than key2.
**


** If the bOmitRowid argument is non-zero, assume both keys end in a rowid
** field. For the purposes of the comparison, ignore it. Also, if bOmitRowid
** is true and key1 contains even a single NULL value, it is considered to
** be less than key2. Even if key2 also contains NULL values.
**
** If pKey2 is passed a NULL pointer, then it is assumed that the pCsr->aSpace
** has been allocated and contains an unpacked record that is used as key2.


*/
static void vdbeSorterCompare(
  SorterThread *pThread,          /* Thread context (for pKeyInfo) */
  int nIgnore,                    /* Ignore the last nIgnore fields */
  const void *pKey1, int nKey1,   /* Left side of comparison */
  const void *pKey2, int nKey2,   /* Right side of comparison */
  int *pRes                       /* OUT: Result of comparison */
){
  KeyInfo *pKeyInfo = pThread->pKeyInfo;
  UnpackedRecord *r2 = pThread->pUnpacked;
  int i;

  if( pKey2 ){
    sqlite3VdbeRecordUnpack(pKeyInfo, nKey2, pKey2, r2);
  }

  if( nIgnore ){
    r2->nField = pKeyInfo->nField - nIgnore;
    assert( r2->nField>0 );
    for(i=0; i<r2->nField; i++){
      if( r2->aMem[i].flags & MEM_Null ){
        *pRes = -1;
        return;
      }
    }
    assert( r2->default_rc==0 );
  }

  *pRes = sqlite3VdbeRecordCompare(nKey1, pKey1, r2, 0);
}

/*
** This function is called to compare two iterator keys when merging 
** multiple b-tree segments. Parameter iOut is the index of the aTree[] 
** value to recalculate.
*/
................................................................................
  if( p1->pFile==0 ){
    iRes = i2;
  }else if( p2->pFile==0 ){
    iRes = i1;
  }else{
    int res;
    assert( pThread->pUnpacked!=0 );  /* allocated in vdbeSorterThreadMain() */
    vdbeSorterCompare(
        pThread, 0, p1->aKey, p1->nKey, p2->aKey, p2->nKey, &res
    );
    if( res<=0 ){
      iRes = i1;
    }else{
      iRes = i2;
    }
  }
................................................................................
  pMerger->aTree[iOut] = iRes;
  return SQLITE_OK;
}

/*
** Initialize the temporary index cursor just opened as a sorter cursor.
*/
int sqlite3VdbeSorterInit(sqlite3 *db, VdbeCursor *pCsr){
  int pgsz;                       /* Page size of main database */
  int i;                          /* Used to iterate through aThread[] */
  int mxCache;                    /* Cache size */
  VdbeSorter *pSorter;            /* The new sorter */
  KeyInfo *pKeyInfo;              /* Copy of pCsr->pKeyInfo with db==0 */
  int szKeyInfo;                  /* Size of pCsr->pKeyInfo in bytes */
  int sz;                         /* Size of pSorter in bytes */
................................................................................
  pCsr->pSorter = pSorter;
  if( pSorter==0 ){
    rc = SQLITE_NOMEM;
  }else{
    pKeyInfo = (KeyInfo*)((u8*)pSorter + sz);
    memcpy(pKeyInfo, pCsr->pKeyInfo, szKeyInfo);
    pKeyInfo->db = 0;

    pgsz = sqlite3BtreeGetPageSize(db->aDb[0].pBt);

    pSorter->nThread = nWorker + 1;
    for(i=0; i<pSorter->nThread; i++){
      SorterThread *pThread = &pSorter->aThread[i];
      pThread->pKeyInfo = pKeyInfo;
      pThread->pVfs = db->pVfs;
................................................................................
){
  SorterRecord *pFinal = 0;
  SorterRecord **pp = &pFinal;
  void *pVal2 = p2 ? SRVAL(p2) : 0;

  while( p1 && p2 ){
    int res;
    vdbeSorterCompare(pThread, 0, SRVAL(p1), p1->nVal, pVal2, p2->nVal, &res);
    if( res<=0 ){
      *pp = p1;
      pp = &p1->u.pNext;
      p1 = p1->u.pNext;
      pVal2 = 0;
    }else{
      *pp = p2;
................................................................................
      /* Compare pIter1 and pIter2. Store the result in variable iRes. */
      int iRes;
      if( pIter1->pFile==0 ){
        iRes = +1;
      }else if( pIter2->pFile==0 ){
        iRes = -1;
      }else{
        vdbeSorterCompare(pThread, 0,
            pIter1->aKey, pIter1->nKey, pKey2, pIter2->nKey, &iRes
        );
      }

      /* If pIter1 contained the smaller value, set aTree[i] to its index.
      ** Then set pIter2 to the next iterator to compare to pIter1. In this
      ** case there is no cache of pIter2 in pThread->pUnpacked, so set
      ** pKey2 to point to the record belonging to pIter2.
................................................................................
  return SQLITE_OK;
}

/*
** Compare the key in memory cell pVal with the key that the sorter cursor
** passed as the first argument currently points to. For the purposes of
** the comparison, ignore the rowid field at the end of each record.



**
** If an error occurs, return an SQLite error code (i.e. SQLITE_NOMEM).
** Otherwise, set *pRes to a negative, zero or positive value if the
** key in pVal is smaller than, equal to or larger than the current sorter
** key.
*/
int sqlite3VdbeSorterCompare(
  const VdbeCursor *pCsr,         /* Sorter cursor */
  Mem *pVal,                      /* Value to compare to current sorter key */
  int nIgnore,                    /* Ignore this many fields at the end */
  int *pRes                       /* OUT: Result of comparison */
){
  VdbeSorter *pSorter = pCsr->pSorter;
  SorterThread *pMain = &pSorter->aThread[0];


  void *pKey; int nKey;           /* Sorter key to compare pVal with */




  pKey = vdbeSorterRowkey(pSorter, &nKey);
  vdbeSorterCompare(pMain, nIgnore, pVal->z, pVal->n, pKey, nKey, pRes);




  return SQLITE_OK;
}












|
|
<
<

>
>
|
|
<
<

<
<
>
>

|

<

|
<

<

<
<

|

<
<
<
<
<
<
<
<
<
<
<
<
<
|







 







|
|







 







|







 







>







 







|







 







|
|







 







>
>
>













|
>
>


>
>
>

<
>
>
>
>
|
|
>
>
>
>
>
485
486
487
488
489
490
491
492
493


494
495
496
497
498


499


500
501
502
503
504

505
506

507

508


509
510
511













512
513
514
515
516
517
518
519
...
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
...
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
...
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
...
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
....
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
....
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603

1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
  }
  return rc;
}


/*
** Compare key1 (buffer pKey1, size nKey1 bytes) with key2 (buffer pKey2, 
** size nKey2 bytes). Use (pThread->pKeyInfo) for the collation sequences
** used by the comparison. Return the result of the comparison.


**
** Before returning, object (pThread->pUnpacked) is populated with the
** unpacked version of key2. Or, if pKey2 is passed a NULL pointer, then it 
** is assumed that the (pThread->pUnpacked) structure already contains the 
** unpacked key to use as key2.


**


** If an OOM error is encountered, (pThread->pUnpacked->error_rc) is set
** to SQLITE_NOMEM.
*/
static int vdbeSorterCompare(
  SorterThread *pThread,          /* Thread context (for pKeyInfo) */

  const void *pKey1, int nKey1,   /* Left side of comparison */
  const void *pKey2, int nKey2    /* Right side of comparison */

){

  UnpackedRecord *r2 = pThread->pUnpacked;


  if( pKey2 ){
    sqlite3VdbeRecordUnpack(pThread->pKeyInfo, nKey2, pKey2, r2);
  }













  return sqlite3VdbeRecordCompare(nKey1, pKey1, r2, 0);
}

/*
** This function is called to compare two iterator keys when merging 
** multiple b-tree segments. Parameter iOut is the index of the aTree[] 
** value to recalculate.
*/
................................................................................
  if( p1->pFile==0 ){
    iRes = i2;
  }else if( p2->pFile==0 ){
    iRes = i1;
  }else{
    int res;
    assert( pThread->pUnpacked!=0 );  /* allocated in vdbeSorterThreadMain() */
    res = vdbeSorterCompare(
        pThread, p1->aKey, p1->nKey, p2->aKey, p2->nKey
    );
    if( res<=0 ){
      iRes = i1;
    }else{
      iRes = i2;
    }
  }
................................................................................
  pMerger->aTree[iOut] = iRes;
  return SQLITE_OK;
}

/*
** Initialize the temporary index cursor just opened as a sorter cursor.
*/
int sqlite3VdbeSorterInit(sqlite3 *db, int nField, VdbeCursor *pCsr){
  int pgsz;                       /* Page size of main database */
  int i;                          /* Used to iterate through aThread[] */
  int mxCache;                    /* Cache size */
  VdbeSorter *pSorter;            /* The new sorter */
  KeyInfo *pKeyInfo;              /* Copy of pCsr->pKeyInfo with db==0 */
  int szKeyInfo;                  /* Size of pCsr->pKeyInfo in bytes */
  int sz;                         /* Size of pSorter in bytes */
................................................................................
  pCsr->pSorter = pSorter;
  if( pSorter==0 ){
    rc = SQLITE_NOMEM;
  }else{
    pKeyInfo = (KeyInfo*)((u8*)pSorter + sz);
    memcpy(pKeyInfo, pCsr->pKeyInfo, szKeyInfo);
    pKeyInfo->db = 0;
    if( nField && nWorker==0 ) pKeyInfo->nField = nField;
    pgsz = sqlite3BtreeGetPageSize(db->aDb[0].pBt);

    pSorter->nThread = nWorker + 1;
    for(i=0; i<pSorter->nThread; i++){
      SorterThread *pThread = &pSorter->aThread[i];
      pThread->pKeyInfo = pKeyInfo;
      pThread->pVfs = db->pVfs;
................................................................................
){
  SorterRecord *pFinal = 0;
  SorterRecord **pp = &pFinal;
  void *pVal2 = p2 ? SRVAL(p2) : 0;

  while( p1 && p2 ){
    int res;
    res = vdbeSorterCompare(pThread, SRVAL(p1), p1->nVal, pVal2, p2->nVal);
    if( res<=0 ){
      *pp = p1;
      pp = &p1->u.pNext;
      p1 = p1->u.pNext;
      pVal2 = 0;
    }else{
      *pp = p2;
................................................................................
      /* Compare pIter1 and pIter2. Store the result in variable iRes. */
      int iRes;
      if( pIter1->pFile==0 ){
        iRes = +1;
      }else if( pIter2->pFile==0 ){
        iRes = -1;
      }else{
        iRes = vdbeSorterCompare(pThread, 
            pIter1->aKey, pIter1->nKey, pKey2, pIter2->nKey
        );
      }

      /* If pIter1 contained the smaller value, set aTree[i] to its index.
      ** Then set pIter2 to the next iterator to compare to pIter1. In this
      ** case there is no cache of pIter2 in pThread->pUnpacked, so set
      ** pKey2 to point to the record belonging to pIter2.
................................................................................
  return SQLITE_OK;
}

/*
** Compare the key in memory cell pVal with the key that the sorter cursor
** passed as the first argument currently points to. For the purposes of
** the comparison, ignore the rowid field at the end of each record.
**
** If the sorter cursor key contains any NULL values, consider it to be
** less than pVal. Evn if pVal also contains NULL values.
**
** If an error occurs, return an SQLite error code (i.e. SQLITE_NOMEM).
** Otherwise, set *pRes to a negative, zero or positive value if the
** key in pVal is smaller than, equal to or larger than the current sorter
** key.
*/
int sqlite3VdbeSorterCompare(
  const VdbeCursor *pCsr,         /* Sorter cursor */
  Mem *pVal,                      /* Value to compare to current sorter key */
  int nIgnore,                    /* Ignore this many fields at the end */
  int *pRes                       /* OUT: Result of comparison */
){
  VdbeSorter *pSorter = pCsr->pSorter;
  UnpackedRecord *r2 = pSorter->aThread[0].pUnpacked;
  KeyInfo *pKeyInfo = pCsr->pKeyInfo;
  int i;
  void *pKey; int nKey;           /* Sorter key to compare pVal with */

  assert( r2->nField>=pKeyInfo->nField-nIgnore );
  r2->nField = pKeyInfo->nField-nIgnore;

  pKey = vdbeSorterRowkey(pSorter, &nKey);

  sqlite3VdbeRecordUnpack(pKeyInfo, nKey, pKey, r2);
  for(i=0; i<r2->nField; i++){
    if( r2->aMem[i].flags & MEM_Null ){
      *pRes = -1;
      return SQLITE_OK;
    }
  }

  *pRes = sqlite3VdbeRecordCompare(pVal->n, pVal->z, r2, 0);
  return SQLITE_OK;
}

Changes to test/sort2.test.

28
29
30
31
32
33
34














35
36
37
38
39
40
41
    SELECT x+1, randomblob(100) FROM r
    LIMIT 100000
  )
  SELECT count(x), length(y) FROM r GROUP BY (x%5)
} {
  20000 100 20000 100 20000 100 20000 100 20000 100
}















db close
sqlite3_shutdown
sqlite3_config_worker_threads 0
sqlite3_initialize
finish_test








>
>
>
>
>
>
>
>
>
>
>
>
>
>







28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
    SELECT x+1, randomblob(100) FROM r
    LIMIT 100000
  )
  SELECT count(x), length(y) FROM r GROUP BY (x%5)
} {
  20000 100 20000 100 20000 100 20000 100 20000 100
}

do_execsql_test 2.1 {
  CREATE TABLE t1(a, b);
  WITH r(x,y) AS (
    SELECT 1, randomblob(100)
    UNION ALL
    SELECT x+1, randomblob(100) FROM r
    LIMIT 10000
  ) INSERT INTO t1 SELECT * FROM r;
}

do_execsql_test 2.2 {
  CREATE UNIQUE INDEX i1 ON t1(b, a);
}

db close
sqlite3_shutdown
sqlite3_config_worker_threads 0
sqlite3_initialize
finish_test