SQLite

Check-in [02610cd9b7]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Improve performance in single-threaded mode by having the final merge pass keys directly to the VDBE, instead of going via a final PMA.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | threads-experimental
Files: files | file ages | folders
SHA1: 02610cd9b77caa2c181210056088beb3ad6ce30f
User & Date: dan 2014-04-14 18:41:21.894
Context
2014-04-14
19:23
Allow the sorter to begin returning data to the VDBE as soon as it is available, instead of waiting until all keys have been sorted. (check-in: cb0ab20c48 user: dan tags: threads)
18:41
Improve performance in single-threaded mode by having the final merge pass keys directly to the VDBE, instead of going via a final PMA. (Closed-Leaf check-in: 02610cd9b7 user: dan tags: threads-experimental)
08:45
Minor fixes so that builds with SQLITE_MAX_WORKER_THREADS=0 work. (check-in: e400bbbf26 user: dan tags: threads-experimental)
Changes
Unified Diff Ignore Whitespace Patch
Changes to src/vdbesort.c.
276
277
278
279
280
281
282
283
284
285

286
287
288
289
290
291
292
293
294
295


296
297
298
299
300
301
302
303
304
**   As records are added to the sorter by calls to sqlite3VdbeSorterWrite(),
**   this variable is updated so as to be set to the size on disk of the
**   largest record in the sorter.
*/
struct VdbeSorter {
  int mnPmaSize;                  /* Minimum PMA size, in bytes */
  int mxPmaSize;                  /* Maximum PMA size, in bytes.  0==no limit */
  int bUsePMA;                    /* True if one or more PMAs created */
  int bUseThreads;                /* True if one or more PMAs created */
  PmaReader *pReader;             /* Read data from here after Rewind() */

  int mxKeysize;                  /* Largest serialized key seen so far */
  UnpackedRecord *pUnpacked;      /* Used by VdbeSorterCompare() */
#if 0
  int nInMemory;                  /* Current size of pRecord list as PMA */
  SorterRecord *pRecord;          /* Head of in-memory record list */
  u8 *aMemory;                    /* Block of memory to alloc records from */
#endif
  SorterList list;
  int iMemory;                    /* Offset of first free byte in aMemory */
  int nMemory;                    /* Size of aMemory allocation in bytes */


  int iPrev;                      /* Previous thread used to flush PMA */
  int nTask;                      /* Size of aTask[] array */
  SortSubtask aTask[1];           /* One or more subtasks */
};

/*
** An instance of the following object is used to read records out of a
** PMA, in sorted order.  The next key to be read is cached in nKey/aKey.
** pFile==0 at EOF.







<
<

>


<
<
<
<
<
|
|
|
>
>
|
|







276
277
278
279
280
281
282


283
284
285
286





287
288
289
290
291
292
293
294
295
296
297
298
299
300
**   As records are added to the sorter by calls to sqlite3VdbeSorterWrite(),
**   this variable is updated so as to be set to the size on disk of the
**   largest record in the sorter.
*/
struct VdbeSorter {
  int mnPmaSize;                  /* Minimum PMA size, in bytes */
  int mxPmaSize;                  /* Maximum PMA size, in bytes.  0==no limit */


  PmaReader *pReader;             /* Read data from here after Rewind() */
  MergeEngine *pMerger;           /* Or here, if bUseThreads==0 */
  int mxKeysize;                  /* Largest serialized key seen so far */
  UnpackedRecord *pUnpacked;      /* Used by VdbeSorterCompare() */





  SorterList list;                /* List of in-memory records */
  int iMemory;                    /* Offset of free space in list.aMemory */
  int nMemory;                    /* Size of list.aMemory allocation in bytes */
  u8 bUsePMA;                     /* True if one or more PMAs created */
  u8 bUseThreads;                 /* True to use background threads */
  u8 iPrev;                       /* Previous thread used to flush PMA */
  u8 nTask;                       /* Size of aTask[] array */
  SortSubtask aTask[1];           /* One or more subtasks */
};

/*
** An instance of the following object is used to read records out of a
** PMA, in sorted order.  The next key to be read is cached in nKey/aKey.
** pFile==0 at EOF.
1001
1002
1003
1004
1005
1006
1007


1008
1009
1010
1011
1012
1013
1014
  int i;
  (void)vdbeSorterJoinAll(pSorter, SQLITE_OK);
  if( pSorter->pReader ){
    vdbePmaReaderClear(pSorter->pReader);
    sqlite3DbFree(db, pSorter->pReader);
    pSorter->pReader = 0;
  }


  for(i=0; i<pSorter->nTask; i++){
    SortSubtask *pTask = &pSorter->aTask[i];
    vdbeSortSubtaskCleanup(db, pTask);
  }
  if( pSorter->list.aMemory==0 ){
    vdbeSorterRecordFree(0, pSorter->list.pList);
  }







>
>







997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
  int i;
  (void)vdbeSorterJoinAll(pSorter, SQLITE_OK);
  if( pSorter->pReader ){
    vdbePmaReaderClear(pSorter->pReader);
    sqlite3DbFree(db, pSorter->pReader);
    pSorter->pReader = 0;
  }
  vdbeMergeEngineFree(pSorter->pMerger);
  pSorter->pMerger = 0;
  for(i=0; i<pSorter->nTask; i++){
    SortSubtask *pTask = &pSorter->aTask[i];
    vdbeSortSubtaskCleanup(db, pTask);
  }
  if( pSorter->list.aMemory==0 ){
    vdbeSorterRecordFree(0, pSorter->list.pList);
  }
1709
1710
1711
1712
1713
1714
1715


























1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
    pIncr->pTask->file2.iEof -= pIncr->mxSz;
  }
}

#define INCRINIT2_NORMAL 0
#define INCRINIT2_TASK   1
#define INCRINIT2_ROOT   2


























static int vdbeIncrInit2(PmaReader *pIter, int eMode){
  int rc = SQLITE_OK;
  IncrMerger *pIncr = pIter->pIncr;
  if( pIncr ){
    SortSubtask *pTask = pIncr->pTask;
    int i;
    MergeEngine *pMerger = pIncr->pMerger;

    for(i=0; rc==SQLITE_OK && i<pMerger->nTree; i++){
      IncrMerger *p;
      if( eMode==INCRINIT2_ROOT ){
        rc = vdbePmaReaderNext(&pMerger->aIter[i]);
      }else{
        rc = vdbeIncrInit2(&pMerger->aIter[i], INCRINIT2_NORMAL);
      }
    }

    /* Set up the required files for pIncr */
    if( rc==SQLITE_OK ){
      if( pIncr->bUseThread==0 ){
        if( pTask->file2.pFd==0 ){
          rc = vdbeSorterOpenTempFile(pTask->db->pVfs, &pTask->file2.pFd);
          assert( pTask->file2.iEof>0 );







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>





<
<

<
<
<
<
<
|
<
<







1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744


1745





1746


1747
1748
1749
1750
1751
1752
1753
    pIncr->pTask->file2.iEof -= pIncr->mxSz;
  }
}

#define INCRINIT2_NORMAL 0
#define INCRINIT2_TASK   1
#define INCRINIT2_ROOT   2

static int vdbeIncrInit2(PmaReader *pIter, int eMode);

static int vdbeIncrInitMerger(
  SortSubtask *pTask, 
  MergeEngine *pMerger, 
  int eMode
){
  int i;
  int rc = SQLITE_OK;

  for(i=0; rc==SQLITE_OK && i<pMerger->nTree; i++){
    if( eMode==INCRINIT2_ROOT ){
      rc = vdbePmaReaderNext(&pMerger->aIter[i]);
    }else{
      rc = vdbeIncrInit2(&pMerger->aIter[i], INCRINIT2_NORMAL);
    }
  }

  for(i=pMerger->nTree-1; rc==SQLITE_OK && i>0; i--){
    rc = vdbeSorterDoCompare(pTask, pMerger, i);
  }

  return rc;
}

static int vdbeIncrInit2(PmaReader *pIter, int eMode){
  int rc = SQLITE_OK;
  IncrMerger *pIncr = pIter->pIncr;
  if( pIncr ){
    SortSubtask *pTask = pIncr->pTask;








    rc = vdbeIncrInitMerger(pTask, pIncr->pMerger, eMode);



    /* Set up the required files for pIncr */
    if( rc==SQLITE_OK ){
      if( pIncr->bUseThread==0 ){
        if( pTask->file2.pFd==0 ){
          rc = vdbeSorterOpenTempFile(pTask->db->pVfs, &pTask->file2.pFd);
          assert( pTask->file2.iEof>0 );
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
        rc = vdbeSorterOpenTempFile(pTask->db->pVfs, &pIncr->aFile[0].pFd);
        if( rc==SQLITE_OK ){
          rc = vdbeSorterOpenTempFile(pTask->db->pVfs, &pIncr->aFile[1].pFd);
        }
      }
    }

    for(i=pMerger->nTree-1; rc==SQLITE_OK && i>0; i--){
      rc = vdbeSorterDoCompare(pIncr->pTask, pMerger, i);
    }

    if( rc==SQLITE_OK && pIncr->bUseThread ){
      /* Use the current thread */
      assert( eMode==INCRINIT2_ROOT || eMode==INCRINIT2_TASK );
      rc = vdbeIncrPopulate(pIncr);
    }

    if( rc==SQLITE_OK && eMode!=INCRINIT2_TASK ){







<
<
<
<







1765
1766
1767
1768
1769
1770
1771




1772
1773
1774
1775
1776
1777
1778
        rc = vdbeSorterOpenTempFile(pTask->db->pVfs, &pIncr->aFile[0].pFd);
        if( rc==SQLITE_OK ){
          rc = vdbeSorterOpenTempFile(pTask->db->pVfs, &pIncr->aFile[1].pFd);
        }
      }
    }





    if( rc==SQLITE_OK && pIncr->bUseThread ){
      /* Use the current thread */
      assert( eMode==INCRINIT2_ROOT || eMode==INCRINIT2_TASK );
      rc = vdbeIncrPopulate(pIncr);
    }

    if( rc==SQLITE_OK && eMode!=INCRINIT2_TASK ){
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
  return rc;
}

/*
** Populate iterator *pIter so that it may be used to iterate through all 
** keys stored in all PMAs created by this sorter.
*/
static int vdbePmaReaderIncrInit(VdbeSorter *pSorter, PmaReader *pIter){
  SortSubtask *pTask0 = &pSorter->aTask[0];
  MergeEngine *pMain = 0;
  sqlite3 *db = pTask0->db;
  int rc = SQLITE_OK;
  int iTask;

  IncrBuilder *aMerge;







|







1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
  return rc;
}

/*
** Populate iterator *pIter so that it may be used to iterate through all 
** keys stored in all PMAs created by this sorter.
*/
static int vdbePmaReaderIncrInit(VdbeSorter *pSorter){
  SortSubtask *pTask0 = &pSorter->aTask[0];
  MergeEngine *pMain = 0;
  sqlite3 *db = pTask0->db;
  int rc = SQLITE_OK;
  int iTask;

  IncrBuilder *aMerge;
1939
1940
1941
1942
1943
1944
1945



1946
1947
1948
1949




1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977







1978
1979
1980
1981
1982
1983
1984
        if( pNew==0 ) rc = SQLITE_NOMEM;
      }
      memset(aMerge, 0, nMerge*sizeof(aMerge[0]));
    }
  }

  if( rc==SQLITE_OK ){



    SortSubtask *pLast = &pSorter->aTask[pSorter->nTask-1];

    rc = vdbeSortAllocUnpacked(pLast);
    if( rc==SQLITE_OK ){




      pIter->pIncr = vdbeIncrNew(pLast, pMain);
      if( pIter->pIncr==0 ){
        rc = SQLITE_NOMEM;
      }
#if SQLITE_MAX_WORKER_THREADS>0
      else{
        vdbeIncrSetThreads(pIter->pIncr, pSorter->bUseThreads);
        for(iTask=0; iTask<(pSorter->nTask-1); iTask++){
          IncrMerger *pIncr;
          if( (pIncr = pMain->aIter[iTask].pIncr) ){
            vdbeIncrSetThreads(pIncr, pSorter->bUseThreads);
            assert( pIncr->pTask!=pLast );
          }
        }
        if( pSorter->nTask>1 ){
          for(iTask=0; rc==SQLITE_OK && iTask<pSorter->nTask; iTask++){
            PmaReader *p = &pMain->aIter[iTask];
            assert( p->pIncr==0 || p->pIncr->pTask==&pSorter->aTask[iTask] );
            if( p->pIncr ){ rc = vdbeIncrBgInit2(p); }
          }
        }
      }
#endif
    }
  }
  if( rc==SQLITE_OK ){
    int eMode = (pSorter->nTask>1 ? INCRINIT2_ROOT : INCRINIT2_NORMAL);
    rc = vdbeIncrInit2(pIter, eMode);







  }

  sqlite3_free(aMerge);
  return rc;
}









>
>
>
|
<
|
|
>
>
>
>
|
|
|
|
<
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<
|
<
|
|
|
>
>
>
>
>
>
>







1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960

1961
1962
1963
1964
1965
1966
1967
1968
1969
1970

1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987

1988

1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
        if( pNew==0 ) rc = SQLITE_NOMEM;
      }
      memset(aMerge, 0, nMerge*sizeof(aMerge[0]));
    }
  }

  if( rc==SQLITE_OK ){
#if SQLITE_MAX_WORKER_THREADS
    if( pSorter->bUseThreads ){
      PmaReader *pIter;
      SortSubtask *pLast = &pSorter->aTask[pSorter->nTask-1];

      rc = vdbeSortAllocUnpacked(pLast);
      if( rc==SQLITE_OK ){
        pIter = (PmaReader*)sqlite3DbMallocZero(db, sizeof(PmaReader));
        pSorter->pReader = pIter;
      }
      if( rc==SQLITE_OK ){
        pIter->pIncr = vdbeIncrNew(pLast, pMain);
        if( pIter->pIncr==0 ){
          rc = SQLITE_NOMEM;
        }

        else{
          vdbeIncrSetThreads(pIter->pIncr, pSorter->bUseThreads);
          for(iTask=0; iTask<(pSorter->nTask-1); iTask++){
            IncrMerger *pIncr;
            if( (pIncr = pMain->aIter[iTask].pIncr) ){
              vdbeIncrSetThreads(pIncr, pSorter->bUseThreads);
              assert( pIncr->pTask!=pLast );
            }
          }
          if( pSorter->nTask>1 ){
            for(iTask=0; rc==SQLITE_OK && iTask<pSorter->nTask; iTask++){
              PmaReader *p = &pMain->aIter[iTask];
              assert( p->pIncr==0 || p->pIncr->pTask==&pSorter->aTask[iTask] );
              if( p->pIncr ){ rc = vdbeIncrBgInit2(p); }
            }
          }
        }

      }

      if( rc==SQLITE_OK ){
        int eMode = (pSorter->nTask>1 ? INCRINIT2_ROOT : INCRINIT2_NORMAL);
        rc = vdbeIncrInit2(pIter, eMode);
      }
    }else
#endif
    {
      pSorter->pMerger = pMain;
      rc = vdbeIncrInitMerger(pTask0, pMain, INCRINIT2_NORMAL);
    }
  }

  sqlite3_free(aMerge);
  return rc;
}


2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041

2042




2043
2044



2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065



2066
2067
2068
2069
2070
2071
2072
2073
2074

  vdbeSorterRewindDebug(db, "rewind");

  /* Assuming no errors have occurred, set up a merger structure to 
  ** incrementally read and merge all remaining PMAs.  */
  assert( pSorter->pReader==0 );
  if( rc==SQLITE_OK ){
    PmaReader *pReader;
    pReader = (PmaReader*)sqlite3DbMallocZero(db, sizeof(PmaReader));
    pSorter->pReader = pReader;
    rc = vdbePmaReaderIncrInit(pSorter, pReader);
    assert( rc!=SQLITE_OK || pReader->pFile );
    *pbEof = 0;
  }

  vdbeSorterRewindDebug(db, "rewinddone");
  return rc;
}

/*
** Advance to the next element in the sorter.
*/
int sqlite3VdbeSorterNext(sqlite3 *db, const VdbeCursor *pCsr, int *pbEof){
  VdbeSorter *pSorter = pCsr->pSorter;
  int rc;                         /* Return code */


  if( pSorter->pReader ){




    rc = vdbePmaReaderNext(pSorter->pReader);
    *pbEof = (pSorter->pReader->pFile==0);



  }else{
    SorterRecord *pFree = pSorter->list.pList;
    pSorter->list.pList = pFree->u.pNext;
    pFree->u.pNext = 0;
    if( pSorter->list.aMemory==0 ) vdbeSorterRecordFree(db, pFree);
    *pbEof = !pSorter->list.pList;
    rc = SQLITE_OK;
  }
  return rc;
}

/*
** Return a pointer to a buffer owned by the sorter that contains the 
** current key.
*/
static void *vdbeSorterRowkey(
  const VdbeSorter *pSorter,      /* Sorter object */
  int *pnKey                      /* OUT: Size of current key in bytes */
){
  void *pKey;
  if( pSorter->pReader ){



    *pnKey = pSorter->pReader->nKey;
    pKey = pSorter->pReader->aKey;
  }else{
    *pnKey = pSorter->list.pList->nVal;
    pKey = SRVAL(pSorter->list.pList);
  }
  return pKey;
}








<
<
<
|
<














>
|
>
>
>
>
|
|
>
>
>




















|
>
>
>
|
|







2037
2038
2039
2040
2041
2042
2043



2044

2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102

  vdbeSorterRewindDebug(db, "rewind");

  /* Assuming no errors have occurred, set up a merger structure to 
  ** incrementally read and merge all remaining PMAs.  */
  assert( pSorter->pReader==0 );
  if( rc==SQLITE_OK ){



    rc = vdbePmaReaderIncrInit(pSorter);

    *pbEof = 0;
  }

  vdbeSorterRewindDebug(db, "rewinddone");
  return rc;
}

/*
** Advance to the next element in the sorter.
*/
int sqlite3VdbeSorterNext(sqlite3 *db, const VdbeCursor *pCsr, int *pbEof){
  VdbeSorter *pSorter = pCsr->pSorter;
  int rc;                         /* Return code */

  assert( pSorter->bUsePMA || (pSorter->pReader==0 && pSorter->pMerger==0) );
  if( pSorter->bUsePMA ){
    assert( pSorter->pReader==0 || pSorter->pMerger==0 );
    assert( pSorter->bUseThreads==0 || pSorter->pReader );
    assert( pSorter->bUseThreads==1 || pSorter->pMerger );
    if( pSorter->bUseThreads ){
      rc = vdbePmaReaderNext(pSorter->pReader);
      *pbEof = (pSorter->pReader->pFile==0);
    }else{
      rc = vdbeSorterNext(&pSorter->aTask[0], pSorter->pMerger, pbEof);
    }
  }else{
    SorterRecord *pFree = pSorter->list.pList;
    pSorter->list.pList = pFree->u.pNext;
    pFree->u.pNext = 0;
    if( pSorter->list.aMemory==0 ) vdbeSorterRecordFree(db, pFree);
    *pbEof = !pSorter->list.pList;
    rc = SQLITE_OK;
  }
  return rc;
}

/*
** Return a pointer to a buffer owned by the sorter that contains the 
** current key.
*/
static void *vdbeSorterRowkey(
  const VdbeSorter *pSorter,      /* Sorter object */
  int *pnKey                      /* OUT: Size of current key in bytes */
){
  void *pKey;
  if( pSorter->bUsePMA ){
    PmaReader *pReader = (pSorter->bUseThreads ?
        pSorter->pReader : &pSorter->pMerger->aIter[pSorter->pMerger->aTree[1]]
    );
    *pnKey = pReader->nKey;
    pKey = pReader->aKey;
  }else{
    *pnKey = pSorter->list.pList->nVal;
    pKey = SRVAL(pSorter->list.pList);
  }
  return pKey;
}