Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Instead of allocating a single large buffer at the beginning of each sort operation, start with a small buffer and extend it using realloc() as required. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | orderby-planning |
Files: | files | file ages | folders |
SHA1: |
81987c8ceb64f051528a6ca42673821d |
User & Date: | dan 2014-03-27 19:25:02.222 |
Context
2014-03-28
| ||
18:35 | Merge the latest changes from trunk. (check-in: 3047a25f1c user: drh tags: orderby-planning) | |
2014-03-27
| ||
19:25 | Instead of allocating a single large buffer at the beginning of each sort operation, start with a small buffer and extend it using realloc() as required. (check-in: 81987c8ceb user: dan tags: orderby-planning) | |
17:23 | Use xFetch() to access temporary files in vdbesort.c. Use a single large allocation instead of many small allocations when accumulating records in vdbesort.c. This is an interim commit - it allocates a buffer the size of the page-cache every time data is sorted. (check-in: f4ac1bf28c user: dan tags: orderby-planning) | |
Changes
Changes to src/vdbesort.c.
︙ | ︙ | |||
103 104 105 106 107 108 109 110 111 112 113 114 115 116 | VdbeSorterIter *aIter; /* Array of iterators to merge */ int *aTree; /* Current state of incremental merge */ sqlite3_file *pTemp1; /* PMA file 1 */ SorterRecord *pRecord; /* Head of in-memory record list */ UnpackedRecord *pUnpacked; /* Used to unpack keys */ u8* aMemory; /* Block to allocate records from */ int iMemory; /* Offset of free space in aMemory */ }; /* ** The following type is an iterator for a PMA. It caches the current key in ** variables nKey/aKey. If the iterator is at EOF, pFile==0. */ struct VdbeSorterIter { | > | 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 | VdbeSorterIter *aIter; /* Array of iterators to merge */ int *aTree; /* Current state of incremental merge */ sqlite3_file *pTemp1; /* PMA file 1 */ SorterRecord *pRecord; /* Head of in-memory record list */ UnpackedRecord *pUnpacked; /* Used to unpack keys */ u8* aMemory; /* Block to allocate records from */ int iMemory; /* Offset of free space in aMemory */ int nMemory; /* Current size of allocation at aMemory */ }; /* ** The following type is an iterator for a PMA. It caches the current key in ** variables nKey/aKey. If the iterator is at EOF, pFile==0. */ struct VdbeSorterIter { |
︙ | ︙ | |||
140 141 142 143 144 145 146 | int iBufEnd; /* Last byte of buffer to write */ i64 iWriteOff; /* Offset of start of buffer in file */ sqlite3_file *pFile; /* File to write to */ }; /* ** A structure to store a single record. All in-memory records are connected | | > > > > | > > > > > > > > > < > | > > > > > > > > > | 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 | int iBufEnd; /* Last byte of buffer to write */ i64 iWriteOff; /* Offset of start of buffer in file */ sqlite3_file *pFile; /* File to write to */ }; /* ** A structure to store a single record. All in-memory records are connected ** together into a linked list headed at VdbeSorter.pRecord. ** ** How the linked list is connected depends on how memory is being managed ** by this module. If using a separate allocation for each in-memory record ** (VdbeSorter.aMemory==0), then the list is always connected using the ** SorterRecord.u.pNext pointers. ** ** Or, if using the single large allocation method (VdbeSorter.aMemory!=0), ** then while records are being accumulated the list is linked using the ** SorterRecord.u.iNext offset. This is because the aMemory[] array may ** be sqlite3Realloc()ed while records are being accumulated. Once the VM ** has finished passing records to the sorter, or when the in-memory buffer ** is full, the list is sorted. As part of the sorting process, it is ** converted to use the SorterRecord.u.pNext pointers. See function ** vdbeSorterSort() for details. */ struct SorterRecord { int nVal; union { SorterRecord *pNext; /* Pointer to next record in list */ int iNext; /* Offset within aMemory of next record */ } u; }; /* Return a pointer to the buffer containing the record data for SorterRecord ** object p. Should be used as if: ** ** void *SRVAL(SorterRecord *p) { return (void*)&p[1]; } */ #define SRVAL(p) ((void*)((SorterRecord*)(p) + 1)) /* Minimum allowable value for the VdbeSorter.nWorking variable */ #define SORTER_MIN_WORKING 10 /* Maximum number of segments to merge in a single pass. */ #define SORTER_MAX_MERGE_COUNT 16 |
︙ | ︙ | |||
509 510 511 512 513 514 515 | if( !sqlite3TempInMemory(db) ){ pgsz = sqlite3BtreeGetPageSize(db->aDb[0].pBt); pSorter->mnPmaSize = SORTER_MIN_WORKING * pgsz; mxCache = db->aDb[0].pSchema->cache_size; if( mxCache<SORTER_MIN_WORKING ) mxCache = SORTER_MIN_WORKING; pSorter->mxPmaSize = mxCache * pgsz; | > > | > | > > | > | | 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 | if( !sqlite3TempInMemory(db) ){ pgsz = sqlite3BtreeGetPageSize(db->aDb[0].pBt); pSorter->mnPmaSize = SORTER_MIN_WORKING * pgsz; mxCache = db->aDb[0].pSchema->cache_size; if( mxCache<SORTER_MIN_WORKING ) mxCache = SORTER_MIN_WORKING; pSorter->mxPmaSize = mxCache * pgsz; /* If the application is using memsys3 or memsys5, use a separate ** allocation for each sort-key in memory. Otherwise, use a single big ** allocation at pSorter->aMemory for all sort-keys. */ if( sqlite3GlobalConfig.pHeap==0 ){ assert( pSorter->iMemory==0 ); pSorter->nMemory = pgsz; pSorter->aMemory = (u8*)sqlite3Malloc(pSorter->nMemory); if( !pSorter->aMemory ) return SQLITE_NOMEM; } } return SQLITE_OK; } /* ** Free the list of sorted records starting at pRecord. */ static void vdbeSorterRecordFree(sqlite3 *db, SorterRecord *pRecord){ SorterRecord *p; SorterRecord *pNext; for(p=pRecord; p; p=pNext){ pNext = p->u.pNext; sqlite3DbFree(db, p); } } /* ** Reset a sorting cursor back to its original empty state. */ |
︙ | ︙ | |||
607 608 609 610 611 612 613 | const VdbeCursor *pCsr, /* For pKeyInfo */ SorterRecord *p1, /* First list to merge */ SorterRecord *p2, /* Second list to merge */ SorterRecord **ppOut /* OUT: Head of merged list */ ){ SorterRecord *pFinal = 0; SorterRecord **pp = &pFinal; | | | | | | | | | 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 | const VdbeCursor *pCsr, /* For pKeyInfo */ SorterRecord *p1, /* First list to merge */ SorterRecord *p2, /* Second list to merge */ SorterRecord **ppOut /* OUT: Head of merged list */ ){ SorterRecord *pFinal = 0; SorterRecord **pp = &pFinal; void *pVal2 = p2 ? SRVAL(p2) : 0; while( p1 && p2 ){ int res; vdbeSorterCompare(pCsr, 0, SRVAL(p1), p1->nVal, pVal2, p2->nVal, &res); if( res<=0 ){ *pp = p1; pp = &p1->u.pNext; p1 = p1->u.pNext; pVal2 = 0; }else{ *pp = p2; pp = &p2->u.pNext; p2 = p2->u.pNext; if( p2==0 ) break; pVal2 = SRVAL(p2); } } *pp = p1 ? p1 : p2; *ppOut = pFinal; } /* |
︙ | ︙ | |||
652 653 654 655 656 657 658 | aSlot = (SorterRecord **)sqlite3MallocZero(64 * sizeof(SorterRecord *)); if( !aSlot ){ return SQLITE_NOMEM; } p = pSorter->pRecord; while( p ){ | | > > > | > > > > > > > | 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 | aSlot = (SorterRecord **)sqlite3MallocZero(64 * sizeof(SorterRecord *)); if( !aSlot ){ return SQLITE_NOMEM; } p = pSorter->pRecord; while( p ){ SorterRecord *pNext; if( pSorter->aMemory ){ assert( p->u.iNext<pSorter->nMemory ); if( (u8*)p==pSorter->aMemory ){ pNext = 0; }else{ pNext = (SorterRecord*)&pSorter->aMemory[p->u.iNext]; } }else{ pNext = p->u.pNext; } p->u.pNext = 0; for(i=0; aSlot[i]; i++){ vdbeSorterMerge(pCsr, p, aSlot[i], &p); aSlot[i] = 0; } aSlot[i] = p; p = pNext; } |
︙ | ︙ | |||
831 832 833 834 835 836 837 | SorterRecord *p; SorterRecord *pNext = 0; fileWriterInit(db, pSorter->pTemp1, &writer, pSorter->iWriteOff); pSorter->nPMA++; fileWriterWriteVarint(&writer, pSorter->nInMemory); for(p=pSorter->pRecord; p; p=pNext){ | | | < < < | < | < < < < < < > | < < | < | < | < < < < < < < | > > | > > > | > > | | > > | > > | | | | | | | | | | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 | SorterRecord *p; SorterRecord *pNext = 0; fileWriterInit(db, pSorter->pTemp1, &writer, pSorter->iWriteOff); pSorter->nPMA++; fileWriterWriteVarint(&writer, pSorter->nInMemory); for(p=pSorter->pRecord; p; p=pNext){ pNext = p->u.pNext; fileWriterWriteVarint(&writer, p->nVal); fileWriterWrite(&writer, SRVAL(p), p->nVal); if( pSorter->aMemory==0 ) sqlite3DbFree(db, p); } pSorter->pRecord = p; rc = fileWriterFinish(db, &writer, &pSorter->iWriteOff); } if( pSorter->aMemory ) pSorter->pRecord = 0; assert( pSorter->pRecord==0 || rc!=SQLITE_OK ); return rc; } /* ** Add a record to the sorter. */ int sqlite3VdbeSorterWrite( sqlite3 *db, /* Database handle */ const VdbeCursor *pCsr, /* Sorter cursor */ Mem *pVal /* Memory cell containing record */ ){ VdbeSorter *pSorter = pCsr->pSorter; int rc = SQLITE_OK; /* Return Code */ SorterRecord *pNew; /* New list element */ int bFlush; /* True to flush contents of memory to PMA */ int nReq; /* Bytes of memory required */ int nPMA; /* Bytes of PMA space required */ assert( pSorter ); /* Figure out whether or not the current contents of memory should be ** flushed to a PMA before continuing. If so, do so. ** ** If using the single large allocation mode (pSorter->aMemory!=0), then ** flush the contents of memory to a new PMA if (a) at least one value is ** already in memory and (b) the new value will not fit in memory. ** ** Or, if using separate allocations for each record, flush the contents ** of memory to a PMA if either of the following are true: ** ** * The total memory allocated for the in-memory list is greater ** than (page-size * cache-size), or ** ** * The total memory allocated for the in-memory list is greater ** than (page-size * 10) and sqlite3HeapNearlyFull() returns true. */ nReq = pVal->n + sizeof(SorterRecord); nPMA = pVal->n + sqlite3VarintLen(pVal->n); if( pSorter->aMemory ){ bFlush = pSorter->iMemory && (pSorter->iMemory+nReq) > pSorter->mxPmaSize; }else{ bFlush = ( (pSorter->nInMemory > pSorter->mxPmaSize) || (pSorter->nInMemory > pSorter->mnPmaSize && sqlite3HeapNearlyFull()) ); } if( bFlush ){ #ifdef SQLITE_DEBUG i64 nExpect = pSorter->iWriteOff + sqlite3VarintLen(pSorter->nInMemory) + pSorter->nInMemory; #endif rc = vdbeSorterListToPMA(db, pCsr); pSorter->nInMemory = 0; pSorter->iMemory = 0; assert( rc!=SQLITE_OK || (nExpect==pSorter->iWriteOff) ); assert( rc!=SQLITE_OK || pSorter->pRecord==0 ); } pSorter->nInMemory += nPMA; if( pSorter->aMemory ){ int nMin = pSorter->iMemory + nReq; if( nMin>pSorter->nMemory ){ u8 *aNew; int nNew = pSorter->nMemory * 2; while( nNew < nMin ) nNew = nNew*2; if( nNew > pSorter->mxPmaSize ) nNew = pSorter->mxPmaSize; if( nNew < nMin ) nNew = nMin; aNew = sqlite3Realloc(pSorter->aMemory, nNew); if( !aNew ) return SQLITE_NOMEM; pSorter->pRecord = aNew + ((u8*)pSorter->pRecord - pSorter->aMemory); pSorter->aMemory = aNew; pSorter->nMemory = nNew; } pNew = (SorterRecord*)&pSorter->aMemory[pSorter->iMemory]; pSorter->iMemory += ROUND8(nReq); pNew->u.iNext = (u8*)(pSorter->pRecord) - pSorter->aMemory; }else{ pNew = (SorterRecord *)sqlite3DbMallocRaw(db, pVal->n+sizeof(SorterRecord)); if( pNew==0 ){ return SQLITE_NOMEM; } pNew->u.pNext = pSorter->pRecord; } memcpy(SRVAL(pNew), pVal->z, pVal->n); pNew->nVal = pVal->n; pSorter->pRecord = pNew; return rc; } /* ** Helper function for sqlite3VdbeSorterRewind(). */ |
︙ | ︙ | |||
1123 1124 1125 1126 1127 1128 1129 | pIter1 = &pSorter->aIter[ pSorter->aTree[i ^ 0x0001] ]; } } *pbEof = (pSorter->aIter[pSorter->aTree[1]].pFile==0); } }else{ SorterRecord *pFree = pSorter->pRecord; | | | | 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 | pIter1 = &pSorter->aIter[ pSorter->aTree[i ^ 0x0001] ]; } } *pbEof = (pSorter->aIter[pSorter->aTree[1]].pFile==0); } }else{ SorterRecord *pFree = pSorter->pRecord; pSorter->pRecord = pFree->u.pNext; pFree->u.pNext = 0; if( pSorter->aMemory==0 ){ vdbeSorterRecordFree(db, pFree); } *pbEof = !pSorter->pRecord; rc = SQLITE_OK; } return rc; |
︙ | ︙ | |||
1150 1151 1152 1153 1154 1155 1156 | if( pSorter->aTree ){ VdbeSorterIter *pIter; pIter = &pSorter->aIter[ pSorter->aTree[1] ]; *pnKey = pIter->nKey; pKey = pIter->aKey; }else{ *pnKey = pSorter->pRecord->nVal; | | | 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 | if( pSorter->aTree ){ VdbeSorterIter *pIter; pIter = &pSorter->aIter[ pSorter->aTree[1] ]; *pnKey = pIter->nKey; pKey = pIter->aKey; }else{ *pnKey = pSorter->pRecord->nVal; pKey = SRVAL(pSorter->pRecord); } return pKey; } /* ** Copy the current sorter key into the memory cell pOut. */ |
︙ | ︙ |