/ Check-in [63597097]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Experimental change: If SQLITE_PAGECACHE_BLOCKALLOC is defined, instead of allocating pages one at a time, allocate blocks of between 15 and 63 pages in a single allocation.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | pager-blockalloc
Files: files | file ages | folders
SHA1: 63597097eedf80080fab0c1978cfd66ecaaa79fa
User & Date: dan 2011-08-19 18:15:00
Context
2011-08-22
14:55
Modify test cases so that veryquick.test passes with PAGECACHE_BLOCKALLOC defined. check-in: c6100070 user: dan tags: pager-blockalloc
2011-08-19
18:15
Experimental change: If SQLITE_PAGECACHE_BLOCKALLOC is defined, instead of allocating pages one at a time, allocate blocks of between 15 and 63 pages in a single allocation. check-in: 63597097 user: dan tags: pager-blockalloc
14:54
When retrying a write() after an EINTR error on unix, be sure to also rerun the previous lseek(). Ticket [e59bdf6116036a] check-in: 21452f3a user: drh tags: trunk
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to src/pcache1.c.

    20     20   #include "sqliteInt.h"
    21     21   
    22     22   typedef struct PCache1 PCache1;
    23     23   typedef struct PgHdr1 PgHdr1;
    24     24   typedef struct PgFreeslot PgFreeslot;
    25     25   typedef struct PGroup PGroup;
    26     26   
           27  +typedef struct PGroupBlock PGroupBlock;
           28  +typedef struct PGroupBlockList PGroupBlockList;
           29  +
    27     30   /* Each page cache (or PCache) belongs to a PGroup.  A PGroup is a set 
    28     31   ** of one or more PCaches that are able to recycle each others unpinned
    29     32   ** pages when they are under memory pressure.  A PGroup is an instance of
    30     33   ** the following object.
    31     34   **
    32     35   ** This page cache implementation works in one of two modes:
    33     36   **
................................................................................
    49     52   struct PGroup {
    50     53     sqlite3_mutex *mutex;          /* MUTEX_STATIC_LRU or NULL */
    51     54     int nMaxPage;                  /* Sum of nMax for purgeable caches */
    52     55     int nMinPage;                  /* Sum of nMin for purgeable caches */
    53     56     int mxPinned;                  /* nMaxpage + 10 - nMinPage */
    54     57     int nCurrentPage;              /* Number of purgeable pages allocated */
    55     58     PgHdr1 *pLruHead, *pLruTail;   /* LRU list of unpinned pages */
           59  +  PGroupBlockList *pBlockList;   /* List of block-lists for this group */
           60  +};
           61  +
           62  +/*
           63  +** If SQLITE_PAGECACHE_BLOCKALLOC is defined when the library is built,
           64  +** each PGroup structure has a linked list of the the following starting
           65  +** at PGroup.pBlockList. There is one entry for each distinct page-size 
           66  +** currently used by members of the PGroup (i.e. 1024 bytes, 4096 bytes
           67  +** etc.). Variable PGroupBlockList.nByte is set to the actual allocation
           68  +** size requested by each pcache, which is the database page-size plus
           69  +** the various header structures used by the pcache, pager and btree layers.
           70  +** Usually around (pgsz+200) bytes.
           71  +**
           72  +** This size (pgsz+200) bytes is not allocated efficiently by some
           73  +** implementations of malloc. In particular, some implementations are only
           74  +** able to allocate blocks of memory chunks of 2^N bytes, where N is some
           75  +** integer value. Since the page-size is a power of 2, this means we
           76  +** end up wasting (pgsz-200) bytes in each allocation.
           77  +**
           78  +** If SQLITE_PAGECACHE_BLOCKALLOC is defined, the (pgsz+200) byte blocks
           79  +** are not allocated directly. Instead, blocks of roughly M*(pgsz+200) bytes 
           80  +** are requested from malloc allocator. After a block is returned,
           81  +** sqlite3MallocSize() is used to determine how many (pgsz+200) byte
           82  +** allocations can fit in the space returned by malloc(). This value may
           83  +** be more than M.
           84  +**
           85  +** The blocks are stored in a doubly-linked list. Variable PGroupBlock.nEntry
           86  +** contains the number of allocations that will fit in the aData[] space.
           87  +** nEntry is limited to the number of bits in bitmask mUsed. If a slot
           88  +** within aData is in use, the corresponding bit in mUsed is set. Thus
           89  +** when (mUsed+1==(1 << nEntry)) the block is completely full.
           90  +**
           91  +** Each time a slot within a block is freed, the block is moved to the start
           92  +** of the linked-list. And if a block becomes completely full, then it is
           93  +** moved to the end of the list. As a result, when searching for a free
           94  +** slot, only the first block in the list need be examined. If it is full,
           95  +** then it is guaranteed that all blocks are full.
           96  +*/
           97  +struct PGroupBlockList {
           98  +  int nByte;                     /* Size of each allocation in bytes */
           99  +  PGroupBlock *pFirst;           /* First PGroupBlock in list */
          100  +  PGroupBlock *pLast;            /* Last PGroupBlock in list */
          101  +  PGroupBlockList *pNext;        /* Next block-list attached to group */
          102  +};
          103  +
          104  +struct PGroupBlock {
          105  +  Bitmask mUsed;                 /* Mask of used slots */
          106  +  int nEntry;                    /* Maximum number of allocations in aData[] */
          107  +  u8 *aData;                     /* Pointer to data block */
          108  +  PGroupBlock *pNext;            /* Next PGroupBlock in list */
          109  +  PGroupBlock *pPrev;            /* Previous PGroupBlock in list */
          110  +  PGroupBlockList *pList;        /* Owner list */
    56    111   };
          112  +
          113  +/* Minimum value for PGroupBlock.nEntry */
          114  +#define PAGECACHE_BLOCKALLOC_MINENTRY 15
    57    115   
    58    116   /* Each page cache is an instance of the following object.  Every
    59    117   ** open database file (including each in-memory database and each
    60    118   ** temporary or transient database) has a single page cache which
    61    119   ** is an instance of this object.
    62    120   **
    63    121   ** Pointers to structures of this type are cast and returned as 
................................................................................
   153    211   ** a pointer to a block of szPage bytes of data and the return value is
   154    212   ** a pointer to the associated PgHdr1 structure.
   155    213   **
   156    214   **   assert( PGHDR1_TO_PAGE(PAGE_TO_PGHDR1(pCache, X))==X );
   157    215   */
   158    216   #define PGHDR1_TO_PAGE(p)    (void*)(((char*)p) - p->pCache->szPage)
   159    217   #define PAGE_TO_PGHDR1(c, p) (PgHdr1*)(((char*)p) + c->szPage)
          218  +
          219  +/*
          220  +** Blocks used by the SQLITE_PAGECACHE_BLOCKALLOC blocks to store/retrieve 
          221  +** a PGroupBlock pointer based on a pointer to a page buffer. 
          222  +*/
          223  +#define PAGE_SET_BLOCKPTR(pCache, pPg, pBlock) \
          224  +  ( *(PGroupBlock **)&(((u8*)pPg)[sizeof(PgHdr1) + pCache->szPage]) = pBlock )
          225  +
          226  +#define PAGE_GET_BLOCKPTR(pCache, pPg) \
          227  +  ( *(PGroupBlock **)&(((u8*)pPg)[sizeof(PgHdr1) + pCache->szPage]) )
          228  +
   160    229   
   161    230   /*
   162    231   ** Macros to enter and leave the PCache LRU mutex.
   163    232   */
   164    233   #define pcache1EnterMutex(X) sqlite3_mutex_enter((X)->mutex)
   165    234   #define pcache1LeaveMutex(X) sqlite3_mutex_leave((X)->mutex)
   166    235   
................................................................................
   278    347       sqlite3MemdebugSetType(p, MEMTYPE_HEAP);
   279    348       iSize = sqlite3MallocSize(p);
   280    349       sqlite3MemdebugSetType(p, MEMTYPE_PCACHE);
   281    350       return iSize;
   282    351     }
   283    352   }
   284    353   #endif /* SQLITE_ENABLE_MEMORY_MANAGEMENT */
          354  +
          355  +/*
          356  +** The block pBlock belongs to list pList but is not currently linked in.
          357  +** Insert it into the start of the list.
          358  +*/
          359  +static void addBlockToList(PGroupBlockList *pList, PGroupBlock *pBlock){
          360  +  pBlock->pPrev = 0;
          361  +  pBlock->pNext = pList->pFirst;
          362  +  pList->pFirst = pBlock;
          363  +  if( pBlock->pNext ){
          364  +    pBlock->pNext->pPrev = pBlock;
          365  +  }else{
          366  +    assert( pList->pLast==0 );
          367  +    pList->pLast = pBlock;
          368  +  }
          369  +}
          370  +
          371  +/*
          372  +** If there are no blocks in the list headed by pList, remove pList
          373  +** from the pGroup->pBlockList list and free it with sqlite3_free().
          374  +*/
          375  +static void freeListIfEmpty(PGroup *pGroup, PGroupBlockList *pList){
          376  +  assert( sqlite3_mutex_held(pGroup->mutex) );
          377  +  if( pList->pFirst==0 ){
          378  +    PGroupBlockList **pp;
          379  +    for(pp=&pGroup->pBlockList; *pp!=pList; pp=&(*pp)->pNext);
          380  +    *pp = (*pp)->pNext;
          381  +    sqlite3_free(pList);
          382  +  }
          383  +}
   285    384   
   286    385   /*
   287    386   ** Allocate a new page object initially associated with cache pCache.
   288    387   */
   289    388   static PgHdr1 *pcache1AllocPage(PCache1 *pCache){
   290    389     int nByte = sizeof(PgHdr1) + pCache->szPage;
   291         -  void *pPg = pcache1Alloc(nByte);
          390  +  void *pPg = 0;
   292    391     PgHdr1 *p;
          392  +
          393  +#ifdef SQLITE_PAGECACHE_BLOCKALLOC
          394  +  PGroup *pGroup = pCache->pGroup;
          395  +  PGroupBlockList *pList;
          396  +  PGroupBlock *pBlock;
          397  +  int i;
          398  +
          399  +  nByte += sizeof(PGroupBlockList *);
          400  +  nByte = ROUND8(nByte);
          401  +
          402  +  do{
          403  +    for(pList=pGroup->pBlockList; pList; pList=pList->pNext){
          404  +      if( pList->nByte==nByte ) break;
          405  +    }
          406  +    if( pList==0 ){
          407  +      PGroupBlockList *pNew;
          408  +      pcache1LeaveMutex(pCache->pGroup);
          409  +      pNew = (PGroupBlockList *)sqlite3MallocZero(sizeof(PGroupBlockList));
          410  +      pcache1EnterMutex(pCache->pGroup);
          411  +      if( pNew==0 ){
          412  +        /* malloc() failure. Return early. */
          413  +        return 0;
          414  +      }
          415  +      for(pList=pGroup->pBlockList; pList; pList=pList->pNext){
          416  +        if( pList->nByte==nByte ) break;
          417  +      }
          418  +      if( pList ){
          419  +        sqlite3_free(pNew);
          420  +      }else{
          421  +        pNew->nByte = nByte;
          422  +        pNew->pNext = pGroup->pBlockList;
          423  +        pGroup->pBlockList = pNew;
          424  +        pList = pNew;
          425  +      }
          426  +    }
          427  +  }while( pList==0 );
          428  +
          429  +  pBlock = pList->pFirst;
          430  +  if( pBlock==0 || pBlock->mUsed==(((Bitmask)1<<pBlock->nEntry)-1) ){
          431  +    int sz;
          432  +
          433  +    /* Allocate a new block. Try to allocate enough space for the PGroupBlock
          434  +    ** structure and MINENTRY allocations of nByte bytes each. If the 
          435  +    ** allocator returns more memory than requested, then more than MINENTRY 
          436  +    ** allocations may fit in it. */
          437  +    pcache1LeaveMutex(pCache->pGroup);
          438  +    sz = sizeof(PGroupBlock) + PAGECACHE_BLOCKALLOC_MINENTRY * nByte;
          439  +    pBlock = (PGroupBlock *)sqlite3Malloc(sz);
          440  +    pcache1EnterMutex(pCache->pGroup);
          441  +
          442  +    if( !pBlock ){
          443  +      freeListIfEmpty(pGroup, pList);
          444  +      return 0;
          445  +    }
          446  +    pBlock->nEntry = (sqlite3MallocSize(pBlock) - sizeof(PGroupBlock)) / nByte;
          447  +    if( pBlock->nEntry>=BMS ){
          448  +      pBlock->nEntry = BMS-1;
          449  +    }
          450  +    pBlock->pList = pList;
          451  +    pBlock->mUsed = 0;
          452  +    pBlock->aData = (u8 *)&pBlock[1];
          453  +    addBlockToList(pList, pBlock);
          454  +
          455  +    sz = sqlite3MallocSize(pBlock);
          456  +    sqlite3_mutex_enter(pcache1.mutex);
          457  +    sqlite3StatusAdd(SQLITE_STATUS_PAGECACHE_OVERFLOW, sz);
          458  +    sqlite3_mutex_leave(pcache1.mutex);
          459  +  }
          460  +
          461  +  for(i=0; pPg==0 && ALWAYS(i<pBlock->nEntry); i++){
          462  +    if( 0==(pBlock->mUsed & ((Bitmask)1<<i)) ){
          463  +      pBlock->mUsed |= ((Bitmask)1<<i);
          464  +      pPg = (void *)&pBlock->aData[pList->nByte * i];
          465  +    }
          466  +  }
          467  +  assert( pPg );
          468  +  PAGE_SET_BLOCKPTR(pCache, pPg, pBlock);
          469  +
          470  +  /* If the block is now full, shift it to the end of the list */
          471  +  if( pBlock->mUsed==(((Bitmask)1<<pBlock->nEntry)-1) && pList->pLast!=pBlock ){
          472  +    assert( pList->pFirst==pBlock );
          473  +    assert( pBlock->pPrev==0 );
          474  +    assert( pList->pLast->pNext==0 );
          475  +    pList->pFirst = pBlock->pNext;
          476  +    pList->pFirst->pPrev = 0;
          477  +    pBlock->pPrev = pList->pLast;
          478  +    pBlock->pNext = 0;
          479  +    pList->pLast->pNext = pBlock;
          480  +    pList->pLast = pBlock;
          481  +  }
          482  +#else
          483  +  /* The group mutex must be released before pcache1Alloc() is called. This
          484  +  ** is because it may call sqlite3_release_memory(), which assumes that 
          485  +  ** this mutex is not held. */
          486  +  assert( sqlite3_mutex_held(pCache->pGroup->mutex) );
          487  +  pcache1LeaveMutex(pCache->pGroup);
          488  +  pPg = pcache1Alloc(nByte);
          489  +  pcache1EnterMutex(pCache->pGroup);
          490  +#endif
          491  +
   293    492     if( pPg ){
   294    493       p = PAGE_TO_PGHDR1(pCache, pPg);
   295    494       if( pCache->bPurgeable ){
   296    495         pCache->pGroup->nCurrentPage++;
   297    496       }
   298    497     }else{
   299    498       p = 0;
................................................................................
   307    506   ** The pointer is allowed to be NULL, which is prudent.  But it turns out
   308    507   ** that the current implementation happens to never call this routine
   309    508   ** with a NULL pointer, so we mark the NULL test with ALWAYS().
   310    509   */
   311    510   static void pcache1FreePage(PgHdr1 *p){
   312    511     if( ALWAYS(p) ){
   313    512       PCache1 *pCache = p->pCache;
          513  +    void *pPg = PGHDR1_TO_PAGE(p);
          514  +
          515  +#ifdef SQLITE_PAGECACHE_BLOCKALLOC
          516  +    PGroupBlock *pBlock = PAGE_GET_BLOCKPTR(pCache, pPg);
          517  +    PGroupBlockList *pList = pBlock->pList;
          518  +    int i = ((u8 *)pPg - pBlock->aData) / pList->nByte;
          519  +
          520  +    assert( pPg==(void *)&pBlock->aData[i*pList->nByte] );
          521  +    assert( pBlock->mUsed & ((Bitmask)1<<i) );
          522  +    pBlock->mUsed &= ~((Bitmask)1<<i);
          523  +
          524  +    /* Remove the block from the list. If it is completely empty, free it.
          525  +    ** Or if it is not completely empty, re-insert it at the start of the
          526  +    ** list. */
          527  +    if( pList->pFirst==pBlock ){
          528  +      pList->pFirst = pBlock->pNext;
          529  +      if( pList->pFirst ) pList->pFirst->pPrev = 0;
          530  +    }else{
          531  +      pBlock->pPrev->pNext = pBlock->pNext;
          532  +    }
          533  +    if( pList->pLast==pBlock ){
          534  +      pList->pLast = pBlock->pPrev;
          535  +      if( pList->pLast ) pList->pLast->pNext = 0;
          536  +    }else{
          537  +      pBlock->pNext->pPrev = pBlock->pPrev;
          538  +    }
          539  +
          540  +    if( pBlock->mUsed==0 ){
          541  +      PGroup *pGroup = p->pCache->pGroup;
          542  +
          543  +      int sz = sqlite3MallocSize(pBlock);
          544  +      sqlite3_mutex_enter(pcache1.mutex);
          545  +      sqlite3StatusAdd(SQLITE_STATUS_PAGECACHE_OVERFLOW, -sz);
          546  +      sqlite3_mutex_leave(pcache1.mutex);
          547  +      freeListIfEmpty(pGroup, pList);
          548  +      sqlite3_free(pBlock);
          549  +    }else{
          550  +      addBlockToList(pList, pBlock);
          551  +    }
          552  +#else
          553  +    assert( sqlite3_mutex_held(p->pCache->pGroup->mutex) );
          554  +    pcache1Free(pPg);
          555  +#endif
   314    556       if( pCache->bPurgeable ){
   315    557         pCache->pGroup->nCurrentPage--;
   316    558       }
   317         -    pcache1Free(PGHDR1_TO_PAGE(p));
   318    559     }
   319    560   }
   320    561   
   321    562   /*
   322    563   ** Malloc function used by SQLite to obtain space from the buffer configured
   323    564   ** using sqlite3_config(SQLITE_CONFIG_PAGECACHE) option. If no such buffer
   324    565   ** exists, this function falls back to sqlite3Malloc().
................................................................................
   748    989     }
   749    990   
   750    991     /* Step 5. If a usable page buffer has still not been found, 
   751    992     ** attempt to allocate a new one. 
   752    993     */
   753    994     if( !pPage ){
   754    995       if( createFlag==1 ) sqlite3BeginBenignMalloc();
   755         -    pcache1LeaveMutex(pGroup);
   756    996       pPage = pcache1AllocPage(pCache);
   757         -    pcache1EnterMutex(pGroup);
   758    997       if( createFlag==1 ) sqlite3EndBenignMalloc();
   759    998     }
   760    999   
   761   1000     if( pPage ){
   762   1001       unsigned int h = iKey % pCache->nHash;
   763   1002       pCache->nPage++;
   764   1003       pPage->iKey = iKey;