/ Check-in [63597097]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Experimental change: If SQLITE_PAGECACHE_BLOCKALLOC is defined, instead of allocating pages one at a time, allocate blocks of between 15 and 63 pages in a single allocation.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | pager-blockalloc
Files: files | file ages | folders
SHA1:63597097eedf80080fab0c1978cfd66ecaaa79fa
User & Date: dan 2011-08-19 18:15:00
Context
2011-08-22
14:55
Modify test cases so that veryquick.test passes with PAGECACHE_BLOCKALLOC defined. check-in: c6100070 user: dan tags: pager-blockalloc
2011-08-19
18:15
Experimental change: If SQLITE_PAGECACHE_BLOCKALLOC is defined, instead of allocating pages one at a time, allocate blocks of between 15 and 63 pages in a single allocation. check-in: 63597097 user: dan tags: pager-blockalloc
14:54
When retrying a write() after an EINTR error on unix, be sure to also rerun the previous lseek(). Ticket [e59bdf6116036a] check-in: 21452f3a user: drh tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to src/pcache1.c.

20
21
22
23
24
25
26



27
28
29
30
31
32
33
..
49
50
51
52
53
54
55

56






















































57
58
59
60
61
62
63
...
153
154
155
156
157
158
159











160
161
162
163
164
165
166
...
278
279
280
281
282
283
284






























285
286
287
288
289
290
291
292




































































































293
294
295
296
297
298
299
...
307
308
309
310
311
312
313











































314
315
316
317
318
319
320
321
322
323
324
...
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
#include "sqliteInt.h"

typedef struct PCache1 PCache1;
typedef struct PgHdr1 PgHdr1;
typedef struct PgFreeslot PgFreeslot;
typedef struct PGroup PGroup;




/* Each page cache (or PCache) belongs to a PGroup.  A PGroup is a set 
** of one or more PCaches that are able to recycle each others unpinned
** pages when they are under memory pressure.  A PGroup is an instance of
** the following object.
**
** This page cache implementation works in one of two modes:
**
................................................................................
struct PGroup {
  sqlite3_mutex *mutex;          /* MUTEX_STATIC_LRU or NULL */
  int nMaxPage;                  /* Sum of nMax for purgeable caches */
  int nMinPage;                  /* Sum of nMin for purgeable caches */
  int mxPinned;                  /* nMaxpage + 10 - nMinPage */
  int nCurrentPage;              /* Number of purgeable pages allocated */
  PgHdr1 *pLruHead, *pLruTail;   /* LRU list of unpinned pages */

};























































/* Each page cache is an instance of the following object.  Every
** open database file (including each in-memory database and each
** temporary or transient database) has a single page cache which
** is an instance of this object.
**
** Pointers to structures of this type are cast and returned as 
................................................................................
** a pointer to a block of szPage bytes of data and the return value is
** a pointer to the associated PgHdr1 structure.
**
**   assert( PGHDR1_TO_PAGE(PAGE_TO_PGHDR1(pCache, X))==X );
*/
#define PGHDR1_TO_PAGE(p)    (void*)(((char*)p) - p->pCache->szPage)
#define PAGE_TO_PGHDR1(c, p) (PgHdr1*)(((char*)p) + c->szPage)












/*
** Macros to enter and leave the PCache LRU mutex.
*/
#define pcache1EnterMutex(X) sqlite3_mutex_enter((X)->mutex)
#define pcache1LeaveMutex(X) sqlite3_mutex_leave((X)->mutex)

................................................................................
    sqlite3MemdebugSetType(p, MEMTYPE_HEAP);
    iSize = sqlite3MallocSize(p);
    sqlite3MemdebugSetType(p, MEMTYPE_PCACHE);
    return iSize;
  }
}
#endif /* SQLITE_ENABLE_MEMORY_MANAGEMENT */































/*
** Allocate a new page object initially associated with cache pCache.
*/
static PgHdr1 *pcache1AllocPage(PCache1 *pCache){
  int nByte = sizeof(PgHdr1) + pCache->szPage;
  void *pPg = pcache1Alloc(nByte);
  PgHdr1 *p;




































































































  if( pPg ){
    p = PAGE_TO_PGHDR1(pCache, pPg);
    if( pCache->bPurgeable ){
      pCache->pGroup->nCurrentPage++;
    }
  }else{
    p = 0;
................................................................................
** The pointer is allowed to be NULL, which is prudent.  But it turns out
** that the current implementation happens to never call this routine
** with a NULL pointer, so we mark the NULL test with ALWAYS().
*/
static void pcache1FreePage(PgHdr1 *p){
  if( ALWAYS(p) ){
    PCache1 *pCache = p->pCache;











































    if( pCache->bPurgeable ){
      pCache->pGroup->nCurrentPage--;
    }
    pcache1Free(PGHDR1_TO_PAGE(p));
  }
}

/*
** Malloc function used by SQLite to obtain space from the buffer configured
** using sqlite3_config(SQLITE_CONFIG_PAGECACHE) option. If no such buffer
** exists, this function falls back to sqlite3Malloc().
................................................................................
  }

  /* Step 5. If a usable page buffer has still not been found, 
  ** attempt to allocate a new one. 
  */
  if( !pPage ){
    if( createFlag==1 ) sqlite3BeginBenignMalloc();
    pcache1LeaveMutex(pGroup);
    pPage = pcache1AllocPage(pCache);
    pcache1EnterMutex(pGroup);
    if( createFlag==1 ) sqlite3EndBenignMalloc();
  }

  if( pPage ){
    unsigned int h = iKey % pCache->nHash;
    pCache->nPage++;
    pPage->iKey = iKey;







>
>
>







 







>

>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







 







>
>
>
>
>
>
>
>
>
>
>







 







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>






|

>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







 







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>



<







 







<

<







20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
..
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
...
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
...
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
...
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558

559
560
561
562
563
564
565
...
989
990
991
992
993
994
995

996

997
998
999
1000
1001
1002
1003
#include "sqliteInt.h"

typedef struct PCache1 PCache1;
typedef struct PgHdr1 PgHdr1;
typedef struct PgFreeslot PgFreeslot;
typedef struct PGroup PGroup;

typedef struct PGroupBlock PGroupBlock;
typedef struct PGroupBlockList PGroupBlockList;

/* Each page cache (or PCache) belongs to a PGroup.  A PGroup is a set 
** of one or more PCaches that are able to recycle each others unpinned
** pages when they are under memory pressure.  A PGroup is an instance of
** the following object.
**
** This page cache implementation works in one of two modes:
**
................................................................................
struct PGroup {
  sqlite3_mutex *mutex;          /* MUTEX_STATIC_LRU or NULL */
  int nMaxPage;                  /* Sum of nMax for purgeable caches */
  int nMinPage;                  /* Sum of nMin for purgeable caches */
  int mxPinned;                  /* nMaxpage + 10 - nMinPage */
  int nCurrentPage;              /* Number of purgeable pages allocated */
  PgHdr1 *pLruHead, *pLruTail;   /* LRU list of unpinned pages */
  PGroupBlockList *pBlockList;   /* List of block-lists for this group */
};

/*
** If SQLITE_PAGECACHE_BLOCKALLOC is defined when the library is built,
** each PGroup structure has a linked list of the the following starting
** at PGroup.pBlockList. There is one entry for each distinct page-size 
** currently used by members of the PGroup (i.e. 1024 bytes, 4096 bytes
** etc.). Variable PGroupBlockList.nByte is set to the actual allocation
** size requested by each pcache, which is the database page-size plus
** the various header structures used by the pcache, pager and btree layers.
** Usually around (pgsz+200) bytes.
**
** This size (pgsz+200) bytes is not allocated efficiently by some
** implementations of malloc. In particular, some implementations are only
** able to allocate blocks of memory chunks of 2^N bytes, where N is some
** integer value. Since the page-size is a power of 2, this means we
** end up wasting (pgsz-200) bytes in each allocation.
**
** If SQLITE_PAGECACHE_BLOCKALLOC is defined, the (pgsz+200) byte blocks
** are not allocated directly. Instead, blocks of roughly M*(pgsz+200) bytes 
** are requested from malloc allocator. After a block is returned,
** sqlite3MallocSize() is used to determine how many (pgsz+200) byte
** allocations can fit in the space returned by malloc(). This value may
** be more than M.
**
** The blocks are stored in a doubly-linked list. Variable PGroupBlock.nEntry
** contains the number of allocations that will fit in the aData[] space.
** nEntry is limited to the number of bits in bitmask mUsed. If a slot
** within aData is in use, the corresponding bit in mUsed is set. Thus
** when (mUsed+1==(1 << nEntry)) the block is completely full.
**
** Each time a slot within a block is freed, the block is moved to the start
** of the linked-list. And if a block becomes completely full, then it is
** moved to the end of the list. As a result, when searching for a free
** slot, only the first block in the list need be examined. If it is full,
** then it is guaranteed that all blocks are full.
*/
struct PGroupBlockList {
  int nByte;                     /* Size of each allocation in bytes */
  PGroupBlock *pFirst;           /* First PGroupBlock in list */
  PGroupBlock *pLast;            /* Last PGroupBlock in list */
  PGroupBlockList *pNext;        /* Next block-list attached to group */
};

struct PGroupBlock {
  Bitmask mUsed;                 /* Mask of used slots */
  int nEntry;                    /* Maximum number of allocations in aData[] */
  u8 *aData;                     /* Pointer to data block */
  PGroupBlock *pNext;            /* Next PGroupBlock in list */
  PGroupBlock *pPrev;            /* Previous PGroupBlock in list */
  PGroupBlockList *pList;        /* Owner list */
};

/* Minimum value for PGroupBlock.nEntry */
#define PAGECACHE_BLOCKALLOC_MINENTRY 15

/* Each page cache is an instance of the following object.  Every
** open database file (including each in-memory database and each
** temporary or transient database) has a single page cache which
** is an instance of this object.
**
** Pointers to structures of this type are cast and returned as 
................................................................................
** a pointer to a block of szPage bytes of data and the return value is
** a pointer to the associated PgHdr1 structure.
**
**   assert( PGHDR1_TO_PAGE(PAGE_TO_PGHDR1(pCache, X))==X );
*/
#define PGHDR1_TO_PAGE(p)    (void*)(((char*)p) - p->pCache->szPage)
#define PAGE_TO_PGHDR1(c, p) (PgHdr1*)(((char*)p) + c->szPage)

/*
** Blocks used by the SQLITE_PAGECACHE_BLOCKALLOC blocks to store/retrieve 
** a PGroupBlock pointer based on a pointer to a page buffer. 
*/
#define PAGE_SET_BLOCKPTR(pCache, pPg, pBlock) \
  ( *(PGroupBlock **)&(((u8*)pPg)[sizeof(PgHdr1) + pCache->szPage]) = pBlock )

#define PAGE_GET_BLOCKPTR(pCache, pPg) \
  ( *(PGroupBlock **)&(((u8*)pPg)[sizeof(PgHdr1) + pCache->szPage]) )


/*
** Macros to enter and leave the PCache LRU mutex.
*/
#define pcache1EnterMutex(X) sqlite3_mutex_enter((X)->mutex)
#define pcache1LeaveMutex(X) sqlite3_mutex_leave((X)->mutex)

................................................................................
    sqlite3MemdebugSetType(p, MEMTYPE_HEAP);
    iSize = sqlite3MallocSize(p);
    sqlite3MemdebugSetType(p, MEMTYPE_PCACHE);
    return iSize;
  }
}
#endif /* SQLITE_ENABLE_MEMORY_MANAGEMENT */

/*
** The block pBlock belongs to list pList but is not currently linked in.
** Insert it into the start of the list.
*/
static void addBlockToList(PGroupBlockList *pList, PGroupBlock *pBlock){
  pBlock->pPrev = 0;
  pBlock->pNext = pList->pFirst;
  pList->pFirst = pBlock;
  if( pBlock->pNext ){
    pBlock->pNext->pPrev = pBlock;
  }else{
    assert( pList->pLast==0 );
    pList->pLast = pBlock;
  }
}

/*
** If there are no blocks in the list headed by pList, remove pList
** from the pGroup->pBlockList list and free it with sqlite3_free().
*/
static void freeListIfEmpty(PGroup *pGroup, PGroupBlockList *pList){
  assert( sqlite3_mutex_held(pGroup->mutex) );
  if( pList->pFirst==0 ){
    PGroupBlockList **pp;
    for(pp=&pGroup->pBlockList; *pp!=pList; pp=&(*pp)->pNext);
    *pp = (*pp)->pNext;
    sqlite3_free(pList);
  }
}

/*
** Allocate a new page object initially associated with cache pCache.
*/
static PgHdr1 *pcache1AllocPage(PCache1 *pCache){
  int nByte = sizeof(PgHdr1) + pCache->szPage;
  void *pPg = 0;
  PgHdr1 *p;

#ifdef SQLITE_PAGECACHE_BLOCKALLOC
  PGroup *pGroup = pCache->pGroup;
  PGroupBlockList *pList;
  PGroupBlock *pBlock;
  int i;

  nByte += sizeof(PGroupBlockList *);
  nByte = ROUND8(nByte);

  do{
    for(pList=pGroup->pBlockList; pList; pList=pList->pNext){
      if( pList->nByte==nByte ) break;
    }
    if( pList==0 ){
      PGroupBlockList *pNew;
      pcache1LeaveMutex(pCache->pGroup);
      pNew = (PGroupBlockList *)sqlite3MallocZero(sizeof(PGroupBlockList));
      pcache1EnterMutex(pCache->pGroup);
      if( pNew==0 ){
        /* malloc() failure. Return early. */
        return 0;
      }
      for(pList=pGroup->pBlockList; pList; pList=pList->pNext){
        if( pList->nByte==nByte ) break;
      }
      if( pList ){
        sqlite3_free(pNew);
      }else{
        pNew->nByte = nByte;
        pNew->pNext = pGroup->pBlockList;
        pGroup->pBlockList = pNew;
        pList = pNew;
      }
    }
  }while( pList==0 );

  pBlock = pList->pFirst;
  if( pBlock==0 || pBlock->mUsed==(((Bitmask)1<<pBlock->nEntry)-1) ){
    int sz;

    /* Allocate a new block. Try to allocate enough space for the PGroupBlock
    ** structure and MINENTRY allocations of nByte bytes each. If the 
    ** allocator returns more memory than requested, then more than MINENTRY 
    ** allocations may fit in it. */
    pcache1LeaveMutex(pCache->pGroup);
    sz = sizeof(PGroupBlock) + PAGECACHE_BLOCKALLOC_MINENTRY * nByte;
    pBlock = (PGroupBlock *)sqlite3Malloc(sz);
    pcache1EnterMutex(pCache->pGroup);

    if( !pBlock ){
      freeListIfEmpty(pGroup, pList);
      return 0;
    }
    pBlock->nEntry = (sqlite3MallocSize(pBlock) - sizeof(PGroupBlock)) / nByte;
    if( pBlock->nEntry>=BMS ){
      pBlock->nEntry = BMS-1;
    }
    pBlock->pList = pList;
    pBlock->mUsed = 0;
    pBlock->aData = (u8 *)&pBlock[1];
    addBlockToList(pList, pBlock);

    sz = sqlite3MallocSize(pBlock);
    sqlite3_mutex_enter(pcache1.mutex);
    sqlite3StatusAdd(SQLITE_STATUS_PAGECACHE_OVERFLOW, sz);
    sqlite3_mutex_leave(pcache1.mutex);
  }

  for(i=0; pPg==0 && ALWAYS(i<pBlock->nEntry); i++){
    if( 0==(pBlock->mUsed & ((Bitmask)1<<i)) ){
      pBlock->mUsed |= ((Bitmask)1<<i);
      pPg = (void *)&pBlock->aData[pList->nByte * i];
    }
  }
  assert( pPg );
  PAGE_SET_BLOCKPTR(pCache, pPg, pBlock);

  /* If the block is now full, shift it to the end of the list */
  if( pBlock->mUsed==(((Bitmask)1<<pBlock->nEntry)-1) && pList->pLast!=pBlock ){
    assert( pList->pFirst==pBlock );
    assert( pBlock->pPrev==0 );
    assert( pList->pLast->pNext==0 );
    pList->pFirst = pBlock->pNext;
    pList->pFirst->pPrev = 0;
    pBlock->pPrev = pList->pLast;
    pBlock->pNext = 0;
    pList->pLast->pNext = pBlock;
    pList->pLast = pBlock;
  }
#else
  /* The group mutex must be released before pcache1Alloc() is called. This
  ** is because it may call sqlite3_release_memory(), which assumes that 
  ** this mutex is not held. */
  assert( sqlite3_mutex_held(pCache->pGroup->mutex) );
  pcache1LeaveMutex(pCache->pGroup);
  pPg = pcache1Alloc(nByte);
  pcache1EnterMutex(pCache->pGroup);
#endif

  if( pPg ){
    p = PAGE_TO_PGHDR1(pCache, pPg);
    if( pCache->bPurgeable ){
      pCache->pGroup->nCurrentPage++;
    }
  }else{
    p = 0;
................................................................................
** The pointer is allowed to be NULL, which is prudent.  But it turns out
** that the current implementation happens to never call this routine
** with a NULL pointer, so we mark the NULL test with ALWAYS().
*/
static void pcache1FreePage(PgHdr1 *p){
  if( ALWAYS(p) ){
    PCache1 *pCache = p->pCache;
    void *pPg = PGHDR1_TO_PAGE(p);

#ifdef SQLITE_PAGECACHE_BLOCKALLOC
    PGroupBlock *pBlock = PAGE_GET_BLOCKPTR(pCache, pPg);
    PGroupBlockList *pList = pBlock->pList;
    int i = ((u8 *)pPg - pBlock->aData) / pList->nByte;

    assert( pPg==(void *)&pBlock->aData[i*pList->nByte] );
    assert( pBlock->mUsed & ((Bitmask)1<<i) );
    pBlock->mUsed &= ~((Bitmask)1<<i);

    /* Remove the block from the list. If it is completely empty, free it.
    ** Or if it is not completely empty, re-insert it at the start of the
    ** list. */
    if( pList->pFirst==pBlock ){
      pList->pFirst = pBlock->pNext;
      if( pList->pFirst ) pList->pFirst->pPrev = 0;
    }else{
      pBlock->pPrev->pNext = pBlock->pNext;
    }
    if( pList->pLast==pBlock ){
      pList->pLast = pBlock->pPrev;
      if( pList->pLast ) pList->pLast->pNext = 0;
    }else{
      pBlock->pNext->pPrev = pBlock->pPrev;
    }

    if( pBlock->mUsed==0 ){
      PGroup *pGroup = p->pCache->pGroup;

      int sz = sqlite3MallocSize(pBlock);
      sqlite3_mutex_enter(pcache1.mutex);
      sqlite3StatusAdd(SQLITE_STATUS_PAGECACHE_OVERFLOW, -sz);
      sqlite3_mutex_leave(pcache1.mutex);
      freeListIfEmpty(pGroup, pList);
      sqlite3_free(pBlock);
    }else{
      addBlockToList(pList, pBlock);
    }
#else
    assert( sqlite3_mutex_held(p->pCache->pGroup->mutex) );
    pcache1Free(pPg);
#endif
    if( pCache->bPurgeable ){
      pCache->pGroup->nCurrentPage--;
    }

  }
}

/*
** Malloc function used by SQLite to obtain space from the buffer configured
** using sqlite3_config(SQLITE_CONFIG_PAGECACHE) option. If no such buffer
** exists, this function falls back to sqlite3Malloc().
................................................................................
  }

  /* Step 5. If a usable page buffer has still not been found, 
  ** attempt to allocate a new one. 
  */
  if( !pPage ){
    if( createFlag==1 ) sqlite3BeginBenignMalloc();

    pPage = pcache1AllocPage(pCache);

    if( createFlag==1 ) sqlite3EndBenignMalloc();
  }

  if( pPage ){
    unsigned int h = iKey % pCache->nHash;
    pCache->nPage++;
    pPage->iKey = iKey;