/ Check-in [2b755def]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Allocate page cache headers and page cache data buffers separately. The data buffer will be a power of two in size and this gives some malloc implementation additional optimization opportunitites. (CVS 4409)
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 2b755defe51a565a2b6ace58381d6e91f6f17db8
User & Date: drh 2007-09-06 22:19:15
Context
2007-09-06
23:28
Base the name of the statement journal on the original database filename. Remember the statement journal name for the lifetime of the Pager so that the name pointer passed to xOpen persists as long as the file exists. (CVS 4410) check-in: 44d8d1e9 user: drh tags: trunk
22:19
Allocate page cache headers and page cache data buffers separately. The data buffer will be a power of two in size and this gives some malloc implementation additional optimization opportunitites. (CVS 4409) check-in: 2b755def user: drh tags: trunk
13:49
Updated comments on journal.c. No changes to code. (CVS 4408) check-in: 32984410 user: drh tags: trunk
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to src/btree.c.

     5      5   ** a legal notice, here is a blessing:
     6      6   **
     7      7   **    May you do good and not evil.
     8      8   **    May you find forgiveness for yourself and forgive others.
     9      9   **    May you share freely, never taking more than you give.
    10     10   **
    11     11   *************************************************************************
    12         -** $Id: btree.c,v 1.422 2007/09/03 22:00:39 drh Exp $
           12  +** $Id: btree.c,v 1.423 2007/09/06 22:19:15 drh Exp $
    13     13   **
    14     14   ** This file implements a external (disk-based) database using BTrees.
    15     15   ** See the header comment on "btreeInt.h" for additional information.
    16     16   ** Including a description of file format and an overview of operation.
    17     17   */
    18     18   #include "btreeInt.h"
    19     19   
................................................................................
   898    898     int top;           /* First byte of the cell content area */
   899    899   
   900    900     pBt = pPage->pBt;
   901    901     assert( pBt!=0 );
   902    902     assert( pParent==0 || pParent->pBt==pBt );
   903    903     assert( sqlite3_mutex_held(pBt->mutex) );
   904    904     assert( pPage->pgno==sqlite3PagerPagenumber(pPage->pDbPage) );
   905         -  assert( pPage->aData == &((unsigned char*)pPage)[-pBt->pageSize] );
          905  +  assert( pPage == sqlite3PagerGetExtra(pPage->pDbPage) );
          906  +  assert( pPage->aData == sqlite3PagerGetData(pPage->pDbPage) );
   906    907     if( pPage->pParent!=pParent && (pPage->pParent!=0 || pPage->isInit) ){
   907    908       /* The parent page should never change unless the file is corrupt */
   908    909       return SQLITE_CORRUPT_BKPT;
   909    910     }
   910    911     if( pPage->isInit ) return SQLITE_OK;
   911    912     if( pPage->pParent==0 && pParent!=0 ){
   912    913       pPage->pParent = pParent;
................................................................................
   965    966   static void zeroPage(MemPage *pPage, int flags){
   966    967     unsigned char *data = pPage->aData;
   967    968     BtShared *pBt = pPage->pBt;
   968    969     int hdr = pPage->hdrOffset;
   969    970     int first;
   970    971   
   971    972     assert( sqlite3PagerPagenumber(pPage->pDbPage)==pPage->pgno );
   972         -  assert( &data[pBt->pageSize] == (unsigned char*)pPage );
          973  +  assert( sqlite3PagerGetExtra(pPage->pDbPage) == (void*)pPage );
          974  +  assert( sqlite3PagerGetData(pPage->pDbPage) == data );
   973    975     assert( sqlite3PagerIswriteable(pPage->pDbPage) );
   974    976     assert( sqlite3_mutex_held(pBt->mutex) );
   975    977     memset(&data[hdr], 0, pBt->usableSize - hdr);
   976    978     data[hdr] = flags;
   977    979     first = hdr + 8 + 4*((flags&PTF_LEAF)==0);
   978    980     memset(&data[hdr+1], 0, 4);
   979    981     data[hdr+7] = 0;
................................................................................
  1049   1051   ** Release a MemPage.  This should be called once for each prior
  1050   1052   ** call to sqlite3BtreeGetPage.
  1051   1053   */
  1052   1054   static void releasePage(MemPage *pPage){
  1053   1055     if( pPage ){
  1054   1056       assert( pPage->aData );
  1055   1057       assert( pPage->pBt );
  1056         -    assert( &pPage->aData[pPage->pBt->pageSize]==(unsigned char*)pPage );
         1058  +    assert( sqlite3PagerGetExtra(pPage->pDbPage) == (void*)pPage );
         1059  +    assert( sqlite3PagerGetData(pPage->pDbPage)==pPage->aData );
  1057   1060       assert( sqlite3_mutex_held(pPage->pBt->mutex) );
  1058   1061       sqlite3PagerUnref(pPage->pDbPage);
  1059   1062     }
  1060   1063   }
  1061   1064   
  1062   1065   /*
  1063   1066   ** This routine is called when the reference count for a page
................................................................................
  1727   1730   */
  1728   1731   static void unlockBtreeIfUnused(BtShared *pBt){
  1729   1732     assert( sqlite3_mutex_held(pBt->mutex) );
  1730   1733     if( pBt->inTransaction==TRANS_NONE && pBt->pCursor==0 && pBt->pPage1!=0 ){
  1731   1734       if( sqlite3PagerRefcount(pBt->pPager)>=1 ){
  1732   1735         if( pBt->pPage1->aData==0 ){
  1733   1736           MemPage *pPage = pBt->pPage1;
  1734         -        pPage->aData = &((u8*)pPage)[-pBt->pageSize];
         1737  +        pPage->aData = sqlite3PagerGetData(pPage->pDbPage);
  1735   1738           pPage->pBt = pBt;
  1736   1739           pPage->pgno = 1;
  1737   1740         }
  1738   1741         releasePage(pBt->pPage1);
  1739   1742       }
  1740   1743       pBt->pPage1 = 0;
  1741   1744       pBt->inStmt = 0;
................................................................................
  4344   4347     assert( pNewParent!=0 );
  4345   4348     if( pgno==0 ) return SQLITE_OK;
  4346   4349     assert( pBt->pPager!=0 );
  4347   4350     pDbPage = sqlite3PagerLookup(pBt->pPager, pgno);
  4348   4351     if( pDbPage ){
  4349   4352       pThis = (MemPage *)sqlite3PagerGetExtra(pDbPage);
  4350   4353       if( pThis->isInit ){
  4351         -      assert( pThis->aData==(sqlite3PagerGetData(pDbPage)) );
         4354  +      assert( pThis->aData==sqlite3PagerGetData(pDbPage) );
  4352   4355         if( pThis->pParent!=pNewParent ){
  4353   4356           if( pThis->pParent ) sqlite3PagerUnref(pThis->pParent->pDbPage);
  4354   4357           pThis->pParent = pNewParent;
  4355   4358           sqlite3PagerRef(pNewParent->pDbPage);
  4356   4359         }
  4357   4360         pThis->idxParent = idx;
  4358   4361       }
................................................................................
  4895   4898     /*
  4896   4899     ** Make copies of the content of pPage and its siblings into aOld[].
  4897   4900     ** The rest of this function will use data from the copies rather
  4898   4901     ** that the original pages since the original pages will be in the
  4899   4902     ** process of being overwritten.
  4900   4903     */
  4901   4904     for(i=0; i<nOld; i++){
  4902         -    MemPage *p = apCopy[i] = (MemPage*)&aCopy[i][pBt->pageSize];
  4903         -    p->aData = &((u8*)p)[-pBt->pageSize];
  4904         -    memcpy(p->aData, apOld[i]->aData, pBt->pageSize + sizeof(MemPage));
  4905         -    /* The memcpy() above changes the value of p->aData so we have to
  4906         -    ** set it again. */
  4907         -    p->aData = &((u8*)p)[-pBt->pageSize];
         4905  +    MemPage *p = apCopy[i] = (MemPage*)aCopy[i];
         4906  +    memcpy(p, apOld[i], sizeof(MemPage));
         4907  +    p->aData = (void*)&p[1];
         4908  +    memcpy(p->aData, apOld[i]->aData, pBt->pageSize);
  4908   4909     }
  4909   4910   
  4910   4911     /*
  4911   4912     ** Load pointers to all cells on sibling pages and the divider cells
  4912   4913     ** into the local apCell[] array.  Make copies of the divider cells
  4913   4914     ** into space obtained form aSpace[] and remove the the divider Cells
  4914   4915     ** from pParent.

Changes to src/pager.c.

    14     14   ** The pager is used to access a database disk file.  It implements
    15     15   ** atomic commit and rollback through the use of a journal file that
    16     16   ** is separate from the database file.  The pager also implements file
    17     17   ** locking to prevent two processes from writing the same database
    18     18   ** file simultaneously, or one process from reading the database while
    19     19   ** another is writing.
    20     20   **
    21         -** @(#) $Id: pager.c,v 1.385 2007/09/03 15:19:35 drh Exp $
           21  +** @(#) $Id: pager.c,v 1.386 2007/09/06 22:19:15 drh Exp $
    22     22   */
    23     23   #ifndef SQLITE_OMIT_DISKIO
    24     24   #include "sqliteInt.h"
    25     25   #include <assert.h>
    26     26   #include <string.h>
    27     27   
    28     28   /*
................................................................................
   267    267     u8 alwaysRollback;             /* Disable DontRollback() for this page */
   268    268     u8 needRead;                   /* Read content if PagerWrite() is called */
   269    269     short int nRef;                /* Number of users of this page */
   270    270     PgHdr *pDirty, *pPrevDirty;    /* Dirty pages */
   271    271   #ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
   272    272     PagerLruLink gfree;            /* Global list of nRef==0 pages */
   273    273   #endif
   274         -  u32 notUsed;                   /* Buffer space */
   275    274   #ifdef SQLITE_CHECK_PAGES
   276    275     u32 pageHash;
   277    276   #endif
   278         -  /* pPager->pageSize bytes of page data follow this header */
   279         -  /* Pager.nExtra bytes of local data follow the page data */
          277  +  void *pData;                   /* Page data */
          278  +  /* Pager.nExtra bytes of local data appended to this header */
   280    279   };
   281    280   
   282    281   /*
   283    282   ** For an in-memory only database, some extra information is recorded about
   284    283   ** each page so that changes can be rolled back.  (Journal files are not
   285    284   ** used for in-memory databases.)  The following information is added to
   286    285   ** the end of every EXTRA block for in-memory databases.
................................................................................
   309    308   # define CODEC2(P,D,N,X) ((char*)D)
   310    309   #endif
   311    310   
   312    311   /*
   313    312   ** Convert a pointer to a PgHdr into a pointer to its data
   314    313   ** and back again.
   315    314   */
   316         -#define PGHDR_TO_DATA(P)  ((void*)(&(P)[1]))
   317         -#define DATA_TO_PGHDR(D)  (&((PgHdr*)(D))[-1])
   318         -#define PGHDR_TO_EXTRA(G,P) ((void*)&((char*)(&(G)[1]))[(P)->pageSize])
          315  +#define PGHDR_TO_DATA(P)    ((P)->pData)
          316  +#define PGHDR_TO_EXTRA(G,P) ((void*)&((G)[1]))
   319    317   #define PGHDR_TO_HIST(P,PGR)  \
   320         -            ((PgHistory*)&((char*)(&(P)[1]))[(PGR)->pageSize+(PGR)->nExtra])
          318  +            ((PgHistory*)&((char*)(&(P)[1]))[(PGR)->nExtra])
   321    319   
   322    320   /*
   323    321   ** A open page cache is an instance of the following structure.
   324    322   **
   325    323   ** Pager.errCode may be set to SQLITE_IOERR, SQLITE_CORRUPT, or
   326    324   ** or SQLITE_FULL. Once one of the first three errors occurs, it persists
   327    325   ** and is returned as the result of every major pager API call.  The
................................................................................
  1241   1239     PgHdr *pPg, *pNext;
  1242   1240     if( pPager->errCode ) return;
  1243   1241     for(pPg=pPager->pAll; pPg; pPg=pNext){
  1244   1242       IOTRACE(("PGFREE %p %d\n", pPager, pPg->pgno));
  1245   1243       PAGER_INCR(sqlite3_pager_pgfree_count);
  1246   1244       pNext = pPg->pNextAll;
  1247   1245       lruListRemove(pPg);
         1246  +    sqlite3_free(pPg->pData);
  1248   1247       sqlite3_free(pPg);
  1249   1248     }
  1250   1249     assert(pPager->lru.pFirst==0);
  1251   1250     assert(pPager->lru.pFirstSynced==0);
  1252   1251     assert(pPager->lru.pLast==0);
  1253   1252     pPager->pStmt = 0;
  1254   1253     pPager->pAll = 0;
................................................................................
  2491   2490         ppPg = &pPg->pNextAll;
  2492   2491       }else{
  2493   2492         *ppPg = pPg->pNextAll;
  2494   2493         IOTRACE(("PGFREE %p %d\n", pPager, pPg->pgno));
  2495   2494         PAGER_INCR(sqlite3_pager_pgfree_count);
  2496   2495         unlinkPage(pPg);
  2497   2496         makeClean(pPg);
         2497  +      sqlite3_free(pPg->pData);
  2498   2498         sqlite3_free(pPg);
  2499   2499         pPager->nPage--;
  2500   2500       }
  2501   2501     }
  2502   2502   }
  2503   2503   
  2504   2504   /*
................................................................................
  3154   3154         nReleased += (
  3155   3155             sizeof(*pPg) + pPager->pageSize
  3156   3156             + sizeof(u32) + pPager->nExtra
  3157   3157             + MEMDB*sizeof(PgHistory) 
  3158   3158         );
  3159   3159         IOTRACE(("PGFREE %p %d *\n", pPager, pPg->pgno));
  3160   3160         PAGER_INCR(sqlite3_pager_pgfree_count);
         3161  +      sqlite3_free(pPg->pData);
  3161   3162         sqlite3_free(pPg);
  3162   3163         pPager->nPage--;
  3163   3164       }else{
  3164   3165         /* An error occured whilst writing to the database file or 
  3165   3166         ** journal in pager_recycle(). The error is not returned to the 
  3166   3167         ** caller of this function. Instead, set the Pager.errCode variable.
  3167   3168         ** The error will be returned to the user (or users, in the case 
................................................................................
  3390   3391   **     (4)  Either there is an available PgHdr that does not need
  3391   3392   **          to be synced to disk or else disk syncing is currently
  3392   3393   **          allowed.
  3393   3394   */
  3394   3395   static int pagerAllocatePage(Pager *pPager, PgHdr **ppPg){
  3395   3396     int rc = SQLITE_OK;
  3396   3397     PgHdr *pPg;
         3398  +  void *pData;
  3397   3399   
  3398   3400     /* Create a new PgHdr if any of the four conditions defined 
  3399   3401     ** above are met: */
  3400   3402     if( pPager->nPage<pPager->mxPage
  3401   3403      || pPager->lru.pFirst==0 
  3402   3404      || MEMDB
  3403   3405      || (pPager->lru.pFirstSynced==0 && pPager->doNotSync)
................................................................................
  3407   3409            pPager->nHash<256 ? 256 : pPager->nHash*2);
  3408   3410         if( pPager->nHash==0 ){
  3409   3411           rc = SQLITE_NOMEM;
  3410   3412           goto pager_allocate_out;
  3411   3413         }
  3412   3414       }
  3413   3415       pagerLeave(pPager);
  3414         -    pPg = sqlite3_malloc( sizeof(*pPg) + pPager->pageSize
  3415         -                            + sizeof(u32) + pPager->nExtra
         3416  +    pPg = sqlite3_malloc( sizeof(*pPg) + sizeof(u32) + pPager->nExtra
  3416   3417                               + MEMDB*sizeof(PgHistory) );
         3418  +    if( pPg ){
         3419  +      pData = sqlite3_malloc( pPager->pageSize );
         3420  +      if( pData==0 ){
         3421  +        sqlite3_free(pPg);
         3422  +        pPg = 0;
         3423  +      }
         3424  +    }
  3417   3425       pagerEnter(pPager);
  3418   3426       if( pPg==0 ){
  3419   3427         rc = SQLITE_NOMEM;
  3420   3428         goto pager_allocate_out;
  3421   3429       }
  3422   3430       memset(pPg, 0, sizeof(*pPg));
  3423   3431       if( MEMDB ){
  3424   3432         memset(PGHDR_TO_HIST(pPg, pPager), 0, sizeof(PgHistory));
  3425   3433       }
         3434  +    pPg->pData = pData;
  3426   3435       pPg->pPager = pPager;
  3427   3436       pPg->pNextAll = pPager->pAll;
  3428   3437       pPager->pAll = pPg;
  3429   3438       pPager->nPage++;
  3430   3439     }else{
  3431   3440       /* Recycle an existing page with a zero ref-count. */
  3432   3441       rc = pager_recycle(pPager, &pPg);
................................................................................
  3983   3992     
  3984   3993       /* The transaction journal now exists and we have a RESERVED or an
  3985   3994       ** EXCLUSIVE lock on the main database file.  Write the current page to
  3986   3995       ** the transaction journal if it is not there already.
  3987   3996       */
  3988   3997       if( !pPg->inJournal && (pPager->useJournal || MEMDB) ){
  3989   3998         if( (int)pPg->pgno <= pPager->origDbSize ){
  3990         -        int szPg;
  3991   3999           if( MEMDB ){
  3992   4000             PgHistory *pHist = PGHDR_TO_HIST(pPg, pPager);
  3993   4001             PAGERTRACE3("JOURNAL %d page %d\n", PAGERID(pPager), pPg->pgno);
  3994   4002             assert( pHist->pOrig==0 );
  3995   4003             pHist->pOrig = sqlite3_malloc( pPager->pageSize );
  3996   4004             if( pHist->pOrig ){
  3997   4005               memcpy(pHist->pOrig, PGHDR_TO_DATA(pPg), pPager->pageSize);
  3998   4006             }
  3999   4007           }else{
  4000         -          u32 cksum, saved;
  4001         -          char *pData2, *pEnd;
         4008  +          u32 cksum;
         4009  +          char *pData2;
  4002   4010   
  4003   4011             /* We should never write to the journal file the page that
  4004   4012             ** contains the database locks.  The following assert verifies
  4005   4013             ** that we do not. */
  4006   4014             assert( pPg->pgno!=PAGER_MJ_PGNO(pPager) );
  4007   4015             pData2 = CODEC2(pPager, pData, pPg->pgno, 7);
  4008   4016             cksum = pager_cksum(pPager, (u8*)pData2);
  4009         -          pEnd = pData2 + pPager->pageSize;
  4010         -          pData2 -= 4;
  4011         -          saved = *(u32*)pEnd;
  4012         -          put32bits(pEnd, cksum);
  4013         -          szPg = pPager->pageSize+8;
  4014         -          put32bits(pData2, pPg->pgno);
  4015         -          rc = sqlite3OsWrite(pPager->jfd, pData2, szPg, pPager->journalOff);
  4016         -          IOTRACE(("JOUT %p %d %lld %d\n", pPager, pPg->pgno,
         4017  +          rc = write32bits(pPager->jfd, pPager->journalOff, pPg->pgno);
         4018  +          if( rc==SQLITE_OK ){
         4019  +            rc = sqlite3OsWrite(pPager->jfd, pData2, pPager->pageSize,
         4020  +                                pPager->journalOff + 4);
         4021  +            pPager->journalOff += pPager->pageSize+4;
         4022  +          }
         4023  +          if( rc==SQLITE_OK ){
         4024  +            rc = write32bits(pPager->jfd, pPager->journalOff, cksum);
         4025  +            pPager->journalOff += 4;
         4026  +          }
         4027  +          IOTRACE(("JOUT %p %d %lld %d\n", pPager, pPg->pgno, 
  4017   4028                      pPager->journalOff, szPg));
  4018   4029             PAGER_INCR(sqlite3_pager_writej_count);
  4019         -          pPager->journalOff += szPg;
  4020   4030             PAGERTRACE5("JOURNAL %d page %d needSync=%d hash(%08x)\n",
  4021   4031                  PAGERID(pPager), pPg->pgno, pPg->needSync, pager_pagehash(pPg));
  4022         -          *(u32*)pEnd = saved;
  4023   4032   
  4024   4033             /* An error has occured writing to the journal file. The 
  4025   4034             ** transaction will be rolled back by the layer above.
  4026   4035             */
  4027   4036             if( rc!=SQLITE_OK ){
  4028   4037               return rc;
  4029   4038             }
................................................................................
  4064   4073           if( pHist->pStmt ){
  4065   4074             memcpy(pHist->pStmt, PGHDR_TO_DATA(pPg), pPager->pageSize);
  4066   4075           }
  4067   4076           PAGERTRACE3("STMT-JOURNAL %d page %d\n", PAGERID(pPager), pPg->pgno);
  4068   4077           page_add_to_stmt_list(pPg);
  4069   4078         }else{
  4070   4079           i64 offset = pPager->stmtNRec*(4+pPager->pageSize);
  4071         -        char *pData2 = CODEC2(pPager, pData, pPg->pgno, 7)-4;
  4072         -        put32bits(pData2, pPg->pgno);
  4073         -        rc = sqlite3OsWrite(pPager->stfd, pData2, pPager->pageSize+4, offset);
         4080  +        char *pData2 = CODEC2(pPager, pData, pPg->pgno, 7);
         4081  +        rc = write32bits(pPager->stfd, offset, pPg->pgno);
         4082  +        if( rc==SQLITE_OK ){
         4083  +          rc = sqlite3OsWrite(pPager->stfd, pData2, pPager->pageSize, offset+4);
         4084  +        }
  4074   4085           PAGERTRACE3("STMT-JOURNAL %d page %d\n", PAGERID(pPager), pPg->pgno);
  4075   4086           if( rc!=SQLITE_OK ){
  4076   4087             return rc;
  4077   4088           }
  4078   4089           pPager->stmtNRec++;
  4079   4090           assert( pPager->aInStmt!=0 );
  4080   4091           pPager->aInStmt[pPg->pgno/8] |= 1<<(pPg->pgno&7);

Changes to test/btree.test.

     7      7   #    May you find forgiveness for yourself and forgive others.
     8      8   #    May you share freely, never taking more than you give.
     9      9   #
    10     10   #***********************************************************************
    11     11   # This file implements regression tests for SQLite library.  The
    12     12   # focus of this script is btree database backend
    13     13   #
    14         -# $Id: btree.test,v 1.40 2007/06/25 08:16:58 danielk1977 Exp $
           14  +# $Id: btree.test,v 1.41 2007/09/06 22:19:15 drh Exp $
    15     15   
    16     16   
    17     17   set testdir [file dirname $argv0]
    18     18   source $testdir/tester.tcl
    19     19   
    20     20   ifcapable default_autovacuum {
    21     21     finish_test
................................................................................
   778    778     lindex [btree_pager_stats $::b1] 1
   779    779   } {1}
   780    780   do_test btree-10.2 {
   781    781     set ::c1 [btree_cursor $::b1 2 1]
   782    782     lindex [btree_pager_stats $::b1] 1
   783    783   } {2}
   784    784   do_test btree-10.3 {
          785  +btree_breakpoint
   785    786     for {set i 1} {$i<=30} {incr i} {
   786    787       set key [format %03d $i]
   787    788       set data "*** $key *** $key *** $key *** $key ***"
   788    789       btree_insert $::c1 $key $data
   789    790     }
   790    791     select_keys $::c1
   791    792   } {001 002 003 004 005 006 007 008 009 010 011 012 013 014 015 016 017 018 019 020 021 022 023 024 025 026 027 028 029 030}