/ Check-in [6f21d9cb]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Use mmap() to read from the database file in rollback mode. This branch is unix only for now.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | experimental-mmap
Files: files | file ages | folders
SHA1: 6f21d9cbf5d457e63a7282015a89ae785526cf6d
User & Date: dan 2013-03-14 18:34:37
Context
2013-03-15
18:29
Allow read-only cursors to use mmap pages even if there is an open write transaction. check-in: b387e2f9 user: dan tags: experimental-mmap
2013-03-14
18:34
Use mmap() to read from the database file in rollback mode. This branch is unix only for now. check-in: 6f21d9cb user: dan tags: experimental-mmap
2013-03-13
07:02
Enhance tests for ticket [4dd95f6943]. check-in: 0b452734 user: dan tags: trunk
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to src/btree.c.

  2559   2559     int rc;
  2560   2560     sqlite3BtreeEnter(p);
  2561   2561     p->pBt->nPage = 0;
  2562   2562     rc = newDatabase(p->pBt);
  2563   2563     sqlite3BtreeLeave(p);
  2564   2564     return rc;
  2565   2565   }
         2566  +
         2567  +/*
         2568  +** If the shared-btree passed as the only argument is holding references
         2569  +** to mmap pages, replace them with read/write pages. Return SQLITE_OK
         2570  +** if successful, or an error code otherwise.
         2571  +*/
         2572  +static int btreeSwapOutMmap(BtShared *pBt){
         2573  +  BtCursor *pCsr;
         2574  +  for(pCsr=pBt->pCursor; pCsr; pCsr=pCsr->pNext){
         2575  +    int i;
         2576  +    for(i=0; i<=pCsr->iPage; i++){
         2577  +      MemPage *pPg = pCsr->apPage[i];
         2578  +      if( pPg->pDbPage->flags & PGHDR_MMAP ){
         2579  +        int rc;
         2580  +        MemPage *pNew = 0;
         2581  +        rc = btreeGetPage(pBt, pPg->pgno, &pNew, 0);
         2582  +        if( rc==SQLITE_OK && i==pCsr->iPage ){
         2583  +          pCsr->info.pCell = pNew->aData + (pCsr->info.pCell - pPg->aData);
         2584  +        }
         2585  +        pCsr->apPage[i] = pNew;
         2586  +        releasePage(pPg);
         2587  +        if( rc!=SQLITE_OK ) return rc;
         2588  +      }
         2589  +    }
         2590  +  }
         2591  +
         2592  +  return SQLITE_OK;
         2593  +}
  2566   2594   
  2567   2595   /*
  2568   2596   ** Attempt to start a new transaction. A write-transaction
  2569   2597   ** is started if the second argument is nonzero, otherwise a read-
  2570   2598   ** transaction.  If the second argument is 2 or more and exclusive
  2571   2599   ** transaction is started, meaning that no other process is allowed
  2572   2600   ** to access the database.  A preexisting transaction may not be
................................................................................
  2666   2694       while( pBt->pPage1==0 && SQLITE_OK==(rc = lockBtree(pBt)) );
  2667   2695   
  2668   2696       if( rc==SQLITE_OK && wrflag ){
  2669   2697         if( (pBt->btsFlags & BTS_READ_ONLY)!=0 ){
  2670   2698           rc = SQLITE_READONLY;
  2671   2699         }else{
  2672   2700           rc = sqlite3PagerBegin(pBt->pPager,wrflag>1,sqlite3TempInMemory(p->db));
         2701  +        if( rc==SQLITE_OK ){
         2702  +          rc = btreeSwapOutMmap(pBt);
         2703  +        }
  2673   2704           if( rc==SQLITE_OK ){
  2674   2705             rc = newDatabase(pBt);
  2675   2706           }
  2676   2707         }
  2677   2708       }
  2678   2709     
  2679   2710       if( rc!=SQLITE_OK ){

Changes to src/os_unix.c.

  3618   3618       case SQLITE_FCNTL_TEMPFILENAME: {
  3619   3619         char *zTFile = sqlite3_malloc( pFile->pVfs->mxPathname );
  3620   3620         if( zTFile ){
  3621   3621           unixGetTempname(pFile->pVfs->mxPathname, zTFile);
  3622   3622           *(char**)pArg = zTFile;
  3623   3623         }
  3624   3624         return SQLITE_OK;
         3625  +    }
         3626  +    case SQLITE_FCNTL_GETFD: {
         3627  +      *(int*)pArg = pFile->h;
         3628  +      return SQLITE_OK;
  3625   3629       }
  3626   3630   #ifdef SQLITE_DEBUG
  3627   3631       /* The pager calls this method to signal that it has done
  3628   3632       ** a rollback and that the database is therefore unchanged and
  3629   3633       ** it hence it is OK for the transaction change counter to be
  3630   3634       ** unchanged.
  3631   3635       */

Changes to src/pager.c.

   651    651     sqlite3_file *sjfd;         /* File descriptor for sub-journal */
   652    652     i64 journalOff;             /* Current write offset in the journal file */
   653    653     i64 journalHdr;             /* Byte offset to previous journal header */
   654    654     sqlite3_backup *pBackup;    /* Pointer to list of ongoing backup processes */
   655    655     PagerSavepoint *aSavepoint; /* Array of active savepoints */
   656    656     int nSavepoint;             /* Number of elements in aSavepoint[] */
   657    657     char dbFileVers[16];        /* Changes whenever database file changes */
          658  +
          659  +  void *pMap;                 /* Memory mapped prefix of database file */
          660  +  i64 nMap;                   /* Size of mapping at pMap in bytes */ 
          661  +  int nMmapOut;               /* Number of mmap pages currently outstanding */
          662  +  PgHdr *pFree;               /* List of free mmap page headers (pDirty) */
   658    663     /*
   659    664     ** End of the routinely-changing class members
   660    665     ***************************************************************************/
   661    666   
   662    667     u16 nExtra;                 /* Add this many bytes to each in-memory page */
   663    668     i16 nReserve;               /* Number of unused bytes at end of each page */
   664    669     u32 vfsFlags;               /* Flags for sqlite3_vfs.xOpen() */
................................................................................
  3798   3803       rc = sqlite3OsSync(pPager->jfd, SQLITE_SYNC_NORMAL);
  3799   3804     }
  3800   3805     if( rc==SQLITE_OK ){
  3801   3806       rc = sqlite3OsFileSize(pPager->jfd, &pPager->journalHdr);
  3802   3807     }
  3803   3808     return rc;
  3804   3809   }
         3810  +
         3811  +#include <sys/mman.h>
         3812  +
         3813  +/*
         3814  +** Unmap any mapping of the database file.
         3815  +*/
         3816  +static int pagerUnmap(Pager *pPager){
         3817  +  if( pPager->pMap ){
         3818  +    munmap(pPager->pMap, pPager->nMap);
         3819  +    pPager->pMap = 0;
         3820  +    pPager->nMap = 0;
         3821  +  }
         3822  +  return SQLITE_OK;
         3823  +}
         3824  +
         3825  +static int pagerMap(Pager *pPager){
         3826  +  int rc;
         3827  +  i64 sz = 0;
         3828  +
         3829  +  assert( pPager->pMap==0 && pPager->nMap==0 );
         3830  +
         3831  +  rc = sqlite3OsFileSize(pPager->fd, &sz);
         3832  +  if( rc==SQLITE_OK && sz>0 ){
         3833  +    int fd;
         3834  +    rc = sqlite3OsFileControl(pPager->fd, SQLITE_FCNTL_GETFD, (void *)&fd);
         3835  +    if( rc==SQLITE_OK ){
         3836  +      void *pMap = mmap(0, sz, PROT_READ, MAP_SHARED, fd, 0);
         3837  +      if( pMap==MAP_FAILED ){
         3838  +      assert( 0 );
         3839  +        return SQLITE_IOERR;
         3840  +      }
         3841  +      pPager->pMap = pMap;
         3842  +      pPager->nMap = sz;
         3843  +    }
         3844  +  }
         3845  +
         3846  +  return rc;
         3847  +}
         3848  +
         3849  +static int pagerAcquireMapPage(Pager *pPager, Pgno pgno, PgHdr **ppPage){
         3850  +  int rc;
         3851  +  *ppPage = 0;
         3852  +
         3853  +  assert( pPager->pWal==0 );
         3854  +
         3855  +  if( MEMDB==0 && pPager->tempFile==0 ){
         3856  +    if( pPager->pMap==0 ){
         3857  +      rc = pagerMap(pPager);
         3858  +      if( rc!=SQLITE_OK ) return rc;
         3859  +    }
         3860  +
         3861  +    if( pgno!=1 && pPager->pMap && pPager->nMap>=((i64)pgno*pPager->pageSize) ){
         3862  +      PgHdr *p;
         3863  +      if( pPager->pFree ){
         3864  +        p = pPager->pFree;
         3865  +        pPager->pFree = p->pDirty;
         3866  +        p->pDirty = 0;
         3867  +        memset(p->pExtra, 0, pPager->nExtra);
         3868  +      }else{
         3869  +        p = (PgHdr *)sqlite3MallocZero(sizeof(PgHdr) + pPager->nExtra);
         3870  +        if( p==0 ) return SQLITE_NOMEM;
         3871  +        p->pExtra = (void *)&p[1];
         3872  +        p->flags = PGHDR_MMAP;
         3873  +        p->nRef = 1;
         3874  +        p->pPager = pPager;
         3875  +      }
         3876  +
         3877  +      assert( p->pExtra==(void *)&p[1] );
         3878  +      assert( p->pPage==0 );
         3879  +      assert( p->flags==PGHDR_MMAP );
         3880  +      assert( p->pPager==pPager );
         3881  +      assert( p->nRef==1 );
         3882  +
         3883  +      p->pData = &((u8 *)pPager->pMap)[(i64)(pgno-1) * pPager->pageSize];
         3884  +      p->pgno = pgno;
         3885  +      pPager->nMmapOut++;
         3886  +      *ppPage = p;
         3887  +    }
         3888  +  }
         3889  +
         3890  +  return SQLITE_OK;
         3891  +}
         3892  +
         3893  +static void pagerReleaseMapPage(PgHdr *pPg){
         3894  +  Pager *pPager = pPg->pPager;
         3895  +  pPager->nMmapOut--;
         3896  +  pPg->pDirty = pPager->pFree;
         3897  +  pPager->pFree = pPg;
         3898  +}
         3899  +
         3900  +static void pagerFreeMapHdrs(Pager *pPager){
         3901  +  PgHdr *p;
         3902  +  PgHdr *pNext;
         3903  +  for(p=pPager->pFree; p; p=pNext){
         3904  +    pNext = p->pDirty;
         3905  +    sqlite3_free(p);
         3906  +  }
         3907  +}
         3908  +
  3805   3909   
  3806   3910   /*
  3807   3911   ** Shutdown the page cache.  Free all memory and close all files.
  3808   3912   **
  3809   3913   ** If a transaction was in progress when this routine is called, that
  3810   3914   ** transaction is rolled back.  All outstanding pages are invalidated
  3811   3915   ** and their memory is freed.  Any attempt to use a page associated
................................................................................
  3819   3923   */
  3820   3924   int sqlite3PagerClose(Pager *pPager){
  3821   3925     u8 *pTmp = (u8 *)pPager->pTmpSpace;
  3822   3926   
  3823   3927     assert( assert_pager_state(pPager) );
  3824   3928     disable_simulated_io_errors();
  3825   3929     sqlite3BeginBenignMalloc();
         3930  +  pagerUnmap(pPager);
         3931  +  pagerFreeMapHdrs(pPager);
  3826   3932     /* pPager->errCode = 0; */
  3827   3933     pPager->exclusiveMode = 0;
  3828   3934   #ifndef SQLITE_OMIT_WAL
  3829   3935     sqlite3WalClose(pPager->pWal, pPager->ckptSyncFlags, pPager->pageSize, pTmp);
  3830   3936     pPager->pWal = 0;
  3831   3937   #endif
  3832   3938     pager_reset(pPager);
................................................................................
  4962   5068           }
  4963   5069         }else{
  4964   5070           memset(dbFileVers, 0, sizeof(dbFileVers));
  4965   5071         }
  4966   5072   
  4967   5073         if( memcmp(pPager->dbFileVers, dbFileVers, sizeof(dbFileVers))!=0 ){
  4968   5074           pager_reset(pPager);
         5075  +        pagerUnmap(pPager);
  4969   5076         }
  4970   5077       }
  4971   5078   
  4972   5079       /* If there is a WAL file in the file-system, open this database in WAL
  4973   5080       ** mode. Otherwise, the following function call is a no-op.
  4974   5081       */
  4975   5082       rc = pagerOpenWalIfPresent(pPager);
................................................................................
  5003   5110   ** transaction and unlock the pager.
  5004   5111   **
  5005   5112   ** Except, in locking_mode=EXCLUSIVE when there is nothing to in
  5006   5113   ** the rollback journal, the unlock is not performed and there is
  5007   5114   ** nothing to rollback, so this routine is a no-op.
  5008   5115   */ 
  5009   5116   static void pagerUnlockIfUnused(Pager *pPager){
  5010         -  if( (sqlite3PcacheRefCount(pPager->pPCache)==0) ){
         5117  +  if( (sqlite3PcacheRefCount(pPager->pPCache)==0) && pPager->nMmapOut==0 ){
  5011   5118       pagerUnlockAndRollback(pPager);
  5012   5119     }
  5013   5120   }
  5014   5121   
  5015   5122   /*
  5016   5123   ** Acquire a reference to page number pgno in pager pPager (a page
  5017   5124   ** reference has type DbPage*). If the requested reference is 
................................................................................
  5079   5186     }
  5080   5187   
  5081   5188     /* If the pager is in the error state, return an error immediately. 
  5082   5189     ** Otherwise, request the page from the PCache layer. */
  5083   5190     if( pPager->errCode!=SQLITE_OK ){
  5084   5191       rc = pPager->errCode;
  5085   5192     }else{
         5193  +    if( pPager->eState==PAGER_READER && pPager->pWal==0 ){
         5194  +      rc = pagerAcquireMapPage(pPager, pgno, &pPg);
         5195  +      if( rc!=SQLITE_OK ) goto pager_acquire_err;
         5196  +      if( pPg ){
         5197  +        *ppPage = pPg;
         5198  +        return SQLITE_OK;
         5199  +      }
         5200  +    }
         5201  +
  5086   5202       rc = sqlite3PcacheFetch(pPager->pPCache, pgno, 1, ppPage);
  5087   5203     }
  5088   5204   
  5089   5205     if( rc!=SQLITE_OK ){
  5090   5206       /* Either the call to sqlite3PcacheFetch() returned an error or the
  5091   5207       ** pager was already in the error-state when this function was called.
  5092   5208       ** Set pPg to 0 and jump to the exception handler.  */
................................................................................
  5192   5308   ** page is added to the LRU list.  When all references to all pages
  5193   5309   ** are released, a rollback occurs and the lock on the database is
  5194   5310   ** removed.
  5195   5311   */
  5196   5312   void sqlite3PagerUnref(DbPage *pPg){
  5197   5313     if( pPg ){
  5198   5314       Pager *pPager = pPg->pPager;
  5199         -    sqlite3PcacheRelease(pPg);
         5315  +    if( pPg->flags & PGHDR_MMAP ){
         5316  +      pagerReleaseMapPage(pPg);
         5317  +    }else{
         5318  +      sqlite3PcacheRelease(pPg);
         5319  +    }
  5200   5320       pagerUnlockIfUnused(pPager);
  5201   5321     }
  5202   5322   }
  5203   5323   
  5204   5324   /*
  5205   5325   ** This function is called at the start of every write transaction.
  5206   5326   ** There must already be a RESERVED or EXCLUSIVE lock on the database 
................................................................................
  5308   5428   */
  5309   5429   int sqlite3PagerBegin(Pager *pPager, int exFlag, int subjInMemory){
  5310   5430     int rc = SQLITE_OK;
  5311   5431   
  5312   5432     if( pPager->errCode ) return pPager->errCode;
  5313   5433     assert( pPager->eState>=PAGER_READER && pPager->eState<PAGER_ERROR );
  5314   5434     pPager->subjInMemory = (u8)subjInMemory;
         5435  +
         5436  +  pagerUnmap(pPager);
  5315   5437   
  5316   5438     if( ALWAYS(pPager->eState==PAGER_READER) ){
  5317   5439       assert( pPager->pInJournal==0 );
  5318   5440   
  5319   5441       if( pagerUseWal(pPager) ){
  5320   5442         /* If the pager is configured to use locking_mode=exclusive, and an
  5321   5443         ** exclusive lock on the database is not already held, obtain it now.
................................................................................
  5530   5652     PgHdr *pPg = pDbPage;
  5531   5653     Pager *pPager = pPg->pPager;
  5532   5654     Pgno nPagePerSector = (pPager->sectorSize/pPager->pageSize);
  5533   5655   
  5534   5656     assert( pPager->eState>=PAGER_WRITER_LOCKED );
  5535   5657     assert( pPager->eState!=PAGER_ERROR );
  5536   5658     assert( assert_pager_state(pPager) );
         5659  +
         5660  +  /* There must not be any outstanding mmap pages at this point */
         5661  +  assert( pPager->nMmapOut==0 );
  5537   5662   
  5538   5663     if( nPagePerSector>1 ){
  5539   5664       Pgno nPageCount;          /* Total number of pages in database file */
  5540   5665       Pgno pg1;                 /* First page of the sector pPg is located on. */
  5541   5666       int nPage = 0;            /* Number of pages starting at pg1 to journal */
  5542   5667       int ii;                   /* Loop counter */
  5543   5668       int needSync = 0;         /* True if any page has PGHDR_NEED_SYNC */

Changes to src/pcache.h.

    48     48   /* Bit values for PgHdr.flags */
    49     49   #define PGHDR_DIRTY             0x002  /* Page has changed */
    50     50   #define PGHDR_NEED_SYNC         0x004  /* Fsync the rollback journal before
    51     51                                          ** writing this page to the database */
    52     52   #define PGHDR_NEED_READ         0x008  /* Content is unread */
    53     53   #define PGHDR_REUSE_UNLIKELY    0x010  /* A hint that reuse is unlikely */
    54     54   #define PGHDR_DONT_WRITE        0x020  /* Do not write content to disk */
           55  +
           56  +#define PGHDR_MMAP              0x040  /* This is an mmap page object */
    55     57   
    56     58   /* Initialize and shutdown the page cache subsystem */
    57     59   int sqlite3PcacheInitialize(void);
    58     60   void sqlite3PcacheShutdown(void);
    59     61   
    60     62   /* Page cache buffer management:
    61     63   ** These routines implement SQLITE_CONFIG_PAGECACHE.

Changes to src/sqlite.h.in.

   896    896   #define SQLITE_FCNTL_PERSIST_WAL            10
   897    897   #define SQLITE_FCNTL_OVERWRITE              11
   898    898   #define SQLITE_FCNTL_VFSNAME                12
   899    899   #define SQLITE_FCNTL_POWERSAFE_OVERWRITE    13
   900    900   #define SQLITE_FCNTL_PRAGMA                 14
   901    901   #define SQLITE_FCNTL_BUSYHANDLER            15
   902    902   #define SQLITE_FCNTL_TEMPFILENAME           16
          903  +#define SQLITE_FCNTL_GETFD                  17
   903    904   
   904    905   /*
   905    906   ** CAPI3REF: Mutex Handle
   906    907   **
   907    908   ** The mutex module within SQLite defines [sqlite3_mutex] to be an
   908    909   ** abstract type for a mutex object.  The SQLite core never looks
   909    910   ** at the internal representation of an [sqlite3_mutex].  It only

Changes to test/permutations.test.

   131    131   lappend ::testsuitelist xxx
   132    132   
   133    133   test_suite "veryquick" -prefix "" -description {
   134    134     "Very" quick test suite. Runs in less than 5 minutes on a workstation. 
   135    135     This test suite is the same as the "quick" tests, except that some files
   136    136     that test malloc and IO errors are omitted.
   137    137   } -files [
   138         -  test_set $allquicktests -exclude *malloc* *ioerr* *fault*
          138  +  test_set $allquicktests -exclude *malloc* *ioerr* *fault* \
          139  +  multiplex* server1.test shared2.test shared6.test
   139    140   ]
   140    141   
   141    142   test_suite "valgrind" -prefix "" -description {
   142    143     Run the "veryquick" test suite with a couple of multi-process tests (that
   143    144     fail under valgrind) omitted.
   144    145   } -files [
   145    146     test_set $allquicktests -exclude *malloc* *ioerr* *fault* wal.test atof1.test