/ Check-in [1431be95]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Replace the sqlite3_io_methods.xMremap interface with sqlite3_io_methods.xFetch and xUnfetch.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | experimental-mmap
Files: files | file ages | folders
SHA1:1431be95579160fb70408d43e17fc23c7b69ab4a
User & Date: dan 2013-03-23 21:00:41
Context
2013-03-25
13:50
Fix a case in the pager where an xFetch() reference was being leaked following an OOM error. check-in: 5885ba6c user: dan tags: experimental-mmap
2013-03-23
21:00
Replace the sqlite3_io_methods.xMremap interface with sqlite3_io_methods.xFetch and xUnfetch. check-in: 1431be95 user: dan tags: experimental-mmap
17:29
Improve a comment in wal.c. No code changes. check-in: 60b9f5e4 user: dan tags: experimental-mmap
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to src/backup.c.

   516    516               iOff+=pgszSrc
   517    517             ){
   518    518               PgHdr *pSrcPg = 0;
   519    519               const Pgno iSrcPg = (Pgno)((iOff/pgszSrc)+1);
   520    520               rc = sqlite3PagerGet(pSrcPager, iSrcPg, &pSrcPg);
   521    521               if( rc==SQLITE_OK ){
   522    522                 u8 *zData = sqlite3PagerGetData(pSrcPg);
   523         -              rc = sqlite3PagerWriteData(pDestPager, zData, pgszSrc, iOff);
          523  +              rc = sqlite3OsWrite(pFile, zData, pgszSrc, iOff);
   524    524               }
   525    525               sqlite3PagerUnref(pSrcPg);
   526    526             }
   527    527             if( rc==SQLITE_OK ){
   528    528               rc = backupTruncateFile(pFile, iSize);
   529    529             }
   530    530   

Changes to src/os.c.

   136    136     int pgsz,
   137    137     int bExtend,                    /* True to extend file if necessary */
   138    138     void volatile **pp              /* OUT: Pointer to mapping */
   139    139   ){
   140    140     DO_OS_MALLOC_TEST(id);
   141    141     return id->pMethods->xShmMap(id, iPage, pgsz, bExtend, pp);
   142    142   }
   143         -int sqlite3OsMremap(
   144         -  sqlite3_file *id,               /* Database file handle */
   145         -  int flags,                      /* SQLITE_MREMAP_XXX flags */
   146         -  i64 iOff,                       /* Offset at which mapping(s) start */
   147         -  i64 nOld,                       /* Size of old mapping */
   148         -  i64 nNew,                       /* Size of requested mapping */
   149         -  void **pp                       /* IN/OUT: Pointer to mapped region */
   150         -){
   151         -  return id->pMethods->xMremap(id, flags, iOff, nOld, nNew, pp);
          143  +
          144  +int sqlite3OsFetch(sqlite3_file *id, i64 iOff, int iAmt, void **pp){
          145  +  return id->pMethods->xFetch(id, iOff, iAmt, pp);
          146  +}
          147  +int sqlite3OsUnfetch(sqlite3_file *id, void *p){
          148  +  return id->pMethods->xUnfetch(id, p);
   152    149   }
   153    150   
   154    151   /*
   155    152   ** The next group of routines are convenience wrappers around the
   156    153   ** VFS methods.
   157    154   */
   158    155   int sqlite3OsOpen(

Changes to src/os.h.

   255    255   #define SQLITE_FCNTL_DB_UNCHANGED 0xca093fa0
   256    256   int sqlite3OsSectorSize(sqlite3_file *id);
   257    257   int sqlite3OsDeviceCharacteristics(sqlite3_file *id);
   258    258   int sqlite3OsShmMap(sqlite3_file *,int,int,int,void volatile **);
   259    259   int sqlite3OsShmLock(sqlite3_file *id, int, int, int);
   260    260   void sqlite3OsShmBarrier(sqlite3_file *id);
   261    261   int sqlite3OsShmUnmap(sqlite3_file *id, int);
   262         -int sqlite3OsMremap(sqlite3_file *id, int, i64, i64, i64, void **);
          262  +int sqlite3OsFetch(sqlite3_file *id, i64, int, void **);
          263  +int sqlite3OsUnfetch(sqlite3_file *, void *);
   263    264   
   264    265   
   265    266   /* 
   266    267   ** Functions for accessing sqlite3_vfs methods 
   267    268   */
   268    269   int sqlite3OsOpen(sqlite3_vfs *, const char *, sqlite3_file*, int, int *);
   269    270   int sqlite3OsDelete(sqlite3_vfs *, const char *, int);

Changes to src/os_unix.c.

   245    245     ** occur if a file is updated without also updating the transaction
   246    246     ** counter.  This test is made to avoid new problems similar to the
   247    247     ** one described by ticket #3584. 
   248    248     */
   249    249     unsigned char transCntrChng;   /* True if the transaction counter changed */
   250    250     unsigned char dbUpdate;        /* True if any part of database file changed */
   251    251     unsigned char inNormalWrite;   /* True if in a normal write operation */
   252         -  sqlite3_int64 mmapSize;        /* Size of xMremap() */
   253         -  void *pMapRegion;              /* Area memory mapped */
          252  +
   254    253   #endif
          254  +  sqlite3_int64 mmapSize;         /* Usable size of mapping at pMapRegion */
          255  +  sqlite3_int64 mmapOrigsize;     /* Actual size of mapping at pMapRegion */
          256  +  sqlite3_int64 mmapLimit;        /* Configured FCNTL_MMAP_SIZE value */
          257  +  void *pMapRegion;               /* Memory mapped region */
          258  +  int nFetchOut;                  /* Number of outstanding xFetch refs */
          259  +
   255    260   #ifdef SQLITE_TEST
   256    261     /* In test mode, increase the size of this structure a bit so that 
   257    262     ** it is larger than the struct CrashFile defined in test6.c.
   258    263     */
   259    264     char aPadding[32];
   260    265   #endif
   261    266   };
................................................................................
  1801   1806   ** If the locking level of the file descriptor is already at or below
  1802   1807   ** the requested locking level, this routine is a no-op.
  1803   1808   */
  1804   1809   static int unixUnlock(sqlite3_file *id, int eFileLock){
  1805   1810     return posixUnlock(id, eFileLock, 0);
  1806   1811   }
  1807   1812   
         1813  +static int unixMapfile(unixFile *pFd, i64 nByte);
         1814  +static void unixUnmapfile(unixFile *pFd);
         1815  +
  1808   1816   /*
  1809   1817   ** This function performs the parts of the "close file" operation 
  1810   1818   ** common to all locking schemes. It closes the directory and file
  1811   1819   ** handles, if they are valid, and sets all fields of the unixFile
  1812   1820   ** structure to 0.
  1813   1821   **
  1814   1822   ** It is *not* necessary to hold the mutex when this routine is called,
  1815   1823   ** even on VxWorks.  A mutex will be acquired on VxWorks by the
  1816   1824   ** vxworksReleaseFileId() routine.
  1817   1825   */
  1818   1826   static int closeUnixFile(sqlite3_file *id){
  1819   1827     unixFile *pFile = (unixFile*)id;
         1828  +  unixUnmapfile(pFile);
  1820   1829     if( pFile->h>=0 ){
  1821   1830       robust_close(pFile, pFile->h, __LINE__);
  1822   1831       pFile->h = -1;
  1823   1832     }
  1824   1833   #if OS_VXWORKS
  1825   1834     if( pFile->pId ){
  1826   1835       if( pFile->ctrlFlags & UNIXFILE_DELETE ){
................................................................................
  3070   3079     void *pBuf, 
  3071   3080     int amt,
  3072   3081     sqlite3_int64 offset
  3073   3082   ){
  3074   3083     unixFile *pFile = (unixFile *)id;
  3075   3084     int got;
  3076   3085     assert( id );
  3077         -  assert( offset>=pFile->mmapSize );  /* Never read from the mmapped region */
  3078   3086   
  3079   3087     /* If this is a database file (not a journal, master-journal or temp
  3080   3088     ** file), the bytes in the locking range should never be read or written. */
  3081   3089   #if 0
  3082   3090     assert( pFile->pUnused==0
  3083   3091          || offset>=PENDING_BYTE+512
  3084   3092          || offset+amt<=PENDING_BYTE 
  3085   3093     );
  3086   3094   #endif
         3095  +
         3096  +  /* Deal with as much of this write request as possible by transfering
         3097  +  ** data to the memory mapping using memcpy().  */
         3098  +  if( offset<pFile->mmapSize ){
         3099  +    if( offset+amt <= pFile->mmapSize ){
         3100  +      memcpy(pBuf, &((u8 *)(pFile->pMapRegion))[offset], amt);
         3101  +      return SQLITE_OK;
         3102  +    }else{
         3103  +      int nCopy = pFile->mmapSize - offset;
         3104  +      memcpy(pBuf, &((u8 *)(pFile->pMapRegion))[offset], nCopy);
         3105  +      pBuf = &((u8 *)pBuf)[nCopy];
         3106  +      amt -= nCopy;
         3107  +      offset += nCopy;
         3108  +    }
         3109  +  }
  3087   3110   
  3088   3111     got = seekAndRead(pFile, offset, pBuf, amt);
  3089   3112     if( got==amt ){
  3090   3113       return SQLITE_OK;
  3091   3114     }else if( got<0 ){
  3092   3115       /* lastErrno set by seekAndRead */
  3093   3116       return SQLITE_IOERR_READ;
................................................................................
  3153   3176     int amt,
  3154   3177     sqlite3_int64 offset 
  3155   3178   ){
  3156   3179     unixFile *pFile = (unixFile*)id;
  3157   3180     int wrote = 0;
  3158   3181     assert( id );
  3159   3182     assert( amt>0 );
  3160         -  assert( offset>=pFile->mmapSize );   /* Never write into the mmapped region */
  3161   3183   
  3162   3184     /* If this is a database file (not a journal, master-journal or temp
  3163   3185     ** file), the bytes in the locking range should never be read or written. */
  3164   3186   #if 0
  3165   3187     assert( pFile->pUnused==0
  3166   3188          || offset>=PENDING_BYTE+512
  3167   3189          || offset+amt<=PENDING_BYTE 
................................................................................
  3185   3207         SimulateIOErrorBenign(0);
  3186   3208         if( rc!=4 || memcmp(oldCntr, &((char*)pBuf)[24-offset], 4)!=0 ){
  3187   3209           pFile->transCntrChng = 1;  /* The transaction counter has changed */
  3188   3210         }
  3189   3211       }
  3190   3212     }
  3191   3213   #endif
         3214  +
         3215  +  /* Deal with as much of this write request as possible by transfering
         3216  +  ** data from the memory mapping using memcpy().  */
         3217  +  if( offset<pFile->mmapSize ){
         3218  +    if( offset+amt <= pFile->mmapSize ){
         3219  +      memcpy(&((u8 *)(pFile->pMapRegion))[offset], pBuf, amt);
         3220  +      return SQLITE_OK;
         3221  +    }else{
         3222  +      int nCopy = pFile->mmapSize - offset;
         3223  +      memcpy(&((u8 *)(pFile->pMapRegion))[offset], pBuf, nCopy);
         3224  +      pBuf = &((u8 *)pBuf)[nCopy];
         3225  +      amt -= nCopy;
         3226  +      offset += nCopy;
         3227  +    }
         3228  +  }
  3192   3229   
  3193   3230     while( amt>0 && (wrote = seekAndWrite(pFile, offset, pBuf, amt))>0 ){
  3194   3231       amt -= wrote;
  3195   3232       offset += wrote;
  3196   3233       pBuf = &((char*)pBuf)[wrote];
  3197   3234     }
  3198   3235     SimulateIOError(( wrote=(-1), amt=1 ));
................................................................................
  3466   3503       ** that effectively updates the change counter.  This might happen
  3467   3504       ** when restoring a database using the backup API from a zero-length
  3468   3505       ** source.
  3469   3506       */
  3470   3507       if( pFile->inNormalWrite && nByte==0 ){
  3471   3508         pFile->transCntrChng = 1;
  3472   3509       }
         3510  +#endif
  3473   3511   
  3474   3512       /* If the file was just truncated to a size smaller than the currently
  3475   3513       ** mapped region, reduce the effective mapping size as well. SQLite will
  3476   3514       ** use read() and write() to access data beyond this point from now on.  
  3477   3515       */
  3478   3516       if( nByte<pFile->mmapSize ){
  3479   3517         pFile->mmapSize = nByte;
  3480   3518       }
  3481         -#endif
  3482   3519   
  3483   3520       return SQLITE_OK;
  3484   3521     }
  3485   3522   }
  3486   3523   
  3487   3524   /*
  3488   3525   ** Determine the current size of a file in bytes
................................................................................
  3563   3600           int nWrite = seekAndWrite(pFile, iWrite, "", 1);
  3564   3601           if( nWrite!=1 ) return SQLITE_IOERR_WRITE;
  3565   3602           iWrite += nBlk;
  3566   3603         }
  3567   3604   #endif
  3568   3605       }
  3569   3606     }
         3607  +
         3608  +  if( pFile->mmapLimit>0 ){
         3609  +    int rc;
         3610  +    if( pFile->szChunk<=0 ){
         3611  +      if( robust_ftruncate(pFile->h, nByte) ){
         3612  +        pFile->lastErrno = errno;
         3613  +        return unixLogError(SQLITE_IOERR_TRUNCATE, "ftruncate", pFile->zPath);
         3614  +      }
         3615  +    }
         3616  +
         3617  +    rc = unixMapfile(pFile, nByte);
         3618  +    return rc;
         3619  +  }
  3570   3620   
  3571   3621     return SQLITE_OK;
  3572   3622   }
  3573   3623   
  3574   3624   /*
  3575   3625   ** If *pArg is inititially negative then this is a query.  Set *pArg to
  3576   3626   ** 1 or 0 depending on whether or not bit mask of pFile->ctrlFlags is set.
................................................................................
  3631   3681         char *zTFile = sqlite3_malloc( pFile->pVfs->mxPathname );
  3632   3682         if( zTFile ){
  3633   3683           unixGetTempname(pFile->pVfs->mxPathname, zTFile);
  3634   3684           *(char**)pArg = zTFile;
  3635   3685         }
  3636   3686         return SQLITE_OK;
  3637   3687       }
  3638         -    case SQLITE_FCNTL_GETFD: {
  3639         -      *(int*)pArg = pFile->h;
         3688  +    case SQLITE_FCNTL_MMAP_SIZE: {
         3689  +      pFile->mmapLimit = *(i64*)pArg;
  3640   3690         return SQLITE_OK;
  3641   3691       }
  3642   3692   #ifdef SQLITE_DEBUG
  3643   3693       /* The pager calls this method to signal that it has done
  3644   3694       ** a rollback and that the database is therefore unchanged and
  3645   3695       ** it hence it is OK for the transaction change counter to be
  3646   3696       ** unchanged.
................................................................................
  4447   4497   **
  4448   4498   **     ROUNDUP(0,  8) ->  0
  4449   4499   **     ROUNDUP(13, 8) -> 16
  4450   4500   **     ROUNDUP(32, 8) -> 32
  4451   4501   */
  4452   4502   #define ROUNDUP(x,y)     (((x)+y-1)&~(y-1))
  4453   4503   
  4454         -/*
  4455         -** Map, remap or unmap part of the database file.
  4456         -*/
  4457         -static int unixMremap(
  4458         -  sqlite3_file *fd,               /* Main database file */
  4459         -  int flags,                      /* Mask of SQLITE_MREMAP_XXX flags */
  4460         -  sqlite3_int64 iOff,             /* Offset to start mapping at */
  4461         -  sqlite3_int64 nOld,             /* Size of old mapping, or zero */
  4462         -  sqlite3_int64 nNew,             /* Size of new mapping, or zero */
  4463         -  void **ppMap                    /* IN/OUT: Old/new mappings */
  4464         -){
  4465         -  unixFile *p = (unixFile *)fd;   /* The underlying database file */
  4466         -  int rc = SQLITE_OK;             /* Return code */
  4467         -  void *pNew = 0;                 /* New mapping */
  4468         -  i64 nNewRnd;                    /* nNew rounded up */
  4469         -  i64 nOldRnd;                    /* nOld rounded up */
  4470         -
  4471         -  assert( iOff==0 );
  4472         -  /* assert( p->mmapSize==nOld ); */
  4473         -  assert( p->pMapRegion==0 || p->pMapRegion==(*ppMap) );
  4474         -
  4475         -  /* If the SQLITE_MREMAP_EXTEND flag is set, then the size of the requested 
  4476         -  ** mapping (nNew bytes) may be greater than the size of the database file.
  4477         -  ** If this is the case, extend the file on disk using ftruncate().  */
  4478         -  assert( nNew>0 || (flags & SQLITE_MREMAP_EXTEND)==0 );
  4479         -  if( flags & SQLITE_MREMAP_EXTEND ){
         4504  +static void unixUnmapfile(unixFile *pFd){
         4505  +  assert( pFd->nFetchOut==0 );
         4506  +  if( pFd->pMapRegion ){
         4507  +    munmap(pFd->pMapRegion, pFd->mmapOrigsize);
         4508  +    pFd->pMapRegion = 0;
         4509  +    pFd->mmapSize = 0;
         4510  +    pFd->mmapOrigsize = 0;
         4511  +  }
         4512  +}
         4513  +
         4514  +static int unixMapfile(unixFile *pFd, i64 nByte){
         4515  +  i64 nMap = nByte;
         4516  +  int rc;
         4517  +
         4518  +  assert( nMap>=0 || pFd->nFetchOut==0 );
         4519  +  if( pFd->nFetchOut>0 ) return SQLITE_OK;
         4520  +
         4521  +  if( nMap<0 ){
  4480   4522       struct stat statbuf;          /* Low-level file information */
  4481         -    rc = osFstat(p->h, &statbuf);
  4482         -    if( rc==SQLITE_OK && nNew>statbuf.st_size ){
  4483         -      rc = robust_ftruncate(p->h, nNew);
  4484         -    }
  4485         -    if( rc!=SQLITE_OK ) return rc;
  4486         -  }
  4487         -
  4488         -  /* According to some sources, the effect of changing the size of the
  4489         -  ** underlying file on mapped regions that correspond to the added or
  4490         -  ** removed pages is undefined. However, there is reason to believe that
  4491         -  ** on modern platforms like Linux or OSX, things just work. For example,
  4492         -  ** it is possible to create a mapping larger than the file on disk and
  4493         -  ** extend the file on disk later on.
  4494         -  **
  4495         -  ** Exploit this on Linux and OSX to reduce the number of munmap()/mmap() 
  4496         -  ** calls required if the file size is changing. In this case all mappings 
  4497         -  ** are rounded up to the nearest 4MB. And if a new mapping is requested 
  4498         -  ** that has the same rounded size as an old mapping, the old mapping can 
  4499         -  ** be reused as is. */
  4500         -#if defined(__APPLE__) || defined(__linux__)
  4501         -  nNewRnd = ROUNDUP(nNew, 4096*1024);
  4502         -  nOldRnd = ROUNDUP(nOld, 4096*1024);
  4503         -#else
  4504         -  nNewRnd = ROUNDUP(nNew, 4096*1);
  4505         -  nOldRnd = ROUNDUP(nOld, 4096*1);
  4506         -#endif
  4507         -
  4508         -  /* On OSX or Linux, reuse the old mapping if it is the right size. */
  4509         -#if defined(__APPLE__) || defined(__linux__)
  4510         -  if( nNewRnd==nOldRnd ){
  4511         -    VVA_ONLY( p->mmapSize = nNew; )
  4512         -    return SQLITE_OK;
  4513         -  }
  4514         -#endif
  4515         -
  4516         -  /* If we get this far, unmap any old mapping. */
  4517         -  if( nOldRnd!=0 ){
  4518         -    void *pOld = *ppMap;
  4519         -    munmap(pOld, nOldRnd);
  4520         -    VVA_ONLY( p->mmapSize = 0; p->pMapRegion = 0; );
  4521         -  }
  4522         -
  4523         -  /* And, if required, use mmap() to create a new mapping. */
  4524         -  if( nNewRnd>0 ){
  4525         -    int flags = PROT_READ;
  4526         -    if( (p->ctrlFlags & UNIXFILE_RDONLY)==0 ) flags |= PROT_WRITE;
  4527         -    pNew = mmap(0, nNewRnd, flags, MAP_SHARED, p->h, iOff);
  4528         -    if( pNew==MAP_FAILED ){
  4529         -      pNew = 0;
  4530         -      VVA_ONLY( p->mmapSize = 0; p->pMapRegion = 0; )
  4531         -      rc = SQLITE_IOERR_MREMAP;
  4532         -    }else{
  4533         -      VVA_ONLY( p->mmapSize = nNew; p->pMapRegion = pNew; )
  4534         -    }
  4535         -  }
  4536         -
  4537         -  *ppMap = pNew;
  4538         -  return rc;
         4523  +    rc = osFstat(pFd->h, &statbuf);
         4524  +    if( rc!=SQLITE_OK ){
         4525  +      return SQLITE_IOERR_FSTAT;
         4526  +    }
         4527  +    nMap = statbuf.st_size;
         4528  +  }
         4529  +  if( nMap>pFd->mmapLimit ){
         4530  +    nMap = pFd->mmapLimit;
         4531  +  }
         4532  +
         4533  +  if( nMap!=pFd->mmapSize ){
         4534  +    void *pNew;
         4535  +    unixUnmapfile(pFd);
         4536  +
         4537  +    if( nMap>0 ){
         4538  +      void *pNew;
         4539  +      int flags = PROT_READ;
         4540  +      if( (pFd->ctrlFlags & UNIXFILE_RDONLY)==0 ) flags |= PROT_WRITE;
         4541  +      pNew = mmap(0, ROUNDUP(nMap, 4096), flags, MAP_SHARED, pFd->h, 0);
         4542  +      if( pNew==MAP_FAILED ){
         4543  +        return SQLITE_IOERR_MREMAP;
         4544  +      }
         4545  +
         4546  +      pFd->pMapRegion = pNew;
         4547  +      pFd->mmapOrigsize = pFd->mmapSize = nMap;
         4548  +    }
         4549  +  }
         4550  +
         4551  +  return SQLITE_OK;
         4552  +}
         4553  +
         4554  +static int unixFetch(sqlite3_file *fd, i64 iOff, int nAmt, void **pp){
         4555  +  unixFile *pFd = (unixFile *)fd;   /* The underlying database file */
         4556  +  *pp = 0;
         4557  +
         4558  +  if( pFd->mmapLimit>0 ){
         4559  +    if( pFd->pMapRegion==0 ){
         4560  +      int rc = unixMapfile(pFd, -1);
         4561  +      if( rc!=SQLITE_OK ) return rc;
         4562  +    }
         4563  +    if( pFd->mmapSize >= iOff+nAmt ){
         4564  +      *pp = &((u8 *)pFd->pMapRegion)[iOff];
         4565  +      pFd->nFetchOut++;
         4566  +    }
         4567  +  }
         4568  +  return SQLITE_OK;
         4569  +}
         4570  +
         4571  +static int unixUnfetch(sqlite3_file *fd, void *p){
         4572  +  unixFile *pFd = (unixFile *)fd;   /* The underlying database file */
         4573  +
         4574  +  assert( (p==0)==(pFd->nFetchOut==0) );
         4575  +
         4576  +  if( p ){
         4577  +    pFd->nFetchOut--;
         4578  +  }else{
         4579  +    unixUnmapfile(pFd);
         4580  +  }
         4581  +
         4582  +  assert( pFd->nFetchOut>=0 );
         4583  +  return SQLITE_OK;
  4539   4584   }
  4540   4585   
  4541   4586   /*
  4542   4587   ** Here ends the implementation of all sqlite3_file methods.
  4543   4588   **
  4544   4589   ********************** End sqlite3_file Methods *******************************
  4545   4590   ******************************************************************************/
................................................................................
  4593   4638      unixFileControl,            /* xFileControl */                            \
  4594   4639      unixSectorSize,             /* xSectorSize */                             \
  4595   4640      unixDeviceCharacteristics,  /* xDeviceCapabilities */                     \
  4596   4641      unixShmMap,                 /* xShmMap */                                 \
  4597   4642      unixShmLock,                /* xShmLock */                                \
  4598   4643      unixShmBarrier,             /* xShmBarrier */                             \
  4599   4644      unixShmUnmap,               /* xShmUnmap */                               \
  4600         -   unixMremap,                 /* xMremap */                                 \
         4645  +   unixFetch,                  /* xFetch */                                  \
         4646  +   unixUnfetch,                /* xUnfetch */                                \
  4601   4647   };                                                                           \
  4602   4648   static const sqlite3_io_methods *FINDER##Impl(const char *z, unixFile *p){   \
  4603   4649     UNUSED_PARAMETER(z); UNUSED_PARAMETER(p);                                  \
  4604   4650     return &METHOD;                                                            \
  4605   4651   }                                                                            \
  4606   4652   static const sqlite3_io_methods *(*const FINDER)(const char*,unixFile *p)    \
  4607   4653       = FINDER##Impl;
................................................................................
  4861   4907     assert( zFilename!=0 || (ctrlFlags & UNIXFILE_NOLOCK)!=0 );
  4862   4908   
  4863   4909     OSTRACE(("OPEN    %-3d %s\n", h, zFilename));
  4864   4910     pNew->h = h;
  4865   4911     pNew->pVfs = pVfs;
  4866   4912     pNew->zPath = zFilename;
  4867   4913     pNew->ctrlFlags = (u8)ctrlFlags;
  4868         -  VVA_ONLY( pNew->mmapSize = 0; )
  4869   4914     if( sqlite3_uri_boolean(((ctrlFlags & UNIXFILE_URI) ? zFilename : 0),
  4870   4915                              "psow", SQLITE_POWERSAFE_OVERWRITE) ){
  4871   4916       pNew->ctrlFlags |= UNIXFILE_PSOW;
  4872   4917     }
  4873   4918     if( strcmp(pVfs->zName,"unix-excl")==0 ){
  4874   4919       pNew->ctrlFlags |= UNIXFILE_EXCL;
  4875   4920     }

Changes to src/pager.c.

   652    652     i64 journalOff;             /* Current write offset in the journal file */
   653    653     i64 journalHdr;             /* Byte offset to previous journal header */
   654    654     sqlite3_backup *pBackup;    /* Pointer to list of ongoing backup processes */
   655    655     PagerSavepoint *aSavepoint; /* Array of active savepoints */
   656    656     int nSavepoint;             /* Number of elements in aSavepoint[] */
   657    657     char dbFileVers[16];        /* Changes whenever database file changes */
   658    658   
   659         -  void *pMap;                 /* Memory mapped prefix of database file */
   660         -  i64 nMap;                   /* Size of mapping at pMap in bytes */ 
   661         -  i64 nMapValid;              /* Bytes at pMap known to be valid */
   662         -  i64 nMapLimit;              /* Maximum permitted mapping size */
          659  +  u8 bUseFetch;               /* True to use xFetch() */
   663    660     int nMapCfgLimit;           /* Configured limit value */
   664    661     int nMmapOut;               /* Number of mmap pages currently outstanding */
   665    662     PgHdr *pFree;               /* List of free mmap page headers (pDirty) */
   666         -  int bMapResize;             /* Check if the mapping should be resized */
   667    663     /*
   668    664     ** End of the routinely-changing class members
   669    665     ***************************************************************************/
   670    666   
   671    667     u16 nExtra;                 /* Add this many bytes to each in-memory page */
   672    668     i16 nReserve;               /* Number of unused bytes at end of each page */
   673    669     u32 vfsFlags;               /* Flags for sqlite3_vfs.xOpen() */
................................................................................
  2083   2079                              (int)pPager->nReserve);
  2084   2080     }
  2085   2081   }
  2086   2082   #else
  2087   2083   # define pagerReportSize(X)     /* No-op if we do not support a codec */
  2088   2084   #endif
  2089   2085   
  2090         -/*
  2091         -** Write nBuf bytes of data from buffer pBuf to offset iOff of the 
  2092         -** database file. If this part of the database file is memory mapped,
  2093         -** use memcpy() to do so. Otherwise, call sqlite3OsWrite().
  2094         -**
  2095         -** Return SQLITE_OK if successful, or an SQLite error code if an error 
  2096         -** occurs.
  2097         -*/
  2098         -int sqlite3PagerWriteData(Pager *pPager, const void *pBuf, int nBuf, i64 iOff){
  2099         -  int rc = SQLITE_OK;
  2100         -  if( pPager->nMapValid>=(iOff+nBuf) ){
  2101         -    memcpy(&((u8 *)(pPager->pMap))[iOff], pBuf, nBuf);
  2102         -  }else{
  2103         -    rc = sqlite3OsWrite(pPager->fd, pBuf, nBuf, iOff);
  2104         -  }
  2105         -  return rc;
  2106         -}
  2107         -
  2108   2086   /*
  2109   2087   ** Read a single page from either the journal file (if isMainJrnl==1) or
  2110   2088   ** from the sub-journal (if isMainJrnl==0) and playback that page.
  2111   2089   ** The page begins at offset *pOffset into the file. The *pOffset
  2112   2090   ** value is increased to the start of the next page in the journal.
  2113   2091   **
  2114   2092   ** The main rollback journal uses checksums - the statement journal does 
................................................................................
  2275   2253     if( isOpen(pPager->fd)
  2276   2254      && (pPager->eState>=PAGER_WRITER_DBMOD || pPager->eState==PAGER_OPEN)
  2277   2255      && isSynced
  2278   2256     ){
  2279   2257       i64 ofst = (pgno-1)*(i64)pPager->pageSize;
  2280   2258       testcase( !isSavepnt && pPg!=0 && (pPg->flags&PGHDR_NEED_SYNC)!=0 );
  2281   2259       assert( !pagerUseWal(pPager) );
  2282         -    rc = sqlite3PagerWriteData(pPager, aData, pPager->pageSize, ofst);
         2260  +    rc = sqlite3OsWrite(pPager->fd, (u8 *)aData, pPager->pageSize, ofst);
  2283   2261       if( pgno>pPager->dbFileSize ){
  2284   2262         pPager->dbFileSize = pgno;
  2285   2263       }
  2286   2264       if( pPager->pBackup ){
  2287   2265         CODEC1(pPager, aData, pgno, 3, rc=SQLITE_NOMEM);
  2288   2266         sqlite3BackupUpdate(pPager->pBackup, pgno, (u8*)aData);
  2289   2267         CODEC2(pPager, aData, pgno, 7, rc=SQLITE_NOMEM, aData);
................................................................................
  2530   2508       assert( pPager->eLock==EXCLUSIVE_LOCK );
  2531   2509       /* TODO: Is it safe to use Pager.dbFileSize here? */
  2532   2510       rc = sqlite3OsFileSize(pPager->fd, &currentSize);
  2533   2511       newSize = szPage*(i64)nPage;
  2534   2512       if( rc==SQLITE_OK && currentSize!=newSize ){
  2535   2513         if( currentSize>newSize ){
  2536   2514           rc = sqlite3OsTruncate(pPager->fd, newSize);
  2537         -        if( newSize<pPager->nMapValid ){
  2538         -          pPager->nMapValid = newSize;
  2539         -        }
  2540   2515         }else if( (currentSize+szPage)<=newSize ){
  2541   2516           char *pTmp = pPager->pTmpSpace;
  2542   2517           memset(pTmp, 0, szPage);
  2543   2518           testcase( (newSize-szPage) == currentSize );
  2544   2519           testcase( (newSize-szPage) >  currentSize );
  2545   2520           rc = sqlite3OsWrite(pPager->fd, pTmp, szPage, newSize-szPage);
  2546   2521         }
................................................................................
  2880   2855     }
  2881   2856   
  2882   2857     if( iFrame ){
  2883   2858       /* Try to pull the page from the write-ahead log. */
  2884   2859       rc = sqlite3WalReadFrame(pPager->pWal, iFrame, pgsz, pPg->pData);
  2885   2860     }else{
  2886   2861       i64 iOffset = (pgno-1)*(i64)pPager->pageSize;
  2887         -    if( pPager->pMap && pPager->nMapValid>=iOffset+pPager->pageSize ){
  2888         -      memcpy(pPg->pData, &((u8 *)(pPager->pMap))[iOffset], pPager->pageSize);
  2889         -    }else{
  2890         -      rc = sqlite3OsRead(pPager->fd, pPg->pData, pgsz, iOffset);
  2891         -      if( rc==SQLITE_IOERR_SHORT_READ ){
  2892         -        rc = SQLITE_OK;
  2893         -      }
         2862  +    rc = sqlite3OsRead(pPager->fd, pPg->pData, pgsz, iOffset);
         2863  +    if( rc==SQLITE_IOERR_SHORT_READ ){
         2864  +      rc = SQLITE_OK;
  2894   2865       }
  2895   2866     }
  2896   2867   
  2897   2868     if( pgno==1 ){
  2898   2869       if( rc ){
  2899   2870         /* If the read is unsuccessful, set the dbFileVers[] to something
  2900   2871         ** that will never be a valid file version.  dbFileVers[] is a copy
................................................................................
  3116   3087     ** the duplicate call is harmless.
  3117   3088     */
  3118   3089     sqlite3WalEndReadTransaction(pPager->pWal);
  3119   3090   
  3120   3091     rc = sqlite3WalBeginReadTransaction(pPager->pWal, &changed);
  3121   3092     if( rc!=SQLITE_OK || changed ){
  3122   3093       pager_reset(pPager);
         3094  +    if( pPager->bUseFetch ) sqlite3OsUnfetch(pPager->fd, 0);
  3123   3095     }
  3124   3096   
  3125   3097     return rc;
  3126   3098   }
  3127   3099   #endif
  3128   3100   
  3129   3101   /*
................................................................................
  3378   3350   ** Change the maximum number of in-memory pages that are allowed.
  3379   3351   */
  3380   3352   void sqlite3PagerSetCachesize(Pager *pPager, int mxPage){
  3381   3353     sqlite3PcacheSetCachesize(pPager->pPCache, mxPage);
  3382   3354   }
  3383   3355   
  3384   3356   /*
  3385         -** Set Pager.nMapLimit, the maximum permitted mapping size, based on the
  3386         -** current values of Pager.nMapCfgLimit and Pager.pageSize.
  3387         -**
  3388         -** If this connection should not use mmap at all, set nMapLimit to zero.
         3357  +** Invoke SQLITE_FCNTL_MMAP_SIZE based on the current value of nMapCfgLimit.
  3389   3358   */
  3390   3359   static void pagerFixMaplimit(Pager *pPager){
  3391         -  if( isOpen(pPager->fd)==0 
  3392         -   || pPager->fd->pMethods->iVersion<3 
  3393         -   || pPager->fd->pMethods->xMremap==0 
  3394         -   || pPager->tempFile 
  3395         -  ){
  3396         -    pPager->nMapLimit = 0;
  3397         -  }else if( pPager->nMapCfgLimit<0 ){
  3398         -    pPager->nMapLimit = (i64)pPager->nMapCfgLimit * -1024;
  3399         -  }else{
  3400         -    pPager->nMapLimit = (i64)pPager->nMapCfgLimit * pPager->pageSize;
         3360  +  sqlite3_file *fd = pPager->fd;
         3361  +  if( isOpen(fd) ){
         3362  +    pPager->bUseFetch = (fd->pMethods->iVersion>=3) && pPager->nMapCfgLimit!=0;
         3363  +    if( pPager->bUseFetch ){
         3364  +      void *p;
         3365  +      i64 nMapLimit;
         3366  +      if( pPager->nMapCfgLimit<0 ){
         3367  +        nMapLimit = (i64)pPager->nMapCfgLimit * -1024;
         3368  +      }else{
         3369  +        nMapLimit = (i64)pPager->nMapCfgLimit * pPager->pageSize;
         3370  +      }
         3371  +
         3372  +      p = (void *)&nMapLimit;
         3373  +      sqlite3OsFileControlHint(pPager->fd, SQLITE_FCNTL_MMAP_SIZE, p);
         3374  +    }
  3401   3375     }
  3402   3376   }
  3403   3377   
  3404   3378   /*
  3405   3379   ** Change the maximum size of any memory mapping made of the database file.
  3406   3380   */
  3407   3381   void sqlite3PagerSetMmapsize(Pager *pPager, int nMap){
................................................................................
  3866   3840     }
  3867   3841     if( rc==SQLITE_OK ){
  3868   3842       rc = sqlite3OsFileSize(pPager->jfd, &pPager->journalHdr);
  3869   3843     }
  3870   3844     return rc;
  3871   3845   }
  3872   3846   
  3873         -/*
  3874         -** Unmap any memory mapping of the database file.
  3875         -*/
  3876         -static int pagerUnmap(Pager *pPager){
  3877         -  assert( pPager->nMmapOut==0 );
  3878         -  if( pPager->pMap ){
  3879         -    sqlite3OsMremap(pPager->fd, 0, 0, pPager->nMap, 0, &pPager->pMap);
  3880         -    pPager->nMap = 0;
  3881         -    pPager->nMapValid = 0;
  3882         -  }
  3883         -  return SQLITE_OK;
  3884         -}
  3885         -
  3886         -/*
  3887         -** Create, or recreate, the memory mapping of the database file.
  3888         -*/
  3889         -static int pagerMap(Pager *pPager, int bExtend){
  3890         -  int rc = SQLITE_OK;             /* Return code */
  3891         -  Pgno nPg;                       /* Size of mapping to request in pages */
  3892         -  i64 sz;                         /* Size of mapping to request in bytes */
  3893         -
  3894         -  assert( isOpen(pPager->fd) && pPager->tempFile==0 );
  3895         -  assert( pPager->pMap==0 || pPager->nMap>0 );
  3896         -  /* assert( pPager->eState>=1 ); */
  3897         -  assert( pPager->nMmapOut==0 );
  3898         -  assert( pPager->nMapLimit>0 );
  3899         -
  3900         -  /* Figure out how large a mapping to request. Set variable sz to this 
  3901         -  ** value in bytes. */
  3902         -  nPg = (pPager->eState==1) ? pPager->dbSize : pPager->dbFileSize;
  3903         -  sz = (i64)nPg * pPager->pageSize;
  3904         -  if( sz>pPager->nMapLimit ) sz = pPager->nMapLimit;
  3905         -
  3906         -  if( sz!=pPager->nMapValid ){
  3907         -    int flags = (bExtend ? SQLITE_MREMAP_EXTEND : 0);
  3908         -    rc = sqlite3OsMremap(pPager->fd, flags, 0, pPager->nMap, sz, &pPager->pMap);
  3909         -    if( rc==SQLITE_OK ){
  3910         -      assert( pPager->pMap!=0 );
  3911         -      pPager->nMap = sz;
  3912         -    }else{
  3913         -      assert( pPager->pMap==0 );
  3914         -      pPager->nMap = 0;
  3915         -    }
  3916         -    pPager->nMapValid = pPager->nMap;
  3917         -  }
  3918         -  pPager->bMapResize = 0;
  3919         -
  3920         -  return rc;
  3921         -}
  3922         -
  3923   3847   /*
  3924   3848   ** Obtain a reference to a memory mapped page object for page number pgno. 
  3925         -** The caller must ensure that page pgno lies within the currently mapped 
  3926         -** region. If successful, set *ppPage to point to the new page reference
         3849  +** The new object will use the pointer pData, obtained from xFetch().
         3850  +** If successful, set *ppPage to point to the new page reference
  3927   3851   ** and return SQLITE_OK. Otherwise, return an SQLite error code and set
  3928   3852   ** *ppPage to zero.
  3929   3853   **
  3930   3854   ** Page references obtained by calling this function should be released
  3931   3855   ** by calling pagerReleaseMapPage().
  3932   3856   */
  3933         -static int pagerAcquireMapPage(Pager *pPager, Pgno pgno, PgHdr **ppPage){
         3857  +static int pagerAcquireMapPage(
         3858  +  Pager *pPager,                  /* Pager object */
         3859  +  Pgno pgno,                      /* Page number */
         3860  +  void *pData,                    /* xFetch()'d data for this page */
         3861  +  PgHdr **ppPage                  /* OUT: Acquired page object */
         3862  +){
  3934   3863     PgHdr *p;                       /* Memory mapped page to return */
  3935   3864   
  3936   3865     if( pPager->pFree ){
  3937   3866       *ppPage = p = pPager->pFree;
  3938   3867       pPager->pFree = p->pDirty;
  3939   3868       p->pDirty = 0;
  3940   3869       memset(p->pExtra, 0, pPager->nExtra);
................................................................................
  3951   3880   
  3952   3881     assert( p->pExtra==(void *)&p[1] );
  3953   3882     assert( p->pPage==0 );
  3954   3883     assert( p->flags==PGHDR_MMAP );
  3955   3884     assert( p->pPager==pPager );
  3956   3885     assert( p->nRef==1 );
  3957   3886   
  3958         -  p->pData = &((u8 *)pPager->pMap)[(i64)(pgno-1) * pPager->pageSize];
  3959   3887     p->pgno = pgno;
         3888  +  p->pData = pData;
  3960   3889     pPager->nMmapOut++;
  3961   3890   
  3962   3891     return SQLITE_OK;
  3963   3892   }
  3964   3893   
  3965   3894   /*
  3966   3895   ** Release a reference to page pPg. pPg must have been returned by an 
................................................................................
  3967   3896   ** earlier call to pagerAcquireMapPage().
  3968   3897   */
  3969   3898   static void pagerReleaseMapPage(PgHdr *pPg){
  3970   3899     Pager *pPager = pPg->pPager;
  3971   3900     pPager->nMmapOut--;
  3972   3901     pPg->pDirty = pPager->pFree;
  3973   3902     pPager->pFree = pPg;
         3903  +
         3904  +  assert( pPager->fd->pMethods->iVersion>=3 );
         3905  +  sqlite3OsUnfetch(pPager->fd, pPg->pData);
  3974   3906   }
  3975   3907   
  3976   3908   /*
  3977   3909   ** Free all PgHdr objects stored in the Pager.pFree list.
  3978   3910   */
  3979   3911   static void pagerFreeMapHdrs(Pager *pPager){
  3980   3912     PgHdr *p;
................................................................................
  4002   3934   */
  4003   3935   int sqlite3PagerClose(Pager *pPager){
  4004   3936     u8 *pTmp = (u8 *)pPager->pTmpSpace;
  4005   3937   
  4006   3938     assert( assert_pager_state(pPager) );
  4007   3939     disable_simulated_io_errors();
  4008   3940     sqlite3BeginBenignMalloc();
  4009         -  pagerUnmap(pPager);
  4010   3941     pagerFreeMapHdrs(pPager);
  4011   3942     /* pPager->errCode = 0; */
  4012   3943     pPager->exclusiveMode = 0;
  4013   3944   #ifndef SQLITE_OMIT_WAL
  4014   3945     sqlite3WalClose(pPager->pWal, pPager->ckptSyncFlags, pPager->pageSize, pTmp);
  4015   3946     pPager->pWal = 0;
  4016   3947   #endif
................................................................................
  4212   4143     */
  4213   4144     sqlite3PcacheClearSyncFlags(pPager->pPCache);
  4214   4145     pPager->eState = PAGER_WRITER_DBMOD;
  4215   4146     assert( assert_pager_state(pPager) );
  4216   4147     return SQLITE_OK;
  4217   4148   }
  4218   4149   
  4219         -/*
  4220         -** This is called by the wal.c module at the start of a checkpoint. If the
  4221         -** checkpoint runs to completion, it will set the database file size to
  4222         -** szReq bytes. This function performs two tasks:
  4223         -**
  4224         -**   * If the file is currently less than szReq bytes in size, an
  4225         -**     xFileControl(SQLITE_FNCTL_SIZE_HINT) is issued to inform the OS
  4226         -**     layer of the expected file size, and
  4227         -**
  4228         -**   * If mmap is being used, then the mapping is extended to szReq
  4229         -**     bytes in size.
  4230         -**
  4231         -** SQLITE_OK is returned if successful, or an error code if an error occurs.
  4232         -*/
  4233         -int sqlite3PagerSetFilesize(Pager *pPager, i64 szReq){
  4234         -  int rc;
  4235         -  i64 sz;                         /* Size of file on disk in bytes */
  4236         -
  4237         -  assert( pPager->eState==PAGER_OPEN );
  4238         -  assert( pPager->nMmapOut==0 );
  4239         -
  4240         -  rc = sqlite3OsFileSize(pPager->fd, &sz);
  4241         -  if( rc==SQLITE_OK ){
  4242         -    if( sz>szReq ){
  4243         -      sqlite3OsFileControlHint(pPager->fd, SQLITE_FCNTL_SIZE_HINT, &sz);
  4244         -    }
  4245         -  }
  4246         -
  4247         -
  4248         -  if( rc==SQLITE_OK ){
  4249         -    i64 szMap = (szReq > pPager->nMapLimit) ? pPager->nMapLimit : szReq;
  4250         -    if( pPager->nMapValid!=pPager->nMap || szMap!=pPager->nMap ){
  4251         -      pPager->dbFileSize = (szReq / pPager->pageSize);
  4252         -      rc = pagerMap(pPager, 1);
  4253         -    }
  4254         -  }
  4255         -
  4256         -  return rc;
  4257         -}
  4258         -
  4259   4150   /*
  4260   4151   ** The argument is the first in a linked list of dirty pages connected
  4261   4152   ** by the PgHdr.pDirty pointer. This function writes each one of the
  4262   4153   ** in-memory pages in the list to the database file. The argument may
  4263   4154   ** be NULL, representing an empty list. In this case this function is
  4264   4155   ** a no-op.
  4265   4156   **
................................................................................
  4311   4202     assert( rc!=SQLITE_OK || isOpen(pPager->fd) );
  4312   4203     if( rc==SQLITE_OK 
  4313   4204      && (pList->pDirty ? pPager->dbSize : pList->pgno+1)>pPager->dbHintSize 
  4314   4205     ){
  4315   4206       sqlite3_int64 szFile = pPager->pageSize * (sqlite3_int64)pPager->dbSize;
  4316   4207       sqlite3OsFileControlHint(pPager->fd, SQLITE_FCNTL_SIZE_HINT, &szFile);
  4317   4208       pPager->dbHintSize = pPager->dbSize;
  4318         -
  4319         -    if( pPager->nMmapOut==0 && pPager->nMapLimit>0 ){
  4320         -      pPager->dbFileSize = pPager->dbSize;
  4321         -      rc = pagerMap(pPager, 1);
  4322         -    }
  4323   4209     }
  4324   4210   
  4325   4211     while( rc==SQLITE_OK && pList ){
  4326   4212       Pgno pgno = pList->pgno;
  4327   4213   
  4328   4214       /* If there are dirty pages in the page cache with page numbers greater
  4329   4215       ** than Pager.dbSize, this means sqlite3PagerTruncateImage() was called to
................................................................................
  4340   4226         assert( (pList->flags&PGHDR_NEED_SYNC)==0 );
  4341   4227         if( pList->pgno==1 ) pager_write_changecounter(pList);
  4342   4228   
  4343   4229         /* Encode the database */
  4344   4230         CODEC2(pPager, pList->pData, pgno, 6, return SQLITE_NOMEM, pData);
  4345   4231   
  4346   4232         /* Write out the page data. */
  4347         -      rc = sqlite3PagerWriteData(pPager, pData, pPager->pageSize, offset);
         4233  +      rc = sqlite3OsWrite(pPager->fd, pData, pPager->pageSize, offset);
  4348   4234   
  4349   4235         /* If page 1 was just written, update Pager.dbFileVers to match
  4350   4236         ** the value now stored in the database file. If writing this 
  4351   4237         ** page caused the database file to grow, update dbFileSize. 
  4352   4238         */
  4353   4239         if( pgno==1 ){
  4354   4240           memcpy(&pPager->dbFileVers, &pData[24], sizeof(pPager->dbFileVers));
................................................................................
  5160   5046              || (pPager->exclusiveMode && pPager->eLock>SHARED_LOCK)
  5161   5047         );
  5162   5048       }
  5163   5049   
  5164   5050       if( !pPager->tempFile && (
  5165   5051           pPager->pBackup 
  5166   5052        || sqlite3PcachePagecount(pPager->pPCache)>0 
  5167         -     || pPager->pMap
         5053  +     || pPager->bUseFetch /* TODO: Currently required for xUnfetch(0) only. */
  5168   5054       )){
  5169   5055         /* The shared-lock has just been acquired on the database file
  5170   5056         ** and there are already pages in the cache (from a previous
  5171   5057         ** read or write transaction).  Check to see if the database
  5172   5058         ** has been modified.  If the database has changed, flush the
  5173   5059         ** cache.
  5174   5060         **
................................................................................
  5184   5070         */
  5185   5071         Pgno nPage = 0;
  5186   5072         char dbFileVers[sizeof(pPager->dbFileVers)];
  5187   5073   
  5188   5074         rc = pagerPagecount(pPager, &nPage);
  5189   5075         if( rc ) goto failed;
  5190   5076   
  5191         -      if( nPage>0 || pPager->pMap ){
         5077  +      if( nPage>0 ){
  5192   5078           IOTRACE(("CKVERS %p %d\n", pPager, sizeof(dbFileVers)));
  5193         -        if( pPager->pMap ){
  5194         -          memcpy(&dbFileVers, &((u8 *)(pPager->pMap))[24], sizeof(dbFileVers));
  5195         -        }else{
  5196         -          rc = sqlite3OsRead(pPager->fd, &dbFileVers, sizeof(dbFileVers), 24);
  5197         -        }
         5079  +        rc = sqlite3OsRead(pPager->fd, &dbFileVers, sizeof(dbFileVers), 24);
  5198   5080           if( rc!=SQLITE_OK ){
  5199   5081             goto failed;
  5200   5082           }
  5201   5083         }else{
  5202   5084           memset(dbFileVers, 0, sizeof(dbFileVers));
  5203   5085         }
  5204   5086   
................................................................................
  5207   5089   
  5208   5090           /* Unmap the database file. It is possible that external processes
  5209   5091           ** may have truncated the database file and then extended it back
  5210   5092           ** to its original size while this process was not holding a lock.
  5211   5093           ** In this case there may exist a Pager.pMap mapping that appears
  5212   5094           ** to be the right size but is not actually valid. Avoid this
  5213   5095           ** possibility by unmapping the db here. */
  5214         -        pagerUnmap(pPager);
  5215         -      }else if( pPager->pMap ){
  5216         -        pPager->bMapResize = 1;
         5096  +        if( pPager->bUseFetch ){
         5097  +          sqlite3OsUnfetch(pPager->fd, 0);
         5098  +        }
  5217   5099         }
  5218   5100       }
  5219   5101   
  5220   5102       /* If there is a WAL file in the file-system, open this database in WAL
  5221   5103       ** mode. Otherwise, the following function call is a no-op.
  5222   5104       */
  5223   5105       rc = pagerOpenWalIfPresent(pPager);
................................................................................
  5321   5203     u32 iFrame = 0;                 /* Frame to read from WAL file */
  5322   5204     const int noContent = (flags & PAGER_ACQUIRE_NOCONTENT);
  5323   5205   
  5324   5206     /* It is acceptable to use a read-only (mmap) page for any page except
  5325   5207     ** page 1 if there is no write-transaction open or the ACQUIRE_READONLY
  5326   5208     ** flag was specified by the caller. And so long as the db is not a 
  5327   5209     ** temporary or in-memory database.  */
  5328         -  const int bMmapOk = (pPager->nMapLimit>0 && pgno!=1
         5210  +  const int bMmapOk = (pgno!=1 && pPager->bUseFetch
  5329   5211      && (pPager->eState==PAGER_READER || (flags & PAGER_ACQUIRE_READONLY))
  5330   5212     );
  5331   5213   
  5332   5214     assert( pPager->eState>=PAGER_READER );
  5333   5215     assert( assert_pager_state(pPager) );
  5334   5216     assert( noContent==0 || bMmapOk==0 );
  5335   5217   
................................................................................
  5345   5227   
  5346   5228       if( bMmapOk && pagerUseWal(pPager) ){
  5347   5229         rc = sqlite3WalFindFrame(pPager->pWal, pgno, &iFrame);
  5348   5230         if( rc!=SQLITE_OK ) goto pager_acquire_err;
  5349   5231       }
  5350   5232   
  5351   5233       if( iFrame==0 && bMmapOk ){
  5352         -      if( pPager->pMap==0 || (pPager->bMapResize && pPager->nMmapOut==0) ){
  5353         -        rc = pagerMap(pPager, 0);
  5354         -      }
  5355         -      if( rc==SQLITE_OK && pPager->nMap>=((i64)pgno * pPager->pageSize) ){
         5234  +      void *pData = 0;
         5235  +
         5236  +      rc = sqlite3OsFetch(pPager->fd, 
         5237  +          (i64)(pgno-1) * pPager->pageSize, pPager->pageSize, &pData
         5238  +      );
         5239  +
         5240  +      if( rc==SQLITE_OK && pData ){
  5356   5241           if( pPager->eState>PAGER_READER ){
  5357   5242             (void)sqlite3PcacheFetch(pPager->pPCache, pgno, 0, &pPg);
  5358   5243           }
  5359   5244           if( pPg==0 ){
  5360         -          rc = pagerAcquireMapPage(pPager, pgno, &pPg);
         5245  +          rc = pagerAcquireMapPage(pPager, pgno, pData, &pPg);
         5246  +        }else{
         5247  +          sqlite3OsUnfetch(pPager->fd, pData);
  5361   5248           }
  5362   5249           if( pPg ){
  5363   5250             assert( rc==SQLITE_OK );
  5364   5251             *ppPage = pPg;
  5365   5252             return SQLITE_OK;
  5366   5253           }
  5367   5254         }
................................................................................
  7113   7000       rc = pagerExclusiveLock(pPager);
  7114   7001     }
  7115   7002   
  7116   7003     /* Open the connection to the log file. If this operation fails, 
  7117   7004     ** (e.g. due to malloc() failure), return an error code.
  7118   7005     */
  7119   7006     if( rc==SQLITE_OK ){
  7120         -    rc = sqlite3WalOpen(pPager->pVfs, pPager,
         7007  +    rc = sqlite3WalOpen(pPager->pVfs,
  7121   7008           pPager->fd, pPager->zWal, pPager->exclusiveMode,
  7122   7009           pPager->journalSizeLimit, &pPager->pWal
  7123   7010       );
  7124   7011     }
  7125   7012     pagerFixMaplimit(pPager);
  7126   7013   
  7127   7014     return rc;
................................................................................
  7151   7038   
  7152   7039     assert( assert_pager_state(pPager) );
  7153   7040     assert( pPager->eState==PAGER_OPEN   || pbOpen );
  7154   7041     assert( pPager->eState==PAGER_READER || !pbOpen );
  7155   7042     assert( pbOpen==0 || *pbOpen==0 );
  7156   7043     assert( pbOpen!=0 || (!pPager->tempFile && !pPager->pWal) );
  7157   7044   
  7158         -  pagerUnmap(pPager);
  7159         -
  7160   7045     if( !pPager->tempFile && !pPager->pWal ){
  7161   7046       if( !sqlite3PagerWalSupported(pPager) ) return SQLITE_CANTOPEN;
  7162   7047   
  7163   7048       /* Close any rollback journal previously open */
  7164   7049       sqlite3OsClose(pPager->jfd);
  7165   7050   
  7166   7051       rc = pagerOpenWal(pPager);

Changes to src/pager.h.

   172    172   void sqlite3PagerClearCache(Pager *);
   173    173   int sqlite3SectorSize(sqlite3_file *);
   174    174   
   175    175   /* Functions used to truncate the database file. */
   176    176   void sqlite3PagerTruncateImage(Pager*,Pgno);
   177    177   int sqlite3PagerSetFilesize(Pager *, i64);
   178    178   
   179         -/* Write data to the database file */
   180         -int sqlite3PagerWriteData(Pager *pPager, const void *pBuf, int nBuf, i64 iOff);
   181         -
   182    179   #if defined(SQLITE_HAS_CODEC) && !defined(SQLITE_OMIT_WAL)
   183    180   void *sqlite3PagerCodec(DbPage *);
   184    181   #endif
   185    182   
   186    183   /* Functions to support testing and debugging. */
   187    184   #if !defined(NDEBUG) || defined(SQLITE_TEST)
   188    185     Pgno sqlite3PagerPagenumber(DbPage*);

Changes to src/sqlite.h.in.

   703    703   ** to xWrite().
   704    704   **
   705    705   ** If xRead() returns SQLITE_IOERR_SHORT_READ it must also fill
   706    706   ** in the unread portions of the buffer with zeros.  A VFS that
   707    707   ** fails to zero-fill short reads might seem to work.  However,
   708    708   ** failure to zero-fill short reads will eventually lead to
   709    709   ** database corruption.
   710         -**
   711         -** Assuming parameter nNew is non-zero, the xMremap method should attempt
   712         -** to memory map a region nNew bytes in size starting at offset iOffset
   713         -** of the file.  If successful, it should set *ppMap to point to the
   714         -** mapping and return SQLITE_OK. If the file is opened for read-write
   715         -** access, then the mapping should also be read-write.
   716         -**
   717         -** If nOld is non-zero, then the initial value of *ppMap points to a
   718         -** mapping returned by a previous call to xMremap. The existing mapping
   719         -** is nOld bytes in size and starts at offset iOffset of the file. In
   720         -** this case the xMremap method is expected to unmap the existing mapping
   721         -** and overwrite *ppMap with the pointer to the new mapping. If nOld is
   722         -** zero, then the initial value of *ppMap is undefined.
   723         -**
   724         -** If nNew is zero, then no new mapping should be created. Any old
   725         -** mapping must still be unmapped if nOld is non-zero. If the nOld
   726         -** parameter is non-zero, then the existing mapping is always unmapped -
   727         -** even if an error occurs.
   728    710   */
   729    711   typedef struct sqlite3_io_methods sqlite3_io_methods;
   730    712   struct sqlite3_io_methods {
   731    713     int iVersion;
   732    714     int (*xClose)(sqlite3_file*);
   733    715     int (*xRead)(sqlite3_file*, void*, int iAmt, sqlite3_int64 iOfst);
   734    716     int (*xWrite)(sqlite3_file*, const void*, int iAmt, sqlite3_int64 iOfst);
................................................................................
   743    725     int (*xDeviceCharacteristics)(sqlite3_file*);
   744    726     /* Methods above are valid for version 1 */
   745    727     int (*xShmMap)(sqlite3_file*, int iPg, int pgsz, int, void volatile**);
   746    728     int (*xShmLock)(sqlite3_file*, int offset, int n, int flags);
   747    729     void (*xShmBarrier)(sqlite3_file*);
   748    730     int (*xShmUnmap)(sqlite3_file*, int deleteFlag);
   749    731     /* Methods above are valid for version 2 */
   750         -  int (*xMremap)(sqlite3_file *fd, int flags,
   751         -      sqlite3_int64 iOff, sqlite3_int64 nOld, sqlite3_int64 nNew, void **ppMap);
          732  +  int (*xFetch)(sqlite3_file*, sqlite3_int64 iOfst, int iAmt, void **pp);
          733  +  int (*xUnfetch)(sqlite3_file*, void *p);
   752    734     /* Methods above are valid for version 3 */
   753    735     /* Additional methods may be added in future releases */
   754    736   };
   755    737   
   756         -#define SQLITE_MREMAP_EXTEND  0x0001        /* xMremap call may extend file */
   757         -
   758    738   /*
   759    739   ** CAPI3REF: Standard File Control Opcodes
   760    740   **
   761    741   ** These integer constants are opcodes for the xFileControl method
   762    742   ** of the [sqlite3_io_methods] object and for the [sqlite3_file_control()]
   763    743   ** interface.
   764    744   **
................................................................................
   902    882   ** ^Application can invoke this file-control to have SQLite generate a
   903    883   ** temporary filename using the same algorithm that is followed to generate
   904    884   ** temporary filenames for TEMP tables and other internal uses.  The
   905    885   ** argument should be a char** which will be filled with the filename
   906    886   ** written into memory obtained from [sqlite3_malloc()].  The caller should
   907    887   ** invoke [sqlite3_free()] on the result to avoid a memory leak.
   908    888   **
          889  +** <li>[[SQLITE_FCNTL_MMAP_SIZE]]
          890  +** The argument is assumed to point to a value of type sqlite3_int64. An
          891  +** advisory maximum amount of this file to memory map in bytes.
          892  +**
   909    893   ** </ul>
   910    894   */
   911    895   #define SQLITE_FCNTL_LOCKSTATE               1
   912    896   #define SQLITE_GET_LOCKPROXYFILE             2
   913    897   #define SQLITE_SET_LOCKPROXYFILE             3
   914    898   #define SQLITE_LAST_ERRNO                    4
   915    899   #define SQLITE_FCNTL_SIZE_HINT               5
................................................................................
   920    904   #define SQLITE_FCNTL_PERSIST_WAL            10
   921    905   #define SQLITE_FCNTL_OVERWRITE              11
   922    906   #define SQLITE_FCNTL_VFSNAME                12
   923    907   #define SQLITE_FCNTL_POWERSAFE_OVERWRITE    13
   924    908   #define SQLITE_FCNTL_PRAGMA                 14
   925    909   #define SQLITE_FCNTL_BUSYHANDLER            15
   926    910   #define SQLITE_FCNTL_TEMPFILENAME           16
   927         -#define SQLITE_FCNTL_GETFD                  17
          911  +#define SQLITE_FCNTL_MMAP_SIZE              18
   928    912   
   929    913   /*
   930    914   ** CAPI3REF: Mutex Handle
   931    915   **
   932    916   ** The mutex module within SQLite defines [sqlite3_mutex] to be an
   933    917   ** abstract type for a mutex object.  The SQLite core never looks
   934    918   ** at the internal representation of an [sqlite3_mutex].  It only

Changes to src/wal.c.

   408    408   ** An open write-ahead log file is represented by an instance of the
   409    409   ** following object.
   410    410   */
   411    411   struct Wal {
   412    412     sqlite3_vfs *pVfs;         /* The VFS used to create pDbFd */
   413    413     sqlite3_file *pDbFd;       /* File handle for the database file */
   414    414     sqlite3_file *pWalFd;      /* File handle for WAL file */
   415         -  Pager *pPager;             /* Pager object */
   416    415     u32 iCallback;             /* Value to pass to log callback (or 0) */
   417    416     i64 mxWalSize;             /* Truncate WAL to this size upon reset */
   418    417     int nWiData;               /* Size of array apWiData */
   419    418     int szFirstBlock;          /* Size of first block written to WAL file */
   420    419     volatile u32 **apWiData;   /* Pointer to wal-index content in memory */
   421    420     u32 szPage;                /* Database page size */
   422    421     i16 readLock;              /* Which read lock is being held.  -1 for none */
................................................................................
  1248   1247   **
  1249   1248   ** If the log file is successfully opened, SQLITE_OK is returned and 
  1250   1249   ** *ppWal is set to point to a new WAL handle. If an error occurs,
  1251   1250   ** an SQLite error code is returned and *ppWal is left unmodified.
  1252   1251   */
  1253   1252   int sqlite3WalOpen(
  1254   1253     sqlite3_vfs *pVfs,              /* vfs module to open wal and wal-index */
  1255         -  Pager *pPager,                  /* Pager object handle */
  1256   1254     sqlite3_file *pDbFd,            /* The open database file */
  1257   1255     const char *zWalName,           /* Name of the WAL file */
  1258   1256     int bNoShm,                     /* True to run in heap-memory mode */
  1259   1257     i64 mxWalSize,                  /* Truncate WAL to this size on reset */
  1260   1258     Wal **ppWal                     /* OUT: Allocated Wal handle */
  1261   1259   ){
  1262   1260     int rc;                         /* Return Code */
................................................................................
  1289   1287     pRet->pWalFd = (sqlite3_file *)&pRet[1];
  1290   1288     pRet->pDbFd = pDbFd;
  1291   1289     pRet->readLock = -1;
  1292   1290     pRet->mxWalSize = mxWalSize;
  1293   1291     pRet->zWalName = zWalName;
  1294   1292     pRet->syncHeader = 1;
  1295   1293     pRet->padToSectorBoundary = 1;
  1296         -  pRet->pPager = pPager;
  1297   1294     pRet->exclusiveMode = (bNoShm ? WAL_HEAPMEMORY_MODE: WAL_NORMAL_MODE);
  1298   1295   
  1299   1296     /* Open file handle on the write-ahead log file. */
  1300   1297     flags = (SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE|SQLITE_OPEN_WAL);
  1301   1298     rc = sqlite3OsOpen(pVfs, zWalName, pRet->pWalFd, flags, &flags);
  1302   1299     if( rc==SQLITE_OK && flags&SQLITE_OPEN_READONLY ){
  1303   1300       pRet->readOnly = WAL_RDONLY;
................................................................................
  1721   1718       u32 nBackfill = pInfo->nBackfill;
  1722   1719   
  1723   1720       /* Sync the WAL to disk */
  1724   1721       if( sync_flags ){
  1725   1722         rc = sqlite3OsSync(pWal->pWalFd, sync_flags);
  1726   1723       }
  1727   1724   
  1728         -    /* If the database file is currently smaller than mxPage pages in size,
  1729         -    ** the call below issues an SQLITE_FCNTL_SIZE_HINT to the OS layer to
  1730         -    ** inform it that it is likely to grow to that size.
  1731         -    **
  1732         -    ** Additionally, if the pager is using mmap(), then the call to 
  1733         -    ** SetFilesize() guarantees that the mapping is not larger than mxPage
  1734         -    ** pages. This makes the sqlite3OsTruncate() call below safe - no pages
  1735         -    ** that are part of the mapped region will be truncated away.  */
         1725  +    /* If the database may grow as a result of this checkpoint, hint
         1726  +    ** about the eventual size of the db file to the VFS layer.
         1727  +    */
  1736   1728       if( rc==SQLITE_OK ){
  1737   1729         i64 nReq = ((i64)mxPage * szPage);
  1738         -      rc = sqlite3PagerSetFilesize(pWal->pPager, nReq);
         1730  +      rc = sqlite3OsFileSize(pWal->pDbFd, &nSize);
         1731  +      if( rc==SQLITE_OK && nSize<nReq ){
         1732  +        sqlite3OsFileControlHint(pWal->pDbFd, SQLITE_FCNTL_SIZE_HINT, &nReq);
         1733  +      }
  1739   1734       }
         1735  +
  1740   1736   
  1741   1737       /* Iterate through the contents of the WAL, copying data to the db file. */
  1742   1738       while( rc==SQLITE_OK && 0==walIteratorNext(pIter, &iDbpage, &iFrame) ){
  1743   1739         i64 iOffset;
  1744   1740         assert( walFramePgno(pWal, iFrame)==iDbpage );
  1745   1741         if( iFrame<=nBackfill || iFrame>mxSafeFrame || iDbpage>mxPage ) continue;
  1746   1742         iOffset = walFrameOffset(iFrame, szPage) + WAL_FRAME_HDRSIZE;
  1747   1743         /* testcase( IS_BIG_INT(iOffset) ); // requires a 4GiB WAL file */
  1748   1744         rc = sqlite3OsRead(pWal->pWalFd, zBuf, szPage, iOffset);
  1749   1745         if( rc!=SQLITE_OK ) break;
  1750   1746         iOffset = (iDbpage-1)*(i64)szPage;
  1751   1747         testcase( IS_BIG_INT(iOffset) );
  1752         -      rc = sqlite3PagerWriteData(pWal->pPager, zBuf, szPage, iOffset);
         1748  +      rc = sqlite3OsWrite(pWal->pDbFd, zBuf, szPage, iOffset);
  1753   1749         if( rc!=SQLITE_OK ) break;
  1754   1750       }
  1755   1751   
  1756   1752       /* If work was actually accomplished... */
  1757   1753       if( rc==SQLITE_OK ){
  1758   1754         if( mxSafeFrame==walIndexHdr(pWal)->mxFrame ){
  1759   1755           i64 szDb = pWal->hdr.nPage*(i64)szPage;

Changes to src/wal.h.

    49     49   
    50     50   /* Connection to a write-ahead log (WAL) file. 
    51     51   ** There is one object of this type for each pager. 
    52     52   */
    53     53   typedef struct Wal Wal;
    54     54   
    55     55   /* Open and close a connection to a write-ahead log. */
    56         -int sqlite3WalOpen(
    57         -  sqlite3_vfs*, Pager *, sqlite3_file*, const char *, int, i64, Wal**);
           56  +int sqlite3WalOpen(sqlite3_vfs*, sqlite3_file*, const char *, int, i64, Wal**);
    58     57   int sqlite3WalClose(Wal *pWal, int sync_flags, int, u8 *);
    59     58   
    60     59   /* Set the limiting size of a WAL file. */
    61     60   void sqlite3WalLimit(Wal*, i64);
    62     61   
    63     62   /* Used by readers to open (lock) and close (unlock) a snapshot.  A 
    64     63   ** snapshot is like a read-transaction.  It is the state of the database