SQLite

Check-in [e7698cba9b]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Change os_unix.c to use either one or two mappings internally.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | two-mappings
Files: files | file ages | folders
SHA1: e7698cba9bcffbfadd30d9319669add4d60fcc65
User & Date: dan 2013-03-26 20:32:39.622
Context
2013-03-27
19:53
Modify various test cases so that they work if the file is extended in units of the system page-size. (check-in: 0e3d511927 user: dan tags: two-mappings)
2013-03-26
20:32
Change os_unix.c to use either one or two mappings internally. (check-in: e7698cba9b user: dan tags: two-mappings)
14:16
In btree.c, save the positions of any open cursors before moving any pages around to auto-vacuum the database on commit. (check-in: 30c0a69363 user: dan tags: experimental-mmap)
Changes
Unified Diff Ignore Whitespace Patch
Changes to src/os_unix.c.
202
203
204
205
206
207
208








209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232


233
234
235
236
237
238
239
** opportunity to either close or reuse it.
*/
struct UnixUnusedFd {
  int fd;                   /* File descriptor to close */
  int flags;                /* Flags this file descriptor was opened with */
  UnixUnusedFd *pNext;      /* Next unused file descriptor on same file */
};









/*
** The unixFile structure is subclass of sqlite3_file specific to the unix
** VFS implementations.
*/
typedef struct unixFile unixFile;
struct unixFile {
  sqlite3_io_methods const *pMethod;  /* Always the first entry */
  sqlite3_vfs *pVfs;                  /* The VFS that created this unixFile */
  unixInodeInfo *pInode;              /* Info about locks on this inode */
  int h;                              /* The file descriptor */
  unsigned char eFileLock;            /* The type of lock held on this fd */
  unsigned short int ctrlFlags;       /* Behavioral bits.  UNIXFILE_* flags */
  int lastErrno;                      /* The unix errno from last I/O error */
  void *lockingContext;               /* Locking style specific state */
  UnixUnusedFd *pUnused;              /* Pre-allocated UnixUnusedFd */
  const char *zPath;                  /* Name of the file */
  unixShm *pShm;                      /* Shared memory segment information */
  int szChunk;                        /* Configured by FCNTL_CHUNK_SIZE */
  int nFetchOut;                      /* Number of outstanding xFetch refs */
  sqlite3_int64 mmapSize;             /* Usable size of mapping at pMapRegion */
  sqlite3_int64 mmapOrigsize;         /* Actual size of mapping at pMapRegion */
  sqlite3_int64 mmapLimit;            /* Configured FCNTL_MMAP_LIMIT value */
  void *pMapRegion;                   /* Memory mapped region */


#ifdef __QNXNTO__
  int sectorSize;                     /* Device sector size */
  int deviceCharacteristics;          /* Precomputed device characteristics */
#endif
#if SQLITE_ENABLE_LOCKING_STYLE
  int openFlags;                      /* The flags specified at open() */
#endif







>
>
>
>
>
>
>
>




















<
<

|
>
>







202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236


237
238
239
240
241
242
243
244
245
246
247
** opportunity to either close or reuse it.
*/
struct UnixUnusedFd {
  int fd;                   /* File descriptor to close */
  int flags;                /* Flags this file descriptor was opened with */
  UnixUnusedFd *pNext;      /* Next unused file descriptor on same file */
};

typedef struct unixMapping unixMapping;
struct unixMapping {
  sqlite3_int64 mmapSize;
  sqlite3_int64 mmapOrigsize;
  void *pMapRegion;
};


/*
** The unixFile structure is subclass of sqlite3_file specific to the unix
** VFS implementations.
*/
typedef struct unixFile unixFile;
struct unixFile {
  sqlite3_io_methods const *pMethod;  /* Always the first entry */
  sqlite3_vfs *pVfs;                  /* The VFS that created this unixFile */
  unixInodeInfo *pInode;              /* Info about locks on this inode */
  int h;                              /* The file descriptor */
  unsigned char eFileLock;            /* The type of lock held on this fd */
  unsigned short int ctrlFlags;       /* Behavioral bits.  UNIXFILE_* flags */
  int lastErrno;                      /* The unix errno from last I/O error */
  void *lockingContext;               /* Locking style specific state */
  UnixUnusedFd *pUnused;              /* Pre-allocated UnixUnusedFd */
  const char *zPath;                  /* Name of the file */
  unixShm *pShm;                      /* Shared memory segment information */
  int szChunk;                        /* Configured by FCNTL_CHUNK_SIZE */
  int nFetchOut;                      /* Number of outstanding xFetch refs */


  sqlite3_int64 mmapLimit;            /* Configured FCNTL_MMAP_LIMIT value */
  int szSyspage;                      /* System page size */
  unixMapping aMmap[2];               /* Up to two memory mapped regions */

#ifdef __QNXNTO__
  int sectorSize;                     /* Device sector size */
  int deviceCharacteristics;          /* Precomputed device characteristics */
#endif
#if SQLITE_ENABLE_LOCKING_STYLE
  int openFlags;                      /* The flags specified at open() */
#endif
308
309
310
311
312
313
314




315
316
317
318
319
320
321
** testing and debugging only.
*/
#if SQLITE_THREADSAFE
#define threadid pthread_self()
#else
#define threadid 0
#endif





/*
** Different Unix systems declare open() in different ways.  Same use
** open(const char*,int,mode_t).  Others use open(const char*,int,...).
** The difference is important when using a pointer to the function.
**
** The safest way to deal with the problem is to always use this wrapper







>
>
>
>







316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
** testing and debugging only.
*/
#if SQLITE_THREADSAFE
#define threadid pthread_self()
#else
#define threadid 0
#endif

#if defined(__linux__) && defined(_GNU_SOURCE)
# define HAVE_MREMAP
#endif

/*
** Different Unix systems declare open() in different ways.  Same use
** open(const char*,int,mode_t).  Others use open(const char*,int,...).
** The difference is important when using a pointer to the function.
**
** The safest way to deal with the problem is to always use this wrapper
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460

  { "mmap",       (sqlite3_syscall_ptr)mmap,     0 },
#define osMmap ((void*(*)(void*,size_t,int,int,int,off_t))aSyscall[21].pCurrent)

  { "munmap",       (sqlite3_syscall_ptr)munmap,          0 },
#define osMunmap ((void*(*)(void*,size_t))aSyscall[22].pCurrent)

#if defined(__linux__) && defined(_GNU_SOURCE)
  { "mremap",       (sqlite3_syscall_ptr)mremap,          0 },
#else
  { "mremap",       (sqlite3_syscall_ptr)0,               0 },
#endif
#define osMremap ((void*(*)(void*,size_t,size_t,int,...))aSyscall[23].pCurrent)

}; /* End of the overrideable system calls */







|







458
459
460
461
462
463
464
465
466
467
468
469
470
471
472

  { "mmap",       (sqlite3_syscall_ptr)mmap,     0 },
#define osMmap ((void*(*)(void*,size_t,int,int,int,off_t))aSyscall[21].pCurrent)

  { "munmap",       (sqlite3_syscall_ptr)munmap,          0 },
#define osMunmap ((void*(*)(void*,size_t))aSyscall[22].pCurrent)

#if defined(HAVE_MREMAP)
  { "mremap",       (sqlite3_syscall_ptr)mremap,          0 },
#else
  { "mremap",       (sqlite3_syscall_ptr)0,               0 },
#endif
#define osMremap ((void*(*)(void*,size_t,size_t,int,...))aSyscall[23].pCurrent)

}; /* End of the overrideable system calls */
1862
1863
1864
1865
1866
1867
1868

1869
1870
1871
1872
1873
1874
1875

/*
** Close a file.
*/
static int unixClose(sqlite3_file *id){
  int rc = SQLITE_OK;
  unixFile *pFile = (unixFile *)id;

  unixUnlock(id, NO_LOCK);
  unixEnterMutex();

  /* unixFile.pInode is always valid here. Otherwise, a different close
  ** routine (e.g. nolockClose()) would be called instead.
  */
  assert( pFile->pInode->nLock>0 || pFile->pInode->bProcessLock==0 );







>







1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888

/*
** Close a file.
*/
static int unixClose(sqlite3_file *id){
  int rc = SQLITE_OK;
  unixFile *pFile = (unixFile *)id;
  unixUnmapfile(pFile);
  unixUnlock(id, NO_LOCK);
  unixEnterMutex();

  /* unixFile.pInode is always valid here. Otherwise, a different close
  ** routine (e.g. nolockClose()) would be called instead.
  */
  assert( pFile->pInode->nLock>0 || pFile->pInode->bProcessLock==0 );
3093
3094
3095
3096
3097
3098
3099


3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111



3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122


3123
3124
3125
3126
3127
3128
3129
  void *pBuf, 
  int amt,
  sqlite3_int64 offset
){
  unixFile *pFile = (unixFile *)id;
  int got;
  assert( id );



  /* If this is a database file (not a journal, master-journal or temp
  ** file), the bytes in the locking range should never be read or written. */
#if 0
  assert( pFile->pUnused==0
       || offset>=PENDING_BYTE+512
       || offset+amt<=PENDING_BYTE 
  );
#endif

  /* Deal with as much of this write request as possible by transfering
  ** data to the memory mapping using memcpy().  */



  if( offset<pFile->mmapSize ){
    if( offset+amt <= pFile->mmapSize ){
      memcpy(pBuf, &((u8 *)(pFile->pMapRegion))[offset], amt);
      return SQLITE_OK;
    }else{
      int nCopy = pFile->mmapSize - offset;
      memcpy(pBuf, &((u8 *)(pFile->pMapRegion))[offset], nCopy);
      pBuf = &((u8 *)pBuf)[nCopy];
      amt -= nCopy;
      offset += nCopy;
    }


  }

  got = seekAndRead(pFile, offset, pBuf, amt);
  if( got==amt ){
    return SQLITE_OK;
  }else if( got<0 ){
    /* lastErrno set by seekAndRead */







>
>










|
|
>
>
>
|
|
|
|
|
|
|
|
|
|
|
>
>







3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
  void *pBuf, 
  int amt,
  sqlite3_int64 offset
){
  unixFile *pFile = (unixFile *)id;
  int got;
  assert( id );
  sqlite3_int64 iMap = 0;         /* File offset of start of mapping i */
  int i;                          /* Used to iterate through mappings */

  /* If this is a database file (not a journal, master-journal or temp
  ** file), the bytes in the locking range should never be read or written. */
#if 0
  assert( pFile->pUnused==0
       || offset>=PENDING_BYTE+512
       || offset+amt<=PENDING_BYTE 
  );
#endif

  /* Deal with as much of this read request as possible by transfering
  ** data from the memory mapping using memcpy().  */
  for(i=0; i<2; i++){
    unixMapping *pMap = &pFile->aMmap[i];
    sqlite3_int64 iEnd = iMap + pMap->mmapSize;
    if( offset<iEnd ){
      if( offset+amt <= iEnd ){
        memcpy(pBuf, &((u8 *)(pMap->pMapRegion))[offset-iMap], amt);
        return SQLITE_OK;
      }else{
        int nCopy = iEnd - offset;
        memcpy(pBuf, &((u8 *)(pMap->pMapRegion))[offset-iMap], nCopy);
        pBuf = &((u8 *)pBuf)[nCopy];
        amt -= nCopy;
        offset += nCopy;
      }
    }
    iMap = pMap->mmapSize;
  }

  got = seekAndRead(pFile, offset, pBuf, amt);
  if( got==amt ){
    return SQLITE_OK;
  }else if( got<0 ){
    /* lastErrno set by seekAndRead */
3190
3191
3192
3193
3194
3195
3196


3197
3198
3199
3200
3201
3202
3203
  int amt,
  sqlite3_int64 offset 
){
  unixFile *pFile = (unixFile*)id;
  int wrote = 0;
  assert( id );
  assert( amt>0 );



  /* If this is a database file (not a journal, master-journal or temp
  ** file), the bytes in the locking range should never be read or written. */
#if 0
  assert( pFile->pUnused==0
       || offset>=PENDING_BYTE+512
       || offset+amt<=PENDING_BYTE 







>
>







3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
  int amt,
  sqlite3_int64 offset 
){
  unixFile *pFile = (unixFile*)id;
  int wrote = 0;
  assert( id );
  assert( amt>0 );
  int i;
  sqlite3_int64 iMap = 0;

  /* If this is a database file (not a journal, master-journal or temp
  ** file), the bytes in the locking range should never be read or written. */
#if 0
  assert( pFile->pUnused==0
       || offset>=PENDING_BYTE+512
       || offset+amt<=PENDING_BYTE 
3222
3223
3224
3225
3226
3227
3228
3229
3230

3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241


3242
3243
3244
3245
3246
3247
3248
      if( rc!=4 || memcmp(oldCntr, &((char*)pBuf)[24-offset], 4)!=0 ){
        pFile->transCntrChng = 1;  /* The transaction counter has changed */
      }
    }
  }
#endif

  /* Deal with as much of this write request as possible by transfering
  ** data from the memory mapping using memcpy().  */

  if( offset<pFile->mmapSize ){
    if( offset+amt <= pFile->mmapSize ){
      memcpy(&((u8 *)(pFile->pMapRegion))[offset], pBuf, amt);
      return SQLITE_OK;
    }else{
      int nCopy = pFile->mmapSize - offset;
      memcpy(&((u8 *)(pFile->pMapRegion))[offset], pBuf, nCopy);
      pBuf = &((u8 *)pBuf)[nCopy];
      amt -= nCopy;
      offset += nCopy;
    }


  }

  while( amt>0 && (wrote = seekAndWrite(pFile, offset, pBuf, amt))>0 ){
    amt -= wrote;
    offset += wrote;
    pBuf = &((char*)pBuf)[wrote];
  }







|
|
>
|
|
|
|
|
|
|
|
|
|
|
>
>







3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
      if( rc!=4 || memcmp(oldCntr, &((char*)pBuf)[24-offset], 4)!=0 ){
        pFile->transCntrChng = 1;  /* The transaction counter has changed */
      }
    }
  }
#endif

  for(i=0; i<2; i++){
    unixMapping *pMap = &pFile->aMmap[i];
    sqlite3_int64 iEnd = iMap + pMap->mmapSize;
    if( offset<iEnd ){
      if( offset+amt <= iEnd ){
        memcpy(&((u8 *)(pMap->pMapRegion))[offset-iMap], pBuf, amt);
        return SQLITE_OK;
      }else{
        int nCopy = iEnd - offset;
        memcpy(&((u8 *)(pMap->pMapRegion))[offset-iMap], pBuf, nCopy);
        pBuf = &((u8 *)pBuf)[nCopy];
        amt -= nCopy;
        offset += nCopy;
      }
    }
    iMap = pMap->mmapSize;
  }

  while( amt>0 && (wrote = seekAndWrite(pFile, offset, pBuf, amt))>0 ){
    amt -= wrote;
    offset += wrote;
    pBuf = &((char*)pBuf)[wrote];
  }
3506
3507
3508
3509
3510
3511
3512


3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529



3530
3531


3532
3533
3534
3535
3536
3537
3538
  }

  rc = robust_ftruncate(pFile->h, (off_t)nByte);
  if( rc ){
    pFile->lastErrno = errno;
    return unixLogError(SQLITE_IOERR_TRUNCATE, "ftruncate", pFile->zPath);
  }else{


#ifdef SQLITE_DEBUG
    /* If we are doing a normal write to a database file (as opposed to
    ** doing a hot-journal rollback or a write to some file other than a
    ** normal database file) and we truncate the file to zero length,
    ** that effectively updates the change counter.  This might happen
    ** when restoring a database using the backup API from a zero-length
    ** source.
    */
    if( pFile->inNormalWrite && nByte==0 ){
      pFile->transCntrChng = 1;
    }
#endif

    /* If the file was just truncated to a size smaller than the currently
    ** mapped region, reduce the effective mapping size as well. SQLite will
    ** use read() and write() to access data beyond this point from now on.  
    */



    if( nByte<pFile->mmapSize ){
      pFile->mmapSize = nByte;


    }

    return SQLITE_OK;
  }
}

/*







>
>

















>
>
>
|
|
>
>







3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
  }

  rc = robust_ftruncate(pFile->h, (off_t)nByte);
  if( rc ){
    pFile->lastErrno = errno;
    return unixLogError(SQLITE_IOERR_TRUNCATE, "ftruncate", pFile->zPath);
  }else{
    int i;

#ifdef SQLITE_DEBUG
    /* If we are doing a normal write to a database file (as opposed to
    ** doing a hot-journal rollback or a write to some file other than a
    ** normal database file) and we truncate the file to zero length,
    ** that effectively updates the change counter.  This might happen
    ** when restoring a database using the backup API from a zero-length
    ** source.
    */
    if( pFile->inNormalWrite && nByte==0 ){
      pFile->transCntrChng = 1;
    }
#endif

    /* If the file was just truncated to a size smaller than the currently
    ** mapped region, reduce the effective mapping size as well. SQLite will
    ** use read() and write() to access data beyond this point from now on.  
    */
    for(i=1; i>=0; i--){
      unixMapping *pMap = &pFile->aMmap[i];
      sqlite3_int64 iEnd = pMap->mmapSize + (i==1 ? pMap[-1].mmapSize : 0);
      if( nByte<iEnd ){
        pMap->mmapSize -= (iEnd - nByte);
        if( pMap->mmapSize<0 ) pMap->mmapSize = 0;
      }
    }

    return SQLITE_OK;
  }
}

/*
3617
3618
3619
3620
3621
3622
3623

3624

3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
      }
#endif
    }
  }

  if( pFile->mmapLimit>0 ){
    int rc;

    if( pFile->szChunk<=0 ){

      if( robust_ftruncate(pFile->h, nByte) ){
        pFile->lastErrno = errno;
        return unixLogError(SQLITE_IOERR_TRUNCATE, "ftruncate", pFile->zPath);
      }
    }

    rc = unixMapfile(pFile, nByte);
    return rc;
  }

  return SQLITE_OK;
}

/*







>

>
|





|







3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
      }
#endif
    }
  }

  if( pFile->mmapLimit>0 ){
    int rc;
    sqlite3_int64 nSz = nByte;
    if( pFile->szChunk<=0 ){
      nSz = ((nSz+pFile->szSyspage-1) / pFile->szSyspage) * pFile->szSyspage;
      if( robust_ftruncate(pFile->h, nSz) ){
        pFile->lastErrno = errno;
        return unixLogError(SQLITE_IOERR_TRUNCATE, "ftruncate", pFile->zPath);
      }
    }

    rc = unixMapfile(pFile, nSz);
    return rc;
  }

  return SQLITE_OK;
}

/*
4505
4506
4507
4508
4509
4510
4511

4512


4513
4514
4515
4516
4517
4518
4519








4520
4521
4522
4523
4524
4525
4526
# define unixShmUnmap   0
#endif /* #ifndef SQLITE_OMIT_WAL */

/*
** If it is currently memory mapped, unmap file pFd.
*/
static void unixUnmapfile(unixFile *pFd){

  assert( pFd->nFetchOut==0 );


  if( pFd->pMapRegion ){
    osMunmap(pFd->pMapRegion, pFd->mmapOrigsize);
    pFd->pMapRegion = 0;
    pFd->mmapSize = 0;
    pFd->mmapOrigsize = 0;
  }
}









/*
** Memory map or remap the file opened by file-descriptor pFd (if the file
** is already mapped, the existing mapping is replaced by the new). Or, if 
** there already exists a mapping for this file, and there are still 
** outstanding xFetch() references to it, this function is a no-op.
**







>

>
>
|
|
|
|
|
|
|
>
>
>
>
>
>
>
>







4539
4540
4541
4542
4543
4544
4545
4546
4547
4548
4549
4550
4551
4552
4553
4554
4555
4556
4557
4558
4559
4560
4561
4562
4563
4564
4565
4566
4567
4568
4569
4570
4571
# define unixShmUnmap   0
#endif /* #ifndef SQLITE_OMIT_WAL */

/*
** If it is currently memory mapped, unmap file pFd.
*/
static void unixUnmapfile(unixFile *pFd){
  int i;
  assert( pFd->nFetchOut==0 );
  for(i=0; i<2; i++){
    unixMapping *pMap = &pFd->aMmap[i];
    if( pMap->pMapRegion ){
      osMunmap(pMap->pMapRegion, pMap->mmapOrigsize);
      pMap->pMapRegion = 0;
      pMap->mmapSize = 0;
      pMap->mmapOrigsize = 0;
    }
  }
}

/*
** Return the system page size somehow.
*/
static int unixGetPagesize(void){
  return 4096;
}

/*
** Memory map or remap the file opened by file-descriptor pFd (if the file
** is already mapped, the existing mapping is replaced by the new). Or, if 
** there already exists a mapping for this file, and there are still 
** outstanding xFetch() references to it, this function is a no-op.
**
4549
4550
4551
4552
4553
4554
4555
4556
4557
4558
4559



4560
4561
4562


4563
4564

4565



4566
4567
4568



4569


4570



4571









4572



















4573
4574
4575




4576
4577
4578




4579
4580
4581
4582
4583
4584
4585
    }
    nMap = statbuf.st_size;
  }
  if( nMap>pFd->mmapLimit ){
    nMap = pFd->mmapLimit;
  }

  if( nMap!=pFd->mmapSize ){
    void *pNew = 0;

#if defined(__linux__) && defined(_GNU_SOURCE)



    if( pFd->pMapRegion && nMap>0 ){
      pNew = osMremap(pFd->pMapRegion, pFd->mmapOrigsize, nMap, MREMAP_MAYMOVE);
    }else


#endif
    {

      unixUnmapfile(pFd);



      if( nMap>0 ){
        int flags = PROT_READ;
        if( (pFd->ctrlFlags & UNIXFILE_RDONLY)==0 ) flags |= PROT_WRITE;



        pNew = osMmap(0, nMap, flags, MAP_SHARED, pFd->h, 0);


      }



    }





























    if( pNew==MAP_FAILED ){
      return SQLITE_IOERR_MMAP;
    }




    pFd->pMapRegion = pNew;
    pFd->mmapSize = nMap;
    pFd->mmapOrigsize = nMap;




  }

  return SQLITE_OK;
}

/*
** If possible, return a pointer to a mapping of file fd starting at offset







|


|
>
>
>
|
|
<
>
>

<
>

>
>
>
|
|
|
>
>
>

>
>
|
>
>
>
|
>
>
>
>
>
>
>
>
>
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
|
|
>
>
>
>
|
|
|
>
>
>
>







4594
4595
4596
4597
4598
4599
4600
4601
4602
4603
4604
4605
4606
4607
4608
4609

4610
4611
4612

4613
4614
4615
4616
4617
4618
4619
4620
4621
4622
4623
4624
4625
4626
4627
4628
4629
4630
4631
4632
4633
4634
4635
4636
4637
4638
4639
4640
4641
4642
4643
4644
4645
4646
4647
4648
4649
4650
4651
4652
4653
4654
4655
4656
4657
4658
4659
4660
4661
4662
4663
4664
4665
4666
4667
4668
4669
4670
4671
4672
4673
4674
4675
4676
4677
4678
4679
4680
4681
    }
    nMap = statbuf.st_size;
  }
  if( nMap>pFd->mmapLimit ){
    nMap = pFd->mmapLimit;
  }

  if( nMap!=(pFd->aMmap[0].mmapSize + pFd->aMmap[1].mmapSize) ){
    void *pNew = 0;

    /* If the request is for a mapping zero bytes in size, or there are 
     ** currently already two mapping regions, or there is already a mapping
     ** region that is not a multiple of the page-size in size, unmap
     ** everything.  */
    if( nMap==0 
#ifndef HAVE_MREMAP

        || (pFd->aMmap[0].pMapRegion && pFd->aMmap[1].pMapRegion) 
        || (pFd->aMmap[0].mmapSize % pFd->szSyspage)
#endif

      ){
      unixUnmapfile(pFd);
    }
    assert( pFd->aMmap[1].pMapRegion==0 );

    if( nMap>0 ){
      int flags = PROT_READ;
      if( (pFd->ctrlFlags & UNIXFILE_RDONLY)==0 ) flags |= PROT_WRITE;

      /* If there are currently no mappings, create a new one */
      if( pFd->aMmap[0].pMapRegion==0 ){
        pNew = osMmap(0, nMap, flags, MAP_SHARED, pFd->h, 0);
        if( pNew==MAP_FAILED ){
          return SQLITE_IOERR_MMAP;
        }
        pFd->aMmap[0].pMapRegion = pNew;
        pFd->aMmap[0].mmapSize = nMap;
        pFd->aMmap[0].mmapOrigsize = nMap;
      }
#ifdef HAVE_MREMAP
      /* If we have an mremap() call, resize the existing mapping. */
      else{
        unixMapping *pMap = &pFd->aMmap[0];
        pNew = osMremap(
            pMap->pMapRegion, pMap->mmapOrigsize, nMap, MREMAP_MAYMOVE
            );
        if( pNew==MAP_FAILED ){
          return SQLITE_IOERR_MMAP;
        }
        pFd->aMmap[0].pMapRegion = pNew;
        pFd->aMmap[0].mmapSize = nMap;
        pFd->aMmap[0].mmapOrigsize = nMap;
      }
#else
      /* Otherwise, create a second mapping. If the existing mapping is
      ** a multiple of the page-size in size, then request that the new
      ** mapping immediately follow the old in virtual memory.  */
      else{
        unixMapping *pMap = &pFd->aMmap[0];
        void *pAddr = 0;

        nMap -= pMap->mmapSize;

        if( pMap->mmapSize==pMap->mmapOrigsize ){
          pAddr = (void *)&((u8 *)pMap->pMapRegion)[pMap->mmapSize];
        }

        pNew = osMmap(pAddr, nMap, flags, MAP_SHARED, pFd->h, pMap->mmapSize);
        if( pNew==MAP_FAILED ){
          return SQLITE_IOERR_MMAP;
        }
        if( pNew==pAddr ){
          pMap->mmapOrigsize += nMap;
          pMap->mmapSize += nMap;
        }else{
          pFd->aMmap[1].pMapRegion = pNew;
          pFd->aMmap[1].mmapSize = nMap;
          pFd->aMmap[1].mmapOrigsize = nMap;
        }
      }
#endif
    }
  }

  return SQLITE_OK;
}

/*
** If possible, return a pointer to a mapping of file fd starting at offset
4594
4595
4596
4597
4598
4599
4600



4601
4602
4603
4604
4605



4606
4607



4608
4609
4610
4611
4612
4613
4614
** release the reference by calling unixUnfetch().
*/
static int unixFetch(sqlite3_file *fd, i64 iOff, int nAmt, void **pp){
  unixFile *pFd = (unixFile *)fd;   /* The underlying database file */
  *pp = 0;

  if( pFd->mmapLimit>0 ){



    if( pFd->pMapRegion==0 ){
      int rc = unixMapfile(pFd, -1);
      if( rc!=SQLITE_OK ) return rc;
    }
    if( pFd->mmapSize >= iOff+nAmt ){



      *pp = &((u8 *)pFd->pMapRegion)[iOff];
      pFd->nFetchOut++;



    }
  }
  return SQLITE_OK;
}

/*
** If the third argument is non-NULL, then this function releases a 







>
>
>
|



|
>
>
>
|
|
>
>
>







4690
4691
4692
4693
4694
4695
4696
4697
4698
4699
4700
4701
4702
4703
4704
4705
4706
4707
4708
4709
4710
4711
4712
4713
4714
4715
4716
4717
4718
4719
** release the reference by calling unixUnfetch().
*/
static int unixFetch(sqlite3_file *fd, i64 iOff, int nAmt, void **pp){
  unixFile *pFd = (unixFile *)fd;   /* The underlying database file */
  *pp = 0;

  if( pFd->mmapLimit>0 ){
    int i;
    sqlite3_int64 iMap = 0;
    
    if( pFd->aMmap[0].pMapRegion==0 ){
      int rc = unixMapfile(pFd, -1);
      if( rc!=SQLITE_OK ) return rc;
    }

    for(i=0; i<2; i++){
      unixMapping *pMap = &pFd->aMmap[i];
      if( iOff>=iMap && iOff+nAmt<=(iMap + pMap->mmapSize) ){
        *pp = &((u8 *)pMap->pMapRegion)[iOff-iMap];
        pFd->nFetchOut++;
        break;
      }
      iMap = pMap->mmapSize;
    }
  }
  return SQLITE_OK;
}

/*
** If the third argument is non-NULL, then this function releases a 
4625
4626
4627
4628
4629
4630
4631

4632

4633
4634
4635
4636
4637
4638
4639

  /* If p==0 (unmap the entire file) then there must be no outstanding 
  ** xFetch references. Or, if p!=0 (meaning it is an xFetch reference),
  ** then there must be at least one outstanding.  */
  assert( (p==0)==(pFd->nFetchOut==0) );

  /* If p!=0, it must match the iOff value. */

  assert( p==0 || p==&((u8 *)pFd->pMapRegion)[iOff] );


  if( p ){
    pFd->nFetchOut--;
  }else{
    unixUnmapfile(pFd);
  }








>

>







4730
4731
4732
4733
4734
4735
4736
4737
4738
4739
4740
4741
4742
4743
4744
4745
4746

  /* If p==0 (unmap the entire file) then there must be no outstanding 
  ** xFetch references. Or, if p!=0 (meaning it is an xFetch reference),
  ** then there must be at least one outstanding.  */
  assert( (p==0)==(pFd->nFetchOut==0) );

  /* If p!=0, it must match the iOff value. */
  #if 0
  assert( p==0 || p==&((u8 *)pFd->pMapRegion)[iOff] );
  #endif

  if( p ){
    pFd->nFetchOut--;
  }else{
    unixUnmapfile(pFd);
  }

5587
5588
5589
5590
5591
5592
5593

5594
5595
5596
5597
5598
5599
5600
        }
      }
      goto open_finished;
    }
  }
#endif
  

  rc = fillInUnixFile(pVfs, fd, pFile, zPath, ctrlFlags);

open_finished:
  if( rc!=SQLITE_OK ){
    sqlite3_free(p->pUnused);
  }
  return rc;







>







5694
5695
5696
5697
5698
5699
5700
5701
5702
5703
5704
5705
5706
5707
5708
        }
      }
      goto open_finished;
    }
  }
#endif
  
  p->szSyspage = unixGetPagesize();
  rc = fillInUnixFile(pVfs, fd, pFile, zPath, ctrlFlags);

open_finished:
  if( rc!=SQLITE_OK ){
    sqlite3_free(p->pUnused);
  }
  return rc;
Changes to src/pager.c.
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
/*
** Invoke SQLITE_FCNTL_MMAP_LIMIT based on the current value of mxMmap.
*/
static void pagerFixMaplimit(Pager *pPager){
  sqlite3_file *fd = pPager->fd;
  if( isOpen(fd) ){
    pPager->bUseFetch = (fd->pMethods->iVersion>=3) && pPager->mxMmap>0;
    if( pPager->bUseFetch ){
      sqlite3OsFileControlHint(pPager->fd, SQLITE_FCNTL_MMAP_LIMIT,
                               (void*)&pPager->mxMmap);
    }
  }
}

/*
** Change the maximum size of any memory mapping made of the database file.
*/
void sqlite3PagerSetMmapLimit(Pager *pPager, sqlite3_int64 mxMmap){







<
|

<







3356
3357
3358
3359
3360
3361
3362

3363
3364

3365
3366
3367
3368
3369
3370
3371
/*
** Invoke SQLITE_FCNTL_MMAP_LIMIT based on the current value of mxMmap.
*/
static void pagerFixMaplimit(Pager *pPager){
  sqlite3_file *fd = pPager->fd;
  if( isOpen(fd) ){
    pPager->bUseFetch = (fd->pMethods->iVersion>=3) && pPager->mxMmap>0;

    sqlite3OsFileControlHint(pPager->fd, SQLITE_FCNTL_MMAP_LIMIT,
                               (void*)&pPager->mxMmap);

  }
}

/*
** Change the maximum size of any memory mapping made of the database file.
*/
void sqlite3PagerSetMmapLimit(Pager *pPager, sqlite3_int64 mxMmap){
3635
3636
3637
3638
3639
3640
3641

3642

3643
3644
3645
3646
3647
3648
3649
** Regardless of mxPage, return the current maximum page count.
*/
int sqlite3PagerMaxPageCount(Pager *pPager, int mxPage){
  if( mxPage>0 ){
    pPager->mxPgno = mxPage;
  }
  assert( pPager->eState!=PAGER_OPEN );      /* Called only by OP_MaxPgcnt */

  assert( pPager->mxPgno>=pPager->dbSize );  /* OP_MaxPgcnt enforces this */

  return pPager->mxPgno;
}

/*
** The following set of routines are used to disable the simulated
** I/O error mechanism.  These routines are used to avoid simulated
** errors in places where we do not care about errors.







>

>







3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
** Regardless of mxPage, return the current maximum page count.
*/
int sqlite3PagerMaxPageCount(Pager *pPager, int mxPage){
  if( mxPage>0 ){
    pPager->mxPgno = mxPage;
  }
  assert( pPager->eState!=PAGER_OPEN );      /* Called only by OP_MaxPgcnt */
  #if 0
  assert( pPager->mxPgno>=pPager->dbSize );  /* OP_MaxPgcnt enforces this */
  #endif
  return pPager->mxPgno;
}

/*
** The following set of routines are used to disable the simulated
** I/O error mechanism.  These routines are used to avoid simulated
** errors in places where we do not care about errors.