/ Check-in [10707d35]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Enhance the unix VFS so that it keeps track of the size of unlinked files internally and thus avoids the need to call fstat() on those files, since fstat() does not work reliably on unlinked files on some implementations of FuseFS.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 10707d35786403ea5392d980f593bfecdae063dd
User & Date: drh 2014-06-09 20:24:46
Context
2014-06-16
16:41
Back out the unix VFS changes that seeks to avoid fstat() calls after an unlink(). That change did not clear the problem on FuseFS. check-in: 0617e20a user: drh tags: trunk
2014-06-09
20:39
Avoid an unnecessary initialization of the szFile field of unixFile in the unix VFS. check-in: 6484fb5a user: drh tags: trunk
20:24
Enhance the unix VFS so that it keeps track of the size of unlinked files internally and thus avoids the need to call fstat() on those files, since fstat() does not work reliably on unlinked files on some implementations of FuseFS. check-in: 10707d35 user: drh tags: trunk
20:06
Enhance the unix VFS so that it keeps track of the size of unlinked files internally and thus avoids the need to call fstat() on those files, since fstat() does not work reliably on unlinked files on some implementations of FuseFS. Closed-Leaf check-in: c41df393 user: drh tags: omit-fstat-after-unlink
13:11
Modify the %nonassoc directive in lemon so that it generates a run-time error rather than a parsing conflict. This changes is due to a bug report on the mailing list. SQLite does not use the %nonassoc directive in its grammar so this change does not affect SQLite. check-in: 1925f3a0 user: drh tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to src/os_unix.c.

187
188
189
190
191
192
193

194
195
196
197
198
199
200
....
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326


1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
....
3276
3277
3278
3279
3280
3281
3282




3283
3284
3285
3286
3287
3288
3289
....
3617
3618
3619
3620
3621
3622
3623

3624
3625
3626
3627
3628
3629
3630
3631
3632

3633
3634














3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
....
3667
3668
3669
3670
3671
3672
3673

3674
3675
3676
3677
3678
3679
3680
....
4215
4216
4217
4218
4219
4220
4221

4222
4223
4224
4225
4226
4227
4228
....
4761
4762
4763
4764
4765
4766
4767

4768
4769
4770
4771
4772
4773
4774
....
4787
4788
4789
4790
4791
4792
4793
4794
4795
4796
4797
4798
4799
4800
4801
4802
4803
4804
4805
4806
....
5766
5767
5768
5769
5770
5771
5772

5773
5774
5775
5776
5777
5778
5779
  unsigned char eFileLock;            /* The type of lock held on this fd */
  unsigned short int ctrlFlags;       /* Behavioral bits.  UNIXFILE_* flags */
  int lastErrno;                      /* The unix errno from last I/O error */
  void *lockingContext;               /* Locking style specific state */
  UnixUnusedFd *pUnused;              /* Pre-allocated UnixUnusedFd */
  const char *zPath;                  /* Name of the file */
  unixShm *pShm;                      /* Shared memory segment information */

  int szChunk;                        /* Configured by FCNTL_CHUNK_SIZE */
#if SQLITE_MAX_MMAP_SIZE>0
  int nFetchOut;                      /* Number of outstanding xFetch refs */
  sqlite3_int64 mmapSize;             /* Usable size of mapping at pMapRegion */
  sqlite3_int64 mmapSizeActual;       /* Actual size of mapping at pMapRegion */
  sqlite3_int64 mmapSizeMax;          /* Configured FCNTL_MMAP_SIZE value */
  void *pMapRegion;                   /* Memory mapped region */
................................................................................
** (3) The file has not been renamed or unlinked
**
** Issue sqlite3_log(SQLITE_WARNING,...) messages if anything is not right.
*/
static void verifyDbFile(unixFile *pFile){
  struct stat buf;
  int rc;
  if( pFile->ctrlFlags & UNIXFILE_WARNED ){
    /* One or more of the following warnings have already been issued.  Do not
    ** repeat them so as not to clutter the error log */


    return;
  }
  rc = osFstat(pFile->h, &buf);
  if( rc!=0 ){
    sqlite3_log(SQLITE_WARNING, "cannot fstat db file %s", pFile->zPath);
    pFile->ctrlFlags |= UNIXFILE_WARNED;
    return;
  }
  if( buf.st_nlink==0 && (pFile->ctrlFlags & UNIXFILE_DELETE)==0 ){
    sqlite3_log(SQLITE_WARNING, "file unlinked while open: %s", pFile->zPath);
    pFile->ctrlFlags |= UNIXFILE_WARNED;
    return;
  }
  if( buf.st_nlink>1 ){
    sqlite3_log(SQLITE_WARNING, "multiple links to file: %s", pFile->zPath);
    pFile->ctrlFlags |= UNIXFILE_WARNED;
................................................................................
  int amt,
  sqlite3_int64 offset 
){
  unixFile *pFile = (unixFile*)id;
  int wrote = 0;
  assert( id );
  assert( amt>0 );





  /* If this is a database file (not a journal, master-journal or temp
  ** file), the bytes in the locking range should never be read or written. */
#if 0
  assert( pFile->pUnused==0
       || offset>=PENDING_BYTE+512
       || offset+amt<=PENDING_BYTE 
................................................................................
    ** use read() and write() to access data beyond this point from now on.  
    */
    if( nByte<pFile->mmapSize ){
      pFile->mmapSize = nByte;
    }
#endif


    return SQLITE_OK;
  }
}

/*
** Determine the current size of a file in bytes
*/
static int unixFileSize(sqlite3_file *id, i64 *pSize){
  int rc;

  struct stat buf;
  assert( id );














  rc = osFstat(((unixFile*)id)->h, &buf);
  SimulateIOError( rc=1 );
  if( rc!=0 ){
    ((unixFile*)id)->lastErrno = errno;
    return SQLITE_IOERR_FSTAT;
  }
  *pSize = buf.st_size;

  /* When opening a zero-size database, the findInodeInfo() procedure
  ** writes a single byte into that file in order to work around a bug
  ** in the OS-X msdos filesystem.  In order to avoid problems with upper
................................................................................
** nBytes or larger, this routine is a no-op.
*/
static int fcntlSizeHint(unixFile *pFile, i64 nByte){
  if( pFile->szChunk>0 ){
    i64 nSize;                    /* Required file size */
    struct stat buf;              /* Used to hold return values of fstat() */
   

    if( osFstat(pFile->h, &buf) ) return SQLITE_IOERR_FSTAT;

    nSize = ((nByte+pFile->szChunk-1) / pFile->szChunk) * pFile->szChunk;
    if( nSize>(i64)buf.st_size ){

#if defined(HAVE_POSIX_FALLOCATE) && HAVE_POSIX_FALLOCATE
      /* The code below is handling the return value of osFallocate() 
................................................................................
  int nShmFilename;               /* Size of the SHM filename in bytes */

  /* Allocate space for the new unixShm object. */
  p = sqlite3_malloc( sizeof(*p) );
  if( p==0 ) return SQLITE_NOMEM;
  memset(p, 0, sizeof(*p));
  assert( pDbFd->pShm==0 );


  /* Check to see if a unixShmNode object already exists. Reuse an existing
  ** one if present. Create a new one if necessary.
  */
  unixEnterMutex();
  pInode = pDbFd->pInode;
  pShmNode = pInode->pShmNode;
................................................................................
    /* If the mmap() above failed, assume that all subsequent mmap() calls
    ** will probably fail too. Fall back to using xRead/xWrite exclusively
    ** in this case.  */
    pFd->mmapSizeMax = 0;
  }
  pFd->pMapRegion = (void *)pNew;
  pFd->mmapSize = pFd->mmapSizeActual = nNew;

}

/*
** Memory map or remap the file opened by file-descriptor pFd (if the file
** is already mapped, the existing mapping is replaced by the new). Or, if 
** there already exists a mapping for this file, and there are still 
** outstanding xFetch() references to it, this function is a no-op.
................................................................................
  i64 nMap = nByte;
  int rc;

  assert( nMap>=0 || pFd->nFetchOut==0 );
  if( pFd->nFetchOut>0 ) return SQLITE_OK;

  if( nMap<0 ){
    struct stat statbuf;          /* Low-level file information */
    rc = osFstat(pFd->h, &statbuf);
    if( rc!=SQLITE_OK ){
      return SQLITE_IOERR_FSTAT;
    }
    nMap = statbuf.st_size;
  }
  if( nMap>pFd->mmapSizeMax ){
    nMap = pFd->mmapSizeMax;
  }

  if( nMap!=pFd->mmapSize ){
    if( nMap>0 ){
................................................................................

  if( p->pUnused ){
    p->pUnused->fd = fd;
    p->pUnused->flags = flags;
  }

  if( isDelete ){

#if OS_VXWORKS
    zPath = zName;
#else
    osUnlink(zName);
#endif
  }
#if SQLITE_ENABLE_LOCKING_STYLE







>







 







|
|
|
>
>








|







 







>
>
>
>







 







>









>

|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|


|







 







>







 







>







 







>







 







|
<



<







 







>







187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
....
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
....
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
....
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
....
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
....
4239
4240
4241
4242
4243
4244
4245
4246
4247
4248
4249
4250
4251
4252
4253
....
4786
4787
4788
4789
4790
4791
4792
4793
4794
4795
4796
4797
4798
4799
4800
....
4813
4814
4815
4816
4817
4818
4819
4820

4821
4822
4823

4824
4825
4826
4827
4828
4829
4830
....
5790
5791
5792
5793
5794
5795
5796
5797
5798
5799
5800
5801
5802
5803
5804
  unsigned char eFileLock;            /* The type of lock held on this fd */
  unsigned short int ctrlFlags;       /* Behavioral bits.  UNIXFILE_* flags */
  int lastErrno;                      /* The unix errno from last I/O error */
  void *lockingContext;               /* Locking style specific state */
  UnixUnusedFd *pUnused;              /* Pre-allocated UnixUnusedFd */
  const char *zPath;                  /* Name of the file */
  unixShm *pShm;                      /* Shared memory segment information */
  sqlite3_int64 szFile;               /* File size for UNIXFILE_DELETE files */
  int szChunk;                        /* Configured by FCNTL_CHUNK_SIZE */
#if SQLITE_MAX_MMAP_SIZE>0
  int nFetchOut;                      /* Number of outstanding xFetch refs */
  sqlite3_int64 mmapSize;             /* Usable size of mapping at pMapRegion */
  sqlite3_int64 mmapSizeActual;       /* Actual size of mapping at pMapRegion */
  sqlite3_int64 mmapSizeMax;          /* Configured FCNTL_MMAP_SIZE value */
  void *pMapRegion;                   /* Memory mapped region */
................................................................................
** (3) The file has not been renamed or unlinked
**
** Issue sqlite3_log(SQLITE_WARNING,...) messages if anything is not right.
*/
static void verifyDbFile(unixFile *pFile){
  struct stat buf;
  int rc;
  if( pFile->ctrlFlags & (UNIXFILE_WARNED|UNIXFILE_DELETE) ){
    /* UNIXFILE_WARNED means that one or more of the following warnings have
    ** already been issued.  Do not* repeat them so as not to clutter the error
    ** log.  Do not investigate unlinked files since fstat() does not always
    ** work following an unlink(). */
    return;
  }
  rc = osFstat(pFile->h, &buf);
  if( rc!=0 ){
    sqlite3_log(SQLITE_WARNING, "cannot fstat db file %s", pFile->zPath);
    pFile->ctrlFlags |= UNIXFILE_WARNED;
    return;
  }
  if( buf.st_nlink==0 ){
    sqlite3_log(SQLITE_WARNING, "file unlinked while open: %s", pFile->zPath);
    pFile->ctrlFlags |= UNIXFILE_WARNED;
    return;
  }
  if( buf.st_nlink>1 ){
    sqlite3_log(SQLITE_WARNING, "multiple links to file: %s", pFile->zPath);
    pFile->ctrlFlags |= UNIXFILE_WARNED;
................................................................................
  int amt,
  sqlite3_int64 offset 
){
  unixFile *pFile = (unixFile*)id;
  int wrote = 0;
  assert( id );
  assert( amt>0 );

  /* Update the internally tracked file size.  The internal file size is only
  ** accurate for unlinked files */
  if( offset+amt>pFile->szFile ) pFile->szFile = offset+amt;

  /* If this is a database file (not a journal, master-journal or temp
  ** file), the bytes in the locking range should never be read or written. */
#if 0
  assert( pFile->pUnused==0
       || offset>=PENDING_BYTE+512
       || offset+amt<=PENDING_BYTE 
................................................................................
    ** use read() and write() to access data beyond this point from now on.  
    */
    if( nByte<pFile->mmapSize ){
      pFile->mmapSize = nByte;
    }
#endif

    pFile->szFile = nByte;
    return SQLITE_OK;
  }
}

/*
** Determine the current size of a file in bytes
*/
static int unixFileSize(sqlite3_file *id, i64 *pSize){
  int rc;
  unixFile *pFile = (unixFile*)id;
  struct stat buf;
  assert( pFile );

  /* For files that have been unlinked, simply return the szFile which we keep
  ** track of internally.  There is no need to fstat() as SQLite has exclusive
  ** access to the file and no other process can modify the file and thus change
  ** the file size without our knowing it.  We do this because fstat() will
  ** fail on unlinked files on some (broken) unix filesystems.
  */
  if( pFile->ctrlFlags & UNIXFILE_DELETE ){
    *pSize = pFile->szFile;
    /* The following assert() confirms that the internal filesize is correct */
    assert( osFstat(pFile->h, &buf)!=0 || buf.st_size==pFile->szFile );
    return SQLITE_OK;
  }

  rc = osFstat(pFile->h, &buf);
  SimulateIOError( rc=1 );
  if( rc!=0 ){
    pFile->lastErrno = errno;
    return SQLITE_IOERR_FSTAT;
  }
  *pSize = buf.st_size;

  /* When opening a zero-size database, the findInodeInfo() procedure
  ** writes a single byte into that file in order to work around a bug
  ** in the OS-X msdos filesystem.  In order to avoid problems with upper
................................................................................
** nBytes or larger, this routine is a no-op.
*/
static int fcntlSizeHint(unixFile *pFile, i64 nByte){
  if( pFile->szChunk>0 ){
    i64 nSize;                    /* Required file size */
    struct stat buf;              /* Used to hold return values of fstat() */
   
    if( pFile->ctrlFlags & UNIXFILE_DELETE ) return SQLITE_OK;
    if( osFstat(pFile->h, &buf) ) return SQLITE_IOERR_FSTAT;

    nSize = ((nByte+pFile->szChunk-1) / pFile->szChunk) * pFile->szChunk;
    if( nSize>(i64)buf.st_size ){

#if defined(HAVE_POSIX_FALLOCATE) && HAVE_POSIX_FALLOCATE
      /* The code below is handling the return value of osFallocate() 
................................................................................
  int nShmFilename;               /* Size of the SHM filename in bytes */

  /* Allocate space for the new unixShm object. */
  p = sqlite3_malloc( sizeof(*p) );
  if( p==0 ) return SQLITE_NOMEM;
  memset(p, 0, sizeof(*p));
  assert( pDbFd->pShm==0 );
  assert( (pDbFd->ctrlFlags & UNIXFILE_DELETE)==0 );

  /* Check to see if a unixShmNode object already exists. Reuse an existing
  ** one if present. Create a new one if necessary.
  */
  unixEnterMutex();
  pInode = pDbFd->pInode;
  pShmNode = pInode->pShmNode;
................................................................................
    /* If the mmap() above failed, assume that all subsequent mmap() calls
    ** will probably fail too. Fall back to using xRead/xWrite exclusively
    ** in this case.  */
    pFd->mmapSizeMax = 0;
  }
  pFd->pMapRegion = (void *)pNew;
  pFd->mmapSize = pFd->mmapSizeActual = nNew;
  if( nNew>pFd->szFile ) pFd->szFile = nNew;
}

/*
** Memory map or remap the file opened by file-descriptor pFd (if the file
** is already mapped, the existing mapping is replaced by the new). Or, if 
** there already exists a mapping for this file, and there are still 
** outstanding xFetch() references to it, this function is a no-op.
................................................................................
  i64 nMap = nByte;
  int rc;

  assert( nMap>=0 || pFd->nFetchOut==0 );
  if( pFd->nFetchOut>0 ) return SQLITE_OK;

  if( nMap<0 ){
    rc = unixFileSize((sqlite3_file*)pFd, &nMap);

    if( rc!=SQLITE_OK ){
      return SQLITE_IOERR_FSTAT;
    }

  }
  if( nMap>pFd->mmapSizeMax ){
    nMap = pFd->mmapSizeMax;
  }

  if( nMap!=pFd->mmapSize ){
    if( nMap>0 ){
................................................................................

  if( p->pUnused ){
    p->pUnused->fd = fd;
    p->pUnused->flags = flags;
  }

  if( isDelete ){
    p->szFile = 0;
#if OS_VXWORKS
    zPath = zName;
#else
    osUnlink(zName);
#endif
  }
#if SQLITE_ENABLE_LOCKING_STYLE