SQLite

Check-in [aa6acfa8ca]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Fix a problem in os_unix.c where a malloc failure could lead to a leaked file descriptor.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: aa6acfa8caa2ef59b4c16dfe42c4b5644da96905
User & Date: dan 2009-08-22 11:39:47.000
References
2009-08-25
12:11
Merge together the os_unix.c fix of [aa6acfa8ca] and the trigger fix of [dee1b8eb40]. (check-in: 1e2c6e134e user: drh tags: trunk)
Context
2009-08-22
19:17
Remove an obsolete documentation file left over from SQLite version 1.0. (check-in: f7eb1efc37 user: drh tags: trunk)
11:39
Fix a problem in os_unix.c where a malloc failure could lead to a leaked file descriptor. (check-in: aa6acfa8ca user: dan tags: trunk)
2009-08-21
17:18
When a database file is opened, try to find an unused file descriptor to reuse. This change affects unix (and other systems that use os_unix.c) only. Fix for cvstrac ticket #4018. (check-in: 9b4d9ab62d user: dan tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Changes to src/os_unix.c.
163
164
165
166
167
168
169













170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
/*
** Only set the lastErrno if the error code is a real error and not 
** a normal expected return code of SQLITE_BUSY or SQLITE_OK
*/
#define IS_LOCK_ERROR(x)  ((x != SQLITE_OK) && (x != SQLITE_BUSY))















/*
** The unixFile structure is subclass of sqlite3_file specific to the unix
** VFS implementations.
*/
typedef struct unixFile unixFile;
struct unixFile {
  sqlite3_io_methods const *pMethod;  /* Always the first entry */
  struct unixOpenCnt *pOpen;       /* Info about all open fd's on this inode */
  struct unixLockInfo *pLock;      /* Info about locks on this inode */
  int h;                           /* The file descriptor */
  int dirfd;                       /* File descriptor for the directory */
  unsigned char locktype;          /* The type of lock held on this fd */
  int lastErrno;                   /* The unix errno from the last I/O error */
  void *lockingContext;            /* Locking style specific state */
  int flags;                       /* Flags value returned by xOpen() */
#if SQLITE_ENABLE_LOCKING_STYLE
  int openFlags;                   /* The flags specified at open() */
#endif
#if SQLITE_THREADSAFE && defined(__linux__)
  pthread_t tid;                   /* The thread that "owns" this unixFile */
#endif
#if OS_VXWORKS







>
>
>
>
>
>
>
>
>
>
>
>
>














|







163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
/*
** Only set the lastErrno if the error code is a real error and not 
** a normal expected return code of SQLITE_BUSY or SQLITE_OK
*/
#define IS_LOCK_ERROR(x)  ((x != SQLITE_OK) && (x != SQLITE_BUSY))


/*
** Sometimes, after a file handle is closed by SQLite, the file descriptor
** cannot be closed immediately. In these cases, instances of the following
** structure are used to store the file descriptor while waiting for an
** opportunity to either close or reuse it.
*/
typedef struct UnixUnusedFd UnixUnusedFd;
struct UnixUnusedFd {
  int fd;                   /* File descriptor to close */
  int flags;                /* Flags this file descriptor was opened with */
  UnixUnusedFd *pNext;      /* Next unused file descriptor on same file */
};

/*
** The unixFile structure is subclass of sqlite3_file specific to the unix
** VFS implementations.
*/
typedef struct unixFile unixFile;
struct unixFile {
  sqlite3_io_methods const *pMethod;  /* Always the first entry */
  struct unixOpenCnt *pOpen;       /* Info about all open fd's on this inode */
  struct unixLockInfo *pLock;      /* Info about locks on this inode */
  int h;                           /* The file descriptor */
  int dirfd;                       /* File descriptor for the directory */
  unsigned char locktype;          /* The type of lock held on this fd */
  int lastErrno;                   /* The unix errno from the last I/O error */
  void *lockingContext;            /* Locking style specific state */
  UnixUnusedFd *pUnused;           /* Pre-allocated UnixUnusedFd */
#if SQLITE_ENABLE_LOCKING_STYLE
  int openFlags;                   /* The flags specified at open() */
#endif
#if SQLITE_THREADSAFE && defined(__linux__)
  pthread_t tid;                   /* The thread that "owns" this unixFile */
#endif
#if OS_VXWORKS
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
** The close() system call would only occur when the last database
** using the file closes.
*/
struct unixOpenCnt {
  struct unixFileId fileId;   /* The lookup key */
  int nRef;                   /* Number of pointers to this structure */
  int nLock;                  /* Number of outstanding locks */
  int nPending;               /* Number of pending close() operations */
  struct PendingClose {
    int fd;                   /* File descriptor to close */
    int flags;                /* Flags this file descriptor was opened with */
  } *aPending;                /* Malloced space holding fds awaiting close() */
#if OS_VXWORKS
  sem_t *pSem;                     /* Named POSIX semaphore */
  char aSemName[MAX_PATHNAME+1];   /* Name of that semaphore */
#endif
  struct unixOpenCnt *pNext, *pPrev;   /* List of all unixOpenCnt objects */
};








<
<
|
<
<







757
758
759
760
761
762
763


764


765
766
767
768
769
770
771
** The close() system call would only occur when the last database
** using the file closes.
*/
struct unixOpenCnt {
  struct unixFileId fileId;   /* The lookup key */
  int nRef;                   /* Number of pointers to this structure */
  int nLock;                  /* Number of outstanding locks */


  UnixUnusedFd *pUnused;      /* Unused file descriptors to close */


#if OS_VXWORKS
  sem_t *pSem;                     /* Named POSIX semaphore */
  char aSemName[MAX_PATHNAME+1];   /* Name of that semaphore */
#endif
  struct unixOpenCnt *pNext, *pPrev;   /* List of all unixOpenCnt objects */
};

906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
        assert( openList==pOpen );
        openList = pOpen->pNext;
      }
      if( pOpen->pNext ){
        assert( pOpen->pNext->pPrev==pOpen );
        pOpen->pNext->pPrev = pOpen->pPrev;
      }
      sqlite3_free(pOpen->aPending);
      sqlite3_free(pOpen);
    }
  }
}

/*
** Given a file descriptor, locate unixLockInfo and unixOpenCnt structures that







|







915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
        assert( openList==pOpen );
        openList = pOpen->pNext;
      }
      if( pOpen->pNext ){
        assert( pOpen->pNext->pPrev==pOpen );
        pOpen->pNext->pPrev = pOpen->pPrev;
      }
      assert( !pOpen->pUnused );
      sqlite3_free(pOpen);
    }
  }
}

/*
** Given a file descriptor, locate unixLockInfo and unixOpenCnt structures that
1024
1025
1026
1027
1028
1029
1030

1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
    if( pOpen==0 ){
      pOpen = sqlite3_malloc( sizeof(*pOpen) );
      if( pOpen==0 ){
        releaseLockInfo(pLock);
        rc = SQLITE_NOMEM;
        goto exit_findlockinfo;
      }

      pOpen->fileId = fileId;
      pOpen->nRef = 1;
      pOpen->nLock = 0;
      pOpen->nPending = 0;
      pOpen->aPending = 0;
      pOpen->pNext = openList;
      pOpen->pPrev = 0;
      if( openList ) openList->pPrev = pOpen;
      openList = pOpen;
#if OS_VXWORKS
      pOpen->pSem = NULL;
      pOpen->aSemName[0] = '\0';
#endif
    }else{
      pOpen->nRef++;
    }
    *ppOpen = pOpen;
  }

exit_findlockinfo:







>


<
<
<

<


<
<
<
<







1033
1034
1035
1036
1037
1038
1039
1040
1041
1042



1043

1044
1045




1046
1047
1048
1049
1050
1051
1052
    if( pOpen==0 ){
      pOpen = sqlite3_malloc( sizeof(*pOpen) );
      if( pOpen==0 ){
        releaseLockInfo(pLock);
        rc = SQLITE_NOMEM;
        goto exit_findlockinfo;
      }
      memset(pOpen, 0, sizeof(*pOpen));
      pOpen->fileId = fileId;
      pOpen->nRef = 1;



      pOpen->pNext = openList;

      if( openList ) openList->pPrev = pOpen;
      openList = pOpen;




    }else{
      pOpen->nRef++;
    }
    *ppOpen = pOpen;
  }

exit_findlockinfo:
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416

1417
1418
1419
1420
1421

1422
1423
1424
1425
1426



1427
1428

1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458

1459
1460
1461
1462
1463
1464
1465
  unixLeaveMutex();
  OSTRACE4("LOCK    %d %s %s\n", pFile->h, locktypeName(locktype), 
      rc==SQLITE_OK ? "ok" : "failed");
  return rc;
}

/*
** Close all file descriptors accumuated in the p->aPending[] array. If
** all such file descriptors are closed without error, the aPending[] 
** array is deleted and SQLITE_OK returned.
**
** Otherwise, if an error occurs, then successfully closed file descriptor
** entries in the aPending[] array are set to -1, the aPending[] array
** not deleted and SQLITE_IOERR_CLOSE returned.
*/ 
static int closePendingFds(unixFile *pFile){

  struct unixOpenCnt *pOpen = pFile->pOpen;
  struct PendingClose *aPending = pOpen->aPending;
  int i;
  int rc = SQLITE_OK;
  assert( unixMutexHeld() );

  for(i=0; i<pOpen->nPending; i++){
    if( aPending[i].fd>=0 ){
      if( close(aPending[i].fd) ){
        pFile->lastErrno = errno;
        rc = SQLITE_IOERR_CLOSE;



      }else{
        aPending[i].fd = -1;

      }
    }
  }
  if( rc==SQLITE_OK ){
    sqlite3_free(aPending);
    pOpen->nPending = 0;
    pOpen->aPending = 0;
  }
  return rc;
}

/*
** Add the file descriptor used by file handle pFile to the corresponding
** aPending[] array to be closed after some other connection releases
** a lock.
*/
static void setPendingFd(unixFile *pFile){
  struct PendingClose *aNew;
  struct unixOpenCnt *pOpen = pFile->pOpen;
  int nByte = (pOpen->nPending+1)*sizeof(pOpen->aPending[0]);
  aNew = sqlite3_realloc(pOpen->aPending, nByte);
  if( aNew==0 ){
    /* If a malloc fails, just leak the file descriptor */
  }else{
    pOpen->aPending = aNew;
    pOpen->aPending[pOpen->nPending].fd = pFile->h;
    pOpen->aPending[pOpen->nPending].flags = pFile->flags;
    pOpen->nPending++;
    pFile->h = -1;
  }

}

/*
** Lower the locking level on file descriptor pFile to locktype.  locktype
** must be either NO_LOCK or SHARED_LOCK.
**
** If the locking level of the file descriptor is already at or below







|
|
|


|



>

<
<
|
|
>
|
|
|
|
|
>
>
>
|
<
>
|
|
<
<
<
|
<
<





<
|


<

<
<
<
<
<
<
|
|
|
|
<
>







1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420


1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432

1433
1434
1435



1436


1437
1438
1439
1440
1441

1442
1443
1444

1445






1446
1447
1448
1449

1450
1451
1452
1453
1454
1455
1456
1457
  unixLeaveMutex();
  OSTRACE4("LOCK    %d %s %s\n", pFile->h, locktypeName(locktype), 
      rc==SQLITE_OK ? "ok" : "failed");
  return rc;
}

/*
** Close all file descriptors accumuated in the unixOpenCnt->pUnused list.
** If all such file descriptors are closed without error, the list is
** cleared and SQLITE_OK returned.
**
** Otherwise, if an error occurs, then successfully closed file descriptor
** entries are removed from the list, and SQLITE_IOERR_CLOSE returned. 
** not deleted and SQLITE_IOERR_CLOSE returned.
*/ 
static int closePendingFds(unixFile *pFile){
  int rc = SQLITE_OK;
  struct unixOpenCnt *pOpen = pFile->pOpen;


  UnixUnusedFd *pError = 0;
  UnixUnusedFd *p;
  UnixUnusedFd *pNext;
  for(p=pOpen->pUnused; p; p=pNext){
    pNext = p->pNext;
    if( close(p->fd) ){
      pFile->lastErrno = errno;
      rc = SQLITE_IOERR_CLOSE;
      p->pNext = pError;
      pError = p;
      assert(0);
    }else{

      sqlite3_free(p);
    }
  }



  pOpen->pUnused = pError;


  return rc;
}

/*
** Add the file descriptor used by file handle pFile to the corresponding

** pUnused list.
*/
static void setPendingFd(unixFile *pFile){

  struct unixOpenCnt *pOpen = pFile->pOpen;






  UnixUnusedFd *p = pFile->pUnused;
  p->pNext = pOpen->pUnused;
  pOpen->pUnused = p;
  pFile->h = -1;

  pFile->pUnused = 0;
}

/*
** Lower the locking level on file descriptor pFile to locktype.  locktype
** must be either NO_LOCK or SHARED_LOCK.
**
** If the locking level of the file descriptor is already at or below
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
    /* Decrement the count of locks against this same file.  When the
    ** count reaches zero, close any other file descriptors whose close
    ** was deferred because of outstanding locks.
    */
    pOpen = pFile->pOpen;
    pOpen->nLock--;
    assert( pOpen->nLock>=0 );
    if( pOpen->nLock==0 && pOpen->nPending>0 ){
      int rc2 = closePendingFds(pFile);
      if( rc==SQLITE_OK ){
        rc = rc2;
      }
    }
  }
	







|







1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
    /* Decrement the count of locks against this same file.  When the
    ** count reaches zero, close any other file descriptors whose close
    ** was deferred because of outstanding locks.
    */
    pOpen = pFile->pOpen;
    pOpen->nLock--;
    assert( pOpen->nLock>=0 );
    if( pOpen->nLock==0 ){
      int rc2 = closePendingFds(pFile);
      if( rc==SQLITE_OK ){
        rc = rc2;
      }
    }
  }
	
1623
1624
1625
1626
1627
1628
1629

1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
      }
      vxworksReleaseFileId(pFile->pId);
      pFile->pId = 0;
    }
#endif
    OSTRACE2("CLOSE   %-3d\n", pFile->h);
    OpenCounter(-1);

    memset(pFile, 0, sizeof(unixFile));
  }
  return SQLITE_OK;
}

/*
** Close a file.
*/
static int unixClose(sqlite3_file *id){
  int rc = SQLITE_OK;
  if( id ){
    unixFile *pFile = (unixFile *)id;
    unixUnlock(id, NO_LOCK);
    unixEnterMutex();
    if( pFile->pOpen && pFile->pOpen->nLock ){
      /* If there are outstanding locks, do not actually close the file just
      ** yet because that would clear those locks.  Instead, add the file
      ** descriptor to pOpen->aPending.  It will be automatically closed when
      ** the last lock is cleared.
      */
      setPendingFd(pFile);
    }
    releaseLockInfo(pFile->pLock);
    releaseOpenCnt(pFile->pOpen);
    rc = closeUnixFile(id);
    unixLeaveMutex();







>

















|
|







1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
      }
      vxworksReleaseFileId(pFile->pId);
      pFile->pId = 0;
    }
#endif
    OSTRACE2("CLOSE   %-3d\n", pFile->h);
    OpenCounter(-1);
    sqlite3_free(pFile->pUnused);
    memset(pFile, 0, sizeof(unixFile));
  }
  return SQLITE_OK;
}

/*
** Close a file.
*/
static int unixClose(sqlite3_file *id){
  int rc = SQLITE_OK;
  if( id ){
    unixFile *pFile = (unixFile *)id;
    unixUnlock(id, NO_LOCK);
    unixEnterMutex();
    if( pFile->pOpen && pFile->pOpen->nLock ){
      /* If there are outstanding locks, do not actually close the file just
      ** yet because that would clear those locks.  Instead, add the file
      ** descriptor to pOpen->pUnused list.  It will be automatically closed 
      ** when the last lock is cleared.
      */
      setPendingFd(pFile);
    }
    releaseLockInfo(pFile->pLock);
    releaseOpenCnt(pFile->pOpen);
    rc = closeUnixFile(id);
    unixLeaveMutex();
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
){
  unixFile *pFile = (unixFile *)id;
  int got;
  assert( id );

  /* If this is a database file (not a journal, master-journal or temp
  ** file), the bytes in the locking range should never be read or written. */
  assert( (pFile->flags&SQLITE_OPEN_MAIN_DB)==0
       || offset>=PENDING_BYTE+512
       || offset+amt<=PENDING_BYTE 
  );

  got = seekAndRead(pFile, offset, pBuf, amt);
  if( got==amt ){
    return SQLITE_OK;







|







2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
){
  unixFile *pFile = (unixFile *)id;
  int got;
  assert( id );

  /* If this is a database file (not a journal, master-journal or temp
  ** file), the bytes in the locking range should never be read or written. */
  assert( pFile->pUnused==0
       || offset>=PENDING_BYTE+512
       || offset+amt<=PENDING_BYTE 
  );

  got = seekAndRead(pFile, offset, pBuf, amt);
  if( got==amt ){
    return SQLITE_OK;
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
  unixFile *pFile = (unixFile*)id;
  int wrote = 0;
  assert( id );
  assert( amt>0 );

  /* If this is a database file (not a journal, master-journal or temp
  ** file), the bytes in the locking range should never be read or written. */
  assert( (pFile->flags&SQLITE_OPEN_MAIN_DB)==0
       || offset>=PENDING_BYTE+512
       || offset+amt<=PENDING_BYTE 
  );

#ifndef NDEBUG
  /* If we are doing a normal write to a database file (as opposed to
  ** doing a hot-journal rollback or a write to some file other than a







|







2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
  unixFile *pFile = (unixFile*)id;
  int wrote = 0;
  assert( id );
  assert( amt>0 );

  /* If this is a database file (not a journal, master-journal or temp
  ** file), the bytes in the locking range should never be read or written. */
  assert( pFile->pUnused==0
       || offset>=PENDING_BYTE+512
       || offset+amt<=PENDING_BYTE 
  );

#ifndef NDEBUG
  /* If we are doing a normal write to a database file (as opposed to
  ** doing a hot-journal rollback or a write to some file other than a
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
** looks at the filesystem type and tries to guess the best locking
** strategy from that.
**
** For finder-funtion F, two objects are created:
**
**    (1) The real finder-function named "FImpt()".
**
**    (2) A constant pointer to this functio named just "F".
**
**
** A pointer to the F pointer is used as the pAppData value for VFS
** objects.  We have to do this instead of letting pAppData point
** directly at the finder-function since C90 rules prevent a void*
** from be cast into a function pointer.
**







|







3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
** looks at the filesystem type and tries to guess the best locking
** strategy from that.
**
** For finder-funtion F, two objects are created:
**
**    (1) The real finder-function named "FImpt()".
**
**    (2) A constant pointer to this function named just "F".
**
**
** A pointer to the F pointer is used as the pAppData value for VFS
** objects.  We have to do this instead of letting pAppData point
** directly at the finder-function since C90 rules prevent a void*
** from be cast into a function pointer.
**
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
  const sqlite3_io_methods *pLockingStyle;
  unixFile *pNew = (unixFile *)pId;
  int rc = SQLITE_OK;

  assert( pNew->pLock==NULL );
  assert( pNew->pOpen==NULL );

  /* Parameter isDelete is only used on vxworks.
  ** Express this explicitly here to prevent compiler warnings
  ** about unused parameters.
  */
#if !OS_VXWORKS
  UNUSED_PARAMETER(isDelete);
#endif

  OSTRACE3("OPEN    %-3d %s\n", h, zFilename);    
  pNew->h = h;
  pNew->dirfd = dirfd;
  SET_THREADID(pNew);

#if OS_VXWORKS







|
|
<

<

<







3427
3428
3429
3430
3431
3432
3433
3434
3435

3436

3437

3438
3439
3440
3441
3442
3443
3444
  const sqlite3_io_methods *pLockingStyle;
  unixFile *pNew = (unixFile *)pId;
  int rc = SQLITE_OK;

  assert( pNew->pLock==NULL );
  assert( pNew->pOpen==NULL );

  /* Parameter isDelete is only used on vxworks. Express this explicitly 
  ** here to prevent compiler warnings about unused parameters.

  */

  UNUSED_PARAMETER(isDelete);


  OSTRACE3("OPEN    %-3d %s\n", h, zFilename);    
  pNew->h = h;
  pNew->dirfd = dirfd;
  SET_THREADID(pNew);

#if OS_VXWORKS
3470
3471
3472
3473
3474
3475
3476






















3477
3478
3479
3480
3481
3482
3483
    pNew->lockingContext = (void*)zFilename;
#endif
  }

  if( pLockingStyle == &posixIoMethods ){
    unixEnterMutex();
    rc = findLockInfo(pNew, &pNew->pLock, &pNew->pOpen);






















    unixLeaveMutex();
  }

#if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__)
  else if( pLockingStyle == &afpIoMethods ){
    /* AFP locking uses the file path so it needs to be included in
    ** the afpLockingContext.







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
    pNew->lockingContext = (void*)zFilename;
#endif
  }

  if( pLockingStyle == &posixIoMethods ){
    unixEnterMutex();
    rc = findLockInfo(pNew, &pNew->pLock, &pNew->pOpen);
    if( rc!=SQLITE_OK ){
      /* If an error occured in findLockInfo(), close the file descriptor
      ** immediately, before releasing the mutex. findLockInfo() may fail
      ** in two scenarios:
      **
      **   (a) A call to fstat() failed.
      **   (b) A malloc failed.
      **
      ** Scenario (b) may only occur if the process is holding no other
      ** file descriptors open on the same file. If there were other file
      ** descriptors on this file, then no malloc would be required by
      ** findLockInfo(). If this is the case, it is quite safe to close
      ** handle h - as it is guaranteed that no posix locks will be released
      ** by doing so.
      **
      ** If scenario (a) caused the error then things are not so safe. The
      ** implicit assumption here is that if fstat() fails, things are in
      ** such bad shape that dropping a lock or two doesn't matter much.
      */
      close(h);
      h = -1;
    }
    unixLeaveMutex();
  }

#if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__)
  else if( pLockingStyle == &afpIoMethods ){
    /* AFP locking uses the file path so it needs to be included in
    ** the afpLockingContext.
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
    unlink(zFilename);
    isDelete = 0;
  }
  pNew->isDelete = isDelete;
#endif
  if( rc!=SQLITE_OK ){
    if( dirfd>=0 ) close(dirfd); /* silent leak if fail, already in error */
    close(h);
  }else{
    pNew->pMethod = pLockingStyle;
    OpenCounter(+1);
  }
  return rc;
}








|







3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
    unlink(zFilename);
    isDelete = 0;
  }
  pNew->isDelete = isDelete;
#endif
  if( rc!=SQLITE_OK ){
    if( dirfd>=0 ) close(dirfd); /* silent leak if fail, already in error */
    if( h>=0 ) close(h);
  }else{
    pNew->pMethod = pLockingStyle;
    OpenCounter(+1);
  }
  return rc;
}

3670
3671
3672
3673
3674
3675
3676
3677

3678






3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702

3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
** Refer to comments in the unixClose() function and the lengthy comment
** describing "Posix Advisory Locking" at the start of this file for 
** further details. Also, ticket #4018.
**
** If a suitable file descriptor is found, then it is returned. If no
** such file descriptor is located, -1 is returned.
*/
static int findReusableFd(const char *zPath, int flags){

  int fd = -1;                         /* Return value */






  struct stat sStat;                   /* Results of stat() call */

  /* A stat() call may fail for various reasons. If this happens, it is
  ** almost certain that an open() call on the same path will also fail.
  ** For this reason, if an error occurs in the stat() call here, it is
  ** ignored and -1 is returned. The caller will try to open a new file
  ** descriptor on the same path, fail, and return an error to SQLite.
  **
  ** Even if a subsequent open() call does succeed, the consequences of
  ** not searching for a resusable file descriptor are not dire.  */
  if( 0==stat(zPath, &sStat) ){
    struct unixOpenCnt *p;
    struct unixFileId id;
    id.dev = sStat.st_dev;
    id.ino = sStat.st_ino;

    unixEnterMutex();
    for(p=openList; p&& memcmp(&id, &p->fileId, sizeof(id)); p=p->pNext);
    if( p && p->aPending ){
      int i;
      struct PendingClose *aPending = p->aPending;
      for(i=0; i<p->nPending; i++){
        if( aPending[i].fd>=0 && flags==aPending[i].flags ){
          fd = aPending[i].fd;

          aPending[i].fd = -1;
          break;
        }
      }
    }
    unixLeaveMutex();
  }

  return fd;
}

/*
** Open the file zPath.
** 
** Previously, the SQLite OS layer used three functions in place of this
** one:







|
>
|
>
>
>
>
>
>











|





|
|
|
|
<
<
|
>
|
<
<




|
|







3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718


3719
3720
3721


3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
** Refer to comments in the unixClose() function and the lengthy comment
** describing "Posix Advisory Locking" at the start of this file for 
** further details. Also, ticket #4018.
**
** If a suitable file descriptor is found, then it is returned. If no
** such file descriptor is located, -1 is returned.
*/
static UnixUnusedFd *findReusableFd(const char *zPath, int flags){
  UnixUnusedFd *pUnused = 0;

  /* Do not search for an unused file descriptor on vxworks. Not because
  ** vxworks would not benefit from the change (it might, we're not sure),
  ** but because no way to test it is currently available. It is better 
  ** not to risk breaking vxworks support for the sake of such an obscure 
  ** feature.  */
#if !OS_VXWORKS
  struct stat sStat;                   /* Results of stat() call */

  /* A stat() call may fail for various reasons. If this happens, it is
  ** almost certain that an open() call on the same path will also fail.
  ** For this reason, if an error occurs in the stat() call here, it is
  ** ignored and -1 is returned. The caller will try to open a new file
  ** descriptor on the same path, fail, and return an error to SQLite.
  **
  ** Even if a subsequent open() call does succeed, the consequences of
  ** not searching for a resusable file descriptor are not dire.  */
  if( 0==stat(zPath, &sStat) ){
    struct unixOpenCnt *pO;
    struct unixFileId id;
    id.dev = sStat.st_dev;
    id.ino = sStat.st_ino;

    unixEnterMutex();
    for(pO=openList; pO && memcmp(&id, &pO->fileId, sizeof(id)); pO=pO->pNext);
    if( pO ){
      UnixUnusedFd **pp;
      for(pp=&pO->pUnused; *pp && (*pp)->flags!=flags; pp=&((*pp)->pNext));


      pUnused = *pp;
      if( pUnused ){
        *pp = pUnused->pNext;


      }
    }
    unixLeaveMutex();
  }
#endif    /* if !OS_VXWORKS */
  return pUnused;
}

/*
** Open the file zPath.
** 
** Previously, the SQLite OS layer used three functions in place of this
** one:
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806








3807
3808
3809
3810
3811
3812
3813
       || eType==SQLITE_OPEN_SUBJOURNAL   || eType==SQLITE_OPEN_MASTER_JOURNAL 
       || eType==SQLITE_OPEN_TRANSIENT_DB
  );

  memset(p, 0, sizeof(unixFile));

  if( eType==SQLITE_OPEN_MAIN_DB ){
    /* Try to find an unused file descriptor to reuse. This is not done
    ** for vxworks. Not because vxworks would not benefit from the change
    ** (it might, we're not sure), but because no way to test it is
    ** currently available. It is better not to risk breaking vxworks for 
    ** the sake of such an obscure feature.   */
#if !OS_VXWORKS
    fd = findReusableFd(zName, flags);
#endif








  }else if( !zName ){
    /* If zName is NULL, the upper layer is requesting a temp file. */
    assert(isDelete && !isOpenDirectory);
    rc = getTempname(MAX_PATHNAME+1, zTmpname);
    if( rc!=SQLITE_OK ){
      return rc;
    }







|
<
<
<
<
<
|
|
>
>
>
>
>
>
>
>







3808
3809
3810
3811
3812
3813
3814
3815





3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
       || eType==SQLITE_OPEN_SUBJOURNAL   || eType==SQLITE_OPEN_MASTER_JOURNAL 
       || eType==SQLITE_OPEN_TRANSIENT_DB
  );

  memset(p, 0, sizeof(unixFile));

  if( eType==SQLITE_OPEN_MAIN_DB ){
    UnixUnusedFd *pUnused;





    pUnused = findReusableFd(zName, flags);
    if( pUnused ){
      fd = pUnused->fd;
    }else{
      pUnused = sqlite3_malloc(sizeof(pUnused));
      if( !pUnused ){
        return SQLITE_NOMEM;
      }
    }
    p->pUnused = pUnused;
  }else if( !zName ){
    /* If zName is NULL, the upper layer is requesting a temp file. */
    assert(isDelete && !isOpenDirectory);
    rc = getTempname(MAX_PATHNAME+1, zTmpname);
    if( rc!=SQLITE_OK ){
      return rc;
    }
3821
3822
3823
3824
3825
3826
3827

3828
3829
3830
3831
3832

3833
3834

3835
3836
3837

3838
3839
3840
3841
3842
3843
3844





3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
  if( isReadonly )  openFlags |= O_RDONLY;
  if( isReadWrite ) openFlags |= O_RDWR;
  if( isCreate )    openFlags |= O_CREAT;
  if( isExclusive ) openFlags |= (O_EXCL|O_NOFOLLOW);
  openFlags |= (O_LARGEFILE|O_BINARY);

  if( fd<0 ){

    fd = open(zName, openFlags, isDelete?0600:SQLITE_DEFAULT_FILE_PERMISSIONS);
    OSTRACE4("OPENX   %-3d %s 0%o\n", fd, zName, openFlags);
    if( fd<0 && errno!=EISDIR && isReadWrite && !isExclusive ){
      /* Failed to open the file for read/write access. Try read-only. */
      flags &= ~(SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE);

      flags |= SQLITE_OPEN_READONLY;
      return unixOpen(pVfs, zPath, pFile, flags, pOutFlags);

    }
    if( fd<0 ){
      return SQLITE_CANTOPEN;

    }
  }
  assert( fd>=0 );
  p->flags = flags;
  if( pOutFlags ){
    *pOutFlags = flags;
  }






  if( isDelete ){
#if OS_VXWORKS
    zPath = zName;
#else
    unlink(zName);
#endif
  }
#if SQLITE_ENABLE_LOCKING_STYLE
  else{
    p->openFlags = openFlags;
  }
#endif

  if( isOpenDirectory ){
    rc = openDirectory(zPath, &dirfd);
    if( rc!=SQLITE_OK ){
      /* It is safe to close fd at this point, because it is guaranteed not
      ** to be open on a database file. If it were open on a database file,
      ** it would not be safe to close as this would cause any locks held
      ** on the file by this process to be released.  */
      assert( eType!=SQLITE_OPEN_MAIN_DB );
      close(fd);             /* silently leak if fail, already in error */
      return rc;
    }
  }

#ifdef FD_CLOEXEC
  fcntl(fd, F_SETFD, fcntl(fd, F_GETFD, 0) | FD_CLOEXEC);
#endif








>
|




>

|
>


|
>



<



>
>
>
>
>



















|
|


|







3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863

3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
  if( isReadonly )  openFlags |= O_RDONLY;
  if( isReadWrite ) openFlags |= O_RDWR;
  if( isCreate )    openFlags |= O_CREAT;
  if( isExclusive ) openFlags |= (O_EXCL|O_NOFOLLOW);
  openFlags |= (O_LARGEFILE|O_BINARY);

  if( fd<0 ){
    mode_t openMode = (isDelete?0600:SQLITE_DEFAULT_FILE_PERMISSIONS);
    fd = open(zName, openFlags, openMode);
    OSTRACE4("OPENX   %-3d %s 0%o\n", fd, zName, openFlags);
    if( fd<0 && errno!=EISDIR && isReadWrite && !isExclusive ){
      /* Failed to open the file for read/write access. Try read-only. */
      flags &= ~(SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE);
      openFlags &= ~(O_RDWR|O_CREAT);
      flags |= SQLITE_OPEN_READONLY;
      openFlags |= O_RDONLY;
      fd = open(zName, openFlags, openMode);
    }
    if( fd<0 ){
      rc = SQLITE_CANTOPEN;
      goto open_finished;
    }
  }
  assert( fd>=0 );

  if( pOutFlags ){
    *pOutFlags = flags;
  }

  if( p->pUnused ){
    p->pUnused->fd = fd;
    p->pUnused->flags = flags;
  }

  if( isDelete ){
#if OS_VXWORKS
    zPath = zName;
#else
    unlink(zName);
#endif
  }
#if SQLITE_ENABLE_LOCKING_STYLE
  else{
    p->openFlags = openFlags;
  }
#endif

  if( isOpenDirectory ){
    rc = openDirectory(zPath, &dirfd);
    if( rc!=SQLITE_OK ){
      /* It is safe to close fd at this point, because it is guaranteed not
      ** to be open on a database file. If it were open on a database file,
      ** it would not be safe to close as this would release any locks held
      ** on the file by this process.  */
      assert( eType!=SQLITE_OPEN_MAIN_DB );
      close(fd);             /* silently leak if fail, already in error */
      goto open_finished;
    }
  }

#ifdef FD_CLOEXEC
  fcntl(fd, F_SETFD, fcntl(fd, F_GETFD, 0) | FD_CLOEXEC);
#endif

3883
3884
3885
3886
3887
3888
3889







3890

3891

3892
3893

3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907



3908



3909
3910
3911
3912
3913
3914
3915
    /* SQLITE_FORCE_PROXY_LOCKING==1 means force always use proxy, 0 means 
    ** never use proxy, NULL means use proxy for non-local files only.  */
    if( envforce!=NULL ){
      useProxy = atoi(envforce)>0;
    }else{
      struct statfs fsInfo;
      if( statfs(zPath, &fsInfo) == -1 ){







        ((unixFile*)pFile)->lastErrno = errno;

        if( dirfd>=0 ) close(dirfd); /* silently leak if fail, in error */

        close(fd); /* silently leak if fail, in error */
        return SQLITE_IOERR_ACCESS;

      }
      useProxy = !(fsInfo.f_flags&MNT_LOCAL);
    }
    if( useProxy ){
      rc = fillInUnixFile(pVfs, fd, dirfd, pFile, zPath, noLock, isDelete);
      if( rc==SQLITE_OK ){
        rc = proxyTransformUnixFile((unixFile*)pFile, ":auto:");
      }
      return rc;
    }
  }
#endif
  
  return fillInUnixFile(pVfs, fd, dirfd, pFile, zPath, noLock, isDelete);



}




/*
** Delete the file at zPath. If the dirSync argument is true, fsync()
** the directory after deleting the file.
*/
static int unixDelete(
  sqlite3_vfs *NotUsed,     /* VFS containing this as the xDelete method */







>
>
>
>
>
>
>
|
>
|
>

|
>








|




|
>
>
>
|
>
>
>







3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
    /* SQLITE_FORCE_PROXY_LOCKING==1 means force always use proxy, 0 means 
    ** never use proxy, NULL means use proxy for non-local files only.  */
    if( envforce!=NULL ){
      useProxy = atoi(envforce)>0;
    }else{
      struct statfs fsInfo;
      if( statfs(zPath, &fsInfo) == -1 ){
        /* In theory, the close(fd) call is sub-optimal. If the file opened
        ** with fd is a database file, and there are other connections open
        ** on that file that are currently holding advisory locks on it,
        ** then the call to close() will cancel those locks. In practice,
        ** we're assuming that statfs() doesn't fail very often. At least
        ** not while other file descriptors opened by the same process on
        ** the same file are working.  */
        p->lastErrno = errno;
        if( dirfd>=0 ){
          close(dirfd); /* silently leak if fail, in error */
        }
        close(fd); /* silently leak if fail, in error */
        rc = SQLITE_IOERR_ACCESS;
        goto open_finished;
      }
      useProxy = !(fsInfo.f_flags&MNT_LOCAL);
    }
    if( useProxy ){
      rc = fillInUnixFile(pVfs, fd, dirfd, pFile, zPath, noLock, isDelete);
      if( rc==SQLITE_OK ){
        rc = proxyTransformUnixFile((unixFile*)pFile, ":auto:");
      }
      goto open_finished;
    }
  }
#endif
  
  rc = fillInUnixFile(pVfs, fd, dirfd, pFile, zPath, noLock, isDelete);
open_finished:
  if( rc!=SQLITE_OK ){
    sqlite3_free(p->pUnused);
  }
  return rc;
}


/*
** Delete the file at zPath. If the dirSync argument is true, fsync()
** the directory after deleting the file.
*/
static int unixDelete(
  sqlite3_vfs *NotUsed,     /* VFS containing this as the xDelete method */
Changes to test/tkt4018.test.
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
    CREATE TABLE t1(a, b);
    BEGIN;
    SELECT * FROM t1;
  }
} {}

# The database is locked by connection [db]. Open and close a second
# connection to test.db 20000 times. If file-descriptors are not being
# reused, then the process will quickly exceed its maximum number of
# file descriptors (1024 by default on linux).
do_test tkt4018-1.2 {
  for {set i 0} {$i < 10000} {incr i} {
    sqlite3 db2 test.db
    db2 close
  }







|







39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
    CREATE TABLE t1(a, b);
    BEGIN;
    SELECT * FROM t1;
  }
} {}

# The database is locked by connection [db]. Open and close a second
# connection to test.db 10000 times. If file-descriptors are not being
# reused, then the process will quickly exceed its maximum number of
# file descriptors (1024 by default on linux).
do_test tkt4018-1.2 {
  for {set i 0} {$i < 10000} {incr i} {
    sqlite3 db2 test.db
    db2 close
  }