SQLite

Check-in [9b4d9ab62d]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:When a database file is opened, try to find an unused file descriptor to reuse. This change affects unix (and other systems that use os_unix.c) only. Fix for cvstrac ticket #4018.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 9b4d9ab62d687289837b13b07885e72cc3abe8a9
User & Date: dan 2009-08-21 17:18:03.000
Context
2009-08-24
01:35
Run the column cache in a new context when generating code for trigger programs. Fix for ticket [efc02f9779]. (check-in: dee1b8eb40 user: drh tags: trunk)
2009-08-22
11:39
Fix a problem in os_unix.c where a malloc failure could lead to a leaked file descriptor. (check-in: aa6acfa8ca user: dan tags: trunk)
2009-08-21
17:18
When a database file is opened, try to find an unused file descriptor to reuse. This change affects unix (and other systems that use os_unix.c) only. Fix for cvstrac ticket #4018. (check-in: 9b4d9ab62d user: dan tags: trunk)
13:22
Change the expression code generator to account for the fact that the new sqlite3AtoF() never returns NaN. Also, clarification of a comment in where.c. (check-in: 75f596a04a user: drh tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Changes to src/os_unix.c.
177
178
179
180
181
182
183

184
185
186
187
188
189
190
  struct unixOpenCnt *pOpen;       /* Info about all open fd's on this inode */
  struct unixLockInfo *pLock;      /* Info about locks on this inode */
  int h;                           /* The file descriptor */
  int dirfd;                       /* File descriptor for the directory */
  unsigned char locktype;          /* The type of lock held on this fd */
  int lastErrno;                   /* The unix errno from the last I/O error */
  void *lockingContext;            /* Locking style specific state */

#if SQLITE_ENABLE_LOCKING_STYLE
  int openFlags;                   /* The flags specified at open() */
#endif
#if SQLITE_THREADSAFE && defined(__linux__)
  pthread_t tid;                   /* The thread that "owns" this unixFile */
#endif
#if OS_VXWORKS







>







177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
  struct unixOpenCnt *pOpen;       /* Info about all open fd's on this inode */
  struct unixLockInfo *pLock;      /* Info about locks on this inode */
  int h;                           /* The file descriptor */
  int dirfd;                       /* File descriptor for the directory */
  unsigned char locktype;          /* The type of lock held on this fd */
  int lastErrno;                   /* The unix errno from the last I/O error */
  void *lockingContext;            /* Locking style specific state */
  int flags;                       /* Flags value returned by xOpen() */
#if SQLITE_ENABLE_LOCKING_STYLE
  int openFlags;                   /* The flags specified at open() */
#endif
#if SQLITE_THREADSAFE && defined(__linux__)
  pthread_t tid;                   /* The thread that "owns" this unixFile */
#endif
#if OS_VXWORKS
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
  ** occur if a file is updated without also updating the transaction
  ** counter.  This test is made to avoid new problems similar to the
  ** one described by ticket #3584. 
  */
  unsigned char transCntrChng;   /* True if the transaction counter changed */
  unsigned char dbUpdate;        /* True if any part of database file changed */
  unsigned char inNormalWrite;   /* True if in a normal write operation */

  /* If true, that means we are dealing with a database file that has
  ** a range of locking bytes from PENDING_BYTE through PENDING_BYTE+511
  ** which should never be read or written.  Asserts() will verify this */
  unsigned char isLockable;      /* True if file might be locked */
#endif
#ifdef SQLITE_TEST
  /* In test mode, increase the size of this structure a bit so that 
  ** it is larger than the struct CrashFile defined in test6.c.
  */
  char aPadding[32];
#endif







<
<
<
<
<







199
200
201
202
203
204
205





206
207
208
209
210
211
212
  ** occur if a file is updated without also updating the transaction
  ** counter.  This test is made to avoid new problems similar to the
  ** one described by ticket #3584. 
  */
  unsigned char transCntrChng;   /* True if the transaction counter changed */
  unsigned char dbUpdate;        /* True if any part of database file changed */
  unsigned char inNormalWrite;   /* True if in a normal write operation */





#endif
#ifdef SQLITE_TEST
  /* In test mode, increase the size of this structure a bit so that 
  ** it is larger than the struct CrashFile defined in test6.c.
  */
  char aPadding[32];
#endif
749
750
751
752
753
754
755



756
757
758
759
760
761
762
763
** using the file closes.
*/
struct unixOpenCnt {
  struct unixFileId fileId;   /* The lookup key */
  int nRef;                   /* Number of pointers to this structure */
  int nLock;                  /* Number of outstanding locks */
  int nPending;               /* Number of pending close() operations */



  int *aPending;              /* Malloced space holding fds awaiting close() */
#if OS_VXWORKS
  sem_t *pSem;                     /* Named POSIX semaphore */
  char aSemName[MAX_PATHNAME+1];   /* Name of that semaphore */
#endif
  struct unixOpenCnt *pNext, *pPrev;   /* List of all unixOpenCnt objects */
};








>
>
>
|







745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
** using the file closes.
*/
struct unixOpenCnt {
  struct unixFileId fileId;   /* The lookup key */
  int nRef;                   /* Number of pointers to this structure */
  int nLock;                  /* Number of outstanding locks */
  int nPending;               /* Number of pending close() operations */
  struct PendingClose {
    int fd;                   /* File descriptor to close */
    int flags;                /* Flags this file descriptor was opened with */
  } *aPending;                /* Malloced space holding fds awaiting close() */
#if OS_VXWORKS
  sem_t *pSem;                     /* Named POSIX semaphore */
  char aSemName[MAX_PATHNAME+1];   /* Name of that semaphore */
#endif
  struct unixOpenCnt *pNext, *pPrev;   /* List of all unixOpenCnt objects */
};

1400
1401
1402
1403
1404
1405
1406






















































1407
1408
1409
1410
1411
1412
1413

end_lock:
  unixLeaveMutex();
  OSTRACE4("LOCK    %d %s %s\n", pFile->h, locktypeName(locktype), 
      rc==SQLITE_OK ? "ok" : "failed");
  return rc;
}























































/*
** Lower the locking level on file descriptor pFile to locktype.  locktype
** must be either NO_LOCK or SHARED_LOCK.
**
** If the locking level of the file descriptor is already at or below
** the requested locking level, this routine is a no-op.







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466

end_lock:
  unixLeaveMutex();
  OSTRACE4("LOCK    %d %s %s\n", pFile->h, locktypeName(locktype), 
      rc==SQLITE_OK ? "ok" : "failed");
  return rc;
}

/*
** Close all file descriptors accumuated in the p->aPending[] array. If
** all such file descriptors are closed without error, the aPending[] 
** array is deleted and SQLITE_OK returned.
**
** Otherwise, if an error occurs, then successfully closed file descriptor
** entries in the aPending[] array are set to -1, the aPending[] array
** not deleted and SQLITE_IOERR_CLOSE returned.
*/ 
static int closePendingFds(unixFile *pFile){
  struct unixOpenCnt *pOpen = pFile->pOpen;
  struct PendingClose *aPending = pOpen->aPending;
  int i;
  int rc = SQLITE_OK;
  assert( unixMutexHeld() );
  for(i=0; i<pOpen->nPending; i++){
    if( aPending[i].fd>=0 ){
      if( close(aPending[i].fd) ){
        pFile->lastErrno = errno;
        rc = SQLITE_IOERR_CLOSE;
      }else{
        aPending[i].fd = -1;
      }
    }
  }
  if( rc==SQLITE_OK ){
    sqlite3_free(aPending);
    pOpen->nPending = 0;
    pOpen->aPending = 0;
  }
  return rc;
}

/*
** Add the file descriptor used by file handle pFile to the corresponding
** aPending[] array to be closed after some other connection releases
** a lock.
*/
static void setPendingFd(unixFile *pFile){
  struct PendingClose *aNew;
  struct unixOpenCnt *pOpen = pFile->pOpen;
  int nByte = (pOpen->nPending+1)*sizeof(pOpen->aPending[0]);
  aNew = sqlite3_realloc(pOpen->aPending, nByte);
  if( aNew==0 ){
    /* If a malloc fails, just leak the file descriptor */
  }else{
    pOpen->aPending = aNew;
    pOpen->aPending[pOpen->nPending].fd = pFile->h;
    pOpen->aPending[pOpen->nPending].flags = pFile->flags;
    pOpen->nPending++;
    pFile->h = -1;
  }
}

/*
** Lower the locking level on file descriptor pFile to locktype.  locktype
** must be either NO_LOCK or SHARED_LOCK.
**
** If the locking level of the file descriptor is already at or below
** the requested locking level, this routine is a no-op.
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
        pFile->lastErrno = tErrno;
      }
      goto end_unlock;
    }
  }
  if( locktype==NO_LOCK ){
    struct unixOpenCnt *pOpen;
    int rc2 = SQLITE_OK;

    /* Decrement the shared lock counter.  Release the lock using an
    ** OS call only when all threads in this same process have released
    ** the lock.
    */
    pLock->cnt--;
    if( pLock->cnt==0 ){







<







1536
1537
1538
1539
1540
1541
1542

1543
1544
1545
1546
1547
1548
1549
        pFile->lastErrno = tErrno;
      }
      goto end_unlock;
    }
  }
  if( locktype==NO_LOCK ){
    struct unixOpenCnt *pOpen;


    /* Decrement the shared lock counter.  Release the lock using an
    ** OS call only when all threads in this same process have released
    ** the lock.
    */
    pLock->cnt--;
    if( pLock->cnt==0 ){
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545

1546
1547
1548
1549
1550
1551
1552
    ** count reaches zero, close any other file descriptors whose close
    ** was deferred because of outstanding locks.
    */
    pOpen = pFile->pOpen;
    pOpen->nLock--;
    assert( pOpen->nLock>=0 );
    if( pOpen->nLock==0 && pOpen->nPending>0 ){
      int i;
      for(i=0; i<pOpen->nPending; i++){
        /* close pending fds, but if closing fails don't free the array
        ** assign -1 to the successfully closed descriptors and record the
        ** error.  The next attempt to unlock will try again. */
        if( pOpen->aPending[i] < 0 ) continue;
        if( close(pOpen->aPending[i]) ){
          pFile->lastErrno = errno;
          rc2 = SQLITE_IOERR_CLOSE;
        }else{
          pOpen->aPending[i] = -1;
        }
      }
      if( rc2==SQLITE_OK ){
        sqlite3_free(pOpen->aPending);
        pOpen->nPending = 0;
        pOpen->aPending = 0;
      }
    }
    if( rc==SQLITE_OK ){
      rc = rc2;

    }
  }
	
end_unlock:
  unixLeaveMutex();
  if( rc==SQLITE_OK ) pFile->locktype = locktype;
  return rc;







<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
|
<
<
<
|
|
>







1570
1571
1572
1573
1574
1575
1576















1577



1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
    ** count reaches zero, close any other file descriptors whose close
    ** was deferred because of outstanding locks.
    */
    pOpen = pFile->pOpen;
    pOpen->nLock--;
    assert( pOpen->nLock>=0 );
    if( pOpen->nLock==0 && pOpen->nPending>0 ){















      int rc2 = closePendingFds(pFile);



      if( rc==SQLITE_OK ){
        rc = rc2;
      }
    }
  }
	
end_unlock:
  unixLeaveMutex();
  if( rc==SQLITE_OK ) pFile->locktype = locktype;
  return rc;
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
    unixEnterMutex();
    if( pFile->pOpen && pFile->pOpen->nLock ){
      /* If there are outstanding locks, do not actually close the file just
      ** yet because that would clear those locks.  Instead, add the file
      ** descriptor to pOpen->aPending.  It will be automatically closed when
      ** the last lock is cleared.
      */
      int *aNew;
      struct unixOpenCnt *pOpen = pFile->pOpen;
      aNew = sqlite3_realloc(pOpen->aPending, (pOpen->nPending+1)*sizeof(int) );
      if( aNew==0 ){
        /* If a malloc fails, just leak the file descriptor */
      }else{
        pOpen->aPending = aNew;
        pOpen->aPending[pOpen->nPending] = pFile->h;
        pOpen->nPending++;
        pFile->h = -1;
      }
    }
    releaseLockInfo(pFile->pLock);
    releaseOpenCnt(pFile->pOpen);
    rc = closeUnixFile(id);
    unixLeaveMutex();
  }
  return rc;







<
<
<
<
<
<
<
<
|
<
<







1643
1644
1645
1646
1647
1648
1649








1650


1651
1652
1653
1654
1655
1656
1657
    unixEnterMutex();
    if( pFile->pOpen && pFile->pOpen->nLock ){
      /* If there are outstanding locks, do not actually close the file just
      ** yet because that would clear those locks.  Instead, add the file
      ** descriptor to pOpen->aPending.  It will be automatically closed when
      ** the last lock is cleared.
      */








      setPendingFd(pFile);


    }
    releaseLockInfo(pFile->pLock);
    releaseOpenCnt(pFile->pOpen);
    rc = closeUnixFile(id);
    unixLeaveMutex();
  }
  return rc;
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614


2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649

  if( rc==SQLITE_OK ){
    if( locktype==NO_LOCK ){
      struct unixOpenCnt *pOpen = pFile->pOpen;
      pOpen->nLock--;
      assert( pOpen->nLock>=0 );
      if( pOpen->nLock==0 && pOpen->nPending>0 ){
        int i;
        for(i=0; i<pOpen->nPending; i++){
          if( pOpen->aPending[i] < 0 ) continue;
          if( close(pOpen->aPending[i]) ){
            pFile->lastErrno = errno;
            rc = SQLITE_IOERR_CLOSE;
          }else{
            pOpen->aPending[i] = -1;
          }
        }
        if( rc==SQLITE_OK ){
          sqlite3_free(pOpen->aPending);
          pOpen->nPending = 0;
          pOpen->aPending = 0;
        }
      }
    }
  }
  unixLeaveMutex();
  if( rc==SQLITE_OK ) pFile->locktype = locktype;


  return rc;
}

/*
** Close a file & cleanup AFP specific locking context 
*/
static int afpClose(sqlite3_file *id) {
  if( id ){
    unixFile *pFile = (unixFile*)id;
    afpUnlock(id, NO_LOCK);
    unixEnterMutex();
    if( pFile->pOpen && pFile->pOpen->nLock ){
      /* If there are outstanding locks, do not actually close the file just
      ** yet because that would clear those locks.  Instead, add the file
      ** descriptor to pOpen->aPending.  It will be automatically closed when
      ** the last lock is cleared.
      */
      int *aNew;
      struct unixOpenCnt *pOpen = pFile->pOpen;
      aNew = sqlite3_realloc(pOpen->aPending, (pOpen->nPending+1)*sizeof(int) );
      if( aNew==0 ){
        /* If a malloc fails, just leak the file descriptor */
      }else{
        pOpen->aPending = aNew;
        pOpen->aPending[pOpen->nPending] = pFile->h;
        pOpen->nPending++;
        pFile->h = -1;
      }
    }
    releaseOpenCnt(pFile->pOpen);
    sqlite3_free(pFile->lockingContext);
    closeUnixFile(id);
    unixLeaveMutex();
  }
  return SQLITE_OK;







<
<
<
<
<
<
<
<
<
<
<
<
|
<
<




|
>
>

















<
<
<
<
<
<
<
<
|
<
<







2613
2614
2615
2616
2617
2618
2619












2620


2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644








2645


2646
2647
2648
2649
2650
2651
2652

  if( rc==SQLITE_OK ){
    if( locktype==NO_LOCK ){
      struct unixOpenCnt *pOpen = pFile->pOpen;
      pOpen->nLock--;
      assert( pOpen->nLock>=0 );
      if( pOpen->nLock==0 && pOpen->nPending>0 ){












        rc = closePendingFds(pFile);


      }
    }
  }
  unixLeaveMutex();
  if( rc==SQLITE_OK ){
    pFile->locktype = locktype;
  }
  return rc;
}

/*
** Close a file & cleanup AFP specific locking context 
*/
static int afpClose(sqlite3_file *id) {
  if( id ){
    unixFile *pFile = (unixFile*)id;
    afpUnlock(id, NO_LOCK);
    unixEnterMutex();
    if( pFile->pOpen && pFile->pOpen->nLock ){
      /* If there are outstanding locks, do not actually close the file just
      ** yet because that would clear those locks.  Instead, add the file
      ** descriptor to pOpen->aPending.  It will be automatically closed when
      ** the last lock is cleared.
      */








      setPendingFd(pFile);


    }
    releaseOpenCnt(pFile->pOpen);
    sqlite3_free(pFile->lockingContext);
    closeUnixFile(id);
    unixLeaveMutex();
  }
  return SQLITE_OK;
2721
2722
2723
2724
2725
2726
2727

2728
2729
2730

2731
2732
2733
2734

2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
*/
static int unixRead(
  sqlite3_file *id, 
  void *pBuf, 
  int amt,
  sqlite3_int64 offset
){

  int got;
  assert( id );


  /* Never read or write any of the bytes in the locking range */
  assert( ((unixFile*)id)->isLockable==0
          || offset>=PENDING_BYTE+512
          || offset+amt<=PENDING_BYTE );


  got = seekAndRead((unixFile*)id, offset, pBuf, amt);
  if( got==amt ){
    return SQLITE_OK;
  }else if( got<0 ){
    /* lastErrno set by seekAndRead */
    return SQLITE_IOERR_READ;
  }else{
    ((unixFile*)id)->lastErrno = 0; /* not a system error */
    /* Unread parts of the buffer must be zero-filled */
    memset(&((char*)pBuf)[got], 0, amt-got);
    return SQLITE_IOERR_SHORT_READ;
  }
}

/*







>



>
|
|
|
|
>

|






|







2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
*/
static int unixRead(
  sqlite3_file *id, 
  void *pBuf, 
  int amt,
  sqlite3_int64 offset
){
  unixFile *pFile = (unixFile *)id;
  int got;
  assert( id );

  /* If this is a database file (not a journal, master-journal or temp
  ** file), the bytes in the locking range should never be read or written. */
  assert( (pFile->flags&SQLITE_OPEN_MAIN_DB)==0
       || offset>=PENDING_BYTE+512
       || offset+amt<=PENDING_BYTE 
  );

  got = seekAndRead(pFile, offset, pBuf, amt);
  if( got==amt ){
    return SQLITE_OK;
  }else if( got<0 ){
    /* lastErrno set by seekAndRead */
    return SQLITE_IOERR_READ;
  }else{
    pFile->lastErrno = 0; /* not a system error */
    /* Unread parts of the buffer must be zero-filled */
    memset(&((char*)pBuf)[got], 0, amt-got);
    return SQLITE_IOERR_SHORT_READ;
  }
}

/*
2790
2791
2792
2793
2794
2795
2796

2797
2798
2799
2800

2801
2802
2803
2804

2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
*/
static int unixWrite(
  sqlite3_file *id, 
  const void *pBuf, 
  int amt,
  sqlite3_int64 offset 
){

  int wrote = 0;
  assert( id );
  assert( amt>0 );


  /* Never read or write any of the bytes in the locking range */
  assert( ((unixFile*)id)->isLockable==0
          || offset>=PENDING_BYTE+512
          || offset+amt<=PENDING_BYTE );


#ifndef NDEBUG
  /* If we are doing a normal write to a database file (as opposed to
  ** doing a hot-journal rollback or a write to some file other than a
  ** normal database file) then record the fact that the database
  ** has changed.  If the transaction counter is modified, record that
  ** fact too.
  */
  if( ((unixFile*)id)->inNormalWrite ){
    unixFile *pFile = (unixFile*)id;
    pFile->dbUpdate = 1;  /* The database has been modified */
    if( offset<=24 && offset+amt>=27 ){
      int rc;
      char oldCntr[4];
      SimulateIOErrorBenign(1);
      rc = seekAndRead(pFile, 24, oldCntr, 4);
      SimulateIOErrorBenign(0);
      if( rc!=4 || memcmp(oldCntr, &((char*)pBuf)[24-offset], 4)!=0 ){
        pFile->transCntrChng = 1;  /* The transaction counter has changed */
      }
    }
  }
#endif

  while( amt>0 && (wrote = seekAndWrite((unixFile*)id, offset, pBuf, amt))>0 ){
    amt -= wrote;
    offset += wrote;
    pBuf = &((char*)pBuf)[wrote];
  }
  SimulateIOError(( wrote=(-1), amt=1 ));
  SimulateDiskfullError(( wrote=0, amt=1 ));
  if( amt>0 ){
    if( wrote<0 ){
      /* lastErrno set by seekAndWrite */
      return SQLITE_IOERR_WRITE;
    }else{
      ((unixFile*)id)->lastErrno = 0; /* not a system error */
      return SQLITE_FULL;
    }
  }
  return SQLITE_OK;
}

#ifdef SQLITE_TEST







>




>
|
|
|
|
>








|
<














|











|







2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822

2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
*/
static int unixWrite(
  sqlite3_file *id, 
  const void *pBuf, 
  int amt,
  sqlite3_int64 offset 
){
  unixFile *pFile = (unixFile*)id;
  int wrote = 0;
  assert( id );
  assert( amt>0 );

  /* If this is a database file (not a journal, master-journal or temp
  ** file), the bytes in the locking range should never be read or written. */
  assert( (pFile->flags&SQLITE_OPEN_MAIN_DB)==0
       || offset>=PENDING_BYTE+512
       || offset+amt<=PENDING_BYTE 
  );

#ifndef NDEBUG
  /* If we are doing a normal write to a database file (as opposed to
  ** doing a hot-journal rollback or a write to some file other than a
  ** normal database file) then record the fact that the database
  ** has changed.  If the transaction counter is modified, record that
  ** fact too.
  */
  if( pFile->inNormalWrite ){

    pFile->dbUpdate = 1;  /* The database has been modified */
    if( offset<=24 && offset+amt>=27 ){
      int rc;
      char oldCntr[4];
      SimulateIOErrorBenign(1);
      rc = seekAndRead(pFile, 24, oldCntr, 4);
      SimulateIOErrorBenign(0);
      if( rc!=4 || memcmp(oldCntr, &((char*)pBuf)[24-offset], 4)!=0 ){
        pFile->transCntrChng = 1;  /* The transaction counter has changed */
      }
    }
  }
#endif

  while( amt>0 && (wrote = seekAndWrite(pFile, offset, pBuf, amt))>0 ){
    amt -= wrote;
    offset += wrote;
    pBuf = &((char*)pBuf)[wrote];
  }
  SimulateIOError(( wrote=(-1), amt=1 ));
  SimulateDiskfullError(( wrote=0, amt=1 ));
  if( amt>0 ){
    if( wrote<0 ){
      /* lastErrno set by seekAndWrite */
      return SQLITE_IOERR_WRITE;
    }else{
      pFile->lastErrno = 0; /* not a system error */
      return SQLITE_FULL;
    }
  }
  return SQLITE_OK;
}

#ifdef SQLITE_TEST
3646
3647
3648
3649
3650
3651
3652




















































3653
3654
3655
3656
3657
3658
3659
** Routine to transform a unixFile into a proxy-locking unixFile.
** Implementation in the proxy-lock division, but used by unixOpen()
** if SQLITE_PREFER_PROXY_LOCKING is defined.
*/
static int proxyTransformUnixFile(unixFile*, const char*);
#endif






















































/*
** Open the file zPath.
** 
** Previously, the SQLite OS layer used three functions in place of this
** one:
**







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
** Routine to transform a unixFile into a proxy-locking unixFile.
** Implementation in the proxy-lock division, but used by unixOpen()
** if SQLITE_PREFER_PROXY_LOCKING is defined.
*/
static int proxyTransformUnixFile(unixFile*, const char*);
#endif

/*
** Search for an unused file descriptor that was opened on the database 
** file (not a journal or master-journal file) identified by pathname
** zPath with SQLITE_OPEN_XXX flags matching those passed as the second
** argument to this function.
**
** Such a file descriptor may exist if a database connection was closed
** but the associated file descriptor could not be closed because some
** other file descriptor open on the same file is holding a file-lock.
** Refer to comments in the unixClose() function and the lengthy comment
** describing "Posix Advisory Locking" at the start of this file for 
** further details. Also, ticket #4018.
**
** If a suitable file descriptor is found, then it is returned. If no
** such file descriptor is located, -1 is returned.
*/
static int findReusableFd(const char *zPath, int flags){
  int fd = -1;                         /* Return value */
  struct stat sStat;                   /* Results of stat() call */

  /* A stat() call may fail for various reasons. If this happens, it is
  ** almost certain that an open() call on the same path will also fail.
  ** For this reason, if an error occurs in the stat() call here, it is
  ** ignored and -1 is returned. The caller will try to open a new file
  ** descriptor on the same path, fail, and return an error to SQLite.
  **
  ** Even if a subsequent open() call does succeed, the consequences of
  ** not searching for a resusable file descriptor are not dire.  */
  if( 0==stat(zPath, &sStat) ){
    struct unixOpenCnt *p;
    struct unixFileId id;
    id.dev = sStat.st_dev;
    id.ino = sStat.st_ino;

    unixEnterMutex();
    for(p=openList; p&& memcmp(&id, &p->fileId, sizeof(id)); p=p->pNext);
    if( p && p->aPending ){
      int i;
      struct PendingClose *aPending = p->aPending;
      for(i=0; i<p->nPending; i++){
        if( aPending[i].fd>=0 && flags==aPending[i].flags ){
          fd = aPending[i].fd;
          aPending[i].fd = -1;
          break;
        }
      }
    }
    unixLeaveMutex();
  }

  return fd;
}

/*
** Open the file zPath.
** 
** Previously, the SQLite OS layer used three functions in place of this
** one:
**
3676
3677
3678
3679
3680
3681
3682

3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
static int unixOpen(
  sqlite3_vfs *pVfs,           /* The VFS for which this is the xOpen method */
  const char *zPath,           /* Pathname of file to be opened */
  sqlite3_file *pFile,         /* The file descriptor to be filled in */
  int flags,                   /* Input flags to control the opening */
  int *pOutFlags               /* Output flags returned to SQLite core */
){

  int fd = -1;                    /* File descriptor returned by open() */
  int dirfd = -1;                /* Directory file descriptor */
  int openFlags = 0;             /* Flags to pass to open() */
  int eType = flags&0xFFFFFF00;  /* Type of file to open */
  int noLock;                    /* True to omit locking primitives */
  int rc = SQLITE_OK;

  int isExclusive  = (flags & SQLITE_OPEN_EXCLUSIVE);
  int isDelete     = (flags & SQLITE_OPEN_DELETEONCLOSE);
  int isCreate     = (flags & SQLITE_OPEN_CREATE);
  int isReadonly   = (flags & SQLITE_OPEN_READONLY);
  int isReadWrite  = (flags & SQLITE_OPEN_READWRITE);








>
|




|







3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
static int unixOpen(
  sqlite3_vfs *pVfs,           /* The VFS for which this is the xOpen method */
  const char *zPath,           /* Pathname of file to be opened */
  sqlite3_file *pFile,         /* The file descriptor to be filled in */
  int flags,                   /* Input flags to control the opening */
  int *pOutFlags               /* Output flags returned to SQLite core */
){
  unixFile *p = (unixFile *)pFile;
  int fd = -1;                   /* File descriptor returned by open() */
  int dirfd = -1;                /* Directory file descriptor */
  int openFlags = 0;             /* Flags to pass to open() */
  int eType = flags&0xFFFFFF00;  /* Type of file to open */
  int noLock;                    /* True to omit locking primitives */
  int rc = SQLITE_OK;            /* Function Return Code */

  int isExclusive  = (flags & SQLITE_OPEN_EXCLUSIVE);
  int isDelete     = (flags & SQLITE_OPEN_DELETEONCLOSE);
  int isCreate     = (flags & SQLITE_OPEN_CREATE);
  int isReadonly   = (flags & SQLITE_OPEN_READONLY);
  int isReadWrite  = (flags & SQLITE_OPEN_READWRITE);

3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737









3738

3739
3740
3741
3742
3743
3744
3745
3746




3747
3748
3749
3750
3751
3752

3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763







3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789





3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
  */
  assert((isReadonly==0 || isReadWrite==0) && (isReadWrite || isReadonly));
  assert(isCreate==0 || isReadWrite);
  assert(isExclusive==0 || isCreate);
  assert(isDelete==0 || isCreate);

  /* The main DB, main journal, and master journal are never automatically
  ** deleted
  */
  assert( eType!=SQLITE_OPEN_MAIN_DB || !isDelete );
  assert( eType!=SQLITE_OPEN_MAIN_JOURNAL || !isDelete );
  assert( eType!=SQLITE_OPEN_MASTER_JOURNAL || !isDelete );

  /* Assert that the upper layer has set one of the "file-type" flags. */
  assert( eType==SQLITE_OPEN_MAIN_DB      || eType==SQLITE_OPEN_TEMP_DB 
       || eType==SQLITE_OPEN_MAIN_JOURNAL || eType==SQLITE_OPEN_TEMP_JOURNAL 
       || eType==SQLITE_OPEN_SUBJOURNAL   || eType==SQLITE_OPEN_MASTER_JOURNAL 
       || eType==SQLITE_OPEN_TRANSIENT_DB
  );

  memset(pFile, 0, sizeof(unixFile));










  if( !zName ){

    assert(isDelete && !isOpenDirectory);
    rc = getTempname(MAX_PATHNAME+1, zTmpname);
    if( rc!=SQLITE_OK ){
      return rc;
    }
    zName = zTmpname;
  }





  if( isReadonly )  openFlags |= O_RDONLY;
  if( isReadWrite ) openFlags |= O_RDWR;
  if( isCreate )    openFlags |= O_CREAT;
  if( isExclusive ) openFlags |= (O_EXCL|O_NOFOLLOW);
  openFlags |= (O_LARGEFILE|O_BINARY);


  fd = open(zName, openFlags, isDelete?0600:SQLITE_DEFAULT_FILE_PERMISSIONS);
  OSTRACE4("OPENX   %-3d %s 0%o\n", fd, zName, openFlags);
  if( fd<0 && errno!=EISDIR && isReadWrite && !isExclusive ){
    /* Failed to open the file for read/write access. Try read-only. */
    flags &= ~(SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE);
    flags |= SQLITE_OPEN_READONLY;
    return unixOpen(pVfs, zPath, pFile, flags, pOutFlags);
  }
  if( fd<0 ){
    return SQLITE_CANTOPEN;
  }







  if( isDelete ){
#if OS_VXWORKS
    zPath = zName;
#else
    unlink(zName);
#endif
  }
#if SQLITE_ENABLE_LOCKING_STYLE
  else{
    ((unixFile*)pFile)->openFlags = openFlags;
  }
#endif
  if( pOutFlags ){
    *pOutFlags = flags;
  }

#ifndef NDEBUG
  if( (flags & SQLITE_OPEN_MAIN_DB)!=0 ){
    ((unixFile*)pFile)->isLockable = 1;
  }
#endif

  assert( fd>=0 );
  if( isOpenDirectory ){
    rc = openDirectory(zPath, &dirfd);
    if( rc!=SQLITE_OK ){





      close(fd); /* silently leak if fail, already in error */
      return rc;
    }
  }

#ifdef FD_CLOEXEC
  fcntl(fd, F_SETFD, fcntl(fd, F_GETFD, 0) | FD_CLOEXEC);
#endif

  noLock = eType!=SQLITE_OPEN_MAIN_DB;

#if SQLITE_PREFER_PROXY_LOCKING
  if( zPath!=NULL && !noLock ){
    char *envforce = getenv("SQLITE_FORCE_PROXY_LOCKING");
    int useProxy = 0;

    /* SQLITE_FORCE_PROXY_LOCKING==1 means force always use proxy, 
    ** 0 means never use proxy, NULL means use proxy for non-local files only
    */
    if( envforce!=NULL ){
      useProxy = atoi(envforce)>0;
    }else{
      struct statfs fsInfo;

      if( statfs(zPath, &fsInfo) == -1 ){
				((unixFile*)pFile)->lastErrno = errno;
        if( dirfd>=0 ) close(dirfd); /* silently leak if fail, in error */
        close(fd); /* silently leak if fail, in error */
        return SQLITE_IOERR_ACCESS;
      }
      useProxy = !(fsInfo.f_flags&MNT_LOCAL);
    }
    if( useProxy ){







|
<
|
|
|








|

>
>
>
>
>
>
>
>
>
|
>








>
>
>
>






>
|
|
|
|
|
|
|
|
|
|
|
>
>
>
>
>
>
>









|
<
<
<
<
<
<
<
<
<



<



>
>
>
>
>
|















|
|
<




<

|







3777
3778
3779
3780
3781
3782
3783
3784

3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855









3856
3857
3858

3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884

3885
3886
3887
3888

3889
3890
3891
3892
3893
3894
3895
3896
3897
  */
  assert((isReadonly==0 || isReadWrite==0) && (isReadWrite || isReadonly));
  assert(isCreate==0 || isReadWrite);
  assert(isExclusive==0 || isCreate);
  assert(isDelete==0 || isCreate);

  /* The main DB, main journal, and master journal are never automatically
  ** deleted. Nor are they ever temporary files.  */

  assert( (!isDelete && zName) || eType!=SQLITE_OPEN_MAIN_DB );
  assert( (!isDelete && zName) || eType!=SQLITE_OPEN_MAIN_JOURNAL );
  assert( (!isDelete && zName) || eType!=SQLITE_OPEN_MASTER_JOURNAL );

  /* Assert that the upper layer has set one of the "file-type" flags. */
  assert( eType==SQLITE_OPEN_MAIN_DB      || eType==SQLITE_OPEN_TEMP_DB 
       || eType==SQLITE_OPEN_MAIN_JOURNAL || eType==SQLITE_OPEN_TEMP_JOURNAL 
       || eType==SQLITE_OPEN_SUBJOURNAL   || eType==SQLITE_OPEN_MASTER_JOURNAL 
       || eType==SQLITE_OPEN_TRANSIENT_DB
  );

  memset(p, 0, sizeof(unixFile));

  if( eType==SQLITE_OPEN_MAIN_DB ){
    /* Try to find an unused file descriptor to reuse. This is not done
    ** for vxworks. Not because vxworks would not benefit from the change
    ** (it might, we're not sure), but because no way to test it is
    ** currently available. It is better not to risk breaking vxworks for 
    ** the sake of such an obscure feature.   */
#if !OS_VXWORKS
    fd = findReusableFd(zName, flags);
#endif
  }else if( !zName ){
    /* If zName is NULL, the upper layer is requesting a temp file. */
    assert(isDelete && !isOpenDirectory);
    rc = getTempname(MAX_PATHNAME+1, zTmpname);
    if( rc!=SQLITE_OK ){
      return rc;
    }
    zName = zTmpname;
  }

  /* Determine the value of the flags parameter passed to POSIX function
  ** open(). These must be calculated even if open() is not called, as
  ** they may be stored as part of the file handle and used by the 
  ** 'conch file' locking functions later on.  */
  if( isReadonly )  openFlags |= O_RDONLY;
  if( isReadWrite ) openFlags |= O_RDWR;
  if( isCreate )    openFlags |= O_CREAT;
  if( isExclusive ) openFlags |= (O_EXCL|O_NOFOLLOW);
  openFlags |= (O_LARGEFILE|O_BINARY);

  if( fd<0 ){
    fd = open(zName, openFlags, isDelete?0600:SQLITE_DEFAULT_FILE_PERMISSIONS);
    OSTRACE4("OPENX   %-3d %s 0%o\n", fd, zName, openFlags);
    if( fd<0 && errno!=EISDIR && isReadWrite && !isExclusive ){
      /* Failed to open the file for read/write access. Try read-only. */
      flags &= ~(SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE);
      flags |= SQLITE_OPEN_READONLY;
      return unixOpen(pVfs, zPath, pFile, flags, pOutFlags);
    }
    if( fd<0 ){
      return SQLITE_CANTOPEN;
    }
  }
  assert( fd>=0 );
  p->flags = flags;
  if( pOutFlags ){
    *pOutFlags = flags;
  }

  if( isDelete ){
#if OS_VXWORKS
    zPath = zName;
#else
    unlink(zName);
#endif
  }
#if SQLITE_ENABLE_LOCKING_STYLE
  else{
    p->openFlags = openFlags;









  }
#endif


  if( isOpenDirectory ){
    rc = openDirectory(zPath, &dirfd);
    if( rc!=SQLITE_OK ){
      /* It is safe to close fd at this point, because it is guaranteed not
      ** to be open on a database file. If it were open on a database file,
      ** it would not be safe to close as this would cause any locks held
      ** on the file by this process to be released.  */
      assert( eType!=SQLITE_OPEN_MAIN_DB );
      close(fd);             /* silently leak if fail, already in error */
      return rc;
    }
  }

#ifdef FD_CLOEXEC
  fcntl(fd, F_SETFD, fcntl(fd, F_GETFD, 0) | FD_CLOEXEC);
#endif

  noLock = eType!=SQLITE_OPEN_MAIN_DB;

#if SQLITE_PREFER_PROXY_LOCKING
  if( zPath!=NULL && !noLock ){
    char *envforce = getenv("SQLITE_FORCE_PROXY_LOCKING");
    int useProxy = 0;

    /* SQLITE_FORCE_PROXY_LOCKING==1 means force always use proxy, 0 means 
    ** never use proxy, NULL means use proxy for non-local files only.  */

    if( envforce!=NULL ){
      useProxy = atoi(envforce)>0;
    }else{
      struct statfs fsInfo;

      if( statfs(zPath, &fsInfo) == -1 ){
        ((unixFile*)pFile)->lastErrno = errno;
        if( dirfd>=0 ) close(dirfd); /* silently leak if fail, in error */
        close(fd); /* silently leak if fail, in error */
        return SQLITE_IOERR_ACCESS;
      }
      useProxy = !(fsInfo.f_flags&MNT_LOCAL);
    }
    if( useProxy ){
Added test/tkt4018.test.


















































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# 2009 August 20
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#***********************************************************************
# This file implements regression tests for SQLite library.
#
# This file implements tests to verify that ticket #4018 has been
# fixed.  
#

set testdir [file dirname $argv0]
source $testdir/tester.tcl

proc testsql {sql} {
  set fd [open tf_main.tcl w]
  puts $fd [subst -nocommands {
    sqlite3_test_control_pending_byte 0x0010000
    sqlite3 db test.db
    set rc [catch { db eval {$sql} } msg]
    puts -nonewline "[set rc] {[set msg]}"
    flush stdout
    exit
  }]
  close $fd
  set fd [open "| [info nameofexec] ./tf_main.tcl" r] 
  set res [read $fd]
  close $fd
  return $res
}

do_test tkt4018-1.1 {
  execsql {
    CREATE TABLE t1(a, b);
    BEGIN;
    SELECT * FROM t1;
  }
} {}

# The database is locked by connection [db]. Open and close a second
# connection to test.db 20000 times. If file-descriptors are not being
# reused, then the process will quickly exceed its maximum number of
# file descriptors (1024 by default on linux).
do_test tkt4018-1.2 {
  for {set i 0} {$i < 10000} {incr i} {
    sqlite3 db2 test.db
    db2 close
  }
} {}

# Now check that connection [db] is still holding a SHARED lock by
# having a second process try to write the db.
do_test tkt4018-1.3 {
  testsql {INSERT INTO t1 VALUES(3, 4)}
} {1 {database is locked}}

# Sanity checking. Have [db] release the lock and then retry the
# INSERT from the previous test case.
do_test tkt4018-1.4 {
  db eval COMMIT
  testsql {INSERT INTO t1 VALUES(3, 4)}
} {0 {}}

# Check that reusing a file descriptor cannot change a read-only 
# connection into a read-write connection.
do_test tkt4018-2.1 {
  sqlite3 db2 test.db
  execsql {INSERT INTO t1 VALUES(1, 2)} db2
} {}
do_test tkt4018-2.2 {
  execsql {
    BEGIN;
    SELECT * FROM t1 ORDER BY a;
  }
} {1 2 3 4}
do_test tkt4018-2.3 {
  db2 close
  sqlite3 db2 test.db -readonly 1
  execsql COMMIT
  catchsql {INSERT INTO t1 VALUES(5, 6)} db2
} {1 {attempt to write a readonly database}}
db2 close

finish_test