/ Check-in [148f8dec]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Initial attempt to get SQLite working with OFD locks on Linux. The code here does not function correctly. This is an incremental check-in for a work in progress.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | ofd-locks
Files: files | file ages | folders
SHA3-256:148f8dec9a26f11d343bfeb558fd12ba18d7f5d4d69da58fc8aa22f88e13c408
User & Date: drh 2018-06-19 13:45:48
Context
2018-06-19
17:19
Miscellaneous cleanup of OFD logic. Add an #if 0 to disable the use of OFD logic, temporarily, until I can get it to actually work. check-in: d849ade3 user: drh tags: ofd-locks
13:45
Initial attempt to get SQLite working with OFD locks on Linux. The code here does not function correctly. This is an incremental check-in for a work in progress. check-in: 148f8dec user: drh tags: ofd-locks
11:15
Minor change to the input grammar to make the parser tables slightly smaller. check-in: 320fa69e user: drh tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to src/os_unix.c.

174
175
176
177
178
179
180











181
182
183
184
185
186
187
...
210
211
212
213
214
215
216

217
218
219
220
221
222
223
...
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
...
768
769
770
771
772
773
774


775

776
777
778
779
780
781
782
...
990
991
992
993
994
995
996
997
998
999
1000

1001
1002
1003
1004
1005
1006
1007
1008
1009
1010

1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
....
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
....
1074
1075
1076
1077
1078
1079
1080




















1081
1082
1083
1084
1085
1086
1087
....
1448
1449
1450
1451
1452
1453
1454

1455
1456
1457
1458
1459
1460
1461
1462
....
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488

1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
....
1533
1534
1535
1536
1537
1538
1539

1540
1541
1542
1543
1544
1545
1546
1547

1548
1549
1550
1551
1552
1553
1554
1555
....
1660
1661
1662
1663
1664
1665
1666
1667

1668

1669
1670
1671
1672
1673
1674
1675
....
3907
3908
3909
3910
3911
3912
3913









3914
3915
3916
3917
3918
3919
3920
....
4226
4227
4228
4229
4230
4231
4232

4233
4234
4235
4236
4237
4238
4239
4240
....
4351
4352
4353
4354
4355
4356
4357
4358
4359
4360
4361
4362
4363
4364
4365
....
5408
5409
5410
5411
5412
5413
5414
















5415
5416
5417
5418
5419
5420
5421

/*
** Only set the lastErrno if the error code is a real error and not 
** a normal expected return code of SQLITE_BUSY or SQLITE_OK
*/
#define IS_LOCK_ERROR(x)  ((x != SQLITE_OK) && (x != SQLITE_BUSY))












/* Forward references */
typedef struct unixShm unixShm;               /* Connection shared memory */
typedef struct unixShmNode unixShmNode;       /* Shared memory instance */
typedef struct unixInodeInfo unixInodeInfo;   /* An i-node */
typedef struct UnixUnusedFd UnixUnusedFd;     /* An unused file descriptor */

/*
................................................................................
  unsigned short int ctrlFlags;       /* Behavioral bits.  UNIXFILE_* flags */
  int lastErrno;                      /* The unix errno from last I/O error */
  void *lockingContext;               /* Locking style specific state */
  UnixUnusedFd *pPreallocatedUnused;  /* Pre-allocated UnixUnusedFd */
  const char *zPath;                  /* Name of the file */
  unixShm *pShm;                      /* Shared memory segment information */
  int szChunk;                        /* Configured by FCNTL_CHUNK_SIZE */

#if SQLITE_MAX_MMAP_SIZE>0
  int nFetchOut;                      /* Number of outstanding xFetch refs */
  sqlite3_int64 mmapSize;             /* Usable size of mapping at pMapRegion */
  sqlite3_int64 mmapSizeActual;       /* Actual size of mapping at pMapRegion */
  sqlite3_int64 mmapSizeMax;          /* Configured FCNTL_MMAP_SIZE value */
  void *pMapRegion;                   /* Memory mapped region */
#endif
................................................................................
** command-line option on the compiler.  This code is normally
** turned off.
*/
static int lockTrace(int fd, int op, struct flock *p){
  char *zOpName, *zType;
  int s;
  int savedErrno;
  if( op==F_GETLK ){
    zOpName = "GETLK";
  }else if( op==F_SETLK ){
    zOpName = "SETLK";
  }else{
    s = osFcntl(fd, op, p);
    sqlite3DebugPrintf("fcntl unknown %d %d %d\n", fd, op, s);
    return s;
  }
  if( p->l_type==F_RDLCK ){
................................................................................
  }
  assert( p->l_whence==SEEK_SET );
  s = osFcntl(fd, op, p);
  savedErrno = errno;
  sqlite3DebugPrintf("fcntl %d %d %s %s %d %d %d %d\n",
     threadid, fd, zOpName, zType, (int)p->l_start, (int)p->l_len,
     (int)p->l_pid, s);


  if( s==(-1) && op==F_SETLK && (p->l_type==F_RDLCK || p->l_type==F_WRLCK) ){

    struct flock l2;
    l2 = *p;
    osFcntl(fd, F_GETLK, &l2);
    if( l2.l_type==F_RDLCK ){
      zType = "RDLCK";
    }else if( l2.l_type==F_WRLCK ){
      zType = "WRLCK";
................................................................................
******************************************************************************/


/******************************************************************************
*************************** Posix Advisory Locking ****************************
**
** POSIX advisory locks are broken by design.  ANSI STD 1003.1 (1996)
** section 6.5.2.2 lines 483 through 490 specify that when a process
** sets or clears a lock, that operation overrides any prior locks set
** by the same process.  It does not explicitly say so, but this implies
** that it overrides locks set by the same process using a different

** file descriptor.  Consider this test case:
**
**       int fd1 = open("./file1", O_RDWR|O_CREAT, 0644);
**       int fd2 = open("./file2", O_RDWR|O_CREAT, 0644);
**
** Suppose ./file1 and ./file2 are really the same file (because
** one is a hard or symbolic link to the other) then if you set
** an exclusive lock on fd1, then try to get an exclusive lock
** on fd2, it works.  I would have expected the second lock to
** fail since there was already a lock on the file due to fd1.

** But not so.  Since both locks came from the same process, the
** second overrides the first, even though they were on different
** file descriptors opened on different file names.
**
** This means that we cannot use POSIX locks to synchronize file access
** among competing threads of the same process.  POSIX locks will work fine
** to synchronize access for threads in separate processes, but not
** threads within the same process.
**
** To work around the problem, SQLite has to manage file locks internally
................................................................................
** locks to see if another thread has previously set a lock on that same
** inode.
**
** (Aside: The use of inode numbers as unique IDs does not work on VxWorks.
** For VxWorks, we have to use the alternative unique ID system based on
** canonical filename and implemented in the previous division.)
**
** The sqlite3_file structure for POSIX is no longer just an integer file
** descriptor.  It is now a structure that holds the integer file
** descriptor and a pointer to a structure that describes the internal
** locks on the corresponding inode.  There is one locking structure
** per inode, so if the same inode is opened twice, both unixFile structures
** point to the same locking structure.  The locking structure keeps
** a reference count (so we will know when to delete it) and a "cnt"
** field that tells us its internal lock status.  cnt==0 means the
** file is unlocked.  cnt==-1 means the file has an exclusive lock.
** cnt>0 means there are cnt shared locks on the file.
**
** Any attempt to lock or unlock a file first checks the locking
** structure.  The fcntl() system call is only invoked to set a 
** POSIX lock if the internal lock structure transitions between
** a locked and an unlocked state.
**
** But wait:  there are yet more problems with POSIX advisory locks.
**
** If you close a file descriptor that points to a file that has locks,
** all locks on that file that are owned by the current process are
** released.  To work around this problem, each unixInodeInfo object
** maintains a count of the number of pending locks on tha inode.
** When an attempt is made to close an unixFile, if there are
** other unixFile open on the same inode that are holding locks, the call
** to close() the file descriptor is deferred until all of the locks clear.
** The unixInodeInfo structure keeps a list of file descriptors that need to
** be closed and that list is walked (and cleared) when the last lock
** clears.
**
** Yet another problem:  LinuxThreads do not play well with posix locks.
**
** Many older versions of linux use the LinuxThreads library which is
** not posix compliant.  Under LinuxThreads, a lock created by thread
** A cannot be modified or overridden by a different thread B.
** Only thread A can modify the lock.  Locking behavior is correct
** if the appliation uses the newer Native Posix Thread Library (NPTL)
** on linux - with NPTL a lock created by thread A can override locks
................................................................................
** current process.
**
** SQLite used to support LinuxThreads.  But support for LinuxThreads
** was dropped beginning with version 3.7.0.  SQLite will still work with
** LinuxThreads provided that (1) there is no more than one connection 
** per database file in the same process and (2) database connections
** do not move across threads.




















*/

/*
** An instance of the following structure serves as the key used
** to locate a particular unixInodeInfo object.
*/
struct unixFileId {
................................................................................
#ifndef __DJGPP__
  if( !reserved && !pFile->pInode->bProcessLock ){
    struct flock lock;
    lock.l_whence = SEEK_SET;
    lock.l_start = RESERVED_BYTE;
    lock.l_len = 1;
    lock.l_type = F_WRLCK;

    if( osFcntl(pFile->h, F_GETLK, &lock) ){
      rc = SQLITE_IOERR_CHECKRESERVEDLOCK;
      storeLastErrno(pFile, errno);
    } else if( lock.l_type!=F_UNLCK ){
      reserved = 1;
    }
  }
#endif
................................................................................
** failing the lock.  The iBusyTimeout value is always reset back to
** zero on each call.
**
** If SQLITE_ENABLE_SETLK_TIMEOUT is not defined, then do a non-blocking
** attempt to set the lock.
*/
#ifndef SQLITE_ENABLE_SETLK_TIMEOUT
# define osSetPosixAdvisoryLock(h,x,t) osFcntl(h,F_SETLK,x)
#else
static int osSetPosixAdvisoryLock(
  int h,                /* The file descriptor on which to take the lock */

  struct flock *pLock,  /* The description of the lock */
  unixFile *pFile       /* Structure holding timeout value */
){
  int rc = osFcntl(h,F_SETLK,pLock);
  while( rc<0 && pFile->iBusyTimeout>0 ){
    /* On systems that support some kind of blocking file lock with a timeout,
    ** make appropriate changes here to invoke that blocking file lock.  On
    ** generic posix, however, there is no such API.  So we simply try the
    ** lock once every millisecond until either the timeout expires, or until
    ** the lock is obtained. */
    usleep(1000);
    rc = osFcntl(h,F_SETLK,pLock);
    pFile->iBusyTimeout--;
  }
  return rc;
}
#endif /* SQLITE_ENABLE_SETLK_TIMEOUT */


................................................................................
    if( pInode->bProcessLock==0 ){
      struct flock lock;
      assert( pInode->nLock==0 );
      lock.l_whence = SEEK_SET;
      lock.l_start = SHARED_FIRST;
      lock.l_len = SHARED_SIZE;
      lock.l_type = F_WRLCK;

      rc = osSetPosixAdvisoryLock(pFile->h, &lock, pFile);
      if( rc<0 ) return rc;
      pInode->bProcessLock = 1;
      pInode->nLock++;
    }else{
      rc = 0;
    }
  }else{

    rc = osSetPosixAdvisoryLock(pFile->h, pLock, pFile);
  }
  return rc;
}

/*
** Lock the file with the lock specified by parameter eFileLock - one
** of the following:
................................................................................
    goto end_lock;
  }

  /* If a SHARED lock is requested, and some thread using this PID already
  ** has a SHARED or RESERVED lock, then increment reference counts and
  ** return SQLITE_OK.
  */
  if( eFileLock==SHARED_LOCK && 

      (pInode->eFileLock==SHARED_LOCK || pInode->eFileLock==RESERVED_LOCK) ){

    assert( eFileLock==SHARED_LOCK );
    assert( pFile->eFileLock==0 );
    assert( pInode->nShared>0 );
    pFile->eFileLock = SHARED_LOCK;
    pInode->nShared++;
    pInode->nLock++;
    goto end_lock;
................................................................................
    }
#ifdef SQLITE_ENABLE_SETLK_TIMEOUT
    case SQLITE_FCNTL_LOCK_TIMEOUT: {
      pFile->iBusyTimeout = *(int*)pArg;
      return SQLITE_OK;
    }
#endif









#if SQLITE_MAX_MMAP_SIZE>0
    case SQLITE_FCNTL_MMAP_SIZE: {
      i64 newLimit = *(i64*)pArg;
      int rc = SQLITE_OK;
      if( newLimit>sqlite3GlobalConfig.mxMmap ){
        newLimit = sqlite3GlobalConfig.mxMmap;
      }
................................................................................

  if( pShmNode->h>=0 ){
    /* Initialize the locking parameters */
    f.l_type = lockType;
    f.l_whence = SEEK_SET;
    f.l_start = ofst;
    f.l_len = n;

    rc = osSetPosixAdvisoryLock(pShmNode->h, &f, pFile);
    rc = (rc!=(-1)) ? SQLITE_OK : SQLITE_BUSY;
  }

  /* Update the global lock state and do debug tracing */
#ifdef SQLITE_DEBUG
  { u16 mask;
  OSTRACE(("SHM-LOCK "));
................................................................................
  ** process might open and use the *-shm file without truncating it.
  ** And if the *-shm file has been corrupted by a power failure or
  ** system crash, the database itself may also become corrupt.  */
  lock.l_whence = SEEK_SET;
  lock.l_start = UNIX_SHM_DMS;
  lock.l_len = 1;
  lock.l_type = F_WRLCK;
  if( osFcntl(pShmNode->h, F_GETLK, &lock)!=0 ) {
    rc = SQLITE_IOERR_LOCK;
  }else if( lock.l_type==F_UNLCK ){
    if( pShmNode->isReadonly ){
      pShmNode->isUnlocked = 1;
      rc = SQLITE_READONLY_CANTINIT;
    }else{
      rc = unixShmSystemLock(pDbFd, F_WRLCK, UNIX_SHM_DMS, 1);
................................................................................
  assert( pNew->pInode==NULL );

  /* No locking occurs in temporary files */
  assert( zFilename!=0 || (ctrlFlags & UNIXFILE_NOLOCK)!=0 );

  OSTRACE(("OPEN    %-3d %s\n", h, zFilename));
  pNew->h = h;
















  pNew->pVfs = pVfs;
  pNew->zPath = zFilename;
  pNew->ctrlFlags = (u8)ctrlFlags;
#if SQLITE_MAX_MMAP_SIZE>0
  pNew->mmapSizeMax = sqlite3GlobalConfig.szMmap;
#endif
  if( sqlite3_uri_boolean(((ctrlFlags & UNIXFILE_URI) ? zFilename : 0),







>
>
>
>
>
>
>
>
>
>
>







 







>







 







|

|







 







>
>
|
>







 







|

|
|
>
|







|

>
|
|
|







 







|
|
|
|
|
|
|
<
<
<

|
|
<
|







|
|





|







 







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







 







>
|







 







|

|

>



|







|







 







>
|







>
|







 







|
>
|
>







 







>
>
>
>
>
>
>
>
>







 







>
|







 







|







 







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
...
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
...
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
...
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
....
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
....
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056



1057
1058
1059

1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
....
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
....
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
....
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
....
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
....
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
....
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
....
4274
4275
4276
4277
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287
4288
4289
....
4400
4401
4402
4403
4404
4405
4406
4407
4408
4409
4410
4411
4412
4413
4414
....
5457
5458
5459
5460
5461
5462
5463
5464
5465
5466
5467
5468
5469
5470
5471
5472
5473
5474
5475
5476
5477
5478
5479
5480
5481
5482
5483
5484
5485
5486

/*
** Only set the lastErrno if the error code is a real error and not 
** a normal expected return code of SQLITE_BUSY or SQLITE_OK
*/
#define IS_LOCK_ERROR(x)  ((x != SQLITE_OK) && (x != SQLITE_BUSY))

/*
** Are OFD locks supported?
*/
#if defined(F_OFD_SETLK) && defined(F_OFD_GETLK)
# define HAVE_OFD_LOCKS 1
#else
# define HAVE_OFD_LOCKS 0
# define F_OFD_SETLK 0    /* Fake value so we can use the identifier */
# define F_OFD_GETLK 0    /* Fake value so we can use the identifier */
#endif

/* Forward references */
typedef struct unixShm unixShm;               /* Connection shared memory */
typedef struct unixShmNode unixShmNode;       /* Shared memory instance */
typedef struct unixInodeInfo unixInodeInfo;   /* An i-node */
typedef struct UnixUnusedFd UnixUnusedFd;     /* An unused file descriptor */

/*
................................................................................
  unsigned short int ctrlFlags;       /* Behavioral bits.  UNIXFILE_* flags */
  int lastErrno;                      /* The unix errno from last I/O error */
  void *lockingContext;               /* Locking style specific state */
  UnixUnusedFd *pPreallocatedUnused;  /* Pre-allocated UnixUnusedFd */
  const char *zPath;                  /* Name of the file */
  unixShm *pShm;                      /* Shared memory segment information */
  int szChunk;                        /* Configured by FCNTL_CHUNK_SIZE */
  int eGetLk, eSetLk;
#if SQLITE_MAX_MMAP_SIZE>0
  int nFetchOut;                      /* Number of outstanding xFetch refs */
  sqlite3_int64 mmapSize;             /* Usable size of mapping at pMapRegion */
  sqlite3_int64 mmapSizeActual;       /* Actual size of mapping at pMapRegion */
  sqlite3_int64 mmapSizeMax;          /* Configured FCNTL_MMAP_SIZE value */
  void *pMapRegion;                   /* Memory mapped region */
#endif
................................................................................
** command-line option on the compiler.  This code is normally
** turned off.
*/
static int lockTrace(int fd, int op, struct flock *p){
  char *zOpName, *zType;
  int s;
  int savedErrno;
  if( op==F_GETLK || op==F_OFD_GETLK ){
    zOpName = "GETLK";
  }else if( op==F_SETLK || op==F_OFD_SETLK ){
    zOpName = "SETLK";
  }else{
    s = osFcntl(fd, op, p);
    sqlite3DebugPrintf("fcntl unknown %d %d %d\n", fd, op, s);
    return s;
  }
  if( p->l_type==F_RDLCK ){
................................................................................
  }
  assert( p->l_whence==SEEK_SET );
  s = osFcntl(fd, op, p);
  savedErrno = errno;
  sqlite3DebugPrintf("fcntl %d %d %s %s %d %d %d %d\n",
     threadid, fd, zOpName, zType, (int)p->l_start, (int)p->l_len,
     (int)p->l_pid, s);
  if( s==(-1)
   && (op==F_SETLK || op==F_OFD_SETLK)
   && (p->l_type==F_RDLCK || p->l_type==F_WRLCK)
  ){
    struct flock l2;
    l2 = *p;
    osFcntl(fd, F_GETLK, &l2);
    if( l2.l_type==F_RDLCK ){
      zType = "RDLCK";
    }else if( l2.l_type==F_WRLCK ){
      zType = "WRLCK";
................................................................................
******************************************************************************/


/******************************************************************************
*************************** Posix Advisory Locking ****************************
**
** POSIX advisory locks are broken by design.  ANSI STD 1003.1 (1996)
** section 6.5.2.2 lines 483 through 490 says that when a process
** sets or clears a lock, that operation overrides any prior locks set
** by the *same process*.  That means that if two different threads
** open the same file using different file descriptors, then POSIX
** advisory locking will not work to coordinate access between those two
** threads.  Consider this test case:
**
**       int fd1 = open("./file1", O_RDWR|O_CREAT, 0644);
**       int fd2 = open("./file2", O_RDWR|O_CREAT, 0644);
**
** Suppose ./file1 and ./file2 are really the same file (because
** one is a hard or symbolic link to the other) then if you set
** an exclusive lock on fd1, then try to get an exclusive lock
** on fd2, it works.  In a reasonable system,  the second lock would
** fail since there was already a lock on the file due to fd1.
** But this is not the case in POSIX advisory locking.  Since both
** locks came from the same process, the second overrides the first,
** even though they were on different file descriptors opened on
** different file names.
**
** This means that we cannot use POSIX locks to synchronize file access
** among competing threads of the same process.  POSIX locks will work fine
** to synchronize access for threads in separate processes, but not
** threads within the same process.
**
** To work around the problem, SQLite has to manage file locks internally
................................................................................
** locks to see if another thread has previously set a lock on that same
** inode.
**
** (Aside: The use of inode numbers as unique IDs does not work on VxWorks.
** For VxWorks, we have to use the alternative unique ID system based on
** canonical filename and implemented in the previous division.)
**
** The sqlite3_file object for POSIX (a.k.a. the unixFile object) is more
** than just an integer file descriptor.  It also holds  a pointer
** a pointer to another object (unixInodeInfo) that describes the
** internal locks on the corresponding inode.  There is one
** unixInodeInfo object per inode, so if the same inode is opened twice,
** both unixFile objects point to the same lunixInodeInfo. The unixInodeInfo
** keeps a reference count (nRef) so we will know when to delete it.



**
** Any attempt to lock or unlock a unixFile first checks the unixInodeInfo.
** The fcntl() system call is only invoked to set a POSIX lock if the

** unixInodeInfo transitions between a locked and an unlocked state.
**
** But wait:  there are yet more problems with POSIX advisory locks.
**
** If you close a file descriptor that points to a file that has locks,
** all locks on that file that are owned by the current process are
** released.  To work around this problem, each unixInodeInfo object
** maintains a count of the number of pending locks on tha inode.
** When an attempt is made to close an unixFile, if there are other
** unixFile objcts open on the same inode that are holding locks, the call
** to close() the file descriptor is deferred until all of the locks clear.
** The unixInodeInfo structure keeps a list of file descriptors that need to
** be closed and that list is walked (and cleared) when the last lock
** clears.
**
** LinuxThreads:
**
** Many older versions of linux use the LinuxThreads library which is
** not posix compliant.  Under LinuxThreads, a lock created by thread
** A cannot be modified or overridden by a different thread B.
** Only thread A can modify the lock.  Locking behavior is correct
** if the appliation uses the newer Native Posix Thread Library (NPTL)
** on linux - with NPTL a lock created by thread A can override locks
................................................................................
** current process.
**
** SQLite used to support LinuxThreads.  But support for LinuxThreads
** was dropped beginning with version 3.7.0.  SQLite will still work with
** LinuxThreads provided that (1) there is no more than one connection 
** per database file in the same process and (2) database connections
** do not move across threads.
**
** OFD Locks:
**
** Recent unix-like OSes have added support for Open File Description or "OFD"
** locks.  (This is not a typo: the name is "Open File Description" not
** "Open File Descriptor".  "-ion" not "-or".  There is a subtle difference
** between "Discription" and "Descriptor" which is described on the Linux
** fcntl manpage and will not be repeated here.)  The main difference
** between OFD locks and POSIX locks is that OFD locks are associated
** with a single open() system call and do not interfere with with file
** descriptors obtained from different open() system calls in the same
** process.  In other words, OFD locks fix the brokenness of POSIX locks.
**
** As of 2018-06-19, SQLite will use OFD locks if they are available.
** But the older work-arounds for POSIX locks are still here in the code
** since SQLite also needs to work on systems that do not support
** OFD locks.  Someday, perhaps, all unix systems will have reliable
** support for OFD locks, and at that time we can omit the unixInodeInfo
** object and all of its associated complication.  But for now we still
** have to support the older POSIX lock work-around hack.
*/

/*
** An instance of the following structure serves as the key used
** to locate a particular unixInodeInfo object.
*/
struct unixFileId {
................................................................................
#ifndef __DJGPP__
  if( !reserved && !pFile->pInode->bProcessLock ){
    struct flock lock;
    lock.l_whence = SEEK_SET;
    lock.l_start = RESERVED_BYTE;
    lock.l_len = 1;
    lock.l_type = F_WRLCK;
    lock.l_pid = 0;
    if( osFcntl(pFile->h, pFile->eGetLk, &lock) ){
      rc = SQLITE_IOERR_CHECKRESERVEDLOCK;
      storeLastErrno(pFile, errno);
    } else if( lock.l_type!=F_UNLCK ){
      reserved = 1;
    }
  }
#endif
................................................................................
** failing the lock.  The iBusyTimeout value is always reset back to
** zero on each call.
**
** If SQLITE_ENABLE_SETLK_TIMEOUT is not defined, then do a non-blocking
** attempt to set the lock.
*/
#ifndef SQLITE_ENABLE_SETLK_TIMEOUT
# define osSetAdvisoryLock(h,e,x,t) osFcntl(h,e,x)
#else
static int osSetAdvisoryLock(
  int h,                /* The file descriptor on which to take the lock */
  int eSetLk,           /* ioctl verb for setting the lock */
  struct flock *pLock,  /* The description of the lock */
  unixFile *pFile       /* Structure holding timeout value */
){
  int rc = osFcntl(h,eSetLk,pLock);
  while( rc<0 && pFile->iBusyTimeout>0 ){
    /* On systems that support some kind of blocking file lock with a timeout,
    ** make appropriate changes here to invoke that blocking file lock.  On
    ** generic posix, however, there is no such API.  So we simply try the
    ** lock once every millisecond until either the timeout expires, or until
    ** the lock is obtained. */
    usleep(1000);
    rc = osFcntl(h,eSetLk,pLock);
    pFile->iBusyTimeout--;
  }
  return rc;
}
#endif /* SQLITE_ENABLE_SETLK_TIMEOUT */


................................................................................
    if( pInode->bProcessLock==0 ){
      struct flock lock;
      assert( pInode->nLock==0 );
      lock.l_whence = SEEK_SET;
      lock.l_start = SHARED_FIRST;
      lock.l_len = SHARED_SIZE;
      lock.l_type = F_WRLCK;
      lock.l_pid = 0;
      rc = osSetAdvisoryLock(pFile->h, pFile->eSetLk, &lock, pFile);
      if( rc<0 ) return rc;
      pInode->bProcessLock = 1;
      pInode->nLock++;
    }else{
      rc = 0;
    }
  }else{
    pLock->l_pid = 0;
    rc = osSetAdvisoryLock(pFile->h, pFile->eSetLk, pLock, pFile);
  }
  return rc;
}

/*
** Lock the file with the lock specified by parameter eFileLock - one
** of the following:
................................................................................
    goto end_lock;
  }

  /* If a SHARED lock is requested, and some thread using this PID already
  ** has a SHARED or RESERVED lock, then increment reference counts and
  ** return SQLITE_OK.
  */
  if( eFileLock==SHARED_LOCK
   && pFile->eGetLk==F_GETLK
   && (pInode->eFileLock==SHARED_LOCK || pInode->eFileLock==RESERVED_LOCK)
  ){
    assert( eFileLock==SHARED_LOCK );
    assert( pFile->eFileLock==0 );
    assert( pInode->nShared>0 );
    pFile->eFileLock = SHARED_LOCK;
    pInode->nShared++;
    pInode->nLock++;
    goto end_lock;
................................................................................
    }
#ifdef SQLITE_ENABLE_SETLK_TIMEOUT
    case SQLITE_FCNTL_LOCK_TIMEOUT: {
      pFile->iBusyTimeout = *(int*)pArg;
      return SQLITE_OK;
    }
#endif
    case SQLITE_FCNTL_OFD_LOCKS: {
      int x = *(int*)pArg;
      if( x==0 ){
        pFile->eSetLk = F_SETLK;
        pFile->eGetLk = F_GETLK;
      }
      *(int*)pArg = pFile->eSetLk==F_OFD_SETLK;
      return SQLITE_OK;
    }
#if SQLITE_MAX_MMAP_SIZE>0
    case SQLITE_FCNTL_MMAP_SIZE: {
      i64 newLimit = *(i64*)pArg;
      int rc = SQLITE_OK;
      if( newLimit>sqlite3GlobalConfig.mxMmap ){
        newLimit = sqlite3GlobalConfig.mxMmap;
      }
................................................................................

  if( pShmNode->h>=0 ){
    /* Initialize the locking parameters */
    f.l_type = lockType;
    f.l_whence = SEEK_SET;
    f.l_start = ofst;
    f.l_len = n;
    f.l_pid = 0;
    rc = osSetAdvisoryLock(pShmNode->h, pFile->eSetLk, &f, pFile);
    rc = (rc!=(-1)) ? SQLITE_OK : SQLITE_BUSY;
  }

  /* Update the global lock state and do debug tracing */
#ifdef SQLITE_DEBUG
  { u16 mask;
  OSTRACE(("SHM-LOCK "));
................................................................................
  ** process might open and use the *-shm file without truncating it.
  ** And if the *-shm file has been corrupted by a power failure or
  ** system crash, the database itself may also become corrupt.  */
  lock.l_whence = SEEK_SET;
  lock.l_start = UNIX_SHM_DMS;
  lock.l_len = 1;
  lock.l_type = F_WRLCK;
  if( osFcntl(pShmNode->h, pDbFd->eGetLk, &lock)!=0 ) {
    rc = SQLITE_IOERR_LOCK;
  }else if( lock.l_type==F_UNLCK ){
    if( pShmNode->isReadonly ){
      pShmNode->isUnlocked = 1;
      rc = SQLITE_READONLY_CANTINIT;
    }else{
      rc = unixShmSystemLock(pDbFd, F_WRLCK, UNIX_SHM_DMS, 1);
................................................................................
  assert( pNew->pInode==NULL );

  /* No locking occurs in temporary files */
  assert( zFilename!=0 || (ctrlFlags & UNIXFILE_NOLOCK)!=0 );

  OSTRACE(("OPEN    %-3d %s\n", h, zFilename));
  pNew->h = h;
  pNew->eSetLk = F_SETLK;
  pNew->eGetLk = F_GETLK;
#if HAVE_OFD_LOCKS
  {
    struct flock lock;
    lock.l_whence = SEEK_SET;
    lock.l_start = RESERVED_BYTE;
    lock.l_len = 1;
    lock.l_type = F_WRLCK;
    lock.l_pid = 0;
    if( osFcntl(h, F_OFD_GETLK, &lock)==0 ){
      pNew->eSetLk = F_OFD_SETLK;
      pNew->eGetLk = F_OFD_GETLK;
    }
  }
#endif
  pNew->pVfs = pVfs;
  pNew->zPath = zFilename;
  pNew->ctrlFlags = (u8)ctrlFlags;
#if SQLITE_MAX_MMAP_SIZE>0
  pNew->mmapSizeMax = sqlite3GlobalConfig.szMmap;
#endif
  if( sqlite3_uri_boolean(((ctrlFlags & UNIXFILE_URI) ? zFilename : 0),

Changes to src/sqlite.h.in.

1069
1070
1071
1072
1073
1074
1075











1076
1077
1078
1079
1080
1081
1082
....
1104
1105
1106
1107
1108
1109
1110

1111
1112
1113
1114
1115
1116
1117
** a prior successful call to [SQLITE_FCNTL_BEGIN_ATOMIC_WRITE].
**
** <li>[[SQLITE_FCNTL_LOCK_TIMEOUT]]
** The [SQLITE_FCNTL_LOCK_TIMEOUT] opcode causes attempts to obtain
** a file lock using the xLock or xShmLock methods of the VFS to wait
** for up to M milliseconds before failing, where M is the single 
** unsigned integer parameter.











** </ul>
*/
#define SQLITE_FCNTL_LOCKSTATE               1
#define SQLITE_FCNTL_GET_LOCKPROXYFILE       2
#define SQLITE_FCNTL_SET_LOCKPROXYFILE       3
#define SQLITE_FCNTL_LAST_ERRNO              4
#define SQLITE_FCNTL_SIZE_HINT               5
................................................................................
#define SQLITE_FCNTL_JOURNAL_POINTER        28
#define SQLITE_FCNTL_WIN32_GET_HANDLE       29
#define SQLITE_FCNTL_PDB                    30
#define SQLITE_FCNTL_BEGIN_ATOMIC_WRITE     31
#define SQLITE_FCNTL_COMMIT_ATOMIC_WRITE    32
#define SQLITE_FCNTL_ROLLBACK_ATOMIC_WRITE  33
#define SQLITE_FCNTL_LOCK_TIMEOUT           34


/* deprecated names */
#define SQLITE_GET_LOCKPROXYFILE      SQLITE_FCNTL_GET_LOCKPROXYFILE
#define SQLITE_SET_LOCKPROXYFILE      SQLITE_FCNTL_SET_LOCKPROXYFILE
#define SQLITE_LAST_ERRNO             SQLITE_FCNTL_LAST_ERRNO









>
>
>
>
>
>
>
>
>
>
>







 







>







1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
....
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
** a prior successful call to [SQLITE_FCNTL_BEGIN_ATOMIC_WRITE].
**
** <li>[[SQLITE_FCNTL_LOCK_TIMEOUT]]
** The [SQLITE_FCNTL_LOCK_TIMEOUT] opcode causes attempts to obtain
** a file lock using the xLock or xShmLock methods of the VFS to wait
** for up to M milliseconds before failing, where M is the single 
** unsigned integer parameter.
**
** <li>[[SQLITE_FCNTL_OFD_LOCKS]]
** The [SQLITE_FCNTL_OFD_LOCKS] opcode will query whether or not OFD
** locking is currently being used for an open file, or disable the use
** of OFD locking on the file.  The argument is a pointer to an integer
** in the callers context.  If that integer is initially -1, then it is
** set to 1 or 0 if the system is or is not using OFD locks for the file.
** If the integer is initially 0, then OFD locks are disabled for the file.
** This file-control is intended for testing and validation use only.
** Applications that strive for correctness and error-free operation should
** not mess with this file-control.
** </ul>
*/
#define SQLITE_FCNTL_LOCKSTATE               1
#define SQLITE_FCNTL_GET_LOCKPROXYFILE       2
#define SQLITE_FCNTL_SET_LOCKPROXYFILE       3
#define SQLITE_FCNTL_LAST_ERRNO              4
#define SQLITE_FCNTL_SIZE_HINT               5
................................................................................
#define SQLITE_FCNTL_JOURNAL_POINTER        28
#define SQLITE_FCNTL_WIN32_GET_HANDLE       29
#define SQLITE_FCNTL_PDB                    30
#define SQLITE_FCNTL_BEGIN_ATOMIC_WRITE     31
#define SQLITE_FCNTL_COMMIT_ATOMIC_WRITE    32
#define SQLITE_FCNTL_ROLLBACK_ATOMIC_WRITE  33
#define SQLITE_FCNTL_LOCK_TIMEOUT           34
#define SQLITE_FCNTL_OFD_LOCKS              35

/* deprecated names */
#define SQLITE_GET_LOCKPROXYFILE      SQLITE_FCNTL_GET_LOCKPROXYFILE
#define SQLITE_SET_LOCKPROXYFILE      SQLITE_FCNTL_SET_LOCKPROXYFILE
#define SQLITE_LAST_ERRNO             SQLITE_FCNTL_LAST_ERRNO


Changes to src/test1.c.

5958
5959
5960
5961
5962
5963
5964


































5965
5966
5967
5968
5969
5970
5971
....
5990
5991
5992
5993
5994
5995
5996
5997
5998
5999
6000
6001
6002
6003
6004
....
7701
7702
7703
7704
7705
7706
7707

7708
7709
7710
7711
7712
7713
7714
  }
  if( Tcl_GetIntFromObj(interp, objv[2], &bPersist) ) return TCL_ERROR;
  rc = sqlite3_file_control(db, NULL, SQLITE_FCNTL_PERSIST_WAL, (void*)&bPersist);
  sqlite3_snprintf(sizeof(z), z, "%d %d", rc, bPersist);
  Tcl_AppendResult(interp, z, (char*)0);
  return TCL_OK;  
}



































/*
** tclcmd:   file_control_powersafe_overwrite DB PSOW-FLAG
**
** This TCL command runs the sqlite3_file_control interface with
** the SQLITE_FCNTL_POWERSAFE_OVERWRITE opcode.
*/
................................................................................
  }
  if( Tcl_GetIntFromObj(interp, objv[2], &b) ) return TCL_ERROR;
  rc = sqlite3_file_control(db,NULL,SQLITE_FCNTL_POWERSAFE_OVERWRITE,(void*)&b);
  sqlite3_snprintf(sizeof(z), z, "%d %d", rc, b);
  Tcl_AppendResult(interp, z, (char*)0);
  return TCL_OK;  
}


/*
** tclcmd:   file_control_vfsname DB ?AUXDB?
**
** Return a string that describes the stack of VFSes.
*/
static int SQLITE_TCLAPI file_control_vfsname(
................................................................................
     { "file_control_sizehint_test",  file_control_sizehint_test,   0   },
#if SQLITE_OS_WIN
     { "file_control_win32_av_retry", file_control_win32_av_retry,  0   },
     { "file_control_win32_get_handle", file_control_win32_get_handle, 0  },
     { "file_control_win32_set_handle", file_control_win32_set_handle, 0  },
#endif
     { "file_control_persist_wal",    file_control_persist_wal,     0   },

     { "file_control_powersafe_overwrite",file_control_powersafe_overwrite,0},
     { "file_control_vfsname",        file_control_vfsname,         0   },
     { "file_control_tempfilename",   file_control_tempfilename,    0   },
     { "sqlite3_vfs_list",           vfs_list,     0   },
     { "sqlite3_create_function_v2", test_create_function_v2, 0 },

     /* Functions from os.h */







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







 







<







 







>







5958
5959
5960
5961
5962
5963
5964
5965
5966
5967
5968
5969
5970
5971
5972
5973
5974
5975
5976
5977
5978
5979
5980
5981
5982
5983
5984
5985
5986
5987
5988
5989
5990
5991
5992
5993
5994
5995
5996
5997
5998
5999
6000
6001
6002
6003
6004
6005
....
6024
6025
6026
6027
6028
6029
6030

6031
6032
6033
6034
6035
6036
6037
....
7734
7735
7736
7737
7738
7739
7740
7741
7742
7743
7744
7745
7746
7747
7748
  }
  if( Tcl_GetIntFromObj(interp, objv[2], &bPersist) ) return TCL_ERROR;
  rc = sqlite3_file_control(db, NULL, SQLITE_FCNTL_PERSIST_WAL, (void*)&bPersist);
  sqlite3_snprintf(sizeof(z), z, "%d %d", rc, bPersist);
  Tcl_AppendResult(interp, z, (char*)0);
  return TCL_OK;  
}

/*
** tclcmd:   file_control_ofd_locks DB ?DISABLE?
**
** Run sqlite3_file_control() to query the OFD lock capability.  Return
** true if OFD locks are available and false if not.
**
** If the DISABLE argument is true, then disable OFD locking, if it is
** enabled.  The returned value will show that OFD locks are disabled.
*/
static int SQLITE_TCLAPI file_control_ofd_locks(
  ClientData clientData, /* Pointer to sqlite3_enable_XXX function */
  Tcl_Interp *interp,    /* The TCL interpreter that invoked this command */
  int objc,              /* Number of arguments */
  Tcl_Obj *CONST objv[]  /* Command arguments */
){
  sqlite3 *db;
  int rc;
  int b = 0;

  if( objc!=2 && objc!=3 ){
    Tcl_AppendResult(interp, "wrong # args: should be \"",
        Tcl_GetStringFromObj(objv[0], 0), " DB ?DISABLE?", 0);
    return TCL_ERROR;
  }
  if( getDbPointer(interp, Tcl_GetString(objv[1]), &db) ){
    return TCL_ERROR;
  }
  if( objc==3 && Tcl_GetIntFromObj(interp, objv[2], &b) ) return TCL_ERROR;
  b = b ? 0 : -1;
  rc = sqlite3_file_control(db,NULL,SQLITE_FCNTL_OFD_LOCKS,(void*)&b);
  Tcl_AppendResult(interp, (rc==SQLITE_OK && b) ? "1" : "0", (char*)0);
  return TCL_OK;  
}

/*
** tclcmd:   file_control_powersafe_overwrite DB PSOW-FLAG
**
** This TCL command runs the sqlite3_file_control interface with
** the SQLITE_FCNTL_POWERSAFE_OVERWRITE opcode.
*/
................................................................................
  }
  if( Tcl_GetIntFromObj(interp, objv[2], &b) ) return TCL_ERROR;
  rc = sqlite3_file_control(db,NULL,SQLITE_FCNTL_POWERSAFE_OVERWRITE,(void*)&b);
  sqlite3_snprintf(sizeof(z), z, "%d %d", rc, b);
  Tcl_AppendResult(interp, z, (char*)0);
  return TCL_OK;  
}


/*
** tclcmd:   file_control_vfsname DB ?AUXDB?
**
** Return a string that describes the stack of VFSes.
*/
static int SQLITE_TCLAPI file_control_vfsname(
................................................................................
     { "file_control_sizehint_test",  file_control_sizehint_test,   0   },
#if SQLITE_OS_WIN
     { "file_control_win32_av_retry", file_control_win32_av_retry,  0   },
     { "file_control_win32_get_handle", file_control_win32_get_handle, 0  },
     { "file_control_win32_set_handle", file_control_win32_set_handle, 0  },
#endif
     { "file_control_persist_wal",    file_control_persist_wal,     0   },
     { "file_control_ofd_locks",      file_control_ofd_locks,       0   },
     { "file_control_powersafe_overwrite",file_control_powersafe_overwrite,0},
     { "file_control_vfsname",        file_control_vfsname,         0   },
     { "file_control_tempfilename",   file_control_tempfilename,    0   },
     { "sqlite3_vfs_list",           vfs_list,     0   },
     { "sqlite3_create_function_v2", test_create_function_v2, 0 },

     /* Functions from os.h */