/ Artifact Content
Login

Artifact c230a7a24766320d8414afd087edcd43e499fb45e86361f6f4f464f343d965a9:


     1  /*
     2  ** 2004 May 22
     3  **
     4  ** The author disclaims copyright to this source code.  In place of
     5  ** a legal notice, here is a blessing:
     6  **
     7  **    May you do good and not evil.
     8  **    May you find forgiveness for yourself and forgive others.
     9  **    May you share freely, never taking more than you give.
    10  **
    11  ******************************************************************************
    12  **
    13  ** This file contains the VFS implementation for unix-like operating systems
    14  ** include Linux, MacOSX, *BSD, QNX, VxWorks, AIX, HPUX, and others.
    15  **
    16  ** There are actually several different VFS implementations in this file.
    17  ** The differences are in the way that file locking is done.  The default
    18  ** implementation uses Posix Advisory Locks.  Alternative implementations
    19  ** use flock(), dot-files, various proprietary locking schemas, or simply
    20  ** skip locking all together.
    21  **
    22  ** This source file is organized into divisions where the logic for various
    23  ** subfunctions is contained within the appropriate division.  PLEASE
    24  ** KEEP THE STRUCTURE OF THIS FILE INTACT.  New code should be placed
    25  ** in the correct division and should be clearly labeled.
    26  **
    27  ** The layout of divisions is as follows:
    28  **
    29  **   *  General-purpose declarations and utility functions.
    30  **   *  Unique file ID logic used by VxWorks.
    31  **   *  Various locking primitive implementations (all except proxy locking):
    32  **      + for Posix Advisory Locks
    33  **      + for no-op locks
    34  **      + for dot-file locks
    35  **      + for flock() locking
    36  **      + for named semaphore locks (VxWorks only)
    37  **      + for AFP filesystem locks (MacOSX only)
    38  **   *  sqlite3_file methods not associated with locking.
    39  **   *  Definitions of sqlite3_io_methods objects for all locking
    40  **      methods plus "finder" functions for each locking method.
    41  **   *  sqlite3_vfs method implementations.
    42  **   *  Locking primitives for the proxy uber-locking-method. (MacOSX only)
    43  **   *  Definitions of sqlite3_vfs objects for all locking methods
    44  **      plus implementations of sqlite3_os_init() and sqlite3_os_end().
    45  */
    46  #include "sqliteInt.h"
    47  #if SQLITE_OS_UNIX              /* This file is used on unix only */
    48  
    49  /*
    50  ** There are various methods for file locking used for concurrency
    51  ** control:
    52  **
    53  **   1. POSIX locking (the default),
    54  **   2. No locking,
    55  **   3. Dot-file locking,
    56  **   4. flock() locking,
    57  **   5. AFP locking (OSX only),
    58  **   6. Named POSIX semaphores (VXWorks only),
    59  **   7. proxy locking. (OSX only)
    60  **
    61  ** Styles 4, 5, and 7 are only available of SQLITE_ENABLE_LOCKING_STYLE
    62  ** is defined to 1.  The SQLITE_ENABLE_LOCKING_STYLE also enables automatic
    63  ** selection of the appropriate locking style based on the filesystem
    64  ** where the database is located.  
    65  */
    66  #if !defined(SQLITE_ENABLE_LOCKING_STYLE)
    67  #  if defined(__APPLE__)
    68  #    define SQLITE_ENABLE_LOCKING_STYLE 1
    69  #  else
    70  #    define SQLITE_ENABLE_LOCKING_STYLE 0
    71  #  endif
    72  #endif
    73  
    74  /* Use pread() and pwrite() if they are available */
    75  #if defined(__APPLE__)
    76  # define HAVE_PREAD 1
    77  # define HAVE_PWRITE 1
    78  #endif
    79  #if defined(HAVE_PREAD64) && defined(HAVE_PWRITE64)
    80  # undef USE_PREAD
    81  # define USE_PREAD64 1
    82  #elif defined(HAVE_PREAD) && defined(HAVE_PWRITE)
    83  # undef USE_PREAD64
    84  # define USE_PREAD 1
    85  #endif
    86  
    87  /*
    88  ** standard include files.
    89  */
    90  #include <sys/types.h>
    91  #include <sys/stat.h>
    92  #include <fcntl.h>
    93  #include <sys/ioctl.h>
    94  #include <unistd.h>
    95  #include <time.h>
    96  #include <sys/time.h>
    97  #include <errno.h>
    98  #if !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0
    99  # include <sys/mman.h>
   100  #endif
   101  
   102  #if SQLITE_ENABLE_LOCKING_STYLE
   103  # include <sys/ioctl.h>
   104  # include <sys/file.h>
   105  # include <sys/param.h>
   106  #endif /* SQLITE_ENABLE_LOCKING_STYLE */
   107  
   108  #if defined(__APPLE__) && ((__MAC_OS_X_VERSION_MIN_REQUIRED > 1050) || \
   109                             (__IPHONE_OS_VERSION_MIN_REQUIRED > 2000))
   110  #  if (!defined(TARGET_OS_EMBEDDED) || (TARGET_OS_EMBEDDED==0)) \
   111         && (!defined(TARGET_IPHONE_SIMULATOR) || (TARGET_IPHONE_SIMULATOR==0))
   112  #    define HAVE_GETHOSTUUID 1
   113  #  else
   114  #    warning "gethostuuid() is disabled."
   115  #  endif
   116  #endif
   117  
   118  
   119  #if OS_VXWORKS
   120  # include <sys/ioctl.h>
   121  # include <semaphore.h>
   122  # include <limits.h>
   123  #endif /* OS_VXWORKS */
   124  
   125  #if defined(__APPLE__) || SQLITE_ENABLE_LOCKING_STYLE
   126  # include <sys/mount.h>
   127  #endif
   128  
   129  #ifdef HAVE_UTIME
   130  # include <utime.h>
   131  #endif
   132  
   133  /*
   134  ** Allowed values of unixFile.fsFlags
   135  */
   136  #define SQLITE_FSFLAGS_IS_MSDOS     0x1
   137  
   138  /*
   139  ** If we are to be thread-safe, include the pthreads header and define
   140  ** the SQLITE_UNIX_THREADS macro.
   141  */
   142  #if SQLITE_THREADSAFE
   143  # include <pthread.h>
   144  # define SQLITE_UNIX_THREADS 1
   145  #endif
   146  
   147  /*
   148  ** Default permissions when creating a new file
   149  */
   150  #ifndef SQLITE_DEFAULT_FILE_PERMISSIONS
   151  # define SQLITE_DEFAULT_FILE_PERMISSIONS 0644
   152  #endif
   153  
   154  /*
   155  ** Default permissions when creating auto proxy dir
   156  */
   157  #ifndef SQLITE_DEFAULT_PROXYDIR_PERMISSIONS
   158  # define SQLITE_DEFAULT_PROXYDIR_PERMISSIONS 0755
   159  #endif
   160  
   161  /*
   162  ** Maximum supported path-length.
   163  */
   164  #define MAX_PATHNAME 512
   165  
   166  /*
   167  ** Maximum supported symbolic links
   168  */
   169  #define SQLITE_MAX_SYMLINKS 100
   170  
   171  /* Always cast the getpid() return type for compatibility with
   172  ** kernel modules in VxWorks. */
   173  #define osGetpid(X) (pid_t)getpid()
   174  
   175  /*
   176  ** Only set the lastErrno if the error code is a real error and not 
   177  ** a normal expected return code of SQLITE_BUSY or SQLITE_OK
   178  */
   179  #define IS_LOCK_ERROR(x)  ((x != SQLITE_OK) && (x != SQLITE_BUSY))
   180  
   181  /* Forward references */
   182  typedef struct unixShm unixShm;               /* Connection shared memory */
   183  typedef struct unixShmNode unixShmNode;       /* Shared memory instance */
   184  typedef struct unixInodeInfo unixInodeInfo;   /* An i-node */
   185  typedef struct UnixUnusedFd UnixUnusedFd;     /* An unused file descriptor */
   186  
   187  /*
   188  ** Sometimes, after a file handle is closed by SQLite, the file descriptor
   189  ** cannot be closed immediately. In these cases, instances of the following
   190  ** structure are used to store the file descriptor while waiting for an
   191  ** opportunity to either close or reuse it.
   192  */
   193  struct UnixUnusedFd {
   194    int fd;                   /* File descriptor to close */
   195    int flags;                /* Flags this file descriptor was opened with */
   196    UnixUnusedFd *pNext;      /* Next unused file descriptor on same file */
   197  };
   198  
   199  /*
   200  ** The unixFile structure is subclass of sqlite3_file specific to the unix
   201  ** VFS implementations.
   202  */
   203  typedef struct unixFile unixFile;
   204  struct unixFile {
   205    sqlite3_io_methods const *pMethod;  /* Always the first entry */
   206    sqlite3_vfs *pVfs;                  /* The VFS that created this unixFile */
   207    unixInodeInfo *pInode;              /* Info about locks on this inode */
   208    int h;                              /* The file descriptor */
   209    unsigned char eFileLock;            /* The type of lock held on this fd */
   210    unsigned short int ctrlFlags;       /* Behavioral bits.  UNIXFILE_* flags */
   211    int lastErrno;                      /* The unix errno from last I/O error */
   212    void *lockingContext;               /* Locking style specific state */
   213    UnixUnusedFd *pPreallocatedUnused;  /* Pre-allocated UnixUnusedFd */
   214    const char *zPath;                  /* Name of the file */
   215    unixShm *pShm;                      /* Shared memory segment information */
   216    int szChunk;                        /* Configured by FCNTL_CHUNK_SIZE */
   217  #if SQLITE_MAX_MMAP_SIZE>0
   218    int nFetchOut;                      /* Number of outstanding xFetch refs */
   219    sqlite3_int64 mmapSize;             /* Usable size of mapping at pMapRegion */
   220    sqlite3_int64 mmapSizeActual;       /* Actual size of mapping at pMapRegion */
   221    sqlite3_int64 mmapSizeMax;          /* Configured FCNTL_MMAP_SIZE value */
   222    void *pMapRegion;                   /* Memory mapped region */
   223  #endif
   224    int sectorSize;                     /* Device sector size */
   225    int deviceCharacteristics;          /* Precomputed device characteristics */
   226  #if SQLITE_ENABLE_LOCKING_STYLE
   227    int openFlags;                      /* The flags specified at open() */
   228  #endif
   229  #if SQLITE_ENABLE_LOCKING_STYLE || defined(__APPLE__)
   230    unsigned fsFlags;                   /* cached details from statfs() */
   231  #endif
   232  #ifdef SQLITE_ENABLE_SETLK_TIMEOUT
   233    unsigned iBusyTimeout;              /* Wait this many millisec on locks */
   234  #endif
   235  #if OS_VXWORKS
   236    struct vxworksFileId *pId;          /* Unique file ID */
   237  #endif
   238  #ifdef SQLITE_DEBUG
   239    /* The next group of variables are used to track whether or not the
   240    ** transaction counter in bytes 24-27 of database files are updated
   241    ** whenever any part of the database changes.  An assertion fault will
   242    ** occur if a file is updated without also updating the transaction
   243    ** counter.  This test is made to avoid new problems similar to the
   244    ** one described by ticket #3584. 
   245    */
   246    unsigned char transCntrChng;   /* True if the transaction counter changed */
   247    unsigned char dbUpdate;        /* True if any part of database file changed */
   248    unsigned char inNormalWrite;   /* True if in a normal write operation */
   249  
   250  #endif
   251  
   252  #ifdef SQLITE_TEST
   253    /* In test mode, increase the size of this structure a bit so that 
   254    ** it is larger than the struct CrashFile defined in test6.c.
   255    */
   256    char aPadding[32];
   257  #endif
   258  };
   259  
   260  /* This variable holds the process id (pid) from when the xRandomness()
   261  ** method was called.  If xOpen() is called from a different process id,
   262  ** indicating that a fork() has occurred, the PRNG will be reset.
   263  */
   264  static pid_t randomnessPid = 0;
   265  
   266  /*
   267  ** Allowed values for the unixFile.ctrlFlags bitmask:
   268  */
   269  #define UNIXFILE_EXCL        0x01     /* Connections from one process only */
   270  #define UNIXFILE_RDONLY      0x02     /* Connection is read only */
   271  #define UNIXFILE_PERSIST_WAL 0x04     /* Persistent WAL mode */
   272  #ifndef SQLITE_DISABLE_DIRSYNC
   273  # define UNIXFILE_DIRSYNC    0x08     /* Directory sync needed */
   274  #else
   275  # define UNIXFILE_DIRSYNC    0x00
   276  #endif
   277  #define UNIXFILE_PSOW        0x10     /* SQLITE_IOCAP_POWERSAFE_OVERWRITE */
   278  #define UNIXFILE_DELETE      0x20     /* Delete on close */
   279  #define UNIXFILE_URI         0x40     /* Filename might have query parameters */
   280  #define UNIXFILE_NOLOCK      0x80     /* Do no file locking */
   281  
   282  /*
   283  ** Include code that is common to all os_*.c files
   284  */
   285  #include "os_common.h"
   286  
   287  /*
   288  ** Define various macros that are missing from some systems.
   289  */
   290  #ifndef O_LARGEFILE
   291  # define O_LARGEFILE 0
   292  #endif
   293  #ifdef SQLITE_DISABLE_LFS
   294  # undef O_LARGEFILE
   295  # define O_LARGEFILE 0
   296  #endif
   297  #ifndef O_NOFOLLOW
   298  # define O_NOFOLLOW 0
   299  #endif
   300  #ifndef O_BINARY
   301  # define O_BINARY 0
   302  #endif
   303  
   304  /*
   305  ** The threadid macro resolves to the thread-id or to 0.  Used for
   306  ** testing and debugging only.
   307  */
   308  #if SQLITE_THREADSAFE
   309  #define threadid pthread_self()
   310  #else
   311  #define threadid 0
   312  #endif
   313  
   314  /*
   315  ** HAVE_MREMAP defaults to true on Linux and false everywhere else.
   316  */
   317  #if !defined(HAVE_MREMAP)
   318  # if defined(__linux__) && defined(_GNU_SOURCE)
   319  #  define HAVE_MREMAP 1
   320  # else
   321  #  define HAVE_MREMAP 0
   322  # endif
   323  #endif
   324  
   325  /*
   326  ** Explicitly call the 64-bit version of lseek() on Android. Otherwise, lseek()
   327  ** is the 32-bit version, even if _FILE_OFFSET_BITS=64 is defined.
   328  */
   329  #ifdef __ANDROID__
   330  # define lseek lseek64
   331  #endif
   332  
   333  #ifdef __linux__
   334  /*
   335  ** Linux-specific IOCTL magic numbers used for controlling F2FS
   336  */
   337  #define F2FS_IOCTL_MAGIC        0xf5
   338  #define F2FS_IOC_START_ATOMIC_WRITE     _IO(F2FS_IOCTL_MAGIC, 1)
   339  #define F2FS_IOC_COMMIT_ATOMIC_WRITE    _IO(F2FS_IOCTL_MAGIC, 2)
   340  #define F2FS_IOC_START_VOLATILE_WRITE   _IO(F2FS_IOCTL_MAGIC, 3)
   341  #define F2FS_IOC_ABORT_VOLATILE_WRITE   _IO(F2FS_IOCTL_MAGIC, 5)
   342  #define F2FS_IOC_GET_FEATURES           _IOR(F2FS_IOCTL_MAGIC, 12, u32)
   343  #define F2FS_FEATURE_ATOMIC_WRITE 0x0004
   344  #endif /* __linux__ */
   345  
   346  
   347  /*
   348  ** Different Unix systems declare open() in different ways.  Same use
   349  ** open(const char*,int,mode_t).  Others use open(const char*,int,...).
   350  ** The difference is important when using a pointer to the function.
   351  **
   352  ** The safest way to deal with the problem is to always use this wrapper
   353  ** which always has the same well-defined interface.
   354  */
   355  static int posixOpen(const char *zFile, int flags, int mode){
   356    return open(zFile, flags, mode);
   357  }
   358  
   359  /* Forward reference */
   360  static int openDirectory(const char*, int*);
   361  static int unixGetpagesize(void);
   362  
   363  /*
   364  ** Many system calls are accessed through pointer-to-functions so that
   365  ** they may be overridden at runtime to facilitate fault injection during
   366  ** testing and sandboxing.  The following array holds the names and pointers
   367  ** to all overrideable system calls.
   368  */
   369  static struct unix_syscall {
   370    const char *zName;            /* Name of the system call */
   371    sqlite3_syscall_ptr pCurrent; /* Current value of the system call */
   372    sqlite3_syscall_ptr pDefault; /* Default value */
   373  } aSyscall[] = {
   374    { "open",         (sqlite3_syscall_ptr)posixOpen,  0  },
   375  #define osOpen      ((int(*)(const char*,int,int))aSyscall[0].pCurrent)
   376  
   377    { "close",        (sqlite3_syscall_ptr)close,      0  },
   378  #define osClose     ((int(*)(int))aSyscall[1].pCurrent)
   379  
   380    { "access",       (sqlite3_syscall_ptr)access,     0  },
   381  #define osAccess    ((int(*)(const char*,int))aSyscall[2].pCurrent)
   382  
   383    { "getcwd",       (sqlite3_syscall_ptr)getcwd,     0  },
   384  #define osGetcwd    ((char*(*)(char*,size_t))aSyscall[3].pCurrent)
   385  
   386    { "stat",         (sqlite3_syscall_ptr)stat,       0  },
   387  #define osStat      ((int(*)(const char*,struct stat*))aSyscall[4].pCurrent)
   388  
   389  /*
   390  ** The DJGPP compiler environment looks mostly like Unix, but it
   391  ** lacks the fcntl() system call.  So redefine fcntl() to be something
   392  ** that always succeeds.  This means that locking does not occur under
   393  ** DJGPP.  But it is DOS - what did you expect?
   394  */
   395  #ifdef __DJGPP__
   396    { "fstat",        0,                 0  },
   397  #define osFstat(a,b,c)    0
   398  #else     
   399    { "fstat",        (sqlite3_syscall_ptr)fstat,      0  },
   400  #define osFstat     ((int(*)(int,struct stat*))aSyscall[5].pCurrent)
   401  #endif
   402  
   403    { "ftruncate",    (sqlite3_syscall_ptr)ftruncate,  0  },
   404  #define osFtruncate ((int(*)(int,off_t))aSyscall[6].pCurrent)
   405  
   406    { "fcntl",        (sqlite3_syscall_ptr)fcntl,      0  },
   407  #define osFcntl     ((int(*)(int,int,...))aSyscall[7].pCurrent)
   408  
   409    { "read",         (sqlite3_syscall_ptr)read,       0  },
   410  #define osRead      ((ssize_t(*)(int,void*,size_t))aSyscall[8].pCurrent)
   411  
   412  #if defined(USE_PREAD) || SQLITE_ENABLE_LOCKING_STYLE
   413    { "pread",        (sqlite3_syscall_ptr)pread,      0  },
   414  #else
   415    { "pread",        (sqlite3_syscall_ptr)0,          0  },
   416  #endif
   417  #define osPread     ((ssize_t(*)(int,void*,size_t,off_t))aSyscall[9].pCurrent)
   418  
   419  #if defined(USE_PREAD64)
   420    { "pread64",      (sqlite3_syscall_ptr)pread64,    0  },
   421  #else
   422    { "pread64",      (sqlite3_syscall_ptr)0,          0  },
   423  #endif
   424  #define osPread64 ((ssize_t(*)(int,void*,size_t,off64_t))aSyscall[10].pCurrent)
   425  
   426    { "write",        (sqlite3_syscall_ptr)write,      0  },
   427  #define osWrite     ((ssize_t(*)(int,const void*,size_t))aSyscall[11].pCurrent)
   428  
   429  #if defined(USE_PREAD) || SQLITE_ENABLE_LOCKING_STYLE
   430    { "pwrite",       (sqlite3_syscall_ptr)pwrite,     0  },
   431  #else
   432    { "pwrite",       (sqlite3_syscall_ptr)0,          0  },
   433  #endif
   434  #define osPwrite    ((ssize_t(*)(int,const void*,size_t,off_t))\
   435                      aSyscall[12].pCurrent)
   436  
   437  #if defined(USE_PREAD64)
   438    { "pwrite64",     (sqlite3_syscall_ptr)pwrite64,   0  },
   439  #else
   440    { "pwrite64",     (sqlite3_syscall_ptr)0,          0  },
   441  #endif
   442  #define osPwrite64  ((ssize_t(*)(int,const void*,size_t,off64_t))\
   443                      aSyscall[13].pCurrent)
   444  
   445    { "fchmod",       (sqlite3_syscall_ptr)fchmod,          0  },
   446  #define osFchmod    ((int(*)(int,mode_t))aSyscall[14].pCurrent)
   447  
   448  #if defined(HAVE_POSIX_FALLOCATE) && HAVE_POSIX_FALLOCATE
   449    { "fallocate",    (sqlite3_syscall_ptr)posix_fallocate,  0 },
   450  #else
   451    { "fallocate",    (sqlite3_syscall_ptr)0,                0 },
   452  #endif
   453  #define osFallocate ((int(*)(int,off_t,off_t))aSyscall[15].pCurrent)
   454  
   455    { "unlink",       (sqlite3_syscall_ptr)unlink,           0 },
   456  #define osUnlink    ((int(*)(const char*))aSyscall[16].pCurrent)
   457  
   458    { "openDirectory",    (sqlite3_syscall_ptr)openDirectory,      0 },
   459  #define osOpenDirectory ((int(*)(const char*,int*))aSyscall[17].pCurrent)
   460  
   461    { "mkdir",        (sqlite3_syscall_ptr)mkdir,           0 },
   462  #define osMkdir     ((int(*)(const char*,mode_t))aSyscall[18].pCurrent)
   463  
   464    { "rmdir",        (sqlite3_syscall_ptr)rmdir,           0 },
   465  #define osRmdir     ((int(*)(const char*))aSyscall[19].pCurrent)
   466  
   467  #if defined(HAVE_FCHOWN)
   468    { "fchown",       (sqlite3_syscall_ptr)fchown,          0 },
   469  #else
   470    { "fchown",       (sqlite3_syscall_ptr)0,               0 },
   471  #endif
   472  #define osFchown    ((int(*)(int,uid_t,gid_t))aSyscall[20].pCurrent)
   473  
   474  #if defined(HAVE_FCHOWN)
   475    { "geteuid",      (sqlite3_syscall_ptr)geteuid,         0 },
   476  #else
   477    { "geteuid",      (sqlite3_syscall_ptr)0,               0 },
   478  #endif
   479  #define osGeteuid   ((uid_t(*)(void))aSyscall[21].pCurrent)
   480  
   481  #if !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0
   482    { "mmap",         (sqlite3_syscall_ptr)mmap,            0 },
   483  #else
   484    { "mmap",         (sqlite3_syscall_ptr)0,               0 },
   485  #endif
   486  #define osMmap ((void*(*)(void*,size_t,int,int,int,off_t))aSyscall[22].pCurrent)
   487  
   488  #if !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0
   489    { "munmap",       (sqlite3_syscall_ptr)munmap,          0 },
   490  #else
   491    { "munmap",       (sqlite3_syscall_ptr)0,               0 },
   492  #endif
   493  #define osMunmap ((int(*)(void*,size_t))aSyscall[23].pCurrent)
   494  
   495  #if HAVE_MREMAP && (!defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0)
   496    { "mremap",       (sqlite3_syscall_ptr)mremap,          0 },
   497  #else
   498    { "mremap",       (sqlite3_syscall_ptr)0,               0 },
   499  #endif
   500  #define osMremap ((void*(*)(void*,size_t,size_t,int,...))aSyscall[24].pCurrent)
   501  
   502  #if !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0
   503    { "getpagesize",  (sqlite3_syscall_ptr)unixGetpagesize, 0 },
   504  #else
   505    { "getpagesize",  (sqlite3_syscall_ptr)0,               0 },
   506  #endif
   507  #define osGetpagesize ((int(*)(void))aSyscall[25].pCurrent)
   508  
   509  #if defined(HAVE_READLINK)
   510    { "readlink",     (sqlite3_syscall_ptr)readlink,        0 },
   511  #else
   512    { "readlink",     (sqlite3_syscall_ptr)0,               0 },
   513  #endif
   514  #define osReadlink ((ssize_t(*)(const char*,char*,size_t))aSyscall[26].pCurrent)
   515  
   516  #if defined(HAVE_LSTAT)
   517    { "lstat",         (sqlite3_syscall_ptr)lstat,          0 },
   518  #else
   519    { "lstat",         (sqlite3_syscall_ptr)0,              0 },
   520  #endif
   521  #define osLstat      ((int(*)(const char*,struct stat*))aSyscall[27].pCurrent)
   522  
   523  #if defined(__linux__) && defined(SQLITE_ENABLE_BATCH_ATOMIC_WRITE)
   524    { "ioctl",         (sqlite3_syscall_ptr)ioctl,          0 },
   525  #else
   526    { "ioctl",         (sqlite3_syscall_ptr)0,              0 },
   527  #endif
   528  #define osIoctl ((int(*)(int,int,...))aSyscall[28].pCurrent)
   529  
   530  }; /* End of the overrideable system calls */
   531  
   532  
   533  /*
   534  ** On some systems, calls to fchown() will trigger a message in a security
   535  ** log if they come from non-root processes.  So avoid calling fchown() if
   536  ** we are not running as root.
   537  */
   538  static int robustFchown(int fd, uid_t uid, gid_t gid){
   539  #if defined(HAVE_FCHOWN)
   540    return osGeteuid() ? 0 : osFchown(fd,uid,gid);
   541  #else
   542    return 0;
   543  #endif
   544  }
   545  
   546  /*
   547  ** This is the xSetSystemCall() method of sqlite3_vfs for all of the
   548  ** "unix" VFSes.  Return SQLITE_OK opon successfully updating the
   549  ** system call pointer, or SQLITE_NOTFOUND if there is no configurable
   550  ** system call named zName.
   551  */
   552  static int unixSetSystemCall(
   553    sqlite3_vfs *pNotUsed,        /* The VFS pointer.  Not used */
   554    const char *zName,            /* Name of system call to override */
   555    sqlite3_syscall_ptr pNewFunc  /* Pointer to new system call value */
   556  ){
   557    unsigned int i;
   558    int rc = SQLITE_NOTFOUND;
   559  
   560    UNUSED_PARAMETER(pNotUsed);
   561    if( zName==0 ){
   562      /* If no zName is given, restore all system calls to their default
   563      ** settings and return NULL
   564      */
   565      rc = SQLITE_OK;
   566      for(i=0; i<sizeof(aSyscall)/sizeof(aSyscall[0]); i++){
   567        if( aSyscall[i].pDefault ){
   568          aSyscall[i].pCurrent = aSyscall[i].pDefault;
   569        }
   570      }
   571    }else{
   572      /* If zName is specified, operate on only the one system call
   573      ** specified.
   574      */
   575      for(i=0; i<sizeof(aSyscall)/sizeof(aSyscall[0]); i++){
   576        if( strcmp(zName, aSyscall[i].zName)==0 ){
   577          if( aSyscall[i].pDefault==0 ){
   578            aSyscall[i].pDefault = aSyscall[i].pCurrent;
   579          }
   580          rc = SQLITE_OK;
   581          if( pNewFunc==0 ) pNewFunc = aSyscall[i].pDefault;
   582          aSyscall[i].pCurrent = pNewFunc;
   583          break;
   584        }
   585      }
   586    }
   587    return rc;
   588  }
   589  
   590  /*
   591  ** Return the value of a system call.  Return NULL if zName is not a
   592  ** recognized system call name.  NULL is also returned if the system call
   593  ** is currently undefined.
   594  */
   595  static sqlite3_syscall_ptr unixGetSystemCall(
   596    sqlite3_vfs *pNotUsed,
   597    const char *zName
   598  ){
   599    unsigned int i;
   600  
   601    UNUSED_PARAMETER(pNotUsed);
   602    for(i=0; i<sizeof(aSyscall)/sizeof(aSyscall[0]); i++){
   603      if( strcmp(zName, aSyscall[i].zName)==0 ) return aSyscall[i].pCurrent;
   604    }
   605    return 0;
   606  }
   607  
   608  /*
   609  ** Return the name of the first system call after zName.  If zName==NULL
   610  ** then return the name of the first system call.  Return NULL if zName
   611  ** is the last system call or if zName is not the name of a valid
   612  ** system call.
   613  */
   614  static const char *unixNextSystemCall(sqlite3_vfs *p, const char *zName){
   615    int i = -1;
   616  
   617    UNUSED_PARAMETER(p);
   618    if( zName ){
   619      for(i=0; i<ArraySize(aSyscall)-1; i++){
   620        if( strcmp(zName, aSyscall[i].zName)==0 ) break;
   621      }
   622    }
   623    for(i++; i<ArraySize(aSyscall); i++){
   624      if( aSyscall[i].pCurrent!=0 ) return aSyscall[i].zName;
   625    }
   626    return 0;
   627  }
   628  
   629  /*
   630  ** Do not accept any file descriptor less than this value, in order to avoid
   631  ** opening database file using file descriptors that are commonly used for 
   632  ** standard input, output, and error.
   633  */
   634  #ifndef SQLITE_MINIMUM_FILE_DESCRIPTOR
   635  # define SQLITE_MINIMUM_FILE_DESCRIPTOR 3
   636  #endif
   637  
   638  /*
   639  ** Invoke open().  Do so multiple times, until it either succeeds or
   640  ** fails for some reason other than EINTR.
   641  **
   642  ** If the file creation mode "m" is 0 then set it to the default for
   643  ** SQLite.  The default is SQLITE_DEFAULT_FILE_PERMISSIONS (normally
   644  ** 0644) as modified by the system umask.  If m is not 0, then
   645  ** make the file creation mode be exactly m ignoring the umask.
   646  **
   647  ** The m parameter will be non-zero only when creating -wal, -journal,
   648  ** and -shm files.  We want those files to have *exactly* the same
   649  ** permissions as their original database, unadulterated by the umask.
   650  ** In that way, if a database file is -rw-rw-rw or -rw-rw-r-, and a
   651  ** transaction crashes and leaves behind hot journals, then any
   652  ** process that is able to write to the database will also be able to
   653  ** recover the hot journals.
   654  */
   655  static int robust_open(const char *z, int f, mode_t m){
   656    int fd;
   657    mode_t m2 = m ? m : SQLITE_DEFAULT_FILE_PERMISSIONS;
   658    while(1){
   659  #if defined(O_CLOEXEC)
   660      fd = osOpen(z,f|O_CLOEXEC,m2);
   661  #else
   662      fd = osOpen(z,f,m2);
   663  #endif
   664      if( fd<0 ){
   665        if( errno==EINTR ) continue;
   666        break;
   667      }
   668      if( fd>=SQLITE_MINIMUM_FILE_DESCRIPTOR ) break;
   669      osClose(fd);
   670      sqlite3_log(SQLITE_WARNING, 
   671                  "attempt to open \"%s\" as file descriptor %d", z, fd);
   672      fd = -1;
   673      if( osOpen("/dev/null", f, m)<0 ) break;
   674    }
   675    if( fd>=0 ){
   676      if( m!=0 ){
   677        struct stat statbuf;
   678        if( osFstat(fd, &statbuf)==0 
   679         && statbuf.st_size==0
   680         && (statbuf.st_mode&0777)!=m 
   681        ){
   682          osFchmod(fd, m);
   683        }
   684      }
   685  #if defined(FD_CLOEXEC) && (!defined(O_CLOEXEC) || O_CLOEXEC==0)
   686      osFcntl(fd, F_SETFD, osFcntl(fd, F_GETFD, 0) | FD_CLOEXEC);
   687  #endif
   688    }
   689    return fd;
   690  }
   691  
   692  /*
   693  ** Helper functions to obtain and relinquish the global mutex. The
   694  ** global mutex is used to protect the unixInodeInfo and
   695  ** vxworksFileId objects used by this file, all of which may be 
   696  ** shared by multiple threads.
   697  **
   698  ** Function unixMutexHeld() is used to assert() that the global mutex 
   699  ** is held when required. This function is only used as part of assert() 
   700  ** statements. e.g.
   701  **
   702  **   unixEnterMutex()
   703  **     assert( unixMutexHeld() );
   704  **   unixEnterLeave()
   705  */
   706  static sqlite3_mutex *unixBigLock = 0;
   707  static void unixEnterMutex(void){
   708    sqlite3_mutex_enter(unixBigLock);
   709  }
   710  static void unixLeaveMutex(void){
   711    sqlite3_mutex_leave(unixBigLock);
   712  }
   713  #ifdef SQLITE_DEBUG
   714  static int unixMutexHeld(void) {
   715    return sqlite3_mutex_held(unixBigLock);
   716  }
   717  #endif
   718  
   719  
   720  #ifdef SQLITE_HAVE_OS_TRACE
   721  /*
   722  ** Helper function for printing out trace information from debugging
   723  ** binaries. This returns the string representation of the supplied
   724  ** integer lock-type.
   725  */
   726  static const char *azFileLock(int eFileLock){
   727    switch( eFileLock ){
   728      case NO_LOCK: return "NONE";
   729      case SHARED_LOCK: return "SHARED";
   730      case RESERVED_LOCK: return "RESERVED";
   731      case PENDING_LOCK: return "PENDING";
   732      case EXCLUSIVE_LOCK: return "EXCLUSIVE";
   733    }
   734    return "ERROR";
   735  }
   736  #endif
   737  
   738  #ifdef SQLITE_LOCK_TRACE
   739  /*
   740  ** Print out information about all locking operations.
   741  **
   742  ** This routine is used for troubleshooting locks on multithreaded
   743  ** platforms.  Enable by compiling with the -DSQLITE_LOCK_TRACE
   744  ** command-line option on the compiler.  This code is normally
   745  ** turned off.
   746  */
   747  static int lockTrace(int fd, int op, struct flock *p){
   748    char *zOpName, *zType;
   749    int s;
   750    int savedErrno;
   751    if( op==F_GETLK ){
   752      zOpName = "GETLK";
   753    }else if( op==F_SETLK ){
   754      zOpName = "SETLK";
   755    }else{
   756      s = osFcntl(fd, op, p);
   757      sqlite3DebugPrintf("fcntl unknown %d %d %d\n", fd, op, s);
   758      return s;
   759    }
   760    if( p->l_type==F_RDLCK ){
   761      zType = "RDLCK";
   762    }else if( p->l_type==F_WRLCK ){
   763      zType = "WRLCK";
   764    }else if( p->l_type==F_UNLCK ){
   765      zType = "UNLCK";
   766    }else{
   767      assert( 0 );
   768    }
   769    assert( p->l_whence==SEEK_SET );
   770    s = osFcntl(fd, op, p);
   771    savedErrno = errno;
   772    sqlite3DebugPrintf("fcntl %d %d %s %s %d %d %d %d\n",
   773       threadid, fd, zOpName, zType, (int)p->l_start, (int)p->l_len,
   774       (int)p->l_pid, s);
   775    if( s==(-1) && op==F_SETLK && (p->l_type==F_RDLCK || p->l_type==F_WRLCK) ){
   776      struct flock l2;
   777      l2 = *p;
   778      osFcntl(fd, F_GETLK, &l2);
   779      if( l2.l_type==F_RDLCK ){
   780        zType = "RDLCK";
   781      }else if( l2.l_type==F_WRLCK ){
   782        zType = "WRLCK";
   783      }else if( l2.l_type==F_UNLCK ){
   784        zType = "UNLCK";
   785      }else{
   786        assert( 0 );
   787      }
   788      sqlite3DebugPrintf("fcntl-failure-reason: %s %d %d %d\n",
   789         zType, (int)l2.l_start, (int)l2.l_len, (int)l2.l_pid);
   790    }
   791    errno = savedErrno;
   792    return s;
   793  }
   794  #undef osFcntl
   795  #define osFcntl lockTrace
   796  #endif /* SQLITE_LOCK_TRACE */
   797  
   798  /*
   799  ** Retry ftruncate() calls that fail due to EINTR
   800  **
   801  ** All calls to ftruncate() within this file should be made through
   802  ** this wrapper.  On the Android platform, bypassing the logic below
   803  ** could lead to a corrupt database.
   804  */
   805  static int robust_ftruncate(int h, sqlite3_int64 sz){
   806    int rc;
   807  #ifdef __ANDROID__
   808    /* On Android, ftruncate() always uses 32-bit offsets, even if 
   809    ** _FILE_OFFSET_BITS=64 is defined. This means it is unsafe to attempt to
   810    ** truncate a file to any size larger than 2GiB. Silently ignore any
   811    ** such attempts.  */
   812    if( sz>(sqlite3_int64)0x7FFFFFFF ){
   813      rc = SQLITE_OK;
   814    }else
   815  #endif
   816    do{ rc = osFtruncate(h,sz); }while( rc<0 && errno==EINTR );
   817    return rc;
   818  }
   819  
   820  /*
   821  ** This routine translates a standard POSIX errno code into something
   822  ** useful to the clients of the sqlite3 functions.  Specifically, it is
   823  ** intended to translate a variety of "try again" errors into SQLITE_BUSY
   824  ** and a variety of "please close the file descriptor NOW" errors into 
   825  ** SQLITE_IOERR
   826  ** 
   827  ** Errors during initialization of locks, or file system support for locks,
   828  ** should handle ENOLCK, ENOTSUP, EOPNOTSUPP separately.
   829  */
   830  static int sqliteErrorFromPosixError(int posixError, int sqliteIOErr) {
   831    assert( (sqliteIOErr == SQLITE_IOERR_LOCK) || 
   832            (sqliteIOErr == SQLITE_IOERR_UNLOCK) || 
   833            (sqliteIOErr == SQLITE_IOERR_RDLOCK) ||
   834            (sqliteIOErr == SQLITE_IOERR_CHECKRESERVEDLOCK) );
   835    switch (posixError) {
   836    case EACCES: 
   837    case EAGAIN:
   838    case ETIMEDOUT:
   839    case EBUSY:
   840    case EINTR:
   841    case ENOLCK:  
   842      /* random NFS retry error, unless during file system support 
   843       * introspection, in which it actually means what it says */
   844      return SQLITE_BUSY;
   845      
   846    case EPERM: 
   847      return SQLITE_PERM;
   848      
   849    default: 
   850      return sqliteIOErr;
   851    }
   852  }
   853  
   854  
   855  /******************************************************************************
   856  ****************** Begin Unique File ID Utility Used By VxWorks ***************
   857  **
   858  ** On most versions of unix, we can get a unique ID for a file by concatenating
   859  ** the device number and the inode number.  But this does not work on VxWorks.
   860  ** On VxWorks, a unique file id must be based on the canonical filename.
   861  **
   862  ** A pointer to an instance of the following structure can be used as a
   863  ** unique file ID in VxWorks.  Each instance of this structure contains
   864  ** a copy of the canonical filename.  There is also a reference count.  
   865  ** The structure is reclaimed when the number of pointers to it drops to
   866  ** zero.
   867  **
   868  ** There are never very many files open at one time and lookups are not
   869  ** a performance-critical path, so it is sufficient to put these
   870  ** structures on a linked list.
   871  */
   872  struct vxworksFileId {
   873    struct vxworksFileId *pNext;  /* Next in a list of them all */
   874    int nRef;                     /* Number of references to this one */
   875    int nName;                    /* Length of the zCanonicalName[] string */
   876    char *zCanonicalName;         /* Canonical filename */
   877  };
   878  
   879  #if OS_VXWORKS
   880  /* 
   881  ** All unique filenames are held on a linked list headed by this
   882  ** variable:
   883  */
   884  static struct vxworksFileId *vxworksFileList = 0;
   885  
   886  /*
   887  ** Simplify a filename into its canonical form
   888  ** by making the following changes:
   889  **
   890  **  * removing any trailing and duplicate /
   891  **  * convert /./ into just /
   892  **  * convert /A/../ where A is any simple name into just /
   893  **
   894  ** Changes are made in-place.  Return the new name length.
   895  **
   896  ** The original filename is in z[0..n-1].  Return the number of
   897  ** characters in the simplified name.
   898  */
   899  static int vxworksSimplifyName(char *z, int n){
   900    int i, j;
   901    while( n>1 && z[n-1]=='/' ){ n--; }
   902    for(i=j=0; i<n; i++){
   903      if( z[i]=='/' ){
   904        if( z[i+1]=='/' ) continue;
   905        if( z[i+1]=='.' && i+2<n && z[i+2]=='/' ){
   906          i += 1;
   907          continue;
   908        }
   909        if( z[i+1]=='.' && i+3<n && z[i+2]=='.' && z[i+3]=='/' ){
   910          while( j>0 && z[j-1]!='/' ){ j--; }
   911          if( j>0 ){ j--; }
   912          i += 2;
   913          continue;
   914        }
   915      }
   916      z[j++] = z[i];
   917    }
   918    z[j] = 0;
   919    return j;
   920  }
   921  
   922  /*
   923  ** Find a unique file ID for the given absolute pathname.  Return
   924  ** a pointer to the vxworksFileId object.  This pointer is the unique
   925  ** file ID.
   926  **
   927  ** The nRef field of the vxworksFileId object is incremented before
   928  ** the object is returned.  A new vxworksFileId object is created
   929  ** and added to the global list if necessary.
   930  **
   931  ** If a memory allocation error occurs, return NULL.
   932  */
   933  static struct vxworksFileId *vxworksFindFileId(const char *zAbsoluteName){
   934    struct vxworksFileId *pNew;         /* search key and new file ID */
   935    struct vxworksFileId *pCandidate;   /* For looping over existing file IDs */
   936    int n;                              /* Length of zAbsoluteName string */
   937  
   938    assert( zAbsoluteName[0]=='/' );
   939    n = (int)strlen(zAbsoluteName);
   940    pNew = sqlite3_malloc64( sizeof(*pNew) + (n+1) );
   941    if( pNew==0 ) return 0;
   942    pNew->zCanonicalName = (char*)&pNew[1];
   943    memcpy(pNew->zCanonicalName, zAbsoluteName, n+1);
   944    n = vxworksSimplifyName(pNew->zCanonicalName, n);
   945  
   946    /* Search for an existing entry that matching the canonical name.
   947    ** If found, increment the reference count and return a pointer to
   948    ** the existing file ID.
   949    */
   950    unixEnterMutex();
   951    for(pCandidate=vxworksFileList; pCandidate; pCandidate=pCandidate->pNext){
   952      if( pCandidate->nName==n 
   953       && memcmp(pCandidate->zCanonicalName, pNew->zCanonicalName, n)==0
   954      ){
   955         sqlite3_free(pNew);
   956         pCandidate->nRef++;
   957         unixLeaveMutex();
   958         return pCandidate;
   959      }
   960    }
   961  
   962    /* No match was found.  We will make a new file ID */
   963    pNew->nRef = 1;
   964    pNew->nName = n;
   965    pNew->pNext = vxworksFileList;
   966    vxworksFileList = pNew;
   967    unixLeaveMutex();
   968    return pNew;
   969  }
   970  
   971  /*
   972  ** Decrement the reference count on a vxworksFileId object.  Free
   973  ** the object when the reference count reaches zero.
   974  */
   975  static void vxworksReleaseFileId(struct vxworksFileId *pId){
   976    unixEnterMutex();
   977    assert( pId->nRef>0 );
   978    pId->nRef--;
   979    if( pId->nRef==0 ){
   980      struct vxworksFileId **pp;
   981      for(pp=&vxworksFileList; *pp && *pp!=pId; pp = &((*pp)->pNext)){}
   982      assert( *pp==pId );
   983      *pp = pId->pNext;
   984      sqlite3_free(pId);
   985    }
   986    unixLeaveMutex();
   987  }
   988  #endif /* OS_VXWORKS */
   989  /*************** End of Unique File ID Utility Used By VxWorks ****************
   990  ******************************************************************************/
   991  
   992  
   993  /******************************************************************************
994 *************************** Posix Advisory Locking **************************** 995 ** 996 ** POSIX advisory locks are broken by design. ANSI STD 1003.1 (1996) 997 ** section 6.5.2.2 lines 483 through 490 specify that when a process 998 ** sets or clears a lock, that operation overrides any prior locks set 999 ** by the same process. It does not explicitly say so, but this implies 1000 ** that it overrides locks set by the same process using a different 1001 ** file descriptor. Consider this test case: 1002 ** 1003 ** int fd1 = open("./file1", O_RDWR|O_CREAT, 0644); 1004 ** int fd2 = open("./file2", O_RDWR|O_CREAT, 0644); 1005 ** 1006 ** Suppose ./file1 and ./file2 are really the same file (because 1007 ** one is a hard or symbolic link to the other) then if you set 1008 ** an exclusive lock on fd1, then try to get an exclusive lock 1009 ** on fd2, it works. I would have expected the second lock to 1010 ** fail since there was already a lock on the file due to fd1. 1011 ** But not so. Since both locks came from the same process, the 1012 ** second overrides the first, even though they were on different 1013 ** file descriptors opened on different file names. 1014 ** 1015 ** This means that we cannot use POSIX locks to synchronize file access 1016 ** among competing threads of the same process. POSIX locks will work fine 1017 ** to synchronize access for threads in separate processes, but not 1018 ** threads within the same process. 1019 ** 1020 ** To work around the problem, SQLite has to manage file locks internally 1021 ** on its own. Whenever a new database is opened, we have to find the 1022 ** specific inode of the database file (the inode is determined by the 1023 ** st_dev and st_ino fields of the stat structure that fstat() fills in) 1024 ** and check for locks already existing on that inode. When locks are 1025 ** created or removed, we have to look at our own internal record of the 1026 ** locks to see if another thread has previously set a lock on that same 1027 ** inode. 1028 ** 1029 ** (Aside: The use of inode numbers as unique IDs does not work on VxWorks. 1030 ** For VxWorks, we have to use the alternative unique ID system based on 1031 ** canonical filename and implemented in the previous division.) 1032 ** 1033 ** The sqlite3_file structure for POSIX is no longer just an integer file 1034 ** descriptor. It is now a structure that holds the integer file 1035 ** descriptor and a pointer to a structure that describes the internal 1036 ** locks on the corresponding inode. There is one locking structure 1037 ** per inode, so if the same inode is opened twice, both unixFile structures 1038 ** point to the same locking structure. The locking structure keeps 1039 ** a reference count (so we will know when to delete it) and a "cnt" 1040 ** field that tells us its internal lock status. cnt==0 means the 1041 ** file is unlocked. cnt==-1 means the file has an exclusive lock. 1042 ** cnt>0 means there are cnt shared locks on the file. 1043 ** 1044 ** Any attempt to lock or unlock a file first checks the locking 1045 ** structure. The fcntl() system call is only invoked to set a 1046 ** POSIX lock if the internal lock structure transitions between 1047 ** a locked and an unlocked state. 1048 ** 1049 ** But wait: there are yet more problems with POSIX advisory locks. 1050 ** 1051 ** If you close a file descriptor that points to a file that has locks, 1052 ** all locks on that file that are owned by the current process are 1053 ** released. To work around this problem, each unixInodeInfo object 1054 ** maintains a count of the number of pending locks on tha inode. 1055 ** When an attempt is made to close an unixFile, if there are 1056 ** other unixFile open on the same inode that are holding locks, the call 1057 ** to close() the file descriptor is deferred until all of the locks clear. 1058 ** The unixInodeInfo structure keeps a list of file descriptors that need to 1059 ** be closed and that list is walked (and cleared) when the last lock 1060 ** clears. 1061 ** 1062 ** Yet another problem: LinuxThreads do not play well with posix locks. 1063 ** 1064 ** Many older versions of linux use the LinuxThreads library which is 1065 ** not posix compliant. Under LinuxThreads, a lock created by thread 1066 ** A cannot be modified or overridden by a different thread B. 1067 ** Only thread A can modify the lock. Locking behavior is correct 1068 ** if the appliation uses the newer Native Posix Thread Library (NPTL) 1069 ** on linux - with NPTL a lock created by thread A can override locks 1070 ** in thread B. But there is no way to know at compile-time which 1071 ** threading library is being used. So there is no way to know at 1072 ** compile-time whether or not thread A can override locks on thread B. 1073 ** One has to do a run-time check to discover the behavior of the 1074 ** current process. 1075 ** 1076 ** SQLite used to support LinuxThreads. But support for LinuxThreads 1077 ** was dropped beginning with version 3.7.0. SQLite will still work with 1078 ** LinuxThreads provided that (1) there is no more than one connection 1079 ** per database file in the same process and (2) database connections 1080 ** do not move across threads. 1081 */
1082 1083 /* 1084 ** An instance of the following structure serves as the key used 1085 ** to locate a particular unixInodeInfo object. 1086 */ 1087 struct unixFileId { 1088 dev_t dev; /* Device number */ 1089 #if OS_VXWORKS 1090 struct vxworksFileId *pId; /* Unique file ID for vxworks. */ 1091 #else 1092 /* We are told that some versions of Android contain a bug that 1093 ** sizes ino_t at only 32-bits instead of 64-bits. (See 1094 ** https://android-review.googlesource.com/#/c/115351/3/dist/sqlite3.c) 1095 ** To work around this, always allocate 64-bits for the inode number. 1096 ** On small machines that only have 32-bit inodes, this wastes 4 bytes, 1097 ** but that should not be a big deal. */ 1098 /* WAS: ino_t ino; */ 1099 u64 ino; /* Inode number */ 1100 #endif 1101 }; 1102 1103 /* 1104 ** An instance of the following structure is allocated for each open 1105 ** inode. Or, on LinuxThreads, there is one of these structures for 1106 ** each inode opened by each thread. 1107 ** 1108 ** A single inode can have multiple file descriptors, so each unixFile 1109 ** structure contains a pointer to an instance of this object and this 1110 ** object keeps a count of the number of unixFile pointing to it. 1111 */ 1112 struct unixInodeInfo { 1113 struct unixFileId fileId; /* The lookup key */ 1114 int nShared; /* Number of SHARED locks held */ 1115 unsigned char eFileLock; /* One of SHARED_LOCK, RESERVED_LOCK etc. */ 1116 unsigned char bProcessLock; /* An exclusive process lock is held */ 1117 int nRef; /* Number of pointers to this structure */ 1118 unixShmNode *pShmNode; /* Shared memory associated with this inode */ 1119 int nLock; /* Number of outstanding file locks */ 1120 UnixUnusedFd *pUnused; /* Unused file descriptors to close */ 1121 unixInodeInfo *pNext; /* List of all unixInodeInfo objects */ 1122 unixInodeInfo *pPrev; /* .... doubly linked */ 1123 #if SQLITE_ENABLE_LOCKING_STYLE 1124 unsigned long long sharedByte; /* for AFP simulated shared lock */ 1125 #endif 1126 #if OS_VXWORKS 1127 sem_t *pSem; /* Named POSIX semaphore */ 1128 char aSemName[MAX_PATHNAME+2]; /* Name of that semaphore */ 1129 #endif 1130 }; 1131 1132 /* 1133 ** A lists of all unixInodeInfo objects. 1134 */ 1135 static unixInodeInfo *inodeList = 0; /* All unixInodeInfo objects */ 1136 static unsigned int nUnusedFd = 0; /* Total unused file descriptors */ 1137 1138 /* 1139 ** 1140 ** This function - unixLogErrorAtLine(), is only ever called via the macro 1141 ** unixLogError(). 1142 ** 1143 ** It is invoked after an error occurs in an OS function and errno has been 1144 ** set. It logs a message using sqlite3_log() containing the current value of 1145 ** errno and, if possible, the human-readable equivalent from strerror() or 1146 ** strerror_r(). 1147 ** 1148 ** The first argument passed to the macro should be the error code that 1149 ** will be returned to SQLite (e.g. SQLITE_IOERR_DELETE, SQLITE_CANTOPEN). 1150 ** The two subsequent arguments should be the name of the OS function that 1151 ** failed (e.g. "unlink", "open") and the associated file-system path, 1152 ** if any. 1153 */ 1154 #define unixLogError(a,b,c) unixLogErrorAtLine(a,b,c,__LINE__) 1155 static int unixLogErrorAtLine( 1156 int errcode, /* SQLite error code */ 1157 const char *zFunc, /* Name of OS function that failed */ 1158 const char *zPath, /* File path associated with error */ 1159 int iLine /* Source line number where error occurred */ 1160 ){ 1161 char *zErr; /* Message from strerror() or equivalent */ 1162 int iErrno = errno; /* Saved syscall error number */ 1163 1164 /* If this is not a threadsafe build (SQLITE_THREADSAFE==0), then use 1165 ** the strerror() function to obtain the human-readable error message 1166 ** equivalent to errno. Otherwise, use strerror_r(). 1167 */ 1168 #if SQLITE_THREADSAFE && defined(HAVE_STRERROR_R) 1169 char aErr[80]; 1170 memset(aErr, 0, sizeof(aErr)); 1171 zErr = aErr; 1172 1173 /* If STRERROR_R_CHAR_P (set by autoconf scripts) or __USE_GNU is defined, 1174 ** assume that the system provides the GNU version of strerror_r() that 1175 ** returns a pointer to a buffer containing the error message. That pointer 1176 ** may point to aErr[], or it may point to some static storage somewhere. 1177 ** Otherwise, assume that the system provides the POSIX version of 1178 ** strerror_r(), which always writes an error message into aErr[]. 1179 ** 1180 ** If the code incorrectly assumes that it is the POSIX version that is 1181 ** available, the error message will often be an empty string. Not a 1182 ** huge problem. Incorrectly concluding that the GNU version is available 1183 ** could lead to a segfault though. 1184 */ 1185 #if defined(STRERROR_R_CHAR_P) || defined(__USE_GNU) 1186 zErr = 1187 # endif 1188 strerror_r(iErrno, aErr, sizeof(aErr)-1); 1189 1190 #elif SQLITE_THREADSAFE 1191 /* This is a threadsafe build, but strerror_r() is not available. */ 1192 zErr = ""; 1193 #else 1194 /* Non-threadsafe build, use strerror(). */ 1195 zErr = strerror(iErrno); 1196 #endif 1197 1198 if( zPath==0 ) zPath = ""; 1199 sqlite3_log(errcode, 1200 "os_unix.c:%d: (%d) %s(%s) - %s", 1201 iLine, iErrno, zFunc, zPath, zErr 1202 ); 1203 1204 return errcode; 1205 } 1206 1207 /* 1208 ** Close a file descriptor. 1209 ** 1210 ** We assume that close() almost always works, since it is only in a 1211 ** very sick application or on a very sick platform that it might fail. 1212 ** If it does fail, simply leak the file descriptor, but do log the 1213 ** error. 1214 ** 1215 ** Note that it is not safe to retry close() after EINTR since the 1216 ** file descriptor might have already been reused by another thread. 1217 ** So we don't even try to recover from an EINTR. Just log the error 1218 ** and move on. 1219 */ 1220 static void robust_close(unixFile *pFile, int h, int lineno){ 1221 if( osClose(h) ){ 1222 unixLogErrorAtLine(SQLITE_IOERR_CLOSE, "close", 1223 pFile ? pFile->zPath : 0, lineno); 1224 } 1225 } 1226 1227 /* 1228 ** Set the pFile->lastErrno. Do this in a subroutine as that provides 1229 ** a convenient place to set a breakpoint. 1230 */ 1231 static void storeLastErrno(unixFile *pFile, int error){ 1232 pFile->lastErrno = error; 1233 } 1234 1235 /* 1236 ** Close all file descriptors accumuated in the unixInodeInfo->pUnused list. 1237 */ 1238 static void closePendingFds(unixFile *pFile){ 1239 unixInodeInfo *pInode = pFile->pInode; 1240 UnixUnusedFd *p; 1241 UnixUnusedFd *pNext; 1242 for(p=pInode->pUnused; p; p=pNext){ 1243 pNext = p->pNext; 1244 robust_close(pFile, p->fd, __LINE__); 1245 sqlite3_free(p); 1246 nUnusedFd--; 1247 } 1248 pInode->pUnused = 0; 1249 } 1250 1251 /* 1252 ** Release a unixInodeInfo structure previously allocated by findInodeInfo(). 1253 ** 1254 ** The mutex entered using the unixEnterMutex() function must be held 1255 ** when this function is called. 1256 */ 1257 static void releaseInodeInfo(unixFile *pFile){ 1258 unixInodeInfo *pInode = pFile->pInode; 1259 assert( unixMutexHeld() ); 1260 if( ALWAYS(pInode) ){ 1261 pInode->nRef--; 1262 if( pInode->nRef==0 ){ 1263 assert( pInode->pShmNode==0 ); 1264 closePendingFds(pFile); 1265 if( pInode->pPrev ){ 1266 assert( pInode->pPrev->pNext==pInode ); 1267 pInode->pPrev->pNext = pInode->pNext; 1268 }else{ 1269 assert( inodeList==pInode ); 1270 inodeList = pInode->pNext; 1271 } 1272 if( pInode->pNext ){ 1273 assert( pInode->pNext->pPrev==pInode ); 1274 pInode->pNext->pPrev = pInode->pPrev; 1275 } 1276 sqlite3_free(pInode); 1277 } 1278 } 1279 assert( inodeList!=0 || nUnusedFd==0 ); 1280 } 1281 1282 /* 1283 ** Given a file descriptor, locate the unixInodeInfo object that 1284 ** describes that file descriptor. Create a new one if necessary. The 1285 ** return value might be uninitialized if an error occurs. 1286 ** 1287 ** The mutex entered using the unixEnterMutex() function must be held 1288 ** when this function is called. 1289 ** 1290 ** Return an appropriate error code. 1291 */ 1292 static int findInodeInfo( 1293 unixFile *pFile, /* Unix file with file desc used in the key */ 1294 unixInodeInfo **ppInode /* Return the unixInodeInfo object here */ 1295 ){ 1296 int rc; /* System call return code */ 1297 int fd; /* The file descriptor for pFile */ 1298 struct unixFileId fileId; /* Lookup key for the unixInodeInfo */ 1299 struct stat statbuf; /* Low-level file information */ 1300 unixInodeInfo *pInode = 0; /* Candidate unixInodeInfo object */ 1301 1302 assert( unixMutexHeld() ); 1303 1304 /* Get low-level information about the file that we can used to 1305 ** create a unique name for the file. 1306 */ 1307 fd = pFile->h; 1308 rc = osFstat(fd, &statbuf); 1309 if( rc!=0 ){ 1310 storeLastErrno(pFile, errno); 1311 #if defined(EOVERFLOW) && defined(SQLITE_DISABLE_LFS) 1312 if( pFile->lastErrno==EOVERFLOW ) return SQLITE_NOLFS; 1313 #endif 1314 return SQLITE_IOERR; 1315 } 1316 1317 #ifdef __APPLE__ 1318 /* On OS X on an msdos filesystem, the inode number is reported 1319 ** incorrectly for zero-size files. See ticket #3260. To work 1320 ** around this problem (we consider it a bug in OS X, not SQLite) 1321 ** we always increase the file size to 1 by writing a single byte 1322 ** prior to accessing the inode number. The one byte written is 1323 ** an ASCII 'S' character which also happens to be the first byte 1324 ** in the header of every SQLite database. In this way, if there 1325 ** is a race condition such that another thread has already populated 1326 ** the first page of the database, no damage is done. 1327 */ 1328 if( statbuf.st_size==0 && (pFile->fsFlags & SQLITE_FSFLAGS_IS_MSDOS)!=0 ){ 1329 do{ rc = osWrite(fd, "S", 1); }while( rc<0 && errno==EINTR ); 1330 if( rc!=1 ){ 1331 storeLastErrno(pFile, errno); 1332 return SQLITE_IOERR; 1333 } 1334 rc = osFstat(fd, &statbuf); 1335 if( rc!=0 ){ 1336 storeLastErrno(pFile, errno); 1337 return SQLITE_IOERR; 1338 } 1339 } 1340 #endif 1341 1342 memset(&fileId, 0, sizeof(fileId)); 1343 fileId.dev = statbuf.st_dev; 1344 #if OS_VXWORKS 1345 fileId.pId = pFile->pId; 1346 #else 1347 fileId.ino = (u64)statbuf.st_ino; 1348 #endif 1349 assert( inodeList!=0 || nUnusedFd==0 ); 1350 pInode = inodeList; 1351 while( pInode && memcmp(&fileId, &pInode->fileId, sizeof(fileId)) ){ 1352 pInode = pInode->pNext; 1353 } 1354 if( pInode==0 ){ 1355 pInode = sqlite3_malloc64( sizeof(*pInode) ); 1356 if( pInode==0 ){ 1357 return SQLITE_NOMEM_BKPT; 1358 } 1359 memset(pInode, 0, sizeof(*pInode)); 1360 memcpy(&pInode->fileId, &fileId, sizeof(fileId)); 1361 pInode->nRef = 1; 1362 pInode->pNext = inodeList; 1363 pInode->pPrev = 0; 1364 if( inodeList ) inodeList->pPrev = pInode; 1365 inodeList = pInode; 1366 }else{ 1367 pInode->nRef++; 1368 } 1369 *ppInode = pInode; 1370 return SQLITE_OK; 1371 } 1372 1373 /* 1374 ** Return TRUE if pFile has been renamed or unlinked since it was first opened. 1375 */ 1376 static int fileHasMoved(unixFile *pFile){ 1377 #if OS_VXWORKS 1378 return pFile->pInode!=0 && pFile->pId!=pFile->pInode->fileId.pId; 1379 #else 1380 struct stat buf; 1381 return pFile->pInode!=0 && 1382 (osStat(pFile->zPath, &buf)!=0 1383 || (u64)buf.st_ino!=pFile->pInode->fileId.ino); 1384 #endif 1385 } 1386 1387 1388 /* 1389 ** Check a unixFile that is a database. Verify the following: 1390 ** 1391 ** (1) There is exactly one hard link on the file 1392 ** (2) The file is not a symbolic link 1393 ** (3) The file has not been renamed or unlinked 1394 ** 1395 ** Issue sqlite3_log(SQLITE_WARNING,...) messages if anything is not right. 1396 */ 1397 static void verifyDbFile(unixFile *pFile){ 1398 struct stat buf; 1399 int rc; 1400 1401 /* These verifications occurs for the main database only */ 1402 if( pFile->ctrlFlags & UNIXFILE_NOLOCK ) return; 1403 1404 rc = osFstat(pFile->h, &buf); 1405 if( rc!=0 ){ 1406 sqlite3_log(SQLITE_WARNING, "cannot fstat db file %s", pFile->zPath); 1407 return; 1408 } 1409 if( buf.st_nlink==0 ){ 1410 sqlite3_log(SQLITE_WARNING, "file unlinked while open: %s", pFile->zPath); 1411 return; 1412 } 1413 if( buf.st_nlink>1 ){ 1414 sqlite3_log(SQLITE_WARNING, "multiple links to file: %s", pFile->zPath); 1415 return; 1416 } 1417 if( fileHasMoved(pFile) ){ 1418 sqlite3_log(SQLITE_WARNING, "file renamed while open: %s", pFile->zPath); 1419 return; 1420 } 1421 } 1422 1423 1424 /* 1425 ** This routine checks if there is a RESERVED lock held on the specified 1426 ** file by this or any other process. If such a lock is held, set *pResOut 1427 ** to a non-zero value otherwise *pResOut is set to zero. The return value 1428 ** is set to SQLITE_OK unless an I/O error occurs during lock checking. 1429 */ 1430 static int unixCheckReservedLock(sqlite3_file *id, int *pResOut){ 1431 int rc = SQLITE_OK; 1432 int reserved = 0; 1433 unixFile *pFile = (unixFile*)id; 1434 1435 SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; ); 1436 1437 assert( pFile ); 1438 assert( pFile->eFileLock<=SHARED_LOCK ); 1439 unixEnterMutex(); /* Because pFile->pInode is shared across threads */ 1440 1441 /* Check if a thread in this process holds such a lock */ 1442 if( pFile->pInode->eFileLock>SHARED_LOCK ){ 1443 reserved = 1; 1444 } 1445 1446 /* Otherwise see if some other process holds it. 1447 */ 1448 #ifndef __DJGPP__ 1449 if( !reserved && !pFile->pInode->bProcessLock ){ 1450 struct flock lock; 1451 lock.l_whence = SEEK_SET; 1452 lock.l_start = RESERVED_BYTE; 1453 lock.l_len = 1; 1454 lock.l_type = F_WRLCK; 1455 if( osFcntl(pFile->h, F_GETLK, &lock) ){ 1456 rc = SQLITE_IOERR_CHECKRESERVEDLOCK; 1457 storeLastErrno(pFile, errno); 1458 } else if( lock.l_type!=F_UNLCK ){ 1459 reserved = 1; 1460 } 1461 } 1462 #endif 1463 1464 unixLeaveMutex(); 1465 OSTRACE(("TEST WR-LOCK %d %d %d (unix)\n", pFile->h, rc, reserved)); 1466 1467 *pResOut = reserved; 1468 return rc; 1469 } 1470 1471 /* 1472 ** Set a posix-advisory-lock. 1473 ** 1474 ** There are two versions of this routine. If compiled with 1475 ** SQLITE_ENABLE_SETLK_TIMEOUT then the routine has an extra parameter 1476 ** which is a pointer to a unixFile. If the unixFile->iBusyTimeout 1477 ** value is set, then it is the number of milliseconds to wait before 1478 ** failing the lock. The iBusyTimeout value is always reset back to 1479 ** zero on each call. 1480 ** 1481 ** If SQLITE_ENABLE_SETLK_TIMEOUT is not defined, then do a non-blocking 1482 ** attempt to set the lock. 1483 */ 1484 #ifndef SQLITE_ENABLE_SETLK_TIMEOUT 1485 # define osSetPosixAdvisoryLock(h,x,t) osFcntl(h,F_SETLK,x) 1486 #else 1487 static int osSetPosixAdvisoryLock( 1488 int h, /* The file descriptor on which to take the lock */ 1489 struct flock *pLock, /* The description of the lock */ 1490 unixFile *pFile /* Structure holding timeout value */ 1491 ){ 1492 int rc = osFcntl(h,F_SETLK,pLock); 1493 while( rc<0 && pFile->iBusyTimeout>0 ){ 1494 /* On systems that support some kind of blocking file lock with a timeout, 1495 ** make appropriate changes here to invoke that blocking file lock. On 1496 ** generic posix, however, there is no such API. So we simply try the 1497 ** lock once every millisecond until either the timeout expires, or until 1498 ** the lock is obtained. */ 1499 usleep(1000); 1500 rc = osFcntl(h,F_SETLK,pLock); 1501 pFile->iBusyTimeout--; 1502 } 1503 return rc; 1504 } 1505 #endif /* SQLITE_ENABLE_SETLK_TIMEOUT */ 1506 1507 1508 /* 1509 ** Attempt to set a system-lock on the file pFile. The lock is 1510 ** described by pLock. 1511 ** 1512 ** If the pFile was opened read/write from unix-excl, then the only lock 1513 ** ever obtained is an exclusive lock, and it is obtained exactly once 1514 ** the first time any lock is attempted. All subsequent system locking 1515 ** operations become no-ops. Locking operations still happen internally, 1516 ** in order to coordinate access between separate database connections 1517 ** within this process, but all of that is handled in memory and the 1518 ** operating system does not participate. 1519 ** 1520 ** This function is a pass-through to fcntl(F_SETLK) if pFile is using 1521 ** any VFS other than "unix-excl" or if pFile is opened on "unix-excl" 1522 ** and is read-only. 1523 ** 1524 ** Zero is returned if the call completes successfully, or -1 if a call 1525 ** to fcntl() fails. In this case, errno is set appropriately (by fcntl()). 1526 */ 1527 static int unixFileLock(unixFile *pFile, struct flock *pLock){ 1528 int rc; 1529 unixInodeInfo *pInode = pFile->pInode; 1530 assert( unixMutexHeld() ); 1531 assert( pInode!=0 ); 1532 if( (pFile->ctrlFlags & (UNIXFILE_EXCL|UNIXFILE_RDONLY))==UNIXFILE_EXCL ){ 1533 if( pInode->bProcessLock==0 ){ 1534 struct flock lock; 1535 assert( pInode->nLock==0 ); 1536 lock.l_whence = SEEK_SET; 1537 lock.l_start = SHARED_FIRST; 1538 lock.l_len = SHARED_SIZE; 1539 lock.l_type = F_WRLCK; 1540 rc = osSetPosixAdvisoryLock(pFile->h, &lock, pFile); 1541 if( rc<0 ) return rc; 1542 pInode->bProcessLock = 1; 1543 pInode->nLock++; 1544 }else{ 1545 rc = 0; 1546 } 1547 }else{ 1548 rc = osSetPosixAdvisoryLock(pFile->h, pLock, pFile); 1549 } 1550 return rc; 1551 } 1552 1553 /* 1554 ** Lock the file with the lock specified by parameter eFileLock - one 1555 ** of the following: 1556 ** 1557 ** (1) SHARED_LOCK 1558 ** (2) RESERVED_LOCK 1559 ** (3) PENDING_LOCK 1560 ** (4) EXCLUSIVE_LOCK 1561 ** 1562 ** Sometimes when requesting one lock state, additional lock states 1563 ** are inserted in between. The locking might fail on one of the later 1564 ** transitions leaving the lock state different from what it started but 1565 ** still short of its goal. The following chart shows the allowed 1566 ** transitions and the inserted intermediate states: 1567 ** 1568 ** UNLOCKED -> SHARED 1569 ** SHARED -> RESERVED 1570 ** SHARED -> (PENDING) -> EXCLUSIVE 1571 ** RESERVED -> (PENDING) -> EXCLUSIVE 1572 ** PENDING -> EXCLUSIVE 1573 ** 1574 ** This routine will only increase a lock. Use the sqlite3OsUnlock() 1575 ** routine to lower a locking level. 1576 */ 1577 static int unixLock(sqlite3_file *id, int eFileLock){ 1578 /* The following describes the implementation of the various locks and 1579 ** lock transitions in terms of the POSIX advisory shared and exclusive 1580 ** lock primitives (called read-locks and write-locks below, to avoid 1581 ** confusion with SQLite lock names). The algorithms are complicated 1582 ** slightly in order to be compatible with Windows95 systems simultaneously 1583 ** accessing the same database file, in case that is ever required. 1584 ** 1585 ** Symbols defined in os.h indentify the 'pending byte' and the 'reserved 1586 ** byte', each single bytes at well known offsets, and the 'shared byte 1587 ** range', a range of 510 bytes at a well known offset. 1588 ** 1589 ** To obtain a SHARED lock, a read-lock is obtained on the 'pending 1590 ** byte'. If this is successful, 'shared byte range' is read-locked 1591 ** and the lock on the 'pending byte' released. (Legacy note: When 1592 ** SQLite was first developed, Windows95 systems were still very common, 1593 ** and Widnows95 lacks a shared-lock capability. So on Windows95, a 1594 ** single randomly selected by from the 'shared byte range' is locked. 1595 ** Windows95 is now pretty much extinct, but this work-around for the 1596 ** lack of shared-locks on Windows95 lives on, for backwards 1597 ** compatibility.) 1598 ** 1599 ** A process may only obtain a RESERVED lock after it has a SHARED lock. 1600 ** A RESERVED lock is implemented by grabbing a write-lock on the 1601 ** 'reserved byte'. 1602 ** 1603 ** A process may only obtain a PENDING lock after it has obtained a 1604 ** SHARED lock. A PENDING lock is implemented by obtaining a write-lock 1605 ** on the 'pending byte'. This ensures that no new SHARED locks can be 1606 ** obtained, but existing SHARED locks are allowed to persist. A process 1607 ** does not have to obtain a RESERVED lock on the way to a PENDING lock. 1608 ** This property is used by the algorithm for rolling back a journal file 1609 ** after a crash. 1610 ** 1611 ** An EXCLUSIVE lock, obtained after a PENDING lock is held, is 1612 ** implemented by obtaining a write-lock on the entire 'shared byte 1613 ** range'. Since all other locks require a read-lock on one of the bytes 1614 ** within this range, this ensures that no other locks are held on the 1615 ** database. 1616 */ 1617 int rc = SQLITE_OK; 1618 unixFile *pFile = (unixFile*)id; 1619 unixInodeInfo *pInode; 1620 struct flock lock; 1621 int tErrno = 0; 1622 1623 assert( pFile ); 1624 OSTRACE(("LOCK %d %s was %s(%s,%d) pid=%d (unix)\n", pFile->h, 1625 azFileLock(eFileLock), azFileLock(pFile->eFileLock), 1626 azFileLock(pFile->pInode->eFileLock), pFile->pInode->nShared, 1627 osGetpid(0))); 1628 1629 /* If there is already a lock of this type or more restrictive on the 1630 ** unixFile, do nothing. Don't use the end_lock: exit path, as 1631 ** unixEnterMutex() hasn't been called yet. 1632 */ 1633 if( pFile->eFileLock>=eFileLock ){ 1634 OSTRACE(("LOCK %d %s ok (already held) (unix)\n", pFile->h, 1635 azFileLock(eFileLock))); 1636 return SQLITE_OK; 1637 } 1638 1639 /* Make sure the locking sequence is correct. 1640 ** (1) We never move from unlocked to anything higher than shared lock. 1641 ** (2) SQLite never explicitly requests a pendig lock. 1642 ** (3) A shared lock is always held when a reserve lock is requested. 1643 */ 1644 assert( pFile->eFileLock!=NO_LOCK || eFileLock==SHARED_LOCK ); 1645 assert( eFileLock!=PENDING_LOCK ); 1646 assert( eFileLock!=RESERVED_LOCK || pFile->eFileLock==SHARED_LOCK ); 1647 1648 /* This mutex is needed because pFile->pInode is shared across threads 1649 */ 1650 unixEnterMutex(); 1651 pInode = pFile->pInode; 1652 1653 /* If some thread using this PID has a lock via a different unixFile* 1654 ** handle that precludes the requested lock, return BUSY. 1655 */ 1656 if( (pFile->eFileLock!=pInode->eFileLock && 1657 (pInode->eFileLock>=PENDING_LOCK || eFileLock>SHARED_LOCK)) 1658 ){ 1659 rc = SQLITE_BUSY; 1660 goto end_lock; 1661 } 1662 1663 /* If a SHARED lock is requested, and some thread using this PID already 1664 ** has a SHARED or RESERVED lock, then increment reference counts and 1665 ** return SQLITE_OK. 1666 */ 1667 if( eFileLock==SHARED_LOCK && 1668 (pInode->eFileLock==SHARED_LOCK || pInode->eFileLock==RESERVED_LOCK) ){ 1669 assert( eFileLock==SHARED_LOCK ); 1670 assert( pFile->eFileLock==0 ); 1671 assert( pInode->nShared>0 ); 1672 pFile->eFileLock = SHARED_LOCK; 1673 pInode->nShared++; 1674 pInode->nLock++; 1675 goto end_lock; 1676 } 1677 1678 1679 /* A PENDING lock is needed before acquiring a SHARED lock and before 1680 ** acquiring an EXCLUSIVE lock. For the SHARED lock, the PENDING will 1681 ** be released. 1682 */ 1683 lock.l_len = 1L; 1684 lock.l_whence = SEEK_SET; 1685 if( eFileLock==SHARED_LOCK 1686 || (eFileLock==EXCLUSIVE_LOCK && pFile->eFileLock<PENDING_LOCK) 1687 ){ 1688 lock.l_type = (eFileLock==SHARED_LOCK?F_RDLCK:F_WRLCK); 1689 lock.l_start = PENDING_BYTE; 1690 if( unixFileLock(pFile, &lock) ){ 1691 tErrno = errno; 1692 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK); 1693 if( rc!=SQLITE_BUSY ){ 1694 storeLastErrno(pFile, tErrno); 1695 } 1696 goto end_lock; 1697 } 1698 } 1699 1700 1701 /* If control gets to this point, then actually go ahead and make 1702 ** operating system calls for the specified lock. 1703 */ 1704 if( eFileLock==SHARED_LOCK ){ 1705 assert( pInode->nShared==0 ); 1706 assert( pInode->eFileLock==0 ); 1707 assert( rc==SQLITE_OK ); 1708 1709 /* Now get the read-lock */ 1710 lock.l_start = SHARED_FIRST; 1711 lock.l_len = SHARED_SIZE; 1712 if( unixFileLock(pFile, &lock) ){ 1713 tErrno = errno; 1714 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK); 1715 } 1716 1717 /* Drop the temporary PENDING lock */ 1718 lock.l_start = PENDING_BYTE; 1719 lock.l_len = 1L; 1720 lock.l_type = F_UNLCK; 1721 if( unixFileLock(pFile, &lock) && rc==SQLITE_OK ){ 1722 /* This could happen with a network mount */ 1723 tErrno = errno; 1724 rc = SQLITE_IOERR_UNLOCK; 1725 } 1726 1727 if( rc ){ 1728 if( rc!=SQLITE_BUSY ){ 1729 storeLastErrno(pFile, tErrno); 1730 } 1731 goto end_lock; 1732 }else{ 1733 pFile->eFileLock = SHARED_LOCK; 1734 pInode->nLock++; 1735 pInode->nShared = 1; 1736 } 1737 }else if( eFileLock==EXCLUSIVE_LOCK && pInode->nShared>1 ){ 1738 /* We are trying for an exclusive lock but another thread in this 1739 ** same process is still holding a shared lock. */ 1740 rc = SQLITE_BUSY; 1741 }else{ 1742 /* The request was for a RESERVED or EXCLUSIVE lock. It is 1743 ** assumed that there is a SHARED or greater lock on the file 1744 ** already. 1745 */ 1746 assert( 0!=pFile->eFileLock ); 1747 lock.l_type = F_WRLCK; 1748 1749 assert( eFileLock==RESERVED_LOCK || eFileLock==EXCLUSIVE_LOCK ); 1750 if( eFileLock==RESERVED_LOCK ){ 1751 lock.l_start = RESERVED_BYTE; 1752 lock.l_len = 1L; 1753 }else{ 1754 lock.l_start = SHARED_FIRST; 1755 lock.l_len = SHARED_SIZE; 1756 } 1757 1758 if( unixFileLock(pFile, &lock) ){ 1759 tErrno = errno; 1760 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK); 1761 if( rc!=SQLITE_BUSY ){ 1762 storeLastErrno(pFile, tErrno); 1763 } 1764 } 1765 } 1766 1767 1768 #ifdef SQLITE_DEBUG 1769 /* Set up the transaction-counter change checking flags when 1770 ** transitioning from a SHARED to a RESERVED lock. The change 1771 ** from SHARED to RESERVED marks the beginning of a normal 1772 ** write operation (not a hot journal rollback). 1773 */ 1774 if( rc==SQLITE_OK 1775 && pFile->eFileLock<=SHARED_LOCK 1776 && eFileLock==RESERVED_LOCK 1777 ){ 1778 pFile->transCntrChng = 0; 1779 pFile->dbUpdate = 0; 1780 pFile->inNormalWrite = 1; 1781 } 1782 #endif 1783 1784 1785 if( rc==SQLITE_OK ){ 1786 pFile->eFileLock = eFileLock; 1787 pInode->eFileLock = eFileLock; 1788 }else if( eFileLock==EXCLUSIVE_LOCK ){ 1789 pFile->eFileLock = PENDING_LOCK; 1790 pInode->eFileLock = PENDING_LOCK; 1791 } 1792 1793 end_lock: 1794 unixLeaveMutex(); 1795 OSTRACE(("LOCK %d %s %s (unix)\n", pFile->h, azFileLock(eFileLock), 1796 rc==SQLITE_OK ? "ok" : "failed")); 1797 return rc; 1798 } 1799 1800 /* 1801 ** Add the file descriptor used by file handle pFile to the corresponding 1802 ** pUnused list. 1803 */ 1804 static void setPendingFd(unixFile *pFile){ 1805 unixInodeInfo *pInode = pFile->pInode; 1806 UnixUnusedFd *p = pFile->pPreallocatedUnused; 1807 p->pNext = pInode->pUnused; 1808 pInode->pUnused = p; 1809 pFile->h = -1; 1810 pFile->pPreallocatedUnused = 0; 1811 nUnusedFd++; 1812 } 1813 1814 /* 1815 ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock 1816 ** must be either NO_LOCK or SHARED_LOCK. 1817 ** 1818 ** If the locking level of the file descriptor is already at or below 1819 ** the requested locking level, this routine is a no-op. 1820 ** 1821 ** If handleNFSUnlock is true, then on downgrading an EXCLUSIVE_LOCK to SHARED 1822 ** the byte range is divided into 2 parts and the first part is unlocked then 1823 ** set to a read lock, then the other part is simply unlocked. This works 1824 ** around a bug in BSD NFS lockd (also seen on MacOSX 10.3+) that fails to 1825 ** remove the write lock on a region when a read lock is set. 1826 */ 1827 static int posixUnlock(sqlite3_file *id, int eFileLock, int handleNFSUnlock){ 1828 unixFile *pFile = (unixFile*)id; 1829 unixInodeInfo *pInode; 1830 struct flock lock; 1831 int rc = SQLITE_OK; 1832 1833 assert( pFile ); 1834 OSTRACE(("UNLOCK %d %d was %d(%d,%d) pid=%d (unix)\n", pFile->h, eFileLock, 1835 pFile->eFileLock, pFile->pInode->eFileLock, pFile->pInode->nShared, 1836 osGetpid(0))); 1837 1838 assert( eFileLock<=SHARED_LOCK ); 1839 if( pFile->eFileLock<=eFileLock ){ 1840 return SQLITE_OK; 1841 } 1842 unixEnterMutex(); 1843 pInode = pFile->pInode; 1844 assert( pInode->nShared!=0 ); 1845 if( pFile->eFileLock>SHARED_LOCK ){ 1846 assert( pInode->eFileLock==pFile->eFileLock ); 1847 1848 #ifdef SQLITE_DEBUG 1849 /* When reducing a lock such that other processes can start 1850 ** reading the database file again, make sure that the 1851 ** transaction counter was updated if any part of the database 1852 ** file changed. If the transaction counter is not updated, 1853 ** other connections to the same file might not realize that 1854 ** the file has changed and hence might not know to flush their 1855 ** cache. The use of a stale cache can lead to database corruption. 1856 */ 1857 pFile->inNormalWrite = 0; 1858 #endif 1859 1860 /* downgrading to a shared lock on NFS involves clearing the write lock 1861 ** before establishing the readlock - to avoid a race condition we downgrade 1862 ** the lock in 2 blocks, so that part of the range will be covered by a 1863 ** write lock until the rest is covered by a read lock: 1864 ** 1: [WWWWW] 1865 ** 2: [....W] 1866 ** 3: [RRRRW] 1867 ** 4: [RRRR.] 1868 */ 1869 if( eFileLock==SHARED_LOCK ){ 1870 #if !defined(__APPLE__) || !SQLITE_ENABLE_LOCKING_STYLE 1871 (void)handleNFSUnlock; 1872 assert( handleNFSUnlock==0 ); 1873 #endif 1874 #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE 1875 if( handleNFSUnlock ){ 1876 int tErrno; /* Error code from system call errors */ 1877 off_t divSize = SHARED_SIZE - 1; 1878 1879 lock.l_type = F_UNLCK; 1880 lock.l_whence = SEEK_SET; 1881 lock.l_start = SHARED_FIRST; 1882 lock.l_len = divSize; 1883 if( unixFileLock(pFile, &lock)==(-1) ){ 1884 tErrno = errno; 1885 rc = SQLITE_IOERR_UNLOCK; 1886 storeLastErrno(pFile, tErrno); 1887 goto end_unlock; 1888 } 1889 lock.l_type = F_RDLCK; 1890 lock.l_whence = SEEK_SET; 1891 lock.l_start = SHARED_FIRST; 1892 lock.l_len = divSize; 1893 if( unixFileLock(pFile, &lock)==(-1) ){ 1894 tErrno = errno; 1895 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_RDLOCK); 1896 if( IS_LOCK_ERROR(rc) ){ 1897 storeLastErrno(pFile, tErrno); 1898 } 1899 goto end_unlock; 1900 } 1901 lock.l_type = F_UNLCK; 1902 lock.l_whence = SEEK_SET; 1903 lock.l_start = SHARED_FIRST+divSize; 1904 lock.l_len = SHARED_SIZE-divSize; 1905 if( unixFileLock(pFile, &lock)==(-1) ){ 1906 tErrno = errno; 1907 rc = SQLITE_IOERR_UNLOCK; 1908 storeLastErrno(pFile, tErrno); 1909 goto end_unlock; 1910 } 1911 }else 1912 #endif /* defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE */ 1913 { 1914 lock.l_type = F_RDLCK; 1915 lock.l_whence = SEEK_SET; 1916 lock.l_start = SHARED_FIRST; 1917 lock.l_len = SHARED_SIZE; 1918 if( unixFileLock(pFile, &lock) ){ 1919 /* In theory, the call to unixFileLock() cannot fail because another 1920 ** process is holding an incompatible lock. If it does, this 1921 ** indicates that the other process is not following the locking 1922 ** protocol. If this happens, return SQLITE_IOERR_RDLOCK. Returning 1923 ** SQLITE_BUSY would confuse the upper layer (in practice it causes 1924 ** an assert to fail). */ 1925 rc = SQLITE_IOERR_RDLOCK; 1926 storeLastErrno(pFile, errno); 1927 goto end_unlock; 1928 } 1929 } 1930 } 1931 lock.l_type = F_UNLCK; 1932 lock.l_whence = SEEK_SET; 1933 lock.l_start = PENDING_BYTE; 1934 lock.l_len = 2L; assert( PENDING_BYTE+1==RESERVED_BYTE ); 1935 if( unixFileLock(pFile, &lock)==0 ){ 1936 pInode->eFileLock = SHARED_LOCK; 1937 }else{ 1938 rc = SQLITE_IOERR_UNLOCK; 1939 storeLastErrno(pFile, errno); 1940 goto end_unlock; 1941 } 1942 } 1943 if( eFileLock==NO_LOCK ){ 1944 /* Decrement the shared lock counter. Release the lock using an 1945 ** OS call only when all threads in this same process have released 1946 ** the lock. 1947 */ 1948 pInode->nShared--; 1949 if( pInode->nShared==0 ){ 1950 lock.l_type = F_UNLCK; 1951 lock.l_whence = SEEK_SET; 1952 lock.l_start = lock.l_len = 0L; 1953 if( unixFileLock(pFile, &lock)==0 ){ 1954 pInode->eFileLock = NO_LOCK; 1955 }else{ 1956 rc = SQLITE_IOERR_UNLOCK; 1957 storeLastErrno(pFile, errno); 1958 pInode->eFileLock = NO_LOCK; 1959 pFile->eFileLock = NO_LOCK; 1960 } 1961 } 1962 1963 /* Decrement the count of locks against this same file. When the 1964 ** count reaches zero, close any other file descriptors whose close 1965 ** was deferred because of outstanding locks. 1966 */ 1967 pInode->nLock--; 1968 assert( pInode->nLock>=0 ); 1969 if( pInode->nLock==0 ){ 1970 closePendingFds(pFile); 1971 } 1972 } 1973 1974 end_unlock: 1975 unixLeaveMutex(); 1976 if( rc==SQLITE_OK ) pFile->eFileLock = eFileLock; 1977 return rc; 1978 } 1979 1980 /* 1981 ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock 1982 ** must be either NO_LOCK or SHARED_LOCK. 1983 ** 1984 ** If the locking level of the file descriptor is already at or below 1985 ** the requested locking level, this routine is a no-op. 1986 */ 1987 static int unixUnlock(sqlite3_file *id, int eFileLock){ 1988 #if SQLITE_MAX_MMAP_SIZE>0 1989 assert( eFileLock==SHARED_LOCK || ((unixFile *)id)->nFetchOut==0 ); 1990 #endif 1991 return posixUnlock(id, eFileLock, 0); 1992 } 1993 1994 #if SQLITE_MAX_MMAP_SIZE>0 1995 static int unixMapfile(unixFile *pFd, i64 nByte); 1996 static void unixUnmapfile(unixFile *pFd); 1997 #endif 1998 1999 /* 2000 ** This function performs the parts of the "close file" operation 2001 ** common to all locking schemes. It closes the directory and file 2002 ** handles, if they are valid, and sets all fields of the unixFile 2003 ** structure to 0. 2004 ** 2005 ** It is *not* necessary to hold the mutex when this routine is called, 2006 ** even on VxWorks. A mutex will be acquired on VxWorks by the 2007 ** vxworksReleaseFileId() routine. 2008 */ 2009 static int closeUnixFile(sqlite3_file *id){ 2010 unixFile *pFile = (unixFile*)id; 2011 #if SQLITE_MAX_MMAP_SIZE>0 2012 unixUnmapfile(pFile); 2013 #endif 2014 if( pFile->h>=0 ){ 2015 robust_close(pFile, pFile->h, __LINE__); 2016 pFile->h = -1; 2017 } 2018 #if OS_VXWORKS 2019 if( pFile->pId ){ 2020 if( pFile->ctrlFlags & UNIXFILE_DELETE ){ 2021 osUnlink(pFile->pId->zCanonicalName); 2022 } 2023 vxworksReleaseFileId(pFile->pId); 2024 pFile->pId = 0; 2025 } 2026 #endif 2027 #ifdef SQLITE_UNLINK_AFTER_CLOSE 2028 if( pFile->ctrlFlags & UNIXFILE_DELETE ){ 2029 osUnlink(pFile->zPath); 2030 sqlite3_free(*(char**)&pFile->zPath); 2031 pFile->zPath = 0; 2032 } 2033 #endif 2034 OSTRACE(("CLOSE %-3d\n", pFile->h)); 2035 OpenCounter(-1); 2036 sqlite3_free(pFile->pPreallocatedUnused); 2037 memset(pFile, 0, sizeof(unixFile)); 2038 return SQLITE_OK; 2039 } 2040 2041 /* 2042 ** Close a file. 2043 */ 2044 static int unixClose(sqlite3_file *id){ 2045 int rc = SQLITE_OK; 2046 unixFile *pFile = (unixFile *)id; 2047 verifyDbFile(pFile); 2048 unixUnlock(id, NO_LOCK); 2049 unixEnterMutex(); 2050 2051 /* unixFile.pInode is always valid here. Otherwise, a different close 2052 ** routine (e.g. nolockClose()) would be called instead. 2053 */ 2054 assert( pFile->pInode->nLock>0 || pFile->pInode->bProcessLock==0 ); 2055 if( ALWAYS(pFile->pInode) && pFile->pInode->nLock ){ 2056 /* If there are outstanding locks, do not actually close the file just 2057 ** yet because that would clear those locks. Instead, add the file 2058 ** descriptor to pInode->pUnused list. It will be automatically closed 2059 ** when the last lock is cleared. 2060 */ 2061 setPendingFd(pFile); 2062 } 2063 releaseInodeInfo(pFile); 2064 rc = closeUnixFile(id); 2065 unixLeaveMutex(); 2066 return rc; 2067 } 2068 2069 /************** End of the posix advisory lock implementation ***************** 2070 ******************************************************************************/ 2071 2072 /****************************************************************************** 2073 ****************************** No-op Locking ********************************** 2074 ** 2075 ** Of the various locking implementations available, this is by far the 2076 ** simplest: locking is ignored. No attempt is made to lock the database 2077 ** file for reading or writing. 2078 ** 2079 ** This locking mode is appropriate for use on read-only databases 2080 ** (ex: databases that are burned into CD-ROM, for example.) It can 2081 ** also be used if the application employs some external mechanism to 2082 ** prevent simultaneous access of the same database by two or more 2083 ** database connections. But there is a serious risk of database 2084 ** corruption if this locking mode is used in situations where multiple 2085 ** database connections are accessing the same database file at the same 2086 ** time and one or more of those connections are writing. 2087 */ 2088 2089 static int nolockCheckReservedLock(sqlite3_file *NotUsed, int *pResOut){ 2090 UNUSED_PARAMETER(NotUsed); 2091 *pResOut = 0; 2092 return SQLITE_OK; 2093 } 2094 static int nolockLock(sqlite3_file *NotUsed, int NotUsed2){ 2095 UNUSED_PARAMETER2(NotUsed, NotUsed2); 2096 return SQLITE_OK; 2097 } 2098 static int nolockUnlock(sqlite3_file *NotUsed, int NotUsed2){ 2099 UNUSED_PARAMETER2(NotUsed, NotUsed2); 2100 return SQLITE_OK; 2101 } 2102 2103 /* 2104 ** Close the file. 2105 */ 2106 static int nolockClose(sqlite3_file *id) { 2107 return closeUnixFile(id); 2108 } 2109 2110 /******************* End of the no-op lock implementation ********************* 2111 ******************************************************************************/ 2112 2113 /****************************************************************************** 2114 ************************* Begin dot-file Locking ****************************** 2115 ** 2116 ** The dotfile locking implementation uses the existence of separate lock 2117 ** files (really a directory) to control access to the database. This works 2118 ** on just about every filesystem imaginable. But there are serious downsides: 2119 ** 2120 ** (1) There is zero concurrency. A single reader blocks all other 2121 ** connections from reading or writing the database. 2122 ** 2123 ** (2) An application crash or power loss can leave stale lock files 2124 ** sitting around that need to be cleared manually. 2125 ** 2126 ** Nevertheless, a dotlock is an appropriate locking mode for use if no 2127 ** other locking strategy is available. 2128 ** 2129 ** Dotfile locking works by creating a subdirectory in the same directory as 2130 ** the database and with the same name but with a ".lock" extension added. 2131 ** The existence of a lock directory implies an EXCLUSIVE lock. All other 2132 ** lock types (SHARED, RESERVED, PENDING) are mapped into EXCLUSIVE. 2133 */ 2134 2135 /* 2136 ** The file suffix added to the data base filename in order to create the 2137 ** lock directory. 2138 */ 2139 #define DOTLOCK_SUFFIX ".lock" 2140 2141 /* 2142 ** This routine checks if there is a RESERVED lock held on the specified 2143 ** file by this or any other process. If such a lock is held, set *pResOut 2144 ** to a non-zero value otherwise *pResOut is set to zero. The return value 2145 ** is set to SQLITE_OK unless an I/O error occurs during lock checking. 2146 ** 2147 ** In dotfile locking, either a lock exists or it does not. So in this 2148 ** variation of CheckReservedLock(), *pResOut is set to true if any lock 2149 ** is held on the file and false if the file is unlocked. 2150 */ 2151 static int dotlockCheckReservedLock(sqlite3_file *id, int *pResOut) { 2152 int rc = SQLITE_OK; 2153 int reserved = 0; 2154 unixFile *pFile = (unixFile*)id; 2155 2156 SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; ); 2157 2158 assert( pFile ); 2159 reserved = osAccess((const char*)pFile->lockingContext, 0)==0; 2160 OSTRACE(("TEST WR-LOCK %d %d %d (dotlock)\n", pFile->h, rc, reserved)); 2161 *pResOut = reserved; 2162 return rc; 2163 } 2164 2165 /* 2166 ** Lock the file with the lock specified by parameter eFileLock - one 2167 ** of the following: 2168 ** 2169 ** (1) SHARED_LOCK 2170 ** (2) RESERVED_LOCK 2171 ** (3) PENDING_LOCK 2172 ** (4) EXCLUSIVE_LOCK 2173 ** 2174 ** Sometimes when requesting one lock state, additional lock states 2175 ** are inserted in between. The locking might fail on one of the later 2176 ** transitions leaving the lock state different from what it started but 2177 ** still short of its goal. The following chart shows the allowed 2178 ** transitions and the inserted intermediate states: 2179 ** 2180 ** UNLOCKED -> SHARED 2181 ** SHARED -> RESERVED 2182 ** SHARED -> (PENDING) -> EXCLUSIVE 2183 ** RESERVED -> (PENDING) -> EXCLUSIVE 2184 ** PENDING -> EXCLUSIVE 2185 ** 2186 ** This routine will only increase a lock. Use the sqlite3OsUnlock() 2187 ** routine to lower a locking level. 2188 ** 2189 ** With dotfile locking, we really only support state (4): EXCLUSIVE. 2190 ** But we track the other locking levels internally. 2191 */ 2192 static int dotlockLock(sqlite3_file *id, int eFileLock) { 2193 unixFile *pFile = (unixFile*)id; 2194 char *zLockFile = (char *)pFile->lockingContext; 2195 int rc = SQLITE_OK; 2196 2197 2198 /* If we have any lock, then the lock file already exists. All we have 2199 ** to do is adjust our internal record of the lock level. 2200 */ 2201 if( pFile->eFileLock > NO_LOCK ){ 2202 pFile->eFileLock = eFileLock; 2203 /* Always update the timestamp on the old file */ 2204 #ifdef HAVE_UTIME 2205 utime(zLockFile, NULL); 2206 #else 2207 utimes(zLockFile, NULL); 2208 #endif 2209 return SQLITE_OK; 2210 } 2211 2212 /* grab an exclusive lock */ 2213 rc = osMkdir(zLockFile, 0777); 2214 if( rc<0 ){ 2215 /* failed to open/create the lock directory */ 2216 int tErrno = errno; 2217 if( EEXIST == tErrno ){ 2218 rc = SQLITE_BUSY; 2219 } else { 2220 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK); 2221 if( rc!=SQLITE_BUSY ){ 2222 storeLastErrno(pFile, tErrno); 2223 } 2224 } 2225 return rc; 2226 } 2227 2228 /* got it, set the type and return ok */ 2229 pFile->eFileLock = eFileLock; 2230 return rc; 2231 } 2232 2233 /* 2234 ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock 2235 ** must be either NO_LOCK or SHARED_LOCK. 2236 ** 2237 ** If the locking level of the file descriptor is already at or below 2238 ** the requested locking level, this routine is a no-op. 2239 ** 2240 ** When the locking level reaches NO_LOCK, delete the lock file. 2241 */ 2242 static int dotlockUnlock(sqlite3_file *id, int eFileLock) { 2243 unixFile *pFile = (unixFile*)id; 2244 char *zLockFile = (char *)pFile->lockingContext; 2245 int rc; 2246 2247 assert( pFile ); 2248 OSTRACE(("UNLOCK %d %d was %d pid=%d (dotlock)\n", pFile->h, eFileLock, 2249 pFile->eFileLock, osGetpid(0))); 2250 assert( eFileLock<=SHARED_LOCK ); 2251 2252 /* no-op if possible */ 2253 if( pFile->eFileLock==eFileLock ){ 2254 return SQLITE_OK; 2255 } 2256 2257 /* To downgrade to shared, simply update our internal notion of the 2258 ** lock state. No need to mess with the file on disk. 2259 */ 2260 if( eFileLock==SHARED_LOCK ){ 2261 pFile->eFileLock = SHARED_LOCK; 2262 return SQLITE_OK; 2263 } 2264 2265 /* To fully unlock the database, delete the lock file */ 2266 assert( eFileLock==NO_LOCK ); 2267 rc = osRmdir(zLockFile); 2268 if( rc<0 ){ 2269 int tErrno = errno; 2270 if( tErrno==ENOENT ){ 2271 rc = SQLITE_OK; 2272 }else{ 2273 rc = SQLITE_IOERR_UNLOCK; 2274 storeLastErrno(pFile, tErrno); 2275 } 2276 return rc; 2277 } 2278 pFile->eFileLock = NO_LOCK; 2279 return SQLITE_OK; 2280 } 2281 2282 /* 2283 ** Close a file. Make sure the lock has been released before closing. 2284 */ 2285 static int dotlockClose(sqlite3_file *id) { 2286 unixFile *pFile = (unixFile*)id; 2287 assert( id!=0 ); 2288 dotlockUnlock(id, NO_LOCK); 2289 sqlite3_free(pFile->lockingContext); 2290 return closeUnixFile(id); 2291 } 2292 /****************** End of the dot-file lock implementation ******************* 2293 ******************************************************************************/ 2294 2295 /****************************************************************************** 2296 ************************** Begin flock Locking ******************************** 2297 ** 2298 ** Use the flock() system call to do file locking. 2299 ** 2300 ** flock() locking is like dot-file locking in that the various 2301 ** fine-grain locking levels supported by SQLite are collapsed into 2302 ** a single exclusive lock. In other words, SHARED, RESERVED, and 2303 ** PENDING locks are the same thing as an EXCLUSIVE lock. SQLite 2304 ** still works when you do this, but concurrency is reduced since 2305 ** only a single process can be reading the database at a time. 2306 ** 2307 ** Omit this section if SQLITE_ENABLE_LOCKING_STYLE is turned off 2308 */ 2309 #if SQLITE_ENABLE_LOCKING_STYLE 2310 2311 /* 2312 ** Retry flock() calls that fail with EINTR 2313 */ 2314 #ifdef EINTR 2315 static int robust_flock(int fd, int op){ 2316 int rc; 2317 do{ rc = flock(fd,op); }while( rc<0 && errno==EINTR ); 2318 return rc; 2319 } 2320 #else 2321 # define robust_flock(a,b) flock(a,b) 2322 #endif 2323 2324 2325 /* 2326 ** This routine checks if there is a RESERVED lock held on the specified 2327 ** file by this or any other process. If such a lock is held, set *pResOut 2328 ** to a non-zero value otherwise *pResOut is set to zero. The return value 2329 ** is set to SQLITE_OK unless an I/O error occurs during lock checking. 2330 */ 2331 static int flockCheckReservedLock(sqlite3_file *id, int *pResOut){ 2332 int rc = SQLITE_OK; 2333 int reserved = 0; 2334 unixFile *pFile = (unixFile*)id; 2335 2336 SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; ); 2337 2338 assert( pFile ); 2339 2340 /* Check if a thread in this process holds such a lock */ 2341 if( pFile->eFileLock>SHARED_LOCK ){ 2342 reserved = 1; 2343 } 2344 2345 /* Otherwise see if some other process holds it. */ 2346 if( !reserved ){ 2347 /* attempt to get the lock */ 2348 int lrc = robust_flock(pFile->h, LOCK_EX | LOCK_NB); 2349 if( !lrc ){ 2350 /* got the lock, unlock it */ 2351 lrc = robust_flock(pFile->h, LOCK_UN); 2352 if ( lrc ) { 2353 int tErrno = errno; 2354 /* unlock failed with an error */ 2355 lrc = SQLITE_IOERR_UNLOCK; 2356 storeLastErrno(pFile, tErrno); 2357 rc = lrc; 2358 } 2359 } else { 2360 int tErrno = errno; 2361 reserved = 1; 2362 /* someone else might have it reserved */ 2363 lrc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK); 2364 if( IS_LOCK_ERROR(lrc) ){ 2365 storeLastErrno(pFile, tErrno); 2366 rc = lrc; 2367 } 2368 } 2369 } 2370 OSTRACE(("TEST WR-LOCK %d %d %d (flock)\n", pFile->h, rc, reserved)); 2371 2372 #ifdef SQLITE_IGNORE_FLOCK_LOCK_ERRORS 2373 if( (rc & 0xff) == SQLITE_IOERR ){ 2374 rc = SQLITE_OK; 2375 reserved=1; 2376 } 2377 #endif /* SQLITE_IGNORE_FLOCK_LOCK_ERRORS */ 2378 *pResOut = reserved; 2379 return rc; 2380 } 2381 2382 /* 2383 ** Lock the file with the lock specified by parameter eFileLock - one 2384 ** of the following: 2385 ** 2386 ** (1) SHARED_LOCK 2387 ** (2) RESERVED_LOCK 2388 ** (3) PENDING_LOCK 2389 ** (4) EXCLUSIVE_LOCK 2390 ** 2391 ** Sometimes when requesting one lock state, additional lock states 2392 ** are inserted in between. The locking might fail on one of the later 2393 ** transitions leaving the lock state different from what it started but 2394 ** still short of its goal. The following chart shows the allowed 2395 ** transitions and the inserted intermediate states: 2396 ** 2397 ** UNLOCKED -> SHARED 2398 ** SHARED -> RESERVED 2399 ** SHARED -> (PENDING) -> EXCLUSIVE 2400 ** RESERVED -> (PENDING) -> EXCLUSIVE 2401 ** PENDING -> EXCLUSIVE 2402 ** 2403 ** flock() only really support EXCLUSIVE locks. We track intermediate 2404 ** lock states in the sqlite3_file structure, but all locks SHARED or 2405 ** above are really EXCLUSIVE locks and exclude all other processes from 2406 ** access the file. 2407 ** 2408 ** This routine will only increase a lock. Use the sqlite3OsUnlock() 2409 ** routine to lower a locking level. 2410 */ 2411 static int flockLock(sqlite3_file *id, int eFileLock) { 2412 int rc = SQLITE_OK; 2413 unixFile *pFile = (unixFile*)id; 2414 2415 assert( pFile ); 2416 2417 /* if we already have a lock, it is exclusive. 2418 ** Just adjust level and punt on outta here. */ 2419 if (pFile->eFileLock > NO_LOCK) { 2420 pFile->eFileLock = eFileLock; 2421 return SQLITE_OK; 2422 } 2423 2424 /* grab an exclusive lock */ 2425 2426 if (robust_flock(pFile->h, LOCK_EX | LOCK_NB)) { 2427 int tErrno = errno; 2428 /* didn't get, must be busy */ 2429 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK); 2430 if( IS_LOCK_ERROR(rc) ){ 2431 storeLastErrno(pFile, tErrno); 2432 } 2433 } else { 2434 /* got it, set the type and return ok */ 2435 pFile->eFileLock = eFileLock; 2436 } 2437 OSTRACE(("LOCK %d %s %s (flock)\n", pFile->h, azFileLock(eFileLock), 2438 rc==SQLITE_OK ? "ok" : "failed")); 2439 #ifdef SQLITE_IGNORE_FLOCK_LOCK_ERRORS 2440 if( (rc & 0xff) == SQLITE_IOERR ){ 2441 rc = SQLITE_BUSY; 2442 } 2443 #endif /* SQLITE_IGNORE_FLOCK_LOCK_ERRORS */ 2444 return rc; 2445 } 2446 2447 2448 /* 2449 ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock 2450 ** must be either NO_LOCK or SHARED_LOCK. 2451 ** 2452 ** If the locking level of the file descriptor is already at or below 2453 ** the requested locking level, this routine is a no-op. 2454 */ 2455 static int flockUnlock(sqlite3_file *id, int eFileLock) { 2456 unixFile *pFile = (unixFile*)id; 2457 2458 assert( pFile ); 2459 OSTRACE(("UNLOCK %d %d was %d pid=%d (flock)\n", pFile->h, eFileLock, 2460 pFile->eFileLock, osGetpid(0))); 2461 assert( eFileLock<=SHARED_LOCK ); 2462 2463 /* no-op if possible */ 2464 if( pFile->eFileLock==eFileLock ){ 2465 return SQLITE_OK; 2466 } 2467 2468 /* shared can just be set because we always have an exclusive */ 2469 if (eFileLock==SHARED_LOCK) { 2470 pFile->eFileLock = eFileLock; 2471 return SQLITE_OK; 2472 } 2473 2474 /* no, really, unlock. */ 2475 if( robust_flock(pFile->h, LOCK_UN) ){ 2476 #ifdef SQLITE_IGNORE_FLOCK_LOCK_ERRORS 2477 return SQLITE_OK; 2478 #endif /* SQLITE_IGNORE_FLOCK_LOCK_ERRORS */ 2479 return SQLITE_IOERR_UNLOCK; 2480 }else{ 2481 pFile->eFileLock = NO_LOCK; 2482 return SQLITE_OK; 2483 } 2484 } 2485 2486 /* 2487 ** Close a file. 2488 */ 2489 static int flockClose(sqlite3_file *id) { 2490 assert( id!=0 ); 2491 flockUnlock(id, NO_LOCK); 2492 return closeUnixFile(id); 2493 } 2494 2495 #endif /* SQLITE_ENABLE_LOCKING_STYLE && !OS_VXWORK */ 2496 2497 /******************* End of the flock lock implementation ********************* 2498 ******************************************************************************/ 2499 2500 /****************************************************************************** 2501 ************************ Begin Named Semaphore Locking ************************ 2502 ** 2503 ** Named semaphore locking is only supported on VxWorks. 2504 ** 2505 ** Semaphore locking is like dot-lock and flock in that it really only 2506 ** supports EXCLUSIVE locking. Only a single process can read or write 2507 ** the database file at a time. This reduces potential concurrency, but 2508 ** makes the lock implementation much easier. 2509 */ 2510 #if OS_VXWORKS 2511 2512 /* 2513 ** This routine checks if there is a RESERVED lock held on the specified 2514 ** file by this or any other process. If such a lock is held, set *pResOut 2515 ** to a non-zero value otherwise *pResOut is set to zero. The return value 2516 ** is set to SQLITE_OK unless an I/O error occurs during lock checking. 2517 */ 2518 static int semXCheckReservedLock(sqlite3_file *id, int *pResOut) { 2519 int rc = SQLITE_OK; 2520 int reserved = 0; 2521 unixFile *pFile = (unixFile*)id; 2522 2523 SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; ); 2524 2525 assert( pFile ); 2526 2527 /* Check if a thread in this process holds such a lock */ 2528 if( pFile->eFileLock>SHARED_LOCK ){ 2529 reserved = 1; 2530 } 2531 2532 /* Otherwise see if some other process holds it. */ 2533 if( !reserved ){ 2534 sem_t *pSem = pFile->pInode->pSem; 2535 2536 if( sem_trywait(pSem)==-1 ){ 2537 int tErrno = errno; 2538 if( EAGAIN != tErrno ){ 2539 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_CHECKRESERVEDLOCK); 2540 storeLastErrno(pFile, tErrno); 2541 } else { 2542 /* someone else has the lock when we are in NO_LOCK */ 2543 reserved = (pFile->eFileLock < SHARED_LOCK); 2544 } 2545 }else{ 2546 /* we could have it if we want it */ 2547 sem_post(pSem); 2548 } 2549 } 2550 OSTRACE(("TEST WR-LOCK %d %d %d (sem)\n", pFile->h, rc, reserved)); 2551 2552 *pResOut = reserved; 2553 return rc; 2554 } 2555 2556 /* 2557 ** Lock the file with the lock specified by parameter eFileLock - one 2558 ** of the following: 2559 ** 2560 ** (1) SHARED_LOCK 2561 ** (2) RESERVED_LOCK 2562 ** (3) PENDING_LOCK 2563 ** (4) EXCLUSIVE_LOCK 2564 ** 2565 ** Sometimes when requesting one lock state, additional lock states 2566 ** are inserted in between. The locking might fail on one of the later 2567 ** transitions leaving the lock state different from what it started but 2568 ** still short of its goal. The following chart shows the allowed 2569 ** transitions and the inserted intermediate states: 2570 ** 2571 ** UNLOCKED -> SHARED 2572 ** SHARED -> RESERVED 2573 ** SHARED -> (PENDING) -> EXCLUSIVE 2574 ** RESERVED -> (PENDING) -> EXCLUSIVE 2575 ** PENDING -> EXCLUSIVE 2576 ** 2577 ** Semaphore locks only really support EXCLUSIVE locks. We track intermediate 2578 ** lock states in the sqlite3_file structure, but all locks SHARED or 2579 ** above are really EXCLUSIVE locks and exclude all other processes from 2580 ** access the file. 2581 ** 2582 ** This routine will only increase a lock. Use the sqlite3OsUnlock() 2583 ** routine to lower a locking level. 2584 */ 2585 static int semXLock(sqlite3_file *id, int eFileLock) { 2586 unixFile *pFile = (unixFile*)id; 2587 sem_t *pSem = pFile->pInode->pSem; 2588 int rc = SQLITE_OK; 2589 2590 /* if we already have a lock, it is exclusive. 2591 ** Just adjust level and punt on outta here. */ 2592 if (pFile->eFileLock > NO_LOCK) { 2593 pFile->eFileLock = eFileLock; 2594 rc = SQLITE_OK; 2595 goto sem_end_lock; 2596 } 2597 2598 /* lock semaphore now but bail out when already locked. */ 2599 if( sem_trywait(pSem)==-1 ){ 2600 rc = SQLITE_BUSY; 2601 goto sem_end_lock; 2602 } 2603 2604 /* got it, set the type and return ok */ 2605 pFile->eFileLock = eFileLock; 2606 2607 sem_end_lock: 2608 return rc; 2609 } 2610 2611 /* 2612 ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock 2613 ** must be either NO_LOCK or SHARED_LOCK. 2614 ** 2615 ** If the locking level of the file descriptor is already at or below 2616 ** the requested locking level, this routine is a no-op. 2617 */ 2618 static int semXUnlock(sqlite3_file *id, int eFileLock) { 2619 unixFile *pFile = (unixFile*)id; 2620 sem_t *pSem = pFile->pInode->pSem; 2621 2622 assert( pFile ); 2623 assert( pSem ); 2624 OSTRACE(("UNLOCK %d %d was %d pid=%d (sem)\n", pFile->h, eFileLock, 2625 pFile->eFileLock, osGetpid(0))); 2626 assert( eFileLock<=SHARED_LOCK ); 2627 2628 /* no-op if possible */ 2629 if( pFile->eFileLock==eFileLock ){ 2630 return SQLITE_OK; 2631 } 2632 2633 /* shared can just be set because we always have an exclusive */ 2634 if (eFileLock==SHARED_LOCK) { 2635 pFile->eFileLock = eFileLock; 2636 return SQLITE_OK; 2637 } 2638 2639 /* no, really unlock. */ 2640 if ( sem_post(pSem)==-1 ) { 2641 int rc, tErrno = errno; 2642 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_UNLOCK); 2643 if( IS_LOCK_ERROR(rc) ){ 2644 storeLastErrno(pFile, tErrno); 2645 } 2646 return rc; 2647 } 2648 pFile->eFileLock = NO_LOCK; 2649 return SQLITE_OK; 2650 } 2651 2652 /* 2653 ** Close a file. 2654 */ 2655 static int semXClose(sqlite3_file *id) { 2656 if( id ){ 2657 unixFile *pFile = (unixFile*)id; 2658 semXUnlock(id, NO_LOCK); 2659 assert( pFile ); 2660 unixEnterMutex(); 2661 releaseInodeInfo(pFile); 2662 unixLeaveMutex(); 2663 closeUnixFile(id); 2664 } 2665 return SQLITE_OK; 2666 } 2667 2668 #endif /* OS_VXWORKS */ 2669 /* 2670 ** Named semaphore locking is only available on VxWorks. 2671 ** 2672 *************** End of the named semaphore lock implementation **************** 2673 ******************************************************************************/ 2674 2675 2676 /****************************************************************************** 2677 *************************** Begin AFP Locking ********************************* 2678 ** 2679 ** AFP is the Apple Filing Protocol. AFP is a network filesystem found 2680 ** on Apple Macintosh computers - both OS9 and OSX. 2681 ** 2682 ** Third-party implementations of AFP are available. But this code here 2683 ** only works on OSX. 2684 */ 2685 2686 #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE 2687 /* 2688 ** The afpLockingContext structure contains all afp lock specific state 2689 */ 2690 typedef struct afpLockingContext afpLockingContext; 2691 struct afpLockingContext { 2692 int reserved; 2693 const char *dbPath; /* Name of the open file */ 2694 }; 2695 2696 struct ByteRangeLockPB2 2697 { 2698 unsigned long long offset; /* offset to first byte to lock */ 2699 unsigned long long length; /* nbr of bytes to lock */ 2700 unsigned long long retRangeStart; /* nbr of 1st byte locked if successful */ 2701 unsigned char unLockFlag; /* 1 = unlock, 0 = lock */ 2702 unsigned char startEndFlag; /* 1=rel to end of fork, 0=rel to start */ 2703 int fd; /* file desc to assoc this lock with */ 2704 }; 2705 2706 #define afpfsByteRangeLock2FSCTL _IOWR('z', 23, struct ByteRangeLockPB2) 2707 2708 /* 2709 ** This is a utility for setting or clearing a bit-range lock on an 2710 ** AFP filesystem. 2711 ** 2712 ** Return SQLITE_OK on success, SQLITE_BUSY on failure. 2713 */ 2714 static int afpSetLock( 2715 const char *path, /* Name of the file to be locked or unlocked */ 2716 unixFile *pFile, /* Open file descriptor on path */ 2717 unsigned long long offset, /* First byte to be locked */ 2718 unsigned long long length, /* Number of bytes to lock */ 2719 int setLockFlag /* True to set lock. False to clear lock */ 2720 ){ 2721 struct ByteRangeLockPB2 pb; 2722 int err; 2723 2724 pb.unLockFlag = setLockFlag ? 0 : 1; 2725 pb.startEndFlag = 0; 2726 pb.offset = offset; 2727 pb.length = length; 2728 pb.fd = pFile->h; 2729 2730 OSTRACE(("AFPSETLOCK [%s] for %d%s in range %llx:%llx\n", 2731 (setLockFlag?"ON":"OFF"), pFile->h, (pb.fd==-1?"[testval-1]":""), 2732 offset, length)); 2733 err = fsctl(path, afpfsByteRangeLock2FSCTL, &pb, 0); 2734 if ( err==-1 ) { 2735 int rc; 2736 int tErrno = errno; 2737 OSTRACE(("AFPSETLOCK failed to fsctl() '%s' %d %s\n", 2738 path, tErrno, strerror(tErrno))); 2739 #ifdef SQLITE_IGNORE_AFP_LOCK_ERRORS 2740 rc = SQLITE_BUSY; 2741 #else 2742 rc = sqliteErrorFromPosixError(tErrno, 2743 setLockFlag ? SQLITE_IOERR_LOCK : SQLITE_IOERR_UNLOCK); 2744 #endif /* SQLITE_IGNORE_AFP_LOCK_ERRORS */ 2745 if( IS_LOCK_ERROR(rc) ){ 2746 storeLastErrno(pFile, tErrno); 2747 } 2748 return rc; 2749 } else { 2750 return SQLITE_OK; 2751 } 2752 } 2753 2754 /* 2755 ** This routine checks if there is a RESERVED lock held on the specified 2756 ** file by this or any other process. If such a lock is held, set *pResOut 2757 ** to a non-zero value otherwise *pResOut is set to zero. The return value 2758 ** is set to SQLITE_OK unless an I/O error occurs during lock checking. 2759 */ 2760 static int afpCheckReservedLock(sqlite3_file *id, int *pResOut){ 2761 int rc = SQLITE_OK; 2762 int reserved = 0; 2763 unixFile *pFile = (unixFile*)id; 2764 afpLockingContext *context; 2765 2766 SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; ); 2767 2768 assert( pFile ); 2769 context = (afpLockingContext *) pFile->lockingContext; 2770 if( context->reserved ){ 2771 *pResOut = 1; 2772 return SQLITE_OK; 2773 } 2774 unixEnterMutex(); /* Because pFile->pInode is shared across threads */ 2775 2776 /* Check if a thread in this process holds such a lock */ 2777 if( pFile->pInode->eFileLock>SHARED_LOCK ){ 2778 reserved = 1; 2779 } 2780 2781 /* Otherwise see if some other process holds it. 2782 */ 2783 if( !reserved ){ 2784 /* lock the RESERVED byte */ 2785 int lrc = afpSetLock(context->dbPath, pFile, RESERVED_BYTE, 1,1); 2786 if( SQLITE_OK==lrc ){ 2787 /* if we succeeded in taking the reserved lock, unlock it to restore 2788 ** the original state */ 2789 lrc = afpSetLock(context->dbPath, pFile, RESERVED_BYTE, 1, 0); 2790 } else { 2791 /* if we failed to get the lock then someone else must have it */ 2792 reserved = 1; 2793 } 2794 if( IS_LOCK_ERROR(lrc) ){ 2795 rc=lrc; 2796 } 2797 } 2798 2799 unixLeaveMutex(); 2800 OSTRACE(("TEST WR-LOCK %d %d %d (afp)\n", pFile->h, rc, reserved)); 2801 2802 *pResOut = reserved; 2803 return rc; 2804 } 2805 2806 /* 2807 ** Lock the file with the lock specified by parameter eFileLock - one 2808 ** of the following: 2809 ** 2810 ** (1) SHARED_LOCK 2811 ** (2) RESERVED_LOCK 2812 ** (3) PENDING_LOCK 2813 ** (4) EXCLUSIVE_LOCK 2814 ** 2815 ** Sometimes when requesting one lock state, additional lock states 2816 ** are inserted in between. The locking might fail on one of the later 2817 ** transitions leaving the lock state different from what it started but 2818 ** still short of its goal. The following chart shows the allowed 2819 ** transitions and the inserted intermediate states: 2820 ** 2821 ** UNLOCKED -> SHARED 2822 ** SHARED -> RESERVED 2823 ** SHARED -> (PENDING) -> EXCLUSIVE 2824 ** RESERVED -> (PENDING) -> EXCLUSIVE 2825 ** PENDING -> EXCLUSIVE 2826 ** 2827 ** This routine will only increase a lock. Use the sqlite3OsUnlock() 2828 ** routine to lower a locking level. 2829 */ 2830 static int afpLock(sqlite3_file *id, int eFileLock){ 2831 int rc = SQLITE_OK; 2832 unixFile *pFile = (unixFile*)id; 2833 unixInodeInfo *pInode = pFile->pInode; 2834 afpLockingContext *context = (afpLockingContext *) pFile->lockingContext; 2835 2836 assert( pFile ); 2837 OSTRACE(("LOCK %d %s was %s(%s,%d) pid=%d (afp)\n", pFile->h, 2838 azFileLock(eFileLock), azFileLock(pFile->eFileLock), 2839 azFileLock(pInode->eFileLock), pInode->nShared , osGetpid(0))); 2840 2841 /* If there is already a lock of this type or more restrictive on the 2842 ** unixFile, do nothing. Don't use the afp_end_lock: exit path, as 2843 ** unixEnterMutex() hasn't been called yet. 2844 */ 2845 if( pFile->eFileLock>=eFileLock ){ 2846 OSTRACE(("LOCK %d %s ok (already held) (afp)\n", pFile->h, 2847 azFileLock(eFileLock))); 2848 return SQLITE_OK; 2849 } 2850 2851 /* Make sure the locking sequence is correct 2852 ** (1) We never move from unlocked to anything higher than shared lock. 2853 ** (2) SQLite never explicitly requests a pendig lock. 2854 ** (3) A shared lock is always held when a reserve lock is requested. 2855 */ 2856 assert( pFile->eFileLock!=NO_LOCK || eFileLock==SHARED_LOCK ); 2857 assert( eFileLock!=PENDING_LOCK ); 2858 assert( eFileLock!=RESERVED_LOCK || pFile->eFileLock==SHARED_LOCK ); 2859 2860 /* This mutex is needed because pFile->pInode is shared across threads 2861 */ 2862 unixEnterMutex(); 2863 pInode = pFile->pInode; 2864 2865 /* If some thread using this PID has a lock via a different unixFile* 2866 ** handle that precludes the requested lock, return BUSY. 2867 */ 2868 if( (pFile->eFileLock!=pInode->eFileLock && 2869 (pInode->eFileLock>=PENDING_LOCK || eFileLock>SHARED_LOCK)) 2870 ){ 2871 rc = SQLITE_BUSY; 2872 goto afp_end_lock; 2873 } 2874 2875 /* If a SHARED lock is requested, and some thread using this PID already 2876 ** has a SHARED or RESERVED lock, then increment reference counts and 2877 ** return SQLITE_OK. 2878 */ 2879 if( eFileLock==SHARED_LOCK && 2880 (pInode->eFileLock==SHARED_LOCK || pInode->eFileLock==RESERVED_LOCK) ){ 2881 assert( eFileLock==SHARED_LOCK ); 2882 assert( pFile->eFileLock==0 ); 2883 assert( pInode->nShared>0 ); 2884 pFile->eFileLock = SHARED_LOCK; 2885 pInode->nShared++; 2886 pInode->nLock++; 2887 goto afp_end_lock; 2888 } 2889 2890 /* A PENDING lock is needed before acquiring a SHARED lock and before 2891 ** acquiring an EXCLUSIVE lock. For the SHARED lock, the PENDING will 2892 ** be released. 2893 */ 2894 if( eFileLock==SHARED_LOCK 2895 || (eFileLock==EXCLUSIVE_LOCK && pFile->eFileLock<PENDING_LOCK) 2896 ){ 2897 int failed; 2898 failed = afpSetLock(context->dbPath, pFile, PENDING_BYTE, 1, 1); 2899 if (failed) { 2900 rc = failed; 2901 goto afp_end_lock; 2902 } 2903 } 2904 2905 /* If control gets to this point, then actually go ahead and make 2906 ** operating system calls for the specified lock. 2907 */ 2908 if( eFileLock==SHARED_LOCK ){ 2909 int lrc1, lrc2, lrc1Errno = 0; 2910 long lk, mask; 2911 2912 assert( pInode->nShared==0 ); 2913 assert( pInode->eFileLock==0 ); 2914 2915 mask = (sizeof(long)==8) ? LARGEST_INT64 : 0x7fffffff; 2916 /* Now get the read-lock SHARED_LOCK */ 2917 /* note that the quality of the randomness doesn't matter that much */ 2918 lk = random(); 2919 pInode->sharedByte = (lk & mask)%(SHARED_SIZE - 1); 2920 lrc1 = afpSetLock(context->dbPath, pFile, 2921 SHARED_FIRST+pInode->sharedByte, 1, 1); 2922 if( IS_LOCK_ERROR(lrc1) ){ 2923 lrc1Errno = pFile->lastErrno; 2924 } 2925 /* Drop the temporary PENDING lock */ 2926 lrc2 = afpSetLock(context->dbPath, pFile, PENDING_BYTE, 1, 0); 2927 2928 if( IS_LOCK_ERROR(lrc1) ) { 2929 storeLastErrno(pFile, lrc1Errno); 2930 rc = lrc1; 2931 goto afp_end_lock; 2932 } else if( IS_LOCK_ERROR(lrc2) ){ 2933 rc = lrc2; 2934 goto afp_end_lock; 2935 } else if( lrc1 != SQLITE_OK ) { 2936 rc = lrc1; 2937 } else { 2938 pFile->eFileLock = SHARED_LOCK; 2939 pInode->nLock++; 2940 pInode->nShared = 1; 2941 } 2942 }else if( eFileLock==EXCLUSIVE_LOCK && pInode->nShared>1 ){ 2943 /* We are trying for an exclusive lock but another thread in this 2944 ** same process is still holding a shared lock. */ 2945 rc = SQLITE_BUSY; 2946 }else{ 2947 /* The request was for a RESERVED or EXCLUSIVE lock. It is 2948 ** assumed that there is a SHARED or greater lock on the file 2949 ** already. 2950 */ 2951 int failed = 0; 2952 assert( 0!=pFile->eFileLock ); 2953 if (eFileLock >= RESERVED_LOCK && pFile->eFileLock < RESERVED_LOCK) { 2954 /* Acquire a RESERVED lock */ 2955 failed = afpSetLock(context->dbPath, pFile, RESERVED_BYTE, 1,1); 2956 if( !failed ){ 2957 context->reserved = 1; 2958 } 2959 } 2960 if (!failed && eFileLock == EXCLUSIVE_LOCK) { 2961 /* Acquire an EXCLUSIVE lock */ 2962 2963 /* Remove the shared lock before trying the range. we'll need to 2964 ** reestablish the shared lock if we can't get the afpUnlock 2965 */ 2966 if( !(failed = afpSetLock(context->dbPath, pFile, SHARED_FIRST + 2967 pInode->sharedByte, 1, 0)) ){ 2968 int failed2 = SQLITE_OK; 2969 /* now attemmpt to get the exclusive lock range */ 2970 failed = afpSetLock(context->dbPath, pFile, SHARED_FIRST, 2971 SHARED_SIZE, 1); 2972 if( failed && (failed2 = afpSetLock(context->dbPath, pFile, 2973 SHARED_FIRST + pInode->sharedByte, 1, 1)) ){ 2974 /* Can't reestablish the shared lock. Sqlite can't deal, this is 2975 ** a critical I/O error 2976 */ 2977 rc = ((failed & 0xff) == SQLITE_IOERR) ? failed2 : 2978 SQLITE_IOERR_LOCK; 2979 goto afp_end_lock; 2980 } 2981 }else{ 2982 rc = failed; 2983 } 2984 } 2985 if( failed ){ 2986 rc = failed; 2987 } 2988 } 2989 2990 if( rc==SQLITE_OK ){ 2991 pFile->eFileLock = eFileLock; 2992 pInode->eFileLock = eFileLock; 2993 }else if( eFileLock==EXCLUSIVE_LOCK ){ 2994 pFile->eFileLock = PENDING_LOCK; 2995 pInode->eFileLock = PENDING_LOCK; 2996 } 2997 2998 afp_end_lock: 2999 unixLeaveMutex(); 3000 OSTRACE(("LOCK %d %s %s (afp)\n", pFile->h, azFileLock(eFileLock), 3001 rc==SQLITE_OK ? "ok" : "failed")); 3002 return rc; 3003 } 3004 3005 /* 3006 ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock 3007 ** must be either NO_LOCK or SHARED_LOCK. 3008 ** 3009 ** If the locking level of the file descriptor is already at or below 3010 ** the requested locking level, this routine is a no-op. 3011 */ 3012 static int afpUnlock(sqlite3_file *id, int eFileLock) { 3013 int rc = SQLITE_OK; 3014 unixFile *pFile = (unixFile*)id; 3015 unixInodeInfo *pInode; 3016 afpLockingContext *context = (afpLockingContext *) pFile->lockingContext; 3017 int skipShared = 0; 3018 #ifdef SQLITE_TEST 3019 int h = pFile->h; 3020 #endif 3021 3022 assert( pFile ); 3023 OSTRACE(("UNLOCK %d %d was %d(%d,%d) pid=%d (afp)\n", pFile->h, eFileLock, 3024 pFile->eFileLock, pFile->pInode->eFileLock, pFile->pInode->nShared, 3025 osGetpid(0))); 3026 3027 assert( eFileLock<=SHARED_LOCK ); 3028 if( pFile->eFileLock<=eFileLock ){ 3029 return SQLITE_OK; 3030 } 3031 unixEnterMutex(); 3032 pInode = pFile->pInode; 3033 assert( pInode->nShared!=0 ); 3034 if( pFile->eFileLock>SHARED_LOCK ){ 3035 assert( pInode->eFileLock==pFile->eFileLock ); 3036 SimulateIOErrorBenign(1); 3037 SimulateIOError( h=(-1) ) 3038 SimulateIOErrorBenign(0); 3039 3040 #ifdef SQLITE_DEBUG 3041 /* When reducing a lock such that other processes can start 3042 ** reading the database file again, make sure that the 3043 ** transaction counter was updated if any part of the database 3044 ** file changed. If the transaction counter is not updated, 3045 ** other connections to the same file might not realize that 3046 ** the file has changed and hence might not know to flush their 3047 ** cache. The use of a stale cache can lead to database corruption. 3048 */ 3049 assert( pFile->inNormalWrite==0 3050 || pFile->dbUpdate==0 3051 || pFile->transCntrChng==1 ); 3052 pFile->inNormalWrite = 0; 3053 #endif 3054 3055 if( pFile->eFileLock==EXCLUSIVE_LOCK ){ 3056 rc = afpSetLock(context->dbPath, pFile, SHARED_FIRST, SHARED_SIZE, 0); 3057 if( rc==SQLITE_OK && (eFileLock==SHARED_LOCK || pInode->nShared>1) ){ 3058 /* only re-establish the shared lock if necessary */ 3059 int sharedLockByte = SHARED_FIRST+pInode->sharedByte; 3060 rc = afpSetLock(context->dbPath, pFile, sharedLockByte, 1, 1); 3061 } else { 3062 skipShared = 1; 3063 } 3064 } 3065 if( rc==SQLITE_OK && pFile->eFileLock>=PENDING_LOCK ){ 3066 rc = afpSetLock(context->dbPath, pFile, PENDING_BYTE, 1, 0); 3067 } 3068 if( rc==SQLITE_OK && pFile->eFileLock>=RESERVED_LOCK && context->reserved ){ 3069 rc = afpSetLock(context->dbPath, pFile, RESERVED_BYTE, 1, 0); 3070 if( !rc ){ 3071 context->reserved = 0; 3072 } 3073 } 3074 if( rc==SQLITE_OK && (eFileLock==SHARED_LOCK || pInode->nShared>1)){ 3075 pInode->eFileLock = SHARED_LOCK; 3076 } 3077 } 3078 if( rc==SQLITE_OK && eFileLock==NO_LOCK ){ 3079 3080 /* Decrement the shared lock counter. Release the lock using an 3081 ** OS call only when all threads in this same process have released 3082 ** the lock. 3083 */ 3084 unsigned long long sharedLockByte = SHARED_FIRST+pInode->sharedByte; 3085 pInode->nShared--; 3086 if( pInode->nShared==0 ){ 3087 SimulateIOErrorBenign(1); 3088 SimulateIOError( h=(-1) ) 3089 SimulateIOErrorBenign(0); 3090 if( !skipShared ){ 3091 rc = afpSetLock(context->dbPath, pFile, sharedLockByte, 1, 0); 3092 } 3093 if( !rc ){ 3094 pInode->eFileLock = NO_LOCK; 3095 pFile->eFileLock = NO_LOCK; 3096 } 3097 } 3098 if( rc==SQLITE_OK ){ 3099 pInode->nLock--; 3100 assert( pInode->nLock>=0 ); 3101 if( pInode->nLock==0 ){ 3102 closePendingFds(pFile); 3103 } 3104 } 3105 } 3106 3107 unixLeaveMutex(); 3108 if( rc==SQLITE_OK ) pFile->eFileLock = eFileLock; 3109 return rc; 3110 } 3111 3112 /* 3113 ** Close a file & cleanup AFP specific locking context 3114 */ 3115 static int afpClose(sqlite3_file *id) { 3116 int rc = SQLITE_OK; 3117 unixFile *pFile = (unixFile*)id; 3118 assert( id!=0 ); 3119 afpUnlock(id, NO_LOCK); 3120 unixEnterMutex(); 3121 if( pFile->pInode && pFile->pInode->nLock ){ 3122 /* If there are outstanding locks, do not actually close the file just 3123 ** yet because that would clear those locks. Instead, add the file 3124 ** descriptor to pInode->aPending. It will be automatically closed when 3125 ** the last lock is cleared. 3126 */ 3127 setPendingFd(pFile); 3128 } 3129 releaseInodeInfo(pFile); 3130 sqlite3_free(pFile->lockingContext); 3131 rc = closeUnixFile(id); 3132 unixLeaveMutex(); 3133 return rc; 3134 } 3135 3136 #endif /* defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE */ 3137 /* 3138 ** The code above is the AFP lock implementation. The code is specific 3139 ** to MacOSX and does not work on other unix platforms. No alternative 3140 ** is available. If you don't compile for a mac, then the "unix-afp" 3141 ** VFS is not available. 3142 ** 3143 ********************* End of the AFP lock implementation ********************** 3144 ******************************************************************************/ 3145 3146 /****************************************************************************** 3147 *************************** Begin NFS Locking ********************************/ 3148 3149 #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE 3150 /* 3151 ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock 3152 ** must be either NO_LOCK or SHARED_LOCK. 3153 ** 3154 ** If the locking level of the file descriptor is already at or below 3155 ** the requested locking level, this routine is a no-op. 3156 */ 3157 static int nfsUnlock(sqlite3_file *id, int eFileLock){ 3158 return posixUnlock(id, eFileLock, 1); 3159 } 3160 3161 #endif /* defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE */ 3162 /* 3163 ** The code above is the NFS lock implementation. The code is specific 3164 ** to MacOSX and does not work on other unix platforms. No alternative 3165 ** is available. 3166 ** 3167 ********************* End of the NFS lock implementation ********************** 3168 ******************************************************************************/ 3169 3170 /****************************************************************************** 3171 **************** Non-locking sqlite3_file methods ***************************** 3172 ** 3173 ** The next division contains implementations for all methods of the 3174 ** sqlite3_file object other than the locking methods. The locking 3175 ** methods were defined in divisions above (one locking method per 3176 ** division). Those methods that are common to all locking modes 3177 ** are gather together into this division. 3178 */ 3179 3180 /* 3181 ** Seek to the offset passed as the second argument, then read cnt 3182 ** bytes into pBuf. Return the number of bytes actually read. 3183 ** 3184 ** NB: If you define USE_PREAD or USE_PREAD64, then it might also 3185 ** be necessary to define _XOPEN_SOURCE to be 500. This varies from 3186 ** one system to another. Since SQLite does not define USE_PREAD 3187 ** in any form by default, we will not attempt to define _XOPEN_SOURCE. 3188 ** See tickets #2741 and #2681. 3189 ** 3190 ** To avoid stomping the errno value on a failed read the lastErrno value 3191 ** is set before returning. 3192 */ 3193 static int seekAndRead(unixFile *id, sqlite3_int64 offset, void *pBuf, int cnt){ 3194 int got; 3195 int prior = 0; 3196 #if (!defined(USE_PREAD) && !defined(USE_PREAD64)) 3197 i64 newOffset; 3198 #endif 3199 TIMER_START; 3200 assert( cnt==(cnt&0x1ffff) ); 3201 assert( id->h>2 ); 3202 do{ 3203 #if defined(USE_PREAD) 3204 got = osPread(id->h, pBuf, cnt, offset); 3205 SimulateIOError( got = -1 ); 3206 #elif defined(USE_PREAD64) 3207 got = osPread64(id->h, pBuf, cnt, offset); 3208 SimulateIOError( got = -1 ); 3209 #else 3210 newOffset = lseek(id->h, offset, SEEK_SET); 3211 SimulateIOError( newOffset = -1 ); 3212 if( newOffset<0 ){ 3213 storeLastErrno((unixFile*)id, errno); 3214 return -1; 3215 } 3216 got = osRead(id->h, pBuf, cnt); 3217 #endif 3218 if( got==cnt ) break; 3219 if( got<0 ){ 3220 if( errno==EINTR ){ got = 1; continue; } 3221 prior = 0; 3222 storeLastErrno((unixFile*)id, errno); 3223 break; 3224 }else if( got>0 ){ 3225 cnt -= got; 3226 offset += got; 3227 prior += got; 3228 pBuf = (void*)(got + (char*)pBuf); 3229 } 3230 }while( got>0 ); 3231 TIMER_END; 3232 OSTRACE(("READ %-3d %5d %7lld %llu\n", 3233 id->h, got+prior, offset-prior, TIMER_ELAPSED)); 3234 return got+prior; 3235 } 3236 3237 /* 3238 ** Read data from a file into a buffer. Return SQLITE_OK if all 3239 ** bytes were read successfully and SQLITE_IOERR if anything goes 3240 ** wrong. 3241 */ 3242 static int unixRead( 3243 sqlite3_file *id, 3244 void *pBuf, 3245 int amt, 3246 sqlite3_int64 offset 3247 ){ 3248 unixFile *pFile = (unixFile *)id; 3249 int got; 3250 assert( id ); 3251 assert( offset>=0 ); 3252 assert( amt>0 ); 3253 3254 /* If this is a database file (not a journal, master-journal or temp 3255 ** file), the bytes in the locking range should never be read or written. */ 3256 #if 0 3257 assert( pFile->pPreallocatedUnused==0 3258 || offset>=PENDING_BYTE+512 3259 || offset+amt<=PENDING_BYTE 3260 ); 3261 #endif 3262 3263 #if SQLITE_MAX_MMAP_SIZE>0 3264 /* Deal with as much of this read request as possible by transfering 3265 ** data from the memory mapping using memcpy(). */ 3266 if( offset<pFile->mmapSize ){ 3267 if( offset+amt <= pFile->mmapSize ){ 3268 memcpy(pBuf, &((u8 *)(pFile->pMapRegion))[offset], amt); 3269 return SQLITE_OK; 3270 }else{ 3271 int nCopy = pFile->mmapSize - offset; 3272 memcpy(pBuf, &((u8 *)(pFile->pMapRegion))[offset], nCopy); 3273 pBuf = &((u8 *)pBuf)[nCopy]; 3274 amt -= nCopy; 3275 offset += nCopy; 3276 } 3277 } 3278 #endif 3279 3280 got = seekAndRead(pFile, offset, pBuf, amt); 3281 if( got==amt ){ 3282 return SQLITE_OK; 3283 }else if( got<0 ){ 3284 /* lastErrno set by seekAndRead */ 3285 return SQLITE_IOERR_READ; 3286 }else{ 3287 storeLastErrno(pFile, 0); /* not a system error */ 3288 /* Unread parts of the buffer must be zero-filled */ 3289 memset(&((char*)pBuf)[got], 0, amt-got); 3290 return SQLITE_IOERR_SHORT_READ; 3291 } 3292 } 3293 3294 /* 3295 ** Attempt to seek the file-descriptor passed as the first argument to 3296 ** absolute offset iOff, then attempt to write nBuf bytes of data from 3297 ** pBuf to it. If an error occurs, return -1 and set *piErrno. Otherwise, 3298 ** return the actual number of bytes written (which may be less than 3299 ** nBuf). 3300 */ 3301 static int seekAndWriteFd( 3302 int fd, /* File descriptor to write to */ 3303 i64 iOff, /* File offset to begin writing at */ 3304 const void *pBuf, /* Copy data from this buffer to the file */ 3305 int nBuf, /* Size of buffer pBuf in bytes */ 3306 int *piErrno /* OUT: Error number if error occurs */ 3307 ){ 3308 int rc = 0; /* Value returned by system call */ 3309 3310 assert( nBuf==(nBuf&0x1ffff) ); 3311 assert( fd>2 ); 3312 assert( piErrno!=0 ); 3313 nBuf &= 0x1ffff; 3314 TIMER_START; 3315 3316 #if defined(USE_PREAD) 3317 do{ rc = (int)osPwrite(fd, pBuf, nBuf, iOff); }while( rc<0 && errno==EINTR ); 3318 #elif defined(USE_PREAD64) 3319 do{ rc = (int)osPwrite64(fd, pBuf, nBuf, iOff);}while( rc<0 && errno==EINTR); 3320 #else 3321 do{ 3322 i64 iSeek = lseek(fd, iOff, SEEK_SET); 3323 SimulateIOError( iSeek = -1 ); 3324 if( iSeek<0 ){ 3325 rc = -1; 3326 break; 3327 } 3328 rc = osWrite(fd, pBuf, nBuf); 3329 }while( rc<0 && errno==EINTR ); 3330 #endif 3331 3332 TIMER_END; 3333 OSTRACE(("WRITE %-3d %5d %7lld %llu\n", fd, rc, iOff, TIMER_ELAPSED)); 3334 3335 if( rc<0 ) *piErrno = errno; 3336 return rc; 3337 } 3338 3339 3340 /* 3341 ** Seek to the offset in id->offset then read cnt bytes into pBuf. 3342 ** Return the number of bytes actually read. Update the offset. 3343 ** 3344 ** To avoid stomping the errno value on a failed write the lastErrno value 3345 ** is set before returning. 3346 */ 3347 static int seekAndWrite(unixFile *id, i64 offset, const void *pBuf, int cnt){ 3348 return seekAndWriteFd(id->h, offset, pBuf, cnt, &id->lastErrno); 3349 } 3350 3351 3352 /* 3353 ** Write data from a buffer into a file. Return SQLITE_OK on success 3354 ** or some other error code on failure. 3355 */ 3356 static int unixWrite( 3357 sqlite3_file *id, 3358 const void *pBuf, 3359 int amt, 3360 sqlite3_int64 offset 3361 ){ 3362 unixFile *pFile = (unixFile*)id; 3363 int wrote = 0; 3364 assert( id ); 3365 assert( amt>0 ); 3366 3367 /* If this is a database file (not a journal, master-journal or temp 3368 ** file), the bytes in the locking range should never be read or written. */ 3369 #if 0 3370 assert( pFile->pPreallocatedUnused==0 3371 || offset>=PENDING_BYTE+512 3372 || offset+amt<=PENDING_BYTE 3373 ); 3374 #endif 3375 3376 #ifdef SQLITE_DEBUG 3377 /* If we are doing a normal write to a database file (as opposed to 3378 ** doing a hot-journal rollback or a write to some file other than a 3379 ** normal database file) then record the fact that the database 3380 ** has changed. If the transaction counter is modified, record that 3381 ** fact too. 3382 */ 3383 if( pFile->inNormalWrite ){ 3384 pFile->dbUpdate = 1; /* The database has been modified */ 3385 if( offset<=24 && offset+amt>=27 ){ 3386 int rc; 3387 char oldCntr[4]; 3388 SimulateIOErrorBenign(1); 3389 rc = seekAndRead(pFile, 24, oldCntr, 4); 3390 SimulateIOErrorBenign(0); 3391 if( rc!=4 || memcmp(oldCntr, &((char*)pBuf)[24-offset], 4)!=0 ){ 3392 pFile->transCntrChng = 1; /* The transaction counter has changed */ 3393 } 3394 } 3395 } 3396 #endif 3397 3398 #if defined(SQLITE_MMAP_READWRITE) && SQLITE_MAX_MMAP_SIZE>0 3399 /* Deal with as much of this write request as possible by transfering 3400 ** data from the memory mapping using memcpy(). */ 3401 if( offset<pFile->mmapSize ){ 3402 if( offset+amt <= pFile->mmapSize ){ 3403 memcpy(&((u8 *)(pFile->pMapRegion))[offset], pBuf, amt); 3404 return SQLITE_OK; 3405 }else{ 3406 int nCopy = pFile->mmapSize - offset; 3407 memcpy(&((u8 *)(pFile->pMapRegion))[offset], pBuf, nCopy); 3408 pBuf = &((u8 *)pBuf)[nCopy]; 3409 amt -= nCopy; 3410 offset += nCopy; 3411 } 3412 } 3413 #endif 3414 3415 while( (wrote = seekAndWrite(pFile, offset, pBuf, amt))<amt && wrote>0 ){ 3416 amt -= wrote; 3417 offset += wrote; 3418 pBuf = &((char*)pBuf)[wrote]; 3419 } 3420 SimulateIOError(( wrote=(-1), amt=1 )); 3421 SimulateDiskfullError(( wrote=0, amt=1 )); 3422 3423 if( amt>wrote ){ 3424 if( wrote<0 && pFile->lastErrno!=ENOSPC ){ 3425 /* lastErrno set by seekAndWrite */ 3426 return SQLITE_IOERR_WRITE; 3427 }else{ 3428 storeLastErrno(pFile, 0); /* not a system error */ 3429 return SQLITE_FULL; 3430 } 3431 } 3432 3433 return SQLITE_OK; 3434 } 3435 3436 #ifdef SQLITE_TEST 3437 /* 3438 ** Count the number of fullsyncs and normal syncs. This is used to test 3439 ** that syncs and fullsyncs are occurring at the right times. 3440 */ 3441 int sqlite3_sync_count = 0; 3442 int sqlite3_fullsync_count = 0; 3443 #endif 3444 3445 /* 3446 ** We do not trust systems to provide a working fdatasync(). Some do. 3447 ** Others do no. To be safe, we will stick with the (slightly slower) 3448 ** fsync(). If you know that your system does support fdatasync() correctly, 3449 ** then simply compile with -Dfdatasync=fdatasync or -DHAVE_FDATASYNC 3450 */ 3451 #if !defined(fdatasync) && !HAVE_FDATASYNC 3452 # define fdatasync fsync 3453 #endif 3454 3455 /* 3456 ** Define HAVE_FULLFSYNC to 0 or 1 depending on whether or not 3457 ** the F_FULLFSYNC macro is defined. F_FULLFSYNC is currently 3458 ** only available on Mac OS X. But that could change. 3459 */ 3460 #ifdef F_FULLFSYNC 3461 # define HAVE_FULLFSYNC 1 3462 #else 3463 # define HAVE_FULLFSYNC 0 3464 #endif 3465 3466 3467 /* 3468 ** The fsync() system call does not work as advertised on many 3469 ** unix systems. The following procedure is an attempt to make 3470 ** it work better. 3471 ** 3472 ** The SQLITE_NO_SYNC macro disables all fsync()s. This is useful 3473 ** for testing when we want to run through the test suite quickly. 3474 ** You are strongly advised *not* to deploy with SQLITE_NO_SYNC 3475 ** enabled, however, since with SQLITE_NO_SYNC enabled, an OS crash 3476 ** or power failure will likely corrupt the database file. 3477 ** 3478 ** SQLite sets the dataOnly flag if the size of the file is unchanged. 3479 ** The idea behind dataOnly is that it should only write the file content 3480 ** to disk, not the inode. We only set dataOnly if the file size is 3481 ** unchanged since the file size is part of the inode. However, 3482 ** Ted Ts'o tells us that fdatasync() will also write the inode if the 3483 ** file size has changed. The only real difference between fdatasync() 3484 ** and fsync(), Ted tells us, is that fdatasync() will not flush the 3485 ** inode if the mtime or owner or other inode attributes have changed. 3486 ** We only care about the file size, not the other file attributes, so 3487 ** as far as SQLite is concerned, an fdatasync() is always adequate. 3488 ** So, we always use fdatasync() if it is available, regardless of 3489 ** the value of the dataOnly flag. 3490 */ 3491 static int full_fsync(int fd, int fullSync, int dataOnly){ 3492 int rc; 3493 3494 /* The following "ifdef/elif/else/" block has the same structure as 3495 ** the one below. It is replicated here solely to avoid cluttering 3496 ** up the real code with the UNUSED_PARAMETER() macros. 3497 */ 3498 #ifdef SQLITE_NO_SYNC 3499 UNUSED_PARAMETER(fd); 3500 UNUSED_PARAMETER(fullSync); 3501 UNUSED_PARAMETER(dataOnly); 3502 #elif HAVE_FULLFSYNC 3503 UNUSED_PARAMETER(dataOnly); 3504 #else 3505 UNUSED_PARAMETER(fullSync); 3506 UNUSED_PARAMETER(dataOnly); 3507 #endif 3508 3509 /* Record the number of times that we do a normal fsync() and 3510 ** FULLSYNC. This is used during testing to verify that this procedure 3511 ** gets called with the correct arguments. 3512 */ 3513 #ifdef SQLITE_TEST 3514 if( fullSync ) sqlite3_fullsync_count++; 3515 sqlite3_sync_count++; 3516 #endif 3517 3518 /* If we compiled with the SQLITE_NO_SYNC flag, then syncing is a 3519 ** no-op. But go ahead and call fstat() to validate the file 3520 ** descriptor as we need a method to provoke a failure during 3521 ** coverate testing. 3522 */ 3523 #ifdef SQLITE_NO_SYNC 3524 { 3525 struct stat buf; 3526 rc = osFstat(fd, &buf); 3527 } 3528 #elif HAVE_FULLFSYNC 3529 if( fullSync ){ 3530 rc = osFcntl(fd, F_FULLFSYNC, 0); 3531 }else{ 3532 rc = 1; 3533 } 3534 /* If the FULLFSYNC failed, fall back to attempting an fsync(). 3535 ** It shouldn't be possible for fullfsync to fail on the local 3536 ** file system (on OSX), so failure indicates that FULLFSYNC 3537 ** isn't supported for this file system. So, attempt an fsync 3538 ** and (for now) ignore the overhead of a superfluous fcntl call. 3539 ** It'd be better to detect fullfsync support once and avoid 3540 ** the fcntl call every time sync is called. 3541 */ 3542 if( rc ) rc = fsync(fd); 3543 3544 #elif defined(__APPLE__) 3545 /* fdatasync() on HFS+ doesn't yet flush the file size if it changed correctly 3546 ** so currently we default to the macro that redefines fdatasync to fsync 3547 */ 3548 rc = fsync(fd); 3549 #else 3550 rc = fdatasync(fd); 3551 #if OS_VXWORKS 3552 if( rc==-1 && errno==ENOTSUP ){ 3553 rc = fsync(fd); 3554 } 3555 #endif /* OS_VXWORKS */ 3556 #endif /* ifdef SQLITE_NO_SYNC elif HAVE_FULLFSYNC */ 3557 3558 if( OS_VXWORKS && rc!= -1 ){ 3559 rc = 0; 3560 } 3561 return rc; 3562 } 3563 3564 /* 3565 ** Open a file descriptor to the directory containing file zFilename. 3566 ** If successful, *pFd is set to the opened file descriptor and 3567 ** SQLITE_OK is returned. If an error occurs, either SQLITE_NOMEM 3568 ** or SQLITE_CANTOPEN is returned and *pFd is set to an undefined 3569 ** value. 3570 ** 3571 ** The directory file descriptor is used for only one thing - to 3572 ** fsync() a directory to make sure file creation and deletion events 3573 ** are flushed to disk. Such fsyncs are not needed on newer 3574 ** journaling filesystems, but are required on older filesystems. 3575 ** 3576 ** This routine can be overridden using the xSetSysCall interface. 3577 ** The ability to override this routine was added in support of the 3578 ** chromium sandbox. Opening a directory is a security risk (we are 3579 ** told) so making it overrideable allows the chromium sandbox to 3580 ** replace this routine with a harmless no-op. To make this routine 3581 ** a no-op, replace it with a stub that returns SQLITE_OK but leaves 3582 ** *pFd set to a negative number. 3583 ** 3584 ** If SQLITE_OK is returned, the caller is responsible for closing 3585 ** the file descriptor *pFd using close(). 3586 */ 3587 static int openDirectory(const char *zFilename, int *pFd){ 3588 int ii; 3589 int fd = -1; 3590 char zDirname[MAX_PATHNAME+1]; 3591 3592 sqlite3_snprintf(MAX_PATHNAME, zDirname, "%s", zFilename); 3593 for(ii=(int)strlen(zDirname); ii>0 && zDirname[ii]!='/'; ii--); 3594 if( ii>0 ){ 3595 zDirname[ii] = '\0'; 3596 }else{ 3597 if( zDirname[0]!='/' ) zDirname[0] = '.'; 3598 zDirname[1] = 0; 3599 } 3600 fd = robust_open(zDirname, O_RDONLY|O_BINARY, 0); 3601 if( fd>=0 ){ 3602 OSTRACE(("OPENDIR %-3d %s\n", fd, zDirname)); 3603 } 3604 *pFd = fd; 3605 if( fd>=0 ) return SQLITE_OK; 3606 return unixLogError(SQLITE_CANTOPEN_BKPT, "openDirectory", zDirname); 3607 } 3608 3609 /* 3610 ** Make sure all writes to a particular file are committed to disk. 3611 ** 3612 ** If dataOnly==0 then both the file itself and its metadata (file 3613 ** size, access time, etc) are synced. If dataOnly!=0 then only the 3614 ** file data is synced. 3615 ** 3616 ** Under Unix, also make sure that the directory entry for the file 3617 ** has been created by fsync-ing the directory that contains the file. 3618 ** If we do not do this and we encounter a power failure, the directory 3619 ** entry for the journal might not exist after we reboot. The next 3620 ** SQLite to access the file will not know that the journal exists (because 3621 ** the directory entry for the journal was never created) and the transaction 3622 ** will not roll back - possibly leading to database corruption. 3623 */ 3624 static int unixSync(sqlite3_file *id, int flags){ 3625 int rc; 3626 unixFile *pFile = (unixFile*)id; 3627 3628 int isDataOnly = (flags&SQLITE_SYNC_DATAONLY); 3629 int isFullsync = (flags&0x0F)==SQLITE_SYNC_FULL; 3630 3631 /* Check that one of SQLITE_SYNC_NORMAL or FULL was passed */ 3632 assert((flags&0x0F)==SQLITE_SYNC_NORMAL 3633 || (flags&0x0F)==SQLITE_SYNC_FULL 3634 ); 3635 3636 /* Unix cannot, but some systems may return SQLITE_FULL from here. This 3637 ** line is to test that doing so does not cause any problems. 3638 */ 3639 SimulateDiskfullError( return SQLITE_FULL ); 3640 3641 assert( pFile ); 3642 OSTRACE(("SYNC %-3d\n", pFile->h)); 3643 rc = full_fsync(pFile->h, isFullsync, isDataOnly); 3644 SimulateIOError( rc=1 ); 3645 if( rc ){ 3646 storeLastErrno(pFile, errno); 3647 return unixLogError(SQLITE_IOERR_FSYNC, "full_fsync", pFile->zPath); 3648 } 3649 3650 /* Also fsync the directory containing the file if the DIRSYNC flag 3651 ** is set. This is a one-time occurrence. Many systems (examples: AIX) 3652 ** are unable to fsync a directory, so ignore errors on the fsync. 3653 */ 3654 if( pFile->ctrlFlags & UNIXFILE_DIRSYNC ){ 3655 int dirfd; 3656 OSTRACE(("DIRSYNC %s (have_fullfsync=%d fullsync=%d)\n", pFile->zPath, 3657 HAVE_FULLFSYNC, isFullsync)); 3658 rc = osOpenDirectory(pFile->zPath, &dirfd); 3659 if( rc==SQLITE_OK ){ 3660 full_fsync(dirfd, 0, 0); 3661 robust_close(pFile, dirfd, __LINE__); 3662 }else{ 3663 assert( rc==SQLITE_CANTOPEN ); 3664 rc = SQLITE_OK; 3665 } 3666 pFile->ctrlFlags &= ~UNIXFILE_DIRSYNC; 3667 } 3668 return rc; 3669 } 3670 3671 /* 3672 ** Truncate an open file to a specified size 3673 */ 3674 static int unixTruncate(sqlite3_file *id, i64 nByte){ 3675 unixFile *pFile = (unixFile *)id; 3676 int rc; 3677 assert( pFile ); 3678 SimulateIOError( return SQLITE_IOERR_TRUNCATE ); 3679 3680 /* If the user has configured a chunk-size for this file, truncate the 3681 ** file so that it consists of an integer number of chunks (i.e. the 3682 ** actual file size after the operation may be larger than the requested 3683 ** size). 3684 */ 3685 if( pFile->szChunk>0 ){ 3686 nByte = ((nByte + pFile->szChunk - 1)/pFile->szChunk) * pFile->szChunk; 3687 } 3688 3689 rc = robust_ftruncate(pFile->h, nByte); 3690 if( rc ){ 3691 storeLastErrno(pFile, errno); 3692 return unixLogError(SQLITE_IOERR_TRUNCATE, "ftruncate", pFile->zPath); 3693 }else{ 3694 #ifdef SQLITE_DEBUG 3695 /* If we are doing a normal write to a database file (as opposed to 3696 ** doing a hot-journal rollback or a write to some file other than a 3697 ** normal database file) and we truncate the file to zero length, 3698 ** that effectively updates the change counter. This might happen 3699 ** when restoring a database using the backup API from a zero-length 3700 ** source. 3701 */ 3702 if( pFile->inNormalWrite && nByte==0 ){ 3703 pFile->transCntrChng = 1; 3704 } 3705 #endif 3706 3707 #if SQLITE_MAX_MMAP_SIZE>0 3708 /* If the file was just truncated to a size smaller than the currently 3709 ** mapped region, reduce the effective mapping size as well. SQLite will 3710 ** use read() and write() to access data beyond this point from now on. 3711 */ 3712 if( nByte<pFile->mmapSize ){ 3713 pFile->mmapSize = nByte; 3714 } 3715 #endif 3716 3717 return SQLITE_OK; 3718 } 3719 } 3720 3721 /* 3722 ** Determine the current size of a file in bytes 3723 */ 3724 static int unixFileSize(sqlite3_file *id, i64 *pSize){ 3725 int rc; 3726 struct stat buf; 3727 assert( id ); 3728 rc = osFstat(((unixFile*)id)->h, &buf); 3729 SimulateIOError( rc=1 ); 3730 if( rc!=0 ){ 3731 storeLastErrno((unixFile*)id, errno); 3732 return SQLITE_IOERR_FSTAT; 3733 } 3734 *pSize = buf.st_size; 3735 3736 /* When opening a zero-size database, the findInodeInfo() procedure 3737 ** writes a single byte into that file in order to work around a bug 3738 ** in the OS-X msdos filesystem. In order to avoid problems with upper 3739 ** layers, we need to report this file size as zero even though it is 3740 ** really 1. Ticket #3260. 3741 */ 3742 if( *pSize==1 ) *pSize = 0; 3743 3744 3745 return SQLITE_OK; 3746 } 3747 3748 #if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) 3749 /* 3750 ** Handler for proxy-locking file-control verbs. Defined below in the 3751 ** proxying locking division. 3752 */ 3753 static int proxyFileControl(sqlite3_file*,int,void*); 3754 #endif 3755 3756 /* 3757 ** This function is called to handle the SQLITE_FCNTL_SIZE_HINT 3758 ** file-control operation. Enlarge the database to nBytes in size 3759 ** (rounded up to the next chunk-size). If the database is already 3760 ** nBytes or larger, this routine is a no-op. 3761 */ 3762 static int fcntlSizeHint(unixFile *pFile, i64 nByte){ 3763 if( pFile->szChunk>0 ){ 3764 i64 nSize; /* Required file size */ 3765 struct stat buf; /* Used to hold return values of fstat() */ 3766 3767 if( osFstat(pFile->h, &buf) ){ 3768 return SQLITE_IOERR_FSTAT; 3769 } 3770 3771 nSize = ((nByte+pFile->szChunk-1) / pFile->szChunk) * pFile->szChunk; 3772 if( nSize>(i64)buf.st_size ){ 3773 3774 #if defined(HAVE_POSIX_FALLOCATE) && HAVE_POSIX_FALLOCATE 3775 /* The code below is handling the return value of osFallocate() 3776 ** correctly. posix_fallocate() is defined to "returns zero on success, 3777 ** or an error number on failure". See the manpage for details. */ 3778 int err; 3779 do{ 3780 err = osFallocate(pFile->h, buf.st_size, nSize-buf.st_size); 3781 }while( err==EINTR ); 3782 if( err && err!=EINVAL ) return SQLITE_IOERR_WRITE; 3783 #else 3784 /* If the OS does not have posix_fallocate(), fake it. Write a 3785 ** single byte to the last byte in each block that falls entirely 3786 ** within the extended region. Then, if required, a single byte 3787 ** at offset (nSize-1), to set the size of the file correctly. 3788 ** This is a similar technique to that used by glibc on systems 3789 ** that do not have a real fallocate() call. 3790 */ 3791 int nBlk = buf.st_blksize; /* File-system block size */ 3792 int nWrite = 0; /* Number of bytes written by seekAndWrite */ 3793 i64 iWrite; /* Next offset to write to */ 3794 3795 iWrite = (buf.st_size/nBlk)*nBlk + nBlk - 1; 3796 assert( iWrite>=buf.st_size ); 3797 assert( ((iWrite+1)%nBlk)==0 ); 3798 for(/*no-op*/; iWrite<nSize+nBlk-1; iWrite+=nBlk ){ 3799 if( iWrite>=nSize ) iWrite = nSize - 1; 3800 nWrite = seekAndWrite(pFile, iWrite, "", 1); 3801 if( nWrite!=1 ) return SQLITE_IOERR_WRITE; 3802 } 3803 #endif 3804 } 3805 } 3806 3807 #if SQLITE_MAX_MMAP_SIZE>0 3808 if( pFile->mmapSizeMax>0 && nByte>pFile->mmapSize ){ 3809 int rc; 3810 if( pFile->szChunk<=0 ){ 3811 if( robust_ftruncate(pFile->h, nByte) ){ 3812 storeLastErrno(pFile, errno); 3813 return unixLogError(SQLITE_IOERR_TRUNCATE, "ftruncate", pFile->zPath); 3814 } 3815 } 3816 3817 rc = unixMapfile(pFile, nByte); 3818 return rc; 3819 } 3820 #endif 3821 3822 return SQLITE_OK; 3823 } 3824 3825 /* 3826 ** If *pArg is initially negative then this is a query. Set *pArg to 3827 ** 1 or 0 depending on whether or not bit mask of pFile->ctrlFlags is set. 3828 ** 3829 ** If *pArg is 0 or 1, then clear or set the mask bit of pFile->ctrlFlags. 3830 */ 3831 static void unixModeBit(unixFile *pFile, unsigned char mask, int *pArg){ 3832 if( *pArg<0 ){ 3833 *pArg = (pFile->ctrlFlags & mask)!=0; 3834 }else if( (*pArg)==0 ){ 3835 pFile->ctrlFlags &= ~mask; 3836 }else{ 3837 pFile->ctrlFlags |= mask; 3838 } 3839 } 3840 3841 /* Forward declaration */ 3842 static int unixGetTempname(int nBuf, char *zBuf); 3843 3844 /* 3845 ** Information and control of an open file handle. 3846 */ 3847 static int unixFileControl(sqlite3_file *id, int op, void *pArg){ 3848 unixFile *pFile = (unixFile*)id; 3849 switch( op ){ 3850 #if defined(__linux__) && defined(SQLITE_ENABLE_BATCH_ATOMIC_WRITE) 3851 case SQLITE_FCNTL_BEGIN_ATOMIC_WRITE: { 3852 int rc = osIoctl(pFile->h, F2FS_IOC_START_ATOMIC_WRITE); 3853 return rc ? SQLITE_IOERR_BEGIN_ATOMIC : SQLITE_OK; 3854 } 3855 case SQLITE_FCNTL_COMMIT_ATOMIC_WRITE: { 3856 int rc = osIoctl(pFile->h, F2FS_IOC_COMMIT_ATOMIC_WRITE); 3857 return rc ? SQLITE_IOERR_COMMIT_ATOMIC : SQLITE_OK; 3858 } 3859 case SQLITE_FCNTL_ROLLBACK_ATOMIC_WRITE: { 3860 int rc = osIoctl(pFile->h, F2FS_IOC_ABORT_VOLATILE_WRITE); 3861 return rc ? SQLITE_IOERR_ROLLBACK_ATOMIC : SQLITE_OK; 3862 } 3863 #endif /* __linux__ && SQLITE_ENABLE_BATCH_ATOMIC_WRITE */ 3864 3865 case SQLITE_FCNTL_LOCKSTATE: { 3866 *(int*)pArg = pFile->eFileLock; 3867 return SQLITE_OK; 3868 } 3869 case SQLITE_FCNTL_LAST_ERRNO: { 3870 *(int*)pArg = pFile->lastErrno; 3871 return SQLITE_OK; 3872 } 3873 case SQLITE_FCNTL_CHUNK_SIZE: { 3874 pFile->szChunk = *(int *)pArg; 3875 return SQLITE_OK; 3876 } 3877 case SQLITE_FCNTL_SIZE_HINT: { 3878 int rc; 3879 SimulateIOErrorBenign(1); 3880 rc = fcntlSizeHint(pFile, *(i64 *)pArg); 3881 SimulateIOErrorBenign(0); 3882 return rc; 3883 } 3884 case SQLITE_FCNTL_PERSIST_WAL: { 3885 unixModeBit(pFile, UNIXFILE_PERSIST_WAL, (int*)pArg); 3886 return SQLITE_OK; 3887 } 3888 case SQLITE_FCNTL_POWERSAFE_OVERWRITE: { 3889 unixModeBit(pFile, UNIXFILE_PSOW, (int*)pArg); 3890 return SQLITE_OK; 3891 } 3892 case SQLITE_FCNTL_VFSNAME: { 3893 *(char**)pArg = sqlite3_mprintf("%s", pFile->pVfs->zName); 3894 return SQLITE_OK; 3895 } 3896 case SQLITE_FCNTL_TEMPFILENAME: { 3897 char *zTFile = sqlite3_malloc64( pFile->pVfs->mxPathname ); 3898 if( zTFile ){ 3899 unixGetTempname(pFile->pVfs->mxPathname, zTFile); 3900 *(char**)pArg = zTFile; 3901 } 3902 return SQLITE_OK; 3903 } 3904 case SQLITE_FCNTL_HAS_MOVED: { 3905 *(int*)pArg = fileHasMoved(pFile); 3906 return SQLITE_OK; 3907 } 3908 #ifdef SQLITE_ENABLE_SETLK_TIMEOUT 3909 case SQLITE_FCNTL_LOCK_TIMEOUT: { 3910 pFile->iBusyTimeout = *(int*)pArg; 3911 return SQLITE_OK; 3912 } 3913 #endif 3914 #if SQLITE_MAX_MMAP_SIZE>0 3915 case SQLITE_FCNTL_MMAP_SIZE: { 3916 i64 newLimit = *(i64*)pArg; 3917 int rc = SQLITE_OK; 3918 if( newLimit>sqlite3GlobalConfig.mxMmap ){ 3919 newLimit = sqlite3GlobalConfig.mxMmap; 3920 } 3921 3922 /* The value of newLimit may be eventually cast to (size_t) and passed 3923 ** to mmap(). Restrict its value to 2GB if (size_t) is not at least a 3924 ** 64-bit type. */ 3925 if( newLimit>0 && sizeof(size_t)<8 ){ 3926 newLimit = (newLimit & 0x7FFFFFFF); 3927 } 3928 3929 *(i64*)pArg = pFile->mmapSizeMax; 3930 if( newLimit>=0 && newLimit!=pFile->mmapSizeMax && pFile->nFetchOut==0 ){ 3931 pFile->mmapSizeMax = newLimit; 3932 if( pFile->mmapSize>0 ){ 3933 unixUnmapfile(pFile); 3934 rc = unixMapfile(pFile, -1); 3935 } 3936 } 3937 return rc; 3938 } 3939 #endif 3940 #ifdef SQLITE_DEBUG 3941 /* The pager calls this method to signal that it has done 3942 ** a rollback and that the database is therefore unchanged and 3943 ** it hence it is OK for the transaction change counter to be 3944 ** unchanged. 3945 */ 3946 case SQLITE_FCNTL_DB_UNCHANGED: { 3947 ((unixFile*)id)->dbUpdate = 0; 3948 return SQLITE_OK; 3949 } 3950 #endif 3951 #if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) 3952 case SQLITE_FCNTL_SET_LOCKPROXYFILE: 3953 case SQLITE_FCNTL_GET_LOCKPROXYFILE: { 3954 return proxyFileControl(id,op,pArg); 3955 } 3956 #endif /* SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) */ 3957 } 3958 return SQLITE_NOTFOUND; 3959 } 3960 3961 /* 3962 ** If pFd->sectorSize is non-zero when this function is called, it is a 3963 ** no-op. Otherwise, the values of pFd->sectorSize and 3964 ** pFd->deviceCharacteristics are set according to the file-system 3965 ** characteristics. 3966 ** 3967 ** There are two versions of this function. One for QNX and one for all 3968 ** other systems. 3969 */ 3970 #ifndef __QNXNTO__ 3971 static void setDeviceCharacteristics(unixFile *pFd){ 3972 assert( pFd->deviceCharacteristics==0 || pFd->sectorSize!=0 ); 3973 if( pFd->sectorSize==0 ){ 3974 #if defined(__linux__) && defined(SQLITE_ENABLE_BATCH_ATOMIC_WRITE) 3975 int res; 3976 u32 f = 0; 3977 3978 /* Check for support for F2FS atomic batch writes. */ 3979 res = osIoctl(pFd->h, F2FS_IOC_GET_FEATURES, &f); 3980 if( res==0 && (f & F2FS_FEATURE_ATOMIC_WRITE) ){ 3981 pFd->deviceCharacteristics = SQLITE_IOCAP_BATCH_ATOMIC; 3982 } 3983 #endif /* __linux__ && SQLITE_ENABLE_BATCH_ATOMIC_WRITE */ 3984 3985 /* Set the POWERSAFE_OVERWRITE flag if requested. */ 3986 if( pFd->ctrlFlags & UNIXFILE_PSOW ){ 3987 pFd->deviceCharacteristics |= SQLITE_IOCAP_POWERSAFE_OVERWRITE; 3988 } 3989 3990 pFd->sectorSize = SQLITE_DEFAULT_SECTOR_SIZE; 3991 } 3992 } 3993 #else 3994 #include <sys/dcmd_blk.h> 3995 #include <sys/statvfs.h> 3996 static void setDeviceCharacteristics(unixFile *pFile){ 3997 if( pFile->sectorSize == 0 ){ 3998 struct statvfs fsInfo; 3999 4000 /* Set defaults for non-supported filesystems */ 4001 pFile->sectorSize = SQLITE_DEFAULT_SECTOR_SIZE; 4002 pFile->deviceCharacteristics = 0; 4003 if( fstatvfs(pFile->h, &fsInfo) == -1 ) { 4004 return; 4005 } 4006 4007 if( !strcmp(fsInfo.f_basetype, "tmp") ) { 4008 pFile->sectorSize = fsInfo.f_bsize; 4009 pFile->deviceCharacteristics = 4010 SQLITE_IOCAP_ATOMIC4K | /* All ram filesystem writes are atomic */ 4011 SQLITE_IOCAP_SAFE_APPEND | /* growing the file does not occur until 4012 ** the write succeeds */ 4013 SQLITE_IOCAP_SEQUENTIAL | /* The ram filesystem has no write behind 4014 ** so it is ordered */ 4015 0; 4016 }else if( strstr(fsInfo.f_basetype, "etfs") ){ 4017 pFile->sectorSize = fsInfo.f_bsize; 4018 pFile->deviceCharacteristics = 4019 /* etfs cluster size writes are atomic */ 4020 (pFile->sectorSize / 512 * SQLITE_IOCAP_ATOMIC512) | 4021 SQLITE_IOCAP_SAFE_APPEND | /* growing the file does not occur until 4022 ** the write succeeds */ 4023 SQLITE_IOCAP_SEQUENTIAL | /* The ram filesystem has no write behind 4024 ** so it is ordered */ 4025 0; 4026 }else if( !strcmp(fsInfo.f_basetype, "qnx6") ){ 4027 pFile->sectorSize = fsInfo.f_bsize; 4028 pFile->deviceCharacteristics = 4029 SQLITE_IOCAP_ATOMIC | /* All filesystem writes are atomic */ 4030 SQLITE_IOCAP_SAFE_APPEND | /* growing the file does not occur until 4031 ** the write succeeds */ 4032 SQLITE_IOCAP_SEQUENTIAL | /* The ram filesystem has no write behind 4033 ** so it is ordered */ 4034 0; 4035 }else if( !strcmp(fsInfo.f_basetype, "qnx4") ){ 4036 pFile->sectorSize = fsInfo.f_bsize; 4037 pFile->deviceCharacteristics = 4038 /* full bitset of atomics from max sector size and smaller */ 4039 ((pFile->sectorSize / 512 * SQLITE_IOCAP_ATOMIC512) << 1) - 2 | 4040 SQLITE_IOCAP_SEQUENTIAL | /* The ram filesystem has no write behind 4041 ** so it is ordered */ 4042 0; 4043 }else if( strstr(fsInfo.f_basetype, "dos") ){ 4044 pFile->sectorSize = fsInfo.f_bsize; 4045 pFile->deviceCharacteristics = 4046 /* full bitset of atomics from max sector size and smaller */ 4047 ((pFile->sectorSize / 512 * SQLITE_IOCAP_ATOMIC512) << 1) - 2 | 4048 SQLITE_IOCAP_SEQUENTIAL | /* The ram filesystem has no write behind 4049 ** so it is ordered */ 4050 0; 4051 }else{ 4052 pFile->deviceCharacteristics = 4053 SQLITE_IOCAP_ATOMIC512 | /* blocks are atomic */ 4054 SQLITE_IOCAP_SAFE_APPEND | /* growing the file does not occur until 4055 ** the write succeeds */ 4056 0; 4057 } 4058 } 4059 /* Last chance verification. If the sector size isn't a multiple of 512 4060 ** then it isn't valid.*/ 4061 if( pFile->sectorSize % 512 != 0 ){ 4062 pFile->deviceCharacteristics = 0; 4063 pFile->sectorSize = SQLITE_DEFAULT_SECTOR_SIZE; 4064 } 4065 } 4066 #endif 4067 4068 /* 4069 ** Return the sector size in bytes of the underlying block device for 4070 ** the specified file. This is almost always 512 bytes, but may be 4071 ** larger for some devices. 4072 ** 4073 ** SQLite code assumes this function cannot fail. It also assumes that 4074 ** if two files are created in the same file-system directory (i.e. 4075 ** a database and its journal file) that the sector size will be the 4076 ** same for both. 4077 */ 4078 static int unixSectorSize(sqlite3_file *id){ 4079 unixFile *pFd = (unixFile*)id; 4080 setDeviceCharacteristics(pFd); 4081 return pFd->sectorSize; 4082 } 4083 4084 /* 4085 ** Return the device characteristics for the file. 4086 ** 4087 ** This VFS is set up to return SQLITE_IOCAP_POWERSAFE_OVERWRITE by default. 4088 ** However, that choice is controversial since technically the underlying 4089 ** file system does not always provide powersafe overwrites. (In other 4090 ** words, after a power-loss event, parts of the file that were never 4091 ** written might end up being altered.) However, non-PSOW behavior is very, 4092 ** very rare. And asserting PSOW makes a large reduction in the amount 4093 ** of required I/O for journaling, since a lot of padding is eliminated. 4094 ** Hence, while POWERSAFE_OVERWRITE is on by default, there is a file-control 4095 ** available to turn it off and URI query parameter available to turn it off. 4096 */ 4097 static int unixDeviceCharacteristics(sqlite3_file *id){ 4098 unixFile *pFd = (unixFile*)id; 4099 setDeviceCharacteristics(pFd); 4100 return pFd->deviceCharacteristics; 4101 } 4102 4103 #if !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0 4104 4105 /* 4106 ** Return the system page size. 4107 ** 4108 ** This function should not be called directly by other code in this file. 4109 ** Instead, it should be called via macro osGetpagesize(). 4110 */ 4111 static int unixGetpagesize(void){ 4112 #if OS_VXWORKS 4113 return 1024; 4114 #elif defined(_BSD_SOURCE) 4115 return getpagesize(); 4116 #else 4117 return (int)sysconf(_SC_PAGESIZE); 4118 #endif 4119 } 4120 4121 #endif /* !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0 */ 4122 4123 #ifndef SQLITE_OMIT_WAL 4124 4125 /* 4126 ** Object used to represent an shared memory buffer. 4127 ** 4128 ** When multiple threads all reference the same wal-index, each thread 4129 ** has its own unixShm object, but they all point to a single instance 4130 ** of this unixShmNode object. In other words, each wal-index is opened 4131 ** only once per process. 4132 ** 4133 ** Each unixShmNode object is connected to a single unixInodeInfo object. 4134 ** We could coalesce this object into unixInodeInfo, but that would mean 4135 ** every open file that does not use shared memory (in other words, most 4136 ** open files) would have to carry around this extra information. So 4137 ** the unixInodeInfo object contains a pointer to this unixShmNode object 4138 ** and the unixShmNode object is created only when needed. 4139 ** 4140 ** unixMutexHeld() must be true when creating or destroying 4141 ** this object or while reading or writing the following fields: 4142 ** 4143 ** nRef 4144 ** 4145 ** The following fields are read-only after the object is created: 4146 ** 4147 ** fid 4148 ** zFilename 4149 ** 4150 ** Either unixShmNode.mutex must be held or unixShmNode.nRef==0 and 4151 ** unixMutexHeld() is true when reading or writing any other field 4152 ** in this structure. 4153 */ 4154 struct unixShmNode { 4155 unixInodeInfo *pInode; /* unixInodeInfo that owns this SHM node */ 4156 sqlite3_mutex *mutex; /* Mutex to access this object */ 4157 char *zFilename; /* Name of the mmapped file */ 4158 int h; /* Open file descriptor */ 4159 int szRegion; /* Size of shared-memory regions */ 4160 u16 nRegion; /* Size of array apRegion */ 4161 u8 isReadonly; /* True if read-only */ 4162 u8 isUnlocked; /* True if no DMS lock held */ 4163 char **apRegion; /* Array of mapped shared-memory regions */ 4164 int nRef; /* Number of unixShm objects pointing to this */ 4165 unixShm *pFirst; /* All unixShm objects pointing to this */ 4166 #ifdef SQLITE_DEBUG 4167 u8 exclMask; /* Mask of exclusive locks held */ 4168 u8 sharedMask; /* Mask of shared locks held */ 4169 u8 nextShmId; /* Next available unixShm.id value */ 4170 #endif 4171 }; 4172 4173 /* 4174 ** Structure used internally by this VFS to record the state of an 4175 ** open shared memory connection. 4176 ** 4177 ** The following fields are initialized when this object is created and 4178 ** are read-only thereafter: 4179 ** 4180 ** unixShm.pFile 4181 ** unixShm.id 4182 ** 4183 ** All other fields are read/write. The unixShm.pFile->mutex must be held 4184 ** while accessing any read/write fields. 4185 */ 4186 struct unixShm { 4187 unixShmNode *pShmNode; /* The underlying unixShmNode object */ 4188 unixShm *pNext; /* Next unixShm with the same unixShmNode */ 4189 u8 hasMutex; /* True if holding the unixShmNode mutex */ 4190 u8 id; /* Id of this connection within its unixShmNode */ 4191 u16 sharedMask; /* Mask of shared locks held */ 4192 u16 exclMask; /* Mask of exclusive locks held */ 4193 }; 4194 4195 /* 4196 ** Constants used for locking 4197 */ 4198 #define UNIX_SHM_BASE ((22+SQLITE_SHM_NLOCK)*4) /* first lock byte */ 4199 #define UNIX_SHM_DMS (UNIX_SHM_BASE+SQLITE_SHM_NLOCK) /* deadman switch */ 4200 4201 /* 4202 ** Apply posix advisory locks for all bytes from ofst through ofst+n-1. 4203 ** 4204 ** Locks block if the mask is exactly UNIX_SHM_C and are non-blocking 4205 ** otherwise. 4206 */ 4207 static int unixShmSystemLock( 4208 unixFile *pFile, /* Open connection to the WAL file */ 4209 int lockType, /* F_UNLCK, F_RDLCK, or F_WRLCK */ 4210 int ofst, /* First byte of the locking range */ 4211 int n /* Number of bytes to lock */ 4212 ){ 4213 unixShmNode *pShmNode; /* Apply locks to this open shared-memory segment */ 4214 struct flock f; /* The posix advisory locking structure */ 4215 int rc = SQLITE_OK; /* Result code form fcntl() */ 4216 4217 /* Access to the unixShmNode object is serialized by the caller */ 4218 pShmNode = pFile->pInode->pShmNode; 4219 assert( pShmNode->nRef==0 || sqlite3_mutex_held(pShmNode->mutex) ); 4220 4221 /* Shared locks never span more than one byte */ 4222 assert( n==1 || lockType!=F_RDLCK ); 4223 4224 /* Locks are within range */ 4225 assert( n>=1 && n<=SQLITE_SHM_NLOCK ); 4226 4227 if( pShmNode->h>=0 ){ 4228 /* Initialize the locking parameters */ 4229 f.l_type = lockType; 4230 f.l_whence = SEEK_SET; 4231 f.l_start = ofst; 4232 f.l_len = n; 4233 rc = osSetPosixAdvisoryLock(pShmNode->h, &f, pFile); 4234 rc = (rc!=(-1)) ? SQLITE_OK : SQLITE_BUSY; 4235 } 4236 4237 /* Update the global lock state and do debug tracing */ 4238 #ifdef SQLITE_DEBUG 4239 { u16 mask; 4240 OSTRACE(("SHM-LOCK ")); 4241 mask = ofst>31 ? 0xffff : (1<<(ofst+n)) - (1<<ofst); 4242 if( rc==SQLITE_OK ){ 4243 if( lockType==F_UNLCK ){ 4244 OSTRACE(("unlock %d ok", ofst)); 4245 pShmNode->exclMask &= ~mask; 4246 pShmNode->sharedMask &= ~mask; 4247 }else if( lockType==F_RDLCK ){ 4248 OSTRACE(("read-lock %d ok", ofst)); 4249 pShmNode->exclMask &= ~mask; 4250 pShmNode->sharedMask |= mask; 4251 }else{ 4252 assert( lockType==F_WRLCK ); 4253 OSTRACE(("write-lock %d ok", ofst)); 4254 pShmNode->exclMask |= mask; 4255 pShmNode->sharedMask &= ~mask; 4256 } 4257 }else{ 4258 if( lockType==F_UNLCK ){ 4259 OSTRACE(("unlock %d failed", ofst)); 4260 }else if( lockType==F_RDLCK ){ 4261 OSTRACE(("read-lock failed")); 4262 }else{ 4263 assert( lockType==F_WRLCK ); 4264 OSTRACE(("write-lock %d failed", ofst)); 4265 } 4266 } 4267 OSTRACE((" - afterwards %03x,%03x\n", 4268 pShmNode->sharedMask, pShmNode->exclMask)); 4269 } 4270 #endif 4271 4272 return rc; 4273 } 4274 4275 /* 4276 ** Return the minimum number of 32KB shm regions that should be mapped at 4277 ** a time, assuming that each mapping must be an integer multiple of the 4278 ** current system page-size. 4279 ** 4280 ** Usually, this is 1. The exception seems to be systems that are configured 4281 ** to use 64KB pages - in this case each mapping must cover at least two 4282 ** shm regions. 4283 */ 4284 static int unixShmRegionPerMap(void){ 4285 int shmsz = 32*1024; /* SHM region size */ 4286 int pgsz = osGetpagesize(); /* System page size */ 4287 assert( ((pgsz-1)&pgsz)==0 ); /* Page size must be a power of 2 */ 4288 if( pgsz<shmsz ) return 1; 4289 return pgsz/shmsz; 4290 } 4291 4292 /* 4293 ** Purge the unixShmNodeList list of all entries with unixShmNode.nRef==0. 4294 ** 4295 ** This is not a VFS shared-memory method; it is a utility function called 4296 ** by VFS shared-memory methods. 4297 */ 4298 static void unixShmPurge(unixFile *pFd){ 4299 unixShmNode *p = pFd->pInode->pShmNode; 4300 assert( unixMutexHeld() ); 4301 if( p && ALWAYS(p->nRef==0) ){ 4302 int nShmPerMap = unixShmRegionPerMap(); 4303 int i; 4304 assert( p->pInode==pFd->pInode ); 4305 sqlite3_mutex_free(p->mutex); 4306 for(i=0; i<p->nRegion; i+=nShmPerMap){ 4307 if( p->h>=0 ){ 4308 osMunmap(p->apRegion[i], p->szRegion); 4309 }else{ 4310 sqlite3_free(p->apRegion[i]); 4311 } 4312 } 4313 sqlite3_free(p->apRegion); 4314 if( p->h>=0 ){ 4315 robust_close(pFd, p->h, __LINE__); 4316 p->h = -1; 4317 } 4318 p->pInode->pShmNode = 0; 4319 sqlite3_free(p); 4320 } 4321 } 4322 4323 /* 4324 ** The DMS lock has not yet been taken on shm file pShmNode. Attempt to 4325 ** take it now. Return SQLITE_OK if successful, or an SQLite error 4326 ** code otherwise. 4327 ** 4328 ** If the DMS cannot be locked because this is a readonly_shm=1 4329 ** connection and no other process already holds a lock, return 4330 ** SQLITE_READONLY_CANTINIT and set pShmNode->isUnlocked=1. 4331 */ 4332 static int unixLockSharedMemory(unixFile *pDbFd, unixShmNode *pShmNode){ 4333 struct flock lock; 4334 int rc = SQLITE_OK; 4335 4336 /* Use F_GETLK to determine the locks other processes are holding 4337 ** on the DMS byte. If it indicates that another process is holding 4338 ** a SHARED lock, then this process may also take a SHARED lock 4339 ** and proceed with opening the *-shm file. 4340 ** 4341 ** Or, if no other process is holding any lock, then this process 4342 ** is the first to open it. In this case take an EXCLUSIVE lock on the 4343 ** DMS byte and truncate the *-shm file to zero bytes in size. Then 4344 ** downgrade to a SHARED lock on the DMS byte. 4345 ** 4346 ** If another process is holding an EXCLUSIVE lock on the DMS byte, 4347 ** return SQLITE_BUSY to the caller (it will try again). An earlier 4348 ** version of this code attempted the SHARED lock at this point. But 4349 ** this introduced a subtle race condition: if the process holding 4350 ** EXCLUSIVE failed just before truncating the *-shm file, then this 4351 ** process might open and use the *-shm file without truncating it. 4352 ** And if the *-shm file has been corrupted by a power failure or 4353 ** system crash, the database itself may also become corrupt. */ 4354 lock.l_whence = SEEK_SET; 4355 lock.l_start = UNIX_SHM_DMS; 4356 lock.l_len = 1; 4357 lock.l_type = F_WRLCK; 4358 if( osFcntl(pShmNode->h, F_GETLK, &lock)!=0 ) { 4359 rc = SQLITE_IOERR_LOCK; 4360 }else if( lock.l_type==F_UNLCK ){ 4361 if( pShmNode->isReadonly ){ 4362 pShmNode->isUnlocked = 1; 4363 rc = SQLITE_READONLY_CANTINIT; 4364 }else{ 4365 rc = unixShmSystemLock(pDbFd, F_WRLCK, UNIX_SHM_DMS, 1); 4366 if( rc==SQLITE_OK && robust_ftruncate(pShmNode->h, 0) ){ 4367 rc = unixLogError(SQLITE_IOERR_SHMOPEN,"ftruncate",pShmNode->zFilename); 4368 } 4369 } 4370 }else if( lock.l_type==F_WRLCK ){ 4371 rc = SQLITE_BUSY; 4372 } 4373 4374 if( rc==SQLITE_OK ){ 4375 assert( lock.l_type==F_UNLCK || lock.l_type==F_RDLCK ); 4376 rc = unixShmSystemLock(pDbFd, F_RDLCK, UNIX_SHM_DMS, 1); 4377 } 4378 return rc; 4379 } 4380 4381 /* 4382 ** Open a shared-memory area associated with open database file pDbFd. 4383 ** This particular implementation uses mmapped files. 4384 ** 4385 ** The file used to implement shared-memory is in the same directory 4386 ** as the open database file and has the same name as the open database 4387 ** file with the "-shm" suffix added. For example, if the database file 4388 ** is "/home/user1/config.db" then the file that is created and mmapped 4389 ** for shared memory will be called "/home/user1/config.db-shm". 4390 ** 4391 ** Another approach to is to use files in /dev/shm or /dev/tmp or an 4392 ** some other tmpfs mount. But if a file in a different directory 4393 ** from the database file is used, then differing access permissions 4394 ** or a chroot() might cause two different processes on the same 4395 ** database to end up using different files for shared memory - 4396 ** meaning that their memory would not really be shared - resulting 4397 ** in database corruption. Nevertheless, this tmpfs file usage 4398 ** can be enabled at compile-time using -DSQLITE_SHM_DIRECTORY="/dev/shm" 4399 ** or the equivalent. The use of the SQLITE_SHM_DIRECTORY compile-time 4400 ** option results in an incompatible build of SQLite; builds of SQLite 4401 ** that with differing SQLITE_SHM_DIRECTORY settings attempt to use the 4402 ** same database file at the same time, database corruption will likely 4403 ** result. The SQLITE_SHM_DIRECTORY compile-time option is considered 4404 ** "unsupported" and may go away in a future SQLite release. 4405 ** 4406 ** When opening a new shared-memory file, if no other instances of that 4407 ** file are currently open, in this process or in other processes, then 4408 ** the file must be truncated to zero length or have its header cleared. 4409 ** 4410 ** If the original database file (pDbFd) is using the "unix-excl" VFS 4411 ** that means that an exclusive lock is held on the database file and 4412 ** that no other processes are able to read or write the database. In 4413 ** that case, we do not really need shared memory. No shared memory 4414 ** file is created. The shared memory will be simulated with heap memory. 4415 */ 4416 static int unixOpenSharedMemory(unixFile *pDbFd){ 4417 struct unixShm *p = 0; /* The connection to be opened */ 4418 struct unixShmNode *pShmNode; /* The underlying mmapped file */ 4419 int rc = SQLITE_OK; /* Result code */ 4420 unixInodeInfo *pInode; /* The inode of fd */ 4421 char *zShm; /* Name of the file used for SHM */ 4422 int nShmFilename; /* Size of the SHM filename in bytes */ 4423 4424 /* Allocate space for the new unixShm object. */ 4425 p = sqlite3_malloc64( sizeof(*p) ); 4426 if( p==0 ) return SQLITE_NOMEM_BKPT; 4427 memset(p, 0, sizeof(*p)); 4428 assert( pDbFd->pShm==0 ); 4429 4430 /* Check to see if a unixShmNode object already exists. Reuse an existing 4431 ** one if present. Create a new one if necessary. 4432 */ 4433 unixEnterMutex(); 4434 pInode = pDbFd->pInode; 4435 pShmNode = pInode->pShmNode; 4436 if( pShmNode==0 ){ 4437 struct stat sStat; /* fstat() info for database file */ 4438 #ifndef SQLITE_SHM_DIRECTORY 4439 const char *zBasePath = pDbFd->zPath; 4440 #endif 4441 4442 /* Call fstat() to figure out the permissions on the database file. If 4443 ** a new *-shm file is created, an attempt will be made to create it 4444 ** with the same permissions. 4445 */ 4446 if( osFstat(pDbFd->h, &sStat) ){ 4447 rc = SQLITE_IOERR_FSTAT; 4448 goto shm_open_err; 4449 } 4450 4451 #ifdef SQLITE_SHM_DIRECTORY 4452 nShmFilename = sizeof(SQLITE_SHM_DIRECTORY) + 31; 4453 #else 4454 nShmFilename = 6 + (int)strlen(zBasePath); 4455 #endif 4456 pShmNode = sqlite3_malloc64( sizeof(*pShmNode) + nShmFilename ); 4457 if( pShmNode==0 ){ 4458 rc = SQLITE_NOMEM_BKPT; 4459 goto shm_open_err; 4460 } 4461 memset(pShmNode, 0, sizeof(*pShmNode)+nShmFilename); 4462 zShm = pShmNode->zFilename = (char*)&pShmNode[1]; 4463 #ifdef SQLITE_SHM_DIRECTORY 4464 sqlite3_snprintf(nShmFilename, zShm, 4465 SQLITE_SHM_DIRECTORY "/sqlite-shm-%x-%x", 4466 (u32)sStat.st_ino, (u32)sStat.st_dev); 4467 #else 4468 sqlite3_snprintf(nShmFilename, zShm, "%s-shm", zBasePath); 4469 sqlite3FileSuffix3(pDbFd->zPath, zShm); 4470 #endif 4471 pShmNode->h = -1; 4472 pDbFd->pInode->pShmNode = pShmNode; 4473 pShmNode->pInode = pDbFd->pInode; 4474 if( sqlite3GlobalConfig.bCoreMutex ){ 4475 pShmNode->mutex = sqlite3_mutex_alloc(SQLITE_MUTEX_FAST); 4476 if( pShmNode->mutex==0 ){ 4477 rc = SQLITE_NOMEM_BKPT; 4478 goto shm_open_err; 4479 } 4480 } 4481 4482 if( pInode->bProcessLock==0 ){ 4483 if( 0==sqlite3_uri_boolean(pDbFd->zPath, "readonly_shm", 0) ){ 4484 pShmNode->h = robust_open(zShm, O_RDWR|O_CREAT, (sStat.st_mode&0777)); 4485 } 4486 if( pShmNode->h<0 ){ 4487 pShmNode->h = robust_open(zShm, O_RDONLY, (sStat.st_mode&0777)); 4488 if( pShmNode->h<0 ){ 4489 rc = unixLogError(SQLITE_CANTOPEN_BKPT, "open", zShm); 4490 goto shm_open_err; 4491 } 4492 pShmNode->isReadonly = 1; 4493 } 4494 4495 /* If this process is running as root, make sure that the SHM file 4496 ** is owned by the same user that owns the original database. Otherwise, 4497 ** the original owner will not be able to connect. 4498 */ 4499 robustFchown(pShmNode->h, sStat.st_uid, sStat.st_gid); 4500 4501 rc = unixLockSharedMemory(pDbFd, pShmNode); 4502 if( rc!=SQLITE_OK && rc!=SQLITE_READONLY_CANTINIT ) goto shm_open_err; 4503 } 4504 } 4505 4506 /* Make the new connection a child of the unixShmNode */ 4507 p->pShmNode = pShmNode; 4508 #ifdef SQLITE_DEBUG 4509 p->id = pShmNode->nextShmId++; 4510 #endif 4511 pShmNode->nRef++; 4512 pDbFd->pShm = p; 4513 unixLeaveMutex(); 4514 4515 /* The reference count on pShmNode has already been incremented under 4516 ** the cover of the unixEnterMutex() mutex and the pointer from the 4517 ** new (struct unixShm) object to the pShmNode has been set. All that is 4518 ** left to do is to link the new object into the linked list starting 4519 ** at pShmNode->pFirst. This must be done while holding the pShmNode->mutex 4520 ** mutex. 4521 */ 4522 sqlite3_mutex_enter(pShmNode->mutex); 4523 p->pNext = pShmNode->pFirst; 4524 pShmNode->pFirst = p; 4525 sqlite3_mutex_leave(pShmNode->mutex); 4526 return rc; 4527 4528 /* Jump here on any error */ 4529 shm_open_err: 4530 unixShmPurge(pDbFd); /* This call frees pShmNode if required */ 4531 sqlite3_free(p); 4532 unixLeaveMutex(); 4533 return rc; 4534 } 4535 4536 /* 4537 ** This function is called to obtain a pointer to region iRegion of the 4538 ** shared-memory associated with the database file fd. Shared-memory regions 4539 ** are numbered starting from zero. Each shared-memory region is szRegion 4540 ** bytes in size. 4541 ** 4542 ** If an error occurs, an error code is returned and *pp is set to NULL. 4543 ** 4544 ** Otherwise, if the bExtend parameter is 0 and the requested shared-memory 4545 ** region has not been allocated (by any client, including one running in a 4546 ** separate process), then *pp is set to NULL and SQLITE_OK returned. If 4547 ** bExtend is non-zero and the requested shared-memory region has not yet 4548 ** been allocated, it is allocated by this function. 4549 ** 4550 ** If the shared-memory region has already been allocated or is allocated by 4551 ** this call as described above, then it is mapped into this processes 4552 ** address space (if it is not already), *pp is set to point to the mapped 4553 ** memory and SQLITE_OK returned. 4554 */ 4555 static int unixShmMap( 4556 sqlite3_file *fd, /* Handle open on database file */ 4557 int iRegion, /* Region to retrieve */ 4558 int szRegion, /* Size of regions */ 4559 int bExtend, /* True to extend file if necessary */ 4560 void volatile **pp /* OUT: Mapped memory */ 4561 ){ 4562 unixFile *pDbFd = (unixFile*)fd; 4563 unixShm *p; 4564 unixShmNode *pShmNode; 4565 int rc = SQLITE_OK; 4566 int nShmPerMap = unixShmRegionPerMap(); 4567 int nReqRegion; 4568 4569 /* If the shared-memory file has not yet been opened, open it now. */ 4570 if( pDbFd->pShm==0 ){ 4571 rc = unixOpenSharedMemory(pDbFd); 4572 if( rc!=SQLITE_OK ) return rc; 4573 } 4574 4575 p = pDbFd->pShm; 4576 pShmNode = p->pShmNode; 4577 sqlite3_mutex_enter(pShmNode->mutex); 4578 if( pShmNode->isUnlocked ){ 4579 rc = unixLockSharedMemory(pDbFd, pShmNode); 4580 if( rc!=SQLITE_OK ) goto shmpage_out; 4581 pShmNode->isUnlocked = 0; 4582 } 4583 assert( szRegion==pShmNode->szRegion || pShmNode->nRegion==0 ); 4584 assert( pShmNode->pInode==pDbFd->pInode ); 4585 assert( pShmNode->h>=0 || pDbFd->pInode->bProcessLock==1 ); 4586 assert( pShmNode->h<0 || pDbFd->pInode->bProcessLock==0 ); 4587 4588 /* Minimum number of regions required to be mapped. */ 4589 nReqRegion = ((iRegion+nShmPerMap) / nShmPerMap) * nShmPerMap; 4590 4591 if( pShmNode->nRegion<nReqRegion ){ 4592 char **apNew; /* New apRegion[] array */ 4593 int nByte = nReqRegion*szRegion; /* Minimum required file size */ 4594 struct stat sStat; /* Used by fstat() */ 4595 4596 pShmNode->szRegion = szRegion; 4597 4598 if( pShmNode->h>=0 ){ 4599 /* The requested region is not mapped into this processes address space. 4600 ** Check to see if it has been allocated (i.e. if the wal-index file is 4601 ** large enough to contain the requested region). 4602 */ 4603 if( osFstat(pShmNode->h, &sStat) ){ 4604 rc = SQLITE_IOERR_SHMSIZE; 4605 goto shmpage_out; 4606 } 4607 4608 if( sStat.st_size<nByte ){ 4609 /* The requested memory region does not exist. If bExtend is set to 4610 ** false, exit early. *pp will be set to NULL and SQLITE_OK returned. 4611 */ 4612 if( !bExtend ){ 4613 goto shmpage_out; 4614 } 4615 4616 /* Alternatively, if bExtend is true, extend the file. Do this by 4617 ** writing a single byte to the end of each (OS) page being 4618 ** allocated or extended. Technically, we need only write to the 4619 ** last page in order to extend the file. But writing to all new 4620 ** pages forces the OS to allocate them immediately, which reduces 4621 ** the chances of SIGBUS while accessing the mapped region later on. 4622 */ 4623 else{ 4624 static const int pgsz = 4096; 4625 int iPg; 4626 4627 /* Write to the last byte of each newly allocated or extended page */ 4628 assert( (nByte % pgsz)==0 ); 4629 for(iPg=(sStat.st_size/pgsz); iPg<(nByte/pgsz); iPg++){ 4630 int x = 0; 4631 if( seekAndWriteFd(pShmNode->h, iPg*pgsz + pgsz-1, "", 1, &x)!=1 ){ 4632 const char *zFile = pShmNode->zFilename; 4633 rc = unixLogError(SQLITE_IOERR_SHMSIZE, "write", zFile); 4634 goto shmpage_out; 4635 } 4636 } 4637 } 4638 } 4639 } 4640 4641 /* Map the requested memory region into this processes address space. */ 4642 apNew = (char **)sqlite3_realloc( 4643 pShmNode->apRegion, nReqRegion*sizeof(char *) 4644 ); 4645 if( !apNew ){ 4646 rc = SQLITE_IOERR_NOMEM_BKPT; 4647 goto shmpage_out; 4648 } 4649 pShmNode->apRegion = apNew; 4650 while( pShmNode->nRegion<nReqRegion ){ 4651 int nMap = szRegion*nShmPerMap; 4652 int i; 4653 void *pMem; 4654 if( pShmNode->h>=0 ){ 4655 pMem = osMmap(0, nMap, 4656 pShmNode->isReadonly ? PROT_READ : PROT_READ|PROT_WRITE, 4657 MAP_SHARED, pShmNode->h, szRegion*(i64)pShmNode->nRegion 4658 ); 4659 if( pMem==MAP_FAILED ){ 4660 rc = unixLogError(SQLITE_IOERR_SHMMAP, "mmap", pShmNode->zFilename); 4661 goto shmpage_out; 4662 } 4663 }else{ 4664 pMem = sqlite3_malloc64(szRegion); 4665 if( pMem==0 ){ 4666 rc = SQLITE_NOMEM_BKPT; 4667 goto shmpage_out; 4668 } 4669 memset(pMem, 0, szRegion); 4670 } 4671 4672 for(i=0; i<nShmPerMap; i++){ 4673 pShmNode->apRegion[pShmNode->nRegion+i] = &((char*)pMem)[szRegion*i]; 4674 } 4675 pShmNode->nRegion += nShmPerMap; 4676 } 4677 } 4678 4679 shmpage_out: 4680 if( pShmNode->nRegion>iRegion ){ 4681 *pp = pShmNode->apRegion[iRegion]; 4682 }else{ 4683 *pp = 0; 4684 } 4685 if( pShmNode->isReadonly && rc==SQLITE_OK ) rc = SQLITE_READONLY; 4686 sqlite3_mutex_leave(pShmNode->mutex); 4687 return rc; 4688 } 4689 4690 /* 4691 ** Change the lock state for a shared-memory segment. 4692 ** 4693 ** Note that the relationship between SHAREd and EXCLUSIVE locks is a little 4694 ** different here than in posix. In xShmLock(), one can go from unlocked 4695 ** to shared and back or from unlocked to exclusive and back. But one may 4696 ** not go from shared to exclusive or from exclusive to shared. 4697 */ 4698 static int unixShmLock( 4699 sqlite3_file *fd, /* Database file holding the shared memory */ 4700 int ofst, /* First lock to acquire or release */ 4701 int n, /* Number of locks to acquire or release */ 4702 int flags /* What to do with the lock */ 4703 ){ 4704 unixFile *pDbFd = (unixFile*)fd; /* Connection holding shared memory */ 4705 unixShm *p = pDbFd->pShm; /* The shared memory being locked */ 4706 unixShm *pX; /* For looping over all siblings */ 4707 unixShmNode *pShmNode = p->pShmNode; /* The underlying file iNode */ 4708 int rc = SQLITE_OK; /* Result code */ 4709 u16 mask; /* Mask of locks to take or release */ 4710 4711 assert( pShmNode==pDbFd->pInode->pShmNode ); 4712 assert( pShmNode->pInode==pDbFd->pInode ); 4713 assert( ofst>=0 && ofst+n<=SQLITE_SHM_NLOCK ); 4714 assert( n>=1 ); 4715 assert( flags==(SQLITE_SHM_LOCK | SQLITE_SHM_SHARED) 4716 || flags==(SQLITE_SHM_LOCK | SQLITE_SHM_EXCLUSIVE) 4717 || flags==(SQLITE_SHM_UNLOCK | SQLITE_SHM_SHARED) 4718 || flags==(SQLITE_SHM_UNLOCK | SQLITE_SHM_EXCLUSIVE) ); 4719 assert( n==1 || (flags & SQLITE_SHM_EXCLUSIVE)!=0 ); 4720 assert( pShmNode->h>=0 || pDbFd->pInode->bProcessLock==1 ); 4721 assert( pShmNode->h<0 || pDbFd->pInode->bProcessLock==0 ); 4722 4723 mask = (1<<(ofst+n)) - (1<<ofst); 4724 assert( n>1 || mask==(1<<ofst) ); 4725 sqlite3_mutex_enter(pShmNode->mutex); 4726 if( flags & SQLITE_SHM_UNLOCK ){ 4727 u16 allMask = 0; /* Mask of locks held by siblings */ 4728 4729 /* See if any siblings hold this same lock */ 4730 for(pX=pShmNode->pFirst; pX; pX=pX->pNext){ 4731 if( pX==p ) continue; 4732 assert( (pX->exclMask & (p->exclMask|p->sharedMask))==0 ); 4733 allMask |= pX->sharedMask; 4734 } 4735 4736 /* Unlock the system-level locks */ 4737 if( (mask & allMask)==0 ){ 4738 rc = unixShmSystemLock(pDbFd, F_UNLCK, ofst+UNIX_SHM_BASE, n); 4739 }else{ 4740 rc = SQLITE_OK; 4741 } 4742 4743 /* Undo the local locks */ 4744 if( rc==SQLITE_OK ){ 4745 p->exclMask &= ~mask; 4746 p->sharedMask &= ~mask; 4747 } 4748 }else if( flags & SQLITE_SHM_SHARED ){ 4749 u16 allShared = 0; /* Union of locks held by connections other than "p" */ 4750 4751 /* Find out which shared locks are already held by sibling connections. 4752 ** If any sibling already holds an exclusive lock, go ahead and return 4753 ** SQLITE_BUSY. 4754 */ 4755 for(pX=pShmNode->pFirst; pX; pX=pX->pNext){ 4756 if( (pX->exclMask & mask)!=0 ){ 4757 rc = SQLITE_BUSY; 4758 break; 4759 } 4760 allShared |= pX->sharedMask; 4761 } 4762 4763 /* Get shared locks at the system level, if necessary */ 4764 if( rc==SQLITE_OK ){ 4765 if( (allShared & mask)==0 ){ 4766 rc = unixShmSystemLock(pDbFd, F_RDLCK, ofst+UNIX_SHM_BASE, n); 4767 }else{ 4768 rc = SQLITE_OK; 4769 } 4770 } 4771 4772 /* Get the local shared locks */ 4773 if( rc==SQLITE_OK ){ 4774 p->sharedMask |= mask; 4775 } 4776 }else{ 4777 /* Make sure no sibling connections hold locks that will block this 4778 ** lock. If any do, return SQLITE_BUSY right away. 4779 */ 4780 for(pX=pShmNode->pFirst; pX; pX=pX->pNext){ 4781 if( (pX->exclMask & mask)!=0 || (pX->sharedMask & mask)!=0 ){ 4782 rc = SQLITE_BUSY; 4783 break; 4784 } 4785 } 4786 4787 /* Get the exclusive locks at the system level. Then if successful 4788 ** also mark the local connection as being locked. 4789 */ 4790 if( rc==SQLITE_OK ){ 4791 rc = unixShmSystemLock(pDbFd, F_WRLCK, ofst+UNIX_SHM_BASE, n); 4792 if( rc==SQLITE_OK ){ 4793 assert( (p->sharedMask & mask)==0 ); 4794 p->exclMask |= mask; 4795 } 4796 } 4797 } 4798 sqlite3_mutex_leave(pShmNode->mutex); 4799 OSTRACE(("SHM-LOCK shmid-%d, pid-%d got %03x,%03x\n", 4800 p->id, osGetpid(0), p->sharedMask, p->exclMask)); 4801 return rc; 4802 } 4803 4804 /* 4805 ** Implement a memory barrier or memory fence on shared memory. 4806 ** 4807 ** All loads and stores begun before the barrier must complete before 4808 ** any load or store begun after the barrier. 4809 */ 4810 static void unixShmBarrier( 4811 sqlite3_file *fd /* Database file holding the shared memory */ 4812 ){ 4813 UNUSED_PARAMETER(fd); 4814 sqlite3MemoryBarrier(); /* compiler-defined memory barrier */ 4815 unixEnterMutex(); /* Also mutex, for redundancy */ 4816 unixLeaveMutex(); 4817 } 4818 4819 /* 4820 ** Close a connection to shared-memory. Delete the underlying 4821 ** storage if deleteFlag is true. 4822 ** 4823 ** If there is no shared memory associated with the connection then this 4824 ** routine is a harmless no-op. 4825 */ 4826 static int unixShmUnmap( 4827 sqlite3_file *fd, /* The underlying database file */ 4828 int deleteFlag /* Delete shared-memory if true */ 4829 ){ 4830 unixShm *p; /* The connection to be closed */ 4831 unixShmNode *pShmNode; /* The underlying shared-memory file */ 4832 unixShm **pp; /* For looping over sibling connections */ 4833 unixFile *pDbFd; /* The underlying database file */ 4834 4835 pDbFd = (unixFile*)fd; 4836 p = pDbFd->pShm; 4837 if( p==0 ) return SQLITE_OK; 4838 pShmNode = p->pShmNode; 4839 4840 assert( pShmNode==pDbFd->pInode->pShmNode ); 4841 assert( pShmNode->pInode==pDbFd->pInode ); 4842 4843 /* Remove connection p from the set of connections associated 4844 ** with pShmNode */ 4845 sqlite3_mutex_enter(pShmNode->mutex); 4846 for(pp=&pShmNode->pFirst; (*pp)!=p; pp = &(*pp)->pNext){} 4847 *pp = p->pNext; 4848 4849 /* Free the connection p */ 4850 sqlite3_free(p); 4851 pDbFd->pShm = 0; 4852 sqlite3_mutex_leave(pShmNode->mutex); 4853 4854 /* If pShmNode->nRef has reached 0, then close the underlying 4855 ** shared-memory file, too */ 4856 unixEnterMutex(); 4857 assert( pShmNode->nRef>0 ); 4858 pShmNode->nRef--; 4859 if( pShmNode->nRef==0 ){ 4860 if( deleteFlag && pShmNode->h>=0 ){ 4861 osUnlink(pShmNode->zFilename); 4862 } 4863 unixShmPurge(pDbFd); 4864 } 4865 unixLeaveMutex(); 4866 4867 return SQLITE_OK; 4868 } 4869 4870 4871 #else 4872 # define unixShmMap 0 4873 # define unixShmLock 0 4874 # define unixShmBarrier 0 4875 # define unixShmUnmap 0 4876 #endif /* #ifndef SQLITE_OMIT_WAL */ 4877 4878 #if SQLITE_MAX_MMAP_SIZE>0 4879 /* 4880 ** If it is currently memory mapped, unmap file pFd. 4881 */ 4882 static void unixUnmapfile(unixFile *pFd){ 4883 assert( pFd->nFetchOut==0 ); 4884 if( pFd->pMapRegion ){ 4885 osMunmap(pFd->pMapRegion, pFd->mmapSizeActual); 4886 pFd->pMapRegion = 0; 4887 pFd->mmapSize = 0; 4888 pFd->mmapSizeActual = 0; 4889 } 4890 } 4891 4892 /* 4893 ** Attempt to set the size of the memory mapping maintained by file 4894 ** descriptor pFd to nNew bytes. Any existing mapping is discarded. 4895 ** 4896 ** If successful, this function sets the following variables: 4897 ** 4898 ** unixFile.pMapRegion 4899 ** unixFile.mmapSize 4900 ** unixFile.mmapSizeActual 4901 ** 4902 ** If unsuccessful, an error message is logged via sqlite3_log() and 4903 ** the three variables above are zeroed. In this case SQLite should 4904 ** continue accessing the database using the xRead() and xWrite() 4905 ** methods. 4906 */ 4907 static void unixRemapfile( 4908 unixFile *pFd, /* File descriptor object */ 4909 i64 nNew /* Required mapping size */ 4910 ){ 4911 const char *zErr = "mmap"; 4912 int h = pFd->h; /* File descriptor open on db file */ 4913 u8 *pOrig = (u8 *)pFd->pMapRegion; /* Pointer to current file mapping */ 4914 i64 nOrig = pFd->mmapSizeActual; /* Size of pOrig region in bytes */ 4915 u8 *pNew = 0; /* Location of new mapping */ 4916 int flags = PROT_READ; /* Flags to pass to mmap() */ 4917 4918 assert( pFd->nFetchOut==0 ); 4919 assert( nNew>pFd->mmapSize ); 4920 assert( nNew<=pFd->mmapSizeMax ); 4921 assert( nNew>0 ); 4922 assert( pFd->mmapSizeActual>=pFd->mmapSize ); 4923 assert( MAP_FAILED!=0 ); 4924 4925 #ifdef SQLITE_MMAP_READWRITE 4926 if( (pFd->ctrlFlags & UNIXFILE_RDONLY)==0 ) flags |= PROT_WRITE; 4927 #endif 4928 4929 if( pOrig ){ 4930 #if HAVE_MREMAP 4931 i64 nReuse = pFd->mmapSize; 4932 #else 4933 const int szSyspage = osGetpagesize(); 4934 i64 nReuse = (pFd->mmapSize & ~(szSyspage-1)); 4935 #endif 4936 u8 *pReq = &pOrig[nReuse]; 4937 4938 /* Unmap any pages of the existing mapping that cannot be reused. */ 4939 if( nReuse!=nOrig ){ 4940 osMunmap(pReq, nOrig-nReuse); 4941 } 4942 4943 #if HAVE_MREMAP 4944 pNew = osMremap(pOrig, nReuse, nNew, MREMAP_MAYMOVE); 4945 zErr = "mremap"; 4946 #else 4947 pNew = osMmap(pReq, nNew-nReuse, flags, MAP_SHARED, h, nReuse); 4948 if( pNew!=MAP_FAILED ){ 4949 if( pNew!=pReq ){ 4950 osMunmap(pNew, nNew - nReuse); 4951 pNew = 0; 4952 }else{ 4953 pNew = pOrig; 4954 } 4955 } 4956 #endif 4957 4958 /* The attempt to extend the existing mapping failed. Free it. */ 4959 if( pNew==MAP_FAILED || pNew==0 ){ 4960 osMunmap(pOrig, nReuse); 4961 } 4962 } 4963 4964 /* If pNew is still NULL, try to create an entirely new mapping. */ 4965 if( pNew==0 ){ 4966 pNew = osMmap(0, nNew, flags, MAP_SHARED, h, 0); 4967 } 4968 4969 if( pNew==MAP_FAILED ){ 4970 pNew = 0; 4971 nNew = 0; 4972 unixLogError(SQLITE_OK, zErr, pFd->zPath); 4973 4974 /* If the mmap() above failed, assume that all subsequent mmap() calls 4975 ** will probably fail too. Fall back to using xRead/xWrite exclusively 4976 ** in this case. */ 4977 pFd->mmapSizeMax = 0; 4978 } 4979 pFd->pMapRegion = (void *)pNew; 4980 pFd->mmapSize = pFd->mmapSizeActual = nNew; 4981 } 4982 4983 /* 4984 ** Memory map or remap the file opened by file-descriptor pFd (if the file 4985 ** is already mapped, the existing mapping is replaced by the new). Or, if 4986 ** there already exists a mapping for this file, and there are still 4987 ** outstanding xFetch() references to it, this function is a no-op. 4988 ** 4989 ** If parameter nByte is non-negative, then it is the requested size of 4990 ** the mapping to create. Otherwise, if nByte is less than zero, then the 4991 ** requested size is the size of the file on disk. The actual size of the 4992 ** created mapping is either the requested size or the value configured 4993 ** using SQLITE_FCNTL_MMAP_LIMIT, whichever is smaller. 4994 ** 4995 ** SQLITE_OK is returned if no error occurs (even if the mapping is not 4996 ** recreated as a result of outstanding references) or an SQLite error 4997 ** code otherwise. 4998 */ 4999 static int unixMapfile(unixFile *pFd, i64 nMap){ 5000 assert( nMap>=0 || pFd->nFetchOut==0 ); 5001 assert( nMap>0 || (pFd->mmapSize==0 && pFd->pMapRegion==0) ); 5002 if( pFd->nFetchOut>0 ) return SQLITE_OK; 5003 5004 if( nMap<0 ){ 5005 struct stat statbuf; /* Low-level file information */ 5006 if( osFstat(pFd->h, &statbuf) ){ 5007 return SQLITE_IOERR_FSTAT; 5008 } 5009 nMap = statbuf.st_size; 5010 } 5011 if( nMap>pFd->mmapSizeMax ){ 5012 nMap = pFd->mmapSizeMax; 5013 } 5014 5015 assert( nMap>0 || (pFd->mmapSize==0 && pFd->pMapRegion==0) ); 5016 if( nMap!=pFd->mmapSize ){ 5017 unixRemapfile(pFd, nMap); 5018 } 5019 5020 return SQLITE_OK; 5021 } 5022 #endif /* SQLITE_MAX_MMAP_SIZE>0 */ 5023 5024 /* 5025 ** If possible, return a pointer to a mapping of file fd starting at offset 5026 ** iOff. The mapping must be valid for at least nAmt bytes. 5027 ** 5028 ** If such a pointer can be obtained, store it in *pp and return SQLITE_OK. 5029 ** Or, if one cannot but no error occurs, set *pp to 0 and return SQLITE_OK. 5030 ** Finally, if an error does occur, return an SQLite error code. The final 5031 ** value of *pp is undefined in this case. 5032 ** 5033 ** If this function does return a pointer, the caller must eventually 5034 ** release the reference by calling unixUnfetch(). 5035 */ 5036 static int unixFetch(sqlite3_file *fd, i64 iOff, int nAmt, void **pp){ 5037 #if SQLITE_MAX_MMAP_SIZE>0 5038 unixFile *pFd = (unixFile *)fd; /* The underlying database file */ 5039 #endif 5040 *pp = 0; 5041 5042 #if SQLITE_MAX_MMAP_SIZE>0 5043 if( pFd->mmapSizeMax>0 ){ 5044 if( pFd->pMapRegion==0 ){ 5045 int rc = unixMapfile(pFd, -1); 5046 if( rc!=SQLITE_OK ) return rc; 5047 } 5048 if( pFd->mmapSize >= iOff+nAmt ){ 5049 *pp = &((u8 *)pFd->pMapRegion)[iOff]; 5050 pFd->nFetchOut++; 5051 } 5052 } 5053 #endif 5054 return SQLITE_OK; 5055 } 5056 5057 /* 5058 ** If the third argument is non-NULL, then this function releases a 5059 ** reference obtained by an earlier call to unixFetch(). The second 5060 ** argument passed to this function must be the same as the corresponding 5061 ** argument that was passed to the unixFetch() invocation. 5062 ** 5063 ** Or, if the third argument is NULL, then this function is being called 5064 ** to inform the VFS layer that, according to POSIX, any existing mapping 5065 ** may now be invalid and should be unmapped. 5066 */ 5067 static int unixUnfetch(sqlite3_file *fd, i64 iOff, void *p){ 5068 #if SQLITE_MAX_MMAP_SIZE>0 5069 unixFile *pFd = (unixFile *)fd; /* The underlying database file */ 5070 UNUSED_PARAMETER(iOff); 5071 5072 /* If p==0 (unmap the entire file) then there must be no outstanding 5073 ** xFetch references. Or, if p!=0 (meaning it is an xFetch reference), 5074 ** then there must be at least one outstanding. */ 5075 assert( (p==0)==(pFd->nFetchOut==0) ); 5076 5077 /* If p!=0, it must match the iOff value. */ 5078 assert( p==0 || p==&((u8 *)pFd->pMapRegion)[iOff] ); 5079 5080 if( p ){ 5081 pFd->nFetchOut--; 5082 }else{ 5083 unixUnmapfile(pFd); 5084 } 5085 5086 assert( pFd->nFetchOut>=0 ); 5087 #else 5088 UNUSED_PARAMETER(fd); 5089 UNUSED_PARAMETER(p); 5090 UNUSED_PARAMETER(iOff); 5091 #endif 5092 return SQLITE_OK; 5093 } 5094 5095 /* 5096 ** Here ends the implementation of all sqlite3_file methods. 5097 ** 5098 ********************** End sqlite3_file Methods ******************************* 5099 ******************************************************************************/ 5100 5101 /* 5102 ** This division contains definitions of sqlite3_io_methods objects that 5103 ** implement various file locking strategies. It also contains definitions 5104 ** of "finder" functions. A finder-function is used to locate the appropriate 5105 ** sqlite3_io_methods object for a particular database file. The pAppData 5106 ** field of the sqlite3_vfs VFS objects are initialized to be pointers to 5107 ** the correct finder-function for that VFS. 5108 ** 5109 ** Most finder functions return a pointer to a fixed sqlite3_io_methods 5110 ** object. The only interesting finder-function is autolockIoFinder, which 5111 ** looks at the filesystem type and tries to guess the best locking 5112 ** strategy from that. 5113 ** 5114 ** For finder-function F, two objects are created: 5115 ** 5116 ** (1) The real finder-function named "FImpt()". 5117 ** 5118 ** (2) A constant pointer to this function named just "F". 5119 ** 5120 ** 5121 ** A pointer to the F pointer is used as the pAppData value for VFS 5122 ** objects. We have to do this instead of letting pAppData point 5123 ** directly at the finder-function since C90 rules prevent a void* 5124 ** from be cast into a function pointer. 5125 ** 5126 ** 5127 ** Each instance of this macro generates two objects: 5128 ** 5129 ** * A constant sqlite3_io_methods object call METHOD that has locking 5130 ** methods CLOSE, LOCK, UNLOCK, CKRESLOCK. 5131 ** 5132 ** * An I/O method finder function called FINDER that returns a pointer 5133 ** to the METHOD object in the previous bullet. 5134 */ 5135 #define IOMETHODS(FINDER,METHOD,VERSION,CLOSE,LOCK,UNLOCK,CKLOCK,SHMMAP) \ 5136 static const sqlite3_io_methods METHOD = { \ 5137 VERSION, /* iVersion */ \ 5138 CLOSE, /* xClose */ \ 5139 unixRead, /* xRead */ \ 5140 unixWrite, /* xWrite */ \ 5141 unixTruncate, /* xTruncate */ \ 5142 unixSync, /* xSync */ \ 5143 unixFileSize, /* xFileSize */ \ 5144 LOCK, /* xLock */ \ 5145 UNLOCK, /* xUnlock */ \ 5146 CKLOCK, /* xCheckReservedLock */ \ 5147 unixFileControl, /* xFileControl */ \ 5148 unixSectorSize, /* xSectorSize */ \ 5149 unixDeviceCharacteristics, /* xDeviceCapabilities */ \ 5150 SHMMAP, /* xShmMap */ \ 5151 unixShmLock, /* xShmLock */ \ 5152 unixShmBarrier, /* xShmBarrier */ \ 5153 unixShmUnmap, /* xShmUnmap */ \ 5154 unixFetch, /* xFetch */ \ 5155 unixUnfetch, /* xUnfetch */ \ 5156 }; \ 5157 static const sqlite3_io_methods *FINDER##Impl(const char *z, unixFile *p){ \ 5158 UNUSED_PARAMETER(z); UNUSED_PARAMETER(p); \ 5159 return &METHOD; \ 5160 } \ 5161 static const sqlite3_io_methods *(*const FINDER)(const char*,unixFile *p) \ 5162 = FINDER##Impl; 5163 5164 /* 5165 ** Here are all of the sqlite3_io_methods objects for each of the 5166 ** locking strategies. Functions that return pointers to these methods 5167 ** are also created. 5168 */ 5169 IOMETHODS( 5170 posixIoFinder, /* Finder function name */ 5171 posixIoMethods, /* sqlite3_io_methods object name */ 5172 3, /* shared memory and mmap are enabled */ 5173 unixClose, /* xClose method */ 5174 unixLock, /* xLock method */ 5175 unixUnlock, /* xUnlock method */ 5176 unixCheckReservedLock, /* xCheckReservedLock method */ 5177 unixShmMap /* xShmMap method */ 5178 ) 5179 IOMETHODS( 5180 nolockIoFinder, /* Finder function name */ 5181 nolockIoMethods, /* sqlite3_io_methods object name */ 5182 3, /* shared memory is disabled */ 5183 nolockClose, /* xClose method */ 5184 nolockLock, /* xLock method */ 5185 nolockUnlock, /* xUnlock method */ 5186 nolockCheckReservedLock, /* xCheckReservedLock method */ 5187 0 /* xShmMap method */ 5188 ) 5189 IOMETHODS( 5190 dotlockIoFinder, /* Finder function name */ 5191 dotlockIoMethods, /* sqlite3_io_methods object name */ 5192 1, /* shared memory is disabled */ 5193 dotlockClose, /* xClose method */ 5194 dotlockLock, /* xLock method */ 5195 dotlockUnlock, /* xUnlock method */ 5196 dotlockCheckReservedLock, /* xCheckReservedLock method */ 5197 0 /* xShmMap method */ 5198 ) 5199 5200 #if SQLITE_ENABLE_LOCKING_STYLE 5201 IOMETHODS( 5202 flockIoFinder, /* Finder function name */ 5203 flockIoMethods, /* sqlite3_io_methods object name */ 5204 1, /* shared memory is disabled */ 5205 flockClose, /* xClose method */ 5206 flockLock, /* xLock method */ 5207 flockUnlock, /* xUnlock method */ 5208 flockCheckReservedLock, /* xCheckReservedLock method */ 5209 0 /* xShmMap method */ 5210 ) 5211 #endif 5212 5213 #if OS_VXWORKS 5214 IOMETHODS( 5215 semIoFinder, /* Finder function name */ 5216 semIoMethods, /* sqlite3_io_methods object name */ 5217 1, /* shared memory is disabled */ 5218 semXClose, /* xClose method */ 5219 semXLock, /* xLock method */ 5220 semXUnlock, /* xUnlock method */ 5221 semXCheckReservedLock, /* xCheckReservedLock method */ 5222 0 /* xShmMap method */ 5223 ) 5224 #endif 5225 5226 #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE 5227 IOMETHODS( 5228 afpIoFinder, /* Finder function name */ 5229 afpIoMethods, /* sqlite3_io_methods object name */ 5230 1, /* shared memory is disabled */ 5231 afpClose, /* xClose method */ 5232 afpLock, /* xLock method */ 5233 afpUnlock, /* xUnlock method */ 5234 afpCheckReservedLock, /* xCheckReservedLock method */ 5235 0 /* xShmMap method */ 5236 ) 5237 #endif 5238 5239 /* 5240 ** The proxy locking method is a "super-method" in the sense that it 5241 ** opens secondary file descriptors for the conch and lock files and 5242 ** it uses proxy, dot-file, AFP, and flock() locking methods on those 5243 ** secondary files. For this reason, the division that implements 5244 ** proxy locking is located much further down in the file. But we need 5245 ** to go ahead and define the sqlite3_io_methods and finder function 5246 ** for proxy locking here. So we forward declare the I/O methods. 5247 */ 5248 #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE 5249 static int proxyClose(sqlite3_file*); 5250 static int proxyLock(sqlite3_file*, int); 5251 static int proxyUnlock(sqlite3_file*, int); 5252 static int proxyCheckReservedLock(sqlite3_file*, int*); 5253 IOMETHODS( 5254 proxyIoFinder, /* Finder function name */ 5255 proxyIoMethods, /* sqlite3_io_methods object name */ 5256 1, /* shared memory is disabled */ 5257 proxyClose, /* xClose method */ 5258 proxyLock, /* xLock method */ 5259 proxyUnlock, /* xUnlock method */ 5260 proxyCheckReservedLock, /* xCheckReservedLock method */ 5261 0 /* xShmMap method */ 5262 ) 5263 #endif 5264 5265 /* nfs lockd on OSX 10.3+ doesn't clear write locks when a read lock is set */ 5266 #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE 5267 IOMETHODS( 5268 nfsIoFinder, /* Finder function name */ 5269 nfsIoMethods, /* sqlite3_io_methods object name */ 5270 1, /* shared memory is disabled */ 5271 unixClose, /* xClose method */ 5272 unixLock, /* xLock method */ 5273 nfsUnlock, /* xUnlock method */ 5274 unixCheckReservedLock, /* xCheckReservedLock method */ 5275 0 /* xShmMap method */ 5276 ) 5277 #endif 5278 5279 #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE 5280 /* 5281 ** This "finder" function attempts to determine the best locking strategy 5282 ** for the database file "filePath". It then returns the sqlite3_io_methods 5283 ** object that implements that strategy. 5284 ** 5285 ** This is for MacOSX only. 5286 */ 5287 static const sqlite3_io_methods *autolockIoFinderImpl( 5288 const char *filePath, /* name of the database file */ 5289 unixFile *pNew /* open file object for the database file */ 5290 ){ 5291 static const struct Mapping { 5292 const char *zFilesystem; /* Filesystem type name */ 5293 const sqlite3_io_methods *pMethods; /* Appropriate locking method */ 5294 } aMap[] = { 5295 { "hfs", &posixIoMethods }, 5296 { "ufs", &posixIoMethods }, 5297 { "afpfs", &afpIoMethods }, 5298 { "smbfs", &afpIoMethods }, 5299 { "webdav", &nolockIoMethods }, 5300 { 0, 0 } 5301 }; 5302 int i; 5303 struct statfs fsInfo; 5304 struct flock lockInfo; 5305 5306 if( !filePath ){ 5307 /* If filePath==NULL that means we are dealing with a transient file 5308 ** that does not need to be locked. */ 5309 return &nolockIoMethods; 5310 } 5311 if( statfs(filePath, &fsInfo) != -1 ){ 5312 if( fsInfo.f_flags & MNT_RDONLY ){ 5313 return &nolockIoMethods; 5314 } 5315 for(i=0; aMap[i].zFilesystem; i++){ 5316 if( strcmp(fsInfo.f_fstypename, aMap[i].zFilesystem)==0 ){ 5317 return aMap[i].pMethods; 5318 } 5319 } 5320 } 5321 5322 /* Default case. Handles, amongst others, "nfs". 5323 ** Test byte-range lock using fcntl(). If the call succeeds, 5324 ** assume that the file-system supports POSIX style locks. 5325 */ 5326 lockInfo.l_len = 1; 5327 lockInfo.l_start = 0; 5328 lockInfo.l_whence = SEEK_SET; 5329 lockInfo.l_type = F_RDLCK; 5330 if( osFcntl(pNew->h, F_GETLK, &lockInfo)!=-1 ) { 5331 if( strcmp(fsInfo.f_fstypename, "nfs")==0 ){ 5332 return &nfsIoMethods; 5333 } else { 5334 return &posixIoMethods; 5335 } 5336 }else{ 5337 return &dotlockIoMethods; 5338 } 5339 } 5340 static const sqlite3_io_methods 5341 *(*const autolockIoFinder)(const char*,unixFile*) = autolockIoFinderImpl; 5342 5343 #endif /* defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE */ 5344 5345 #if OS_VXWORKS 5346 /* 5347 ** This "finder" function for VxWorks checks to see if posix advisory 5348 ** locking works. If it does, then that is what is used. If it does not 5349 ** work, then fallback to named semaphore locking. 5350 */ 5351 static const sqlite3_io_methods *vxworksIoFinderImpl( 5352 const char *filePath, /* name of the database file */ 5353 unixFile *pNew /* the open file object */ 5354 ){ 5355 struct flock lockInfo; 5356 5357 if( !filePath ){ 5358 /* If filePath==NULL that means we are dealing with a transient file 5359 ** that does not need to be locked. */ 5360 return &nolockIoMethods; 5361 } 5362 5363 /* Test if fcntl() is supported and use POSIX style locks. 5364 ** Otherwise fall back to the named semaphore method. 5365 */ 5366 lockInfo.l_len = 1; 5367 lockInfo.l_start = 0; 5368 lockInfo.l_whence = SEEK_SET; 5369 lockInfo.l_type = F_RDLCK; 5370 if( osFcntl(pNew->h, F_GETLK, &lockInfo)!=-1 ) { 5371 return &posixIoMethods; 5372 }else{ 5373 return &semIoMethods; 5374 } 5375 } 5376 static const sqlite3_io_methods 5377 *(*const vxworksIoFinder)(const char*,unixFile*) = vxworksIoFinderImpl; 5378 5379 #endif /* OS_VXWORKS */ 5380 5381 /* 5382 ** An abstract type for a pointer to an IO method finder function: 5383 */ 5384 typedef const sqlite3_io_methods *(*finder_type)(const char*,unixFile*); 5385 5386 5387 /**************************************************************************** 5388 **************************** sqlite3_vfs methods **************************** 5389 ** 5390 ** This division contains the implementation of methods on the 5391 ** sqlite3_vfs object. 5392 */ 5393 5394 /* 5395 ** Initialize the contents of the unixFile structure pointed to by pId. 5396 */ 5397 static int fillInUnixFile( 5398 sqlite3_vfs *pVfs, /* Pointer to vfs object */ 5399 int h, /* Open file descriptor of file being opened */ 5400 sqlite3_file *pId, /* Write to the unixFile structure here */ 5401 const char *zFilename, /* Name of the file being opened */ 5402 int ctrlFlags /* Zero or more UNIXFILE_* values */ 5403 ){ 5404 const sqlite3_io_methods *pLockingStyle; 5405 unixFile *pNew = (unixFile *)pId; 5406 int rc = SQLITE_OK; 5407 5408 assert( pNew->pInode==NULL ); 5409 5410 /* No locking occurs in temporary files */ 5411 assert( zFilename!=0 || (ctrlFlags & UNIXFILE_NOLOCK)!=0 ); 5412 5413 OSTRACE(("OPEN %-3d %s\n", h, zFilename)); 5414 pNew->h = h; 5415 pNew->pVfs = pVfs; 5416 pNew->zPath = zFilename; 5417 pNew->ctrlFlags = (u8)ctrlFlags; 5418 #if SQLITE_MAX_MMAP_SIZE>0 5419 pNew->mmapSizeMax = sqlite3GlobalConfig.szMmap; 5420 #endif 5421 if( sqlite3_uri_boolean(((ctrlFlags & UNIXFILE_URI) ? zFilename : 0), 5422 "psow", SQLITE_POWERSAFE_OVERWRITE) ){ 5423 pNew->ctrlFlags |= UNIXFILE_PSOW; 5424 } 5425 if( strcmp(pVfs->zName,"unix-excl")==0 ){ 5426 pNew->ctrlFlags |= UNIXFILE_EXCL; 5427 } 5428 5429 #if OS_VXWORKS 5430 pNew->pId = vxworksFindFileId(zFilename); 5431 if( pNew->pId==0 ){ 5432 ctrlFlags |= UNIXFILE_NOLOCK; 5433 rc = SQLITE_NOMEM_BKPT; 5434 } 5435 #endif 5436 5437 if( ctrlFlags & UNIXFILE_NOLOCK ){ 5438 pLockingStyle = &nolockIoMethods; 5439 }else{ 5440 pLockingStyle = (**(finder_type*)pVfs->pAppData)(zFilename, pNew); 5441 #if SQLITE_ENABLE_LOCKING_STYLE 5442 /* Cache zFilename in the locking context (AFP and dotlock override) for 5443 ** proxyLock activation is possible (remote proxy is based on db name) 5444 ** zFilename remains valid until file is closed, to support */ 5445 pNew->lockingContext = (void*)zFilename; 5446 #endif 5447 } 5448 5449 if( pLockingStyle == &posixIoMethods 5450 #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE 5451 || pLockingStyle == &nfsIoMethods 5452 #endif 5453 ){ 5454 unixEnterMutex(); 5455 rc = findInodeInfo(pNew, &pNew->pInode); 5456 if( rc!=SQLITE_OK ){ 5457 /* If an error occurred in findInodeInfo(), close the file descriptor 5458 ** immediately, before releasing the mutex. findInodeInfo() may fail 5459 ** in two scenarios: 5460 ** 5461 ** (a) A call to fstat() failed. 5462 ** (b) A malloc failed. 5463 ** 5464 ** Scenario (b) may only occur if the process is holding no other 5465 ** file descriptors open on the same file. If there were other file 5466 ** descriptors on this file, then no malloc would be required by 5467 ** findInodeInfo(). If this is the case, it is quite safe to close 5468 ** handle h - as it is guaranteed that no posix locks will be released 5469 ** by doing so. 5470 ** 5471 ** If scenario (a) caused the error then things are not so safe. The 5472 ** implicit assumption here is that if fstat() fails, things are in 5473 ** such bad shape that dropping a lock or two doesn't matter much. 5474 */ 5475 robust_close(pNew, h, __LINE__); 5476 h = -1; 5477 } 5478 unixLeaveMutex(); 5479 } 5480 5481 #if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) 5482 else if( pLockingStyle == &afpIoMethods ){ 5483 /* AFP locking uses the file path so it needs to be included in 5484 ** the afpLockingContext. 5485 */ 5486 afpLockingContext *pCtx; 5487 pNew->lockingContext = pCtx = sqlite3_malloc64( sizeof(*pCtx) ); 5488 if( pCtx==0 ){ 5489 rc = SQLITE_NOMEM_BKPT; 5490 }else{ 5491 /* NB: zFilename exists and remains valid until the file is closed 5492 ** according to requirement F11141. So we do not need to make a 5493 ** copy of the filename. */ 5494 pCtx->dbPath = zFilename; 5495 pCtx->reserved = 0; 5496 srandomdev(); 5497 unixEnterMutex(); 5498 rc = findInodeInfo(pNew, &pNew->pInode); 5499 if( rc!=SQLITE_OK ){ 5500 sqlite3_free(pNew->lockingContext); 5501 robust_close(pNew, h, __LINE__); 5502 h = -1; 5503 } 5504 unixLeaveMutex(); 5505 } 5506 } 5507 #endif 5508 5509 else if( pLockingStyle == &dotlockIoMethods ){ 5510 /* Dotfile locking uses the file path so it needs to be included in 5511 ** the dotlockLockingContext 5512 */ 5513 char *zLockFile; 5514 int nFilename; 5515 assert( zFilename!=0 ); 5516 nFilename = (int)strlen(zFilename) + 6; 5517 zLockFile = (char *)sqlite3_malloc64(nFilename); 5518 if( zLockFile==0 ){ 5519 rc = SQLITE_NOMEM_BKPT; 5520 }else{ 5521 sqlite3_snprintf(nFilename, zLockFile, "%s" DOTLOCK_SUFFIX, zFilename); 5522 } 5523 pNew->lockingContext = zLockFile; 5524 } 5525 5526 #if OS_VXWORKS 5527 else if( pLockingStyle == &semIoMethods ){ 5528 /* Named semaphore locking uses the file path so it needs to be 5529 ** included in the semLockingContext 5530 */ 5531 unixEnterMutex(); 5532 rc = findInodeInfo(pNew, &pNew->pInode); 5533 if( (rc==SQLITE_OK) && (pNew->pInode->pSem==NULL) ){ 5534 char *zSemName = pNew->pInode->aSemName; 5535 int n; 5536 sqlite3_snprintf(MAX_PATHNAME, zSemName, "/%s.sem", 5537 pNew->pId->zCanonicalName); 5538 for( n=1; zSemName[n]; n++ ) 5539 if( zSemName[n]=='/' ) zSemName[n] = '_'; 5540 pNew->pInode->pSem = sem_open(zSemName, O_CREAT, 0666, 1); 5541 if( pNew->pInode->pSem == SEM_FAILED ){ 5542 rc = SQLITE_NOMEM_BKPT; 5543 pNew->pInode->aSemName[0] = '\0'; 5544 } 5545 } 5546 unixLeaveMutex(); 5547 } 5548 #endif 5549 5550 storeLastErrno(pNew, 0); 5551 #if OS_VXWORKS 5552 if( rc!=SQLITE_OK ){ 5553 if( h>=0 ) robust_close(pNew, h, __LINE__); 5554 h = -1; 5555 osUnlink(zFilename); 5556 pNew->ctrlFlags |= UNIXFILE_DELETE; 5557 } 5558 #endif 5559 if( rc!=SQLITE_OK ){ 5560 if( h>=0 ) robust_close(pNew, h, __LINE__); 5561 }else{ 5562 pNew->pMethod = pLockingStyle; 5563 OpenCounter(+1); 5564 verifyDbFile(pNew); 5565 } 5566 return rc; 5567 } 5568 5569 /* 5570 ** Return the name of a directory in which to put temporary files. 5571 ** If no suitable temporary file directory can be found, return NULL. 5572 */ 5573 static const char *unixTempFileDir(void){ 5574 static const char *azDirs[] = { 5575 0, 5576 0, 5577 "/var/tmp", 5578 "/usr/tmp", 5579 "/tmp", 5580 "." 5581 }; 5582 unsigned int i = 0; 5583 struct stat buf; 5584 const char *zDir = sqlite3_temp_directory; 5585 5586 if( !azDirs[0] ) azDirs[0] = getenv("SQLITE_TMPDIR"); 5587 if( !azDirs[1] ) azDirs[1] = getenv("TMPDIR"); 5588 while(1){ 5589 if( zDir!=0 5590 && osStat(zDir, &buf)==0 5591 && S_ISDIR(buf.st_mode) 5592 && osAccess(zDir, 03)==0 5593 ){ 5594 return zDir; 5595 } 5596 if( i>=sizeof(azDirs)/sizeof(azDirs[0]) ) break; 5597 zDir = azDirs[i++]; 5598 } 5599 return 0; 5600 } 5601 5602 /* 5603 ** Create a temporary file name in zBuf. zBuf must be allocated 5604 ** by the calling process and must be big enough to hold at least 5605 ** pVfs->mxPathname bytes. 5606 */ 5607 static int unixGetTempname(int nBuf, char *zBuf){ 5608 const char *zDir; 5609 int iLimit = 0; 5610 5611 /* It's odd to simulate an io-error here, but really this is just 5612 ** using the io-error infrastructure to test that SQLite handles this 5613 ** function failing. 5614 */ 5615 zBuf[0] = 0; 5616 SimulateIOError( return SQLITE_IOERR ); 5617 5618 zDir = unixTempFileDir(); 5619 if( zDir==0 ) return SQLITE_IOERR_GETTEMPPATH; 5620 do{ 5621 u64 r; 5622 sqlite3_randomness(sizeof(r), &r); 5623 assert( nBuf>2 ); 5624 zBuf[nBuf-2] = 0; 5625 sqlite3_snprintf(nBuf, zBuf, "%s/"SQLITE_TEMP_FILE_PREFIX"%llx%c", 5626 zDir, r, 0); 5627 if( zBuf[nBuf-2]!=0 || (iLimit++)>10 ) return SQLITE_ERROR; 5628 }while( osAccess(zBuf,0)==0 ); 5629 return SQLITE_OK; 5630 } 5631 5632 #if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) 5633 /* 5634 ** Routine to transform a unixFile into a proxy-locking unixFile. 5635 ** Implementation in the proxy-lock division, but used by unixOpen() 5636 ** if SQLITE_PREFER_PROXY_LOCKING is defined. 5637 */ 5638 static int proxyTransformUnixFile(unixFile*, const char*); 5639 #endif 5640 5641 /* 5642 ** Search for an unused file descriptor that was opened on the database 5643 ** file (not a journal or master-journal file) identified by pathname 5644 ** zPath with SQLITE_OPEN_XXX flags matching those passed as the second 5645 ** argument to this function. 5646 ** 5647 ** Such a file descriptor may exist if a database connection was closed 5648 ** but the associated file descriptor could not be closed because some 5649 ** other file descriptor open on the same file is holding a file-lock. 5650 ** Refer to comments in the unixClose() function and the lengthy comment 5651 ** describing "Posix Advisory Locking" at the start of this file for 5652 ** further details. Also, ticket #4018. 5653 ** 5654 ** If a suitable file descriptor is found, then it is returned. If no 5655 ** such file descriptor is located, -1 is returned. 5656 */ 5657 static UnixUnusedFd *findReusableFd(const char *zPath, int flags){ 5658 UnixUnusedFd *pUnused = 0; 5659 5660 /* Do not search for an unused file descriptor on vxworks. Not because 5661 ** vxworks would not benefit from the change (it might, we're not sure), 5662 ** but because no way to test it is currently available. It is better 5663 ** not to risk breaking vxworks support for the sake of such an obscure 5664 ** feature. */ 5665 #if !OS_VXWORKS 5666 struct stat sStat; /* Results of stat() call */ 5667 5668 unixEnterMutex(); 5669 5670 /* A stat() call may fail for various reasons. If this happens, it is 5671 ** almost certain that an open() call on the same path will also fail. 5672 ** For this reason, if an error occurs in the stat() call here, it is 5673 ** ignored and -1 is returned. The caller will try to open a new file 5674 ** descriptor on the same path, fail, and return an error to SQLite. 5675 ** 5676 ** Even if a subsequent open() call does succeed, the consequences of 5677 ** not searching for a reusable file descriptor are not dire. */ 5678 if( nUnusedFd>0 && 0==osStat(zPath, &sStat) ){ 5679 unixInodeInfo *pInode; 5680 5681 pInode = inodeList; 5682 while( pInode && (pInode->fileId.dev!=sStat.st_dev 5683 || pInode->fileId.ino!=(u64)sStat.st_ino) ){ 5684 pInode = pInode->pNext; 5685 } 5686 if( pInode ){ 5687 UnixUnusedFd **pp; 5688 for(pp=&pInode->pUnused; *pp && (*pp)->flags!=flags; pp=&((*pp)->pNext)); 5689 pUnused = *pp; 5690 if( pUnused ){ 5691 nUnusedFd--; 5692 *pp = pUnused->pNext; 5693 } 5694 } 5695 } 5696 unixLeaveMutex(); 5697 #endif /* if !OS_VXWORKS */ 5698 return pUnused; 5699 } 5700 5701 /* 5702 ** Find the mode, uid and gid of file zFile. 5703 */ 5704 static int getFileMode( 5705 const char *zFile, /* File name */ 5706 mode_t *pMode, /* OUT: Permissions of zFile */ 5707 uid_t *pUid, /* OUT: uid of zFile. */ 5708 gid_t *pGid /* OUT: gid of zFile. */ 5709 ){ 5710 struct stat sStat; /* Output of stat() on database file */ 5711 int rc = SQLITE_OK; 5712 if( 0==osStat(zFile, &sStat) ){ 5713 *pMode = sStat.st_mode & 0777; 5714 *pUid = sStat.st_uid; 5715 *pGid = sStat.st_gid; 5716 }else{ 5717 rc = SQLITE_IOERR_FSTAT; 5718 } 5719 return rc; 5720 } 5721 5722 /* 5723 ** This function is called by unixOpen() to determine the unix permissions 5724 ** to create new files with. If no error occurs, then SQLITE_OK is returned 5725 ** and a value suitable for passing as the third argument to open(2) is 5726 ** written to *pMode. If an IO error occurs, an SQLite error code is 5727 ** returned and the value of *pMode is not modified. 5728 ** 5729 ** In most cases, this routine sets *pMode to 0, which will become 5730 ** an indication to robust_open() to create the file using 5731 ** SQLITE_DEFAULT_FILE_PERMISSIONS adjusted by the umask. 5732 ** But if the file being opened is a WAL or regular journal file, then 5733 ** this function queries the file-system for the permissions on the 5734 ** corresponding database file and sets *pMode to this value. Whenever 5735 ** possible, WAL and journal files are created using the same permissions 5736 ** as the associated database file. 5737 ** 5738 ** If the SQLITE_ENABLE_8_3_NAMES option is enabled, then the 5739 ** original filename is unavailable. But 8_3_NAMES is only used for 5740 ** FAT filesystems and permissions do not matter there, so just use 5741 ** the default permissions. 5742 */ 5743 static int findCreateFileMode( 5744 const char *zPath, /* Path of file (possibly) being created */ 5745 int flags, /* Flags passed as 4th argument to xOpen() */ 5746 mode_t *pMode, /* OUT: Permissions to open file with */ 5747 uid_t *pUid, /* OUT: uid to set on the file */ 5748 gid_t *pGid /* OUT: gid to set on the file */ 5749 ){ 5750 int rc = SQLITE_OK; /* Return Code */ 5751 *pMode = 0; 5752 *pUid = 0; 5753 *pGid = 0; 5754 if( flags & (SQLITE_OPEN_WAL|SQLITE_OPEN_MAIN_JOURNAL) ){ 5755 char zDb[MAX_PATHNAME+1]; /* Database file path */ 5756 int nDb; /* Number of valid bytes in zDb */ 5757 5758 /* zPath is a path to a WAL or journal file. The following block derives 5759 ** the path to the associated database file from zPath. This block handles 5760 ** the following naming conventions: 5761 ** 5762 ** "<path to db>-journal" 5763 ** "<path to db>-wal" 5764 ** "<path to db>-journalNN" 5765 ** "<path to db>-walNN" 5766 ** 5767 ** where NN is a decimal number. The NN naming schemes are 5768 ** used by the test_multiplex.c module. 5769 */ 5770 nDb = sqlite3Strlen30(zPath) - 1; 5771 while( zPath[nDb]!='-' ){ 5772 /* In normal operation, the journal file name will always contain 5773 ** a '-' character. However in 8+3 filename mode, or if a corrupt 5774 ** rollback journal specifies a master journal with a goofy name, then 5775 ** the '-' might be missing. */ 5776 if( nDb==0 || zPath[nDb]=='.' ) return SQLITE_OK; 5777 nDb--; 5778 } 5779 memcpy(zDb, zPath, nDb); 5780 zDb[nDb] = '\0'; 5781 5782 rc = getFileMode(zDb, pMode, pUid, pGid); 5783 }else if( flags & SQLITE_OPEN_DELETEONCLOSE ){ 5784 *pMode = 0600; 5785 }else if( flags & SQLITE_OPEN_URI ){ 5786 /* If this is a main database file and the file was opened using a URI 5787 ** filename, check for the "modeof" parameter. If present, interpret 5788 ** its value as a filename and try to copy the mode, uid and gid from 5789 ** that file. */ 5790 const char *z = sqlite3_uri_parameter(zPath, "modeof"); 5791 if( z ){ 5792 rc = getFileMode(z, pMode, pUid, pGid); 5793 } 5794 } 5795 return rc; 5796 } 5797 5798 /* 5799 ** Open the file zPath. 5800 ** 5801 ** Previously, the SQLite OS layer used three functions in place of this 5802 ** one: 5803 ** 5804 ** sqlite3OsOpenReadWrite(); 5805 ** sqlite3OsOpenReadOnly(); 5806 ** sqlite3OsOpenExclusive(); 5807 ** 5808 ** These calls correspond to the following combinations of flags: 5809 ** 5810 ** ReadWrite() -> (READWRITE | CREATE) 5811 ** ReadOnly() -> (READONLY) 5812 ** OpenExclusive() -> (READWRITE | CREATE | EXCLUSIVE) 5813 ** 5814 ** The old OpenExclusive() accepted a boolean argument - "delFlag". If 5815 ** true, the file was configured to be automatically deleted when the 5816 ** file handle closed. To achieve the same effect using this new 5817 ** interface, add the DELETEONCLOSE flag to those specified above for 5818 ** OpenExclusive(). 5819 */ 5820 static int unixOpen( 5821 sqlite3_vfs *pVfs, /* The VFS for which this is the xOpen method */ 5822 const char *zPath, /* Pathname of file to be opened */ 5823 sqlite3_file *pFile, /* The file descriptor to be filled in */ 5824 int flags, /* Input flags to control the opening */ 5825 int *pOutFlags /* Output flags returned to SQLite core */ 5826 ){ 5827 unixFile *p = (unixFile *)pFile; 5828 int fd = -1; /* File descriptor returned by open() */ 5829 int openFlags = 0; /* Flags to pass to open() */ 5830 int eType = flags&0xFFFFFF00; /* Type of file to open */ 5831 int noLock; /* True to omit locking primitives */ 5832 int rc = SQLITE_OK; /* Function Return Code */ 5833 int ctrlFlags = 0; /* UNIXFILE_* flags */ 5834 5835 int isExclusive = (flags & SQLITE_OPEN_EXCLUSIVE); 5836 int isDelete = (flags & SQLITE_OPEN_DELETEONCLOSE); 5837 int isCreate = (flags & SQLITE_OPEN_CREATE); 5838 int isReadonly = (flags & SQLITE_OPEN_READONLY); 5839 int isReadWrite = (flags & SQLITE_OPEN_READWRITE); 5840 #if SQLITE_ENABLE_LOCKING_STYLE 5841 int isAutoProxy = (flags & SQLITE_OPEN_AUTOPROXY); 5842 #endif 5843 #if defined(__APPLE__) || SQLITE_ENABLE_LOCKING_STYLE 5844 struct statfs fsInfo; 5845 #endif 5846 5847 /* If creating a master or main-file journal, this function will open 5848 ** a file-descriptor on the directory too. The first time unixSync() 5849 ** is called the directory file descriptor will be fsync()ed and close()d. 5850 */ 5851 int isNewJrnl = (isCreate && ( 5852 eType==SQLITE_OPEN_MASTER_JOURNAL 5853 || eType==SQLITE_OPEN_MAIN_JOURNAL 5854 || eType==SQLITE_OPEN_WAL 5855 )); 5856 5857 /* If argument zPath is a NULL pointer, this function is required to open 5858 ** a temporary file. Use this buffer to store the file name in. 5859 */ 5860 char zTmpname[MAX_PATHNAME+2]; 5861 const char *zName = zPath; 5862 5863 /* Check the following statements are true: 5864 ** 5865 ** (a) Exactly one of the READWRITE and READONLY flags must be set, and 5866 ** (b) if CREATE is set, then READWRITE must also be set, and 5867 ** (c) if EXCLUSIVE is set, then CREATE must also be set. 5868 ** (d) if DELETEONCLOSE is set, then CREATE must also be set. 5869 */ 5870 assert((isReadonly==0 || isReadWrite==0) && (isReadWrite || isReadonly)); 5871 assert(isCreate==0 || isReadWrite); 5872 assert(isExclusive==0 || isCreate); 5873 assert(isDelete==0 || isCreate); 5874 5875 /* The main DB, main journal, WAL file and master journal are never 5876 ** automatically deleted. Nor are they ever temporary files. */ 5877 assert( (!isDelete && zName) || eType!=SQLITE_OPEN_MAIN_DB ); 5878 assert( (!isDelete && zName) || eType!=SQLITE_OPEN_MAIN_JOURNAL ); 5879 assert( (!isDelete && zName) || eType!=SQLITE_OPEN_MASTER_JOURNAL ); 5880 assert( (!isDelete && zName) || eType!=SQLITE_OPEN_WAL ); 5881 5882 /* Assert that the upper layer has set one of the "file-type" flags. */ 5883 assert( eType==SQLITE_OPEN_MAIN_DB || eType==SQLITE_OPEN_TEMP_DB 5884 || eType==SQLITE_OPEN_MAIN_JOURNAL || eType==SQLITE_OPEN_TEMP_JOURNAL 5885 || eType==SQLITE_OPEN_SUBJOURNAL || eType==SQLITE_OPEN_MASTER_JOURNAL 5886 || eType==SQLITE_OPEN_TRANSIENT_DB || eType==SQLITE_OPEN_WAL 5887 ); 5888 5889 /* Detect a pid change and reset the PRNG. There is a race condition 5890 ** here such that two or more threads all trying to open databases at 5891 ** the same instant might all reset the PRNG. But multiple resets 5892 ** are harmless. 5893 */ 5894 if( randomnessPid!=osGetpid(0) ){ 5895 randomnessPid = osGetpid(0); 5896 sqlite3_randomness(0,0); 5897 } 5898 memset(p, 0, sizeof(unixFile)); 5899 5900 if( eType==SQLITE_OPEN_MAIN_DB ){ 5901 UnixUnusedFd *pUnused; 5902 pUnused = findReusableFd(zName, flags); 5903 if( pUnused ){ 5904 fd = pUnused->fd; 5905 }else{ 5906 pUnused = sqlite3_malloc64(sizeof(*pUnused)); 5907 if( !pUnused ){ 5908 return SQLITE_NOMEM_BKPT; 5909 } 5910 } 5911 p->pPreallocatedUnused = pUnused; 5912 5913 /* Database filenames are double-zero terminated if they are not 5914 ** URIs with parameters. Hence, they can always be passed into 5915 ** sqlite3_uri_parameter(). */ 5916 assert( (flags & SQLITE_OPEN_URI) || zName[strlen(zName)+1]==0 ); 5917 5918 }else if( !zName ){ 5919 /* If zName is NULL, the upper layer is requesting a temp file. */ 5920 assert(isDelete && !isNewJrnl); 5921 rc = unixGetTempname(pVfs->mxPathname, zTmpname); 5922 if( rc!=SQLITE_OK ){ 5923 return rc; 5924 } 5925 zName = zTmpname; 5926 5927 /* Generated temporary filenames are always double-zero terminated 5928 ** for use by sqlite3_uri_parameter(). */ 5929 assert( zName[strlen(zName)+1]==0 ); 5930 } 5931 5932 /* Determine the value of the flags parameter passed to POSIX function 5933 ** open(). These must be calculated even if open() is not called, as 5934 ** they may be stored as part of the file handle and used by the 5935 ** 'conch file' locking functions later on. */ 5936 if( isReadonly ) openFlags |= O_RDONLY; 5937 if( isReadWrite ) openFlags |= O_RDWR; 5938 if( isCreate ) openFlags |= O_CREAT; 5939 if( isExclusive ) openFlags |= (O_EXCL|O_NOFOLLOW); 5940 openFlags |= (O_LARGEFILE|O_BINARY); 5941 5942 if( fd<0 ){ 5943 mode_t openMode; /* Permissions to create file with */ 5944 uid_t uid; /* Userid for the file */ 5945 gid_t gid; /* Groupid for the file */ 5946 rc = findCreateFileMode(zName, flags, &openMode, &uid, &gid); 5947 if( rc!=SQLITE_OK ){ 5948 assert( !p->pPreallocatedUnused ); 5949 assert( eType==SQLITE_OPEN_WAL || eType==SQLITE_OPEN_MAIN_JOURNAL ); 5950 return rc; 5951 } 5952 fd = robust_open(zName, openFlags, openMode); 5953 OSTRACE(("OPENX %-3d %s 0%o\n", fd, zName, openFlags)); 5954 assert( !isExclusive || (openFlags & O_CREAT)!=0 ); 5955 if( fd<0 ){ 5956 if( isNewJrnl && errno==EACCES && osAccess(zName, F_OK) ){ 5957 /* If unable to create a journal because the directory is not 5958 ** writable, change the error code to indicate that. */ 5959 rc = SQLITE_READONLY_DIRECTORY; 5960 }else if( errno!=EISDIR && isReadWrite ){ 5961 /* Failed to open the file for read/write access. Try read-only. */ 5962 flags &= ~(SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE); 5963 openFlags &= ~(O_RDWR|O_CREAT); 5964 flags |= SQLITE_OPEN_READONLY; 5965 openFlags |= O_RDONLY; 5966 isReadonly = 1; 5967 fd = robust_open(zName, openFlags, openMode); 5968 } 5969 } 5970 if( fd<0 ){ 5971 int rc2 = unixLogError(SQLITE_CANTOPEN_BKPT, "open", zName); 5972 if( rc==SQLITE_OK ) rc = rc2; 5973 goto open_finished; 5974 } 5975 5976 /* If this process is running as root and if creating a new rollback 5977 ** journal or WAL file, set the ownership of the journal or WAL to be 5978 ** the same as the original database. 5979 */ 5980 if( flags & (SQLITE_OPEN_WAL|SQLITE_OPEN_MAIN_JOURNAL) ){ 5981 robustFchown(fd, uid, gid); 5982 } 5983 } 5984 assert( fd>=0 ); 5985 if( pOutFlags ){ 5986 *pOutFlags = flags; 5987 } 5988 5989 if( p->pPreallocatedUnused ){ 5990 p->pPreallocatedUnused->fd = fd; 5991 p->pPreallocatedUnused->flags = flags; 5992 } 5993 5994 if( isDelete ){ 5995 #if OS_VXWORKS 5996 zPath = zName; 5997 #elif defined(SQLITE_UNLINK_AFTER_CLOSE) 5998 zPath = sqlite3_mprintf("%s", zName); 5999 if( zPath==0 ){ 6000 robust_close(p, fd, __LINE__); 6001 return SQLITE_NOMEM_BKPT; 6002 } 6003 #else 6004 osUnlink(zName); 6005 #endif 6006 } 6007 #if SQLITE_ENABLE_LOCKING_STYLE 6008 else{ 6009 p->openFlags = openFlags; 6010 } 6011 #endif 6012 6013 #if defined(__APPLE__) || SQLITE_ENABLE_LOCKING_STYLE 6014 if( fstatfs(fd, &fsInfo) == -1 ){ 6015 storeLastErrno(p, errno); 6016 robust_close(p, fd, __LINE__); 6017 return SQLITE_IOERR_ACCESS; 6018 } 6019 if (0 == strncmp("msdos", fsInfo.f_fstypename, 5)) { 6020 ((unixFile*)pFile)->fsFlags |= SQLITE_FSFLAGS_IS_MSDOS; 6021 } 6022 if (0 == strncmp("exfat", fsInfo.f_fstypename, 5)) { 6023 ((unixFile*)pFile)->fsFlags |= SQLITE_FSFLAGS_IS_MSDOS; 6024 } 6025 #endif 6026 6027 /* Set up appropriate ctrlFlags */ 6028 if( isDelete ) ctrlFlags |= UNIXFILE_DELETE; 6029 if( isReadonly ) ctrlFlags |= UNIXFILE_RDONLY; 6030 noLock = eType!=SQLITE_OPEN_MAIN_DB; 6031 if( noLock ) ctrlFlags |= UNIXFILE_NOLOCK; 6032 if( isNewJrnl ) ctrlFlags |= UNIXFILE_DIRSYNC; 6033 if( flags & SQLITE_OPEN_URI ) ctrlFlags |= UNIXFILE_URI; 6034 6035 #if SQLITE_ENABLE_LOCKING_STYLE 6036 #if SQLITE_PREFER_PROXY_LOCKING 6037 isAutoProxy = 1; 6038 #endif 6039 if( isAutoProxy && (zPath!=NULL) && (!noLock) && pVfs->xOpen ){ 6040 char *envforce = getenv("SQLITE_FORCE_PROXY_LOCKING"); 6041 int useProxy = 0; 6042 6043 /* SQLITE_FORCE_PROXY_LOCKING==1 means force always use proxy, 0 means 6044 ** never use proxy, NULL means use proxy for non-local files only. */ 6045 if( envforce!=NULL ){ 6046 useProxy = atoi(envforce)>0; 6047 }else{ 6048 useProxy = !(fsInfo.f_flags&MNT_LOCAL); 6049 } 6050 if( useProxy ){ 6051 rc = fillInUnixFile(pVfs, fd, pFile, zPath, ctrlFlags); 6052 if( rc==SQLITE_OK ){ 6053 rc = proxyTransformUnixFile((unixFile*)pFile, ":auto:"); 6054 if( rc!=SQLITE_OK ){ 6055 /* Use unixClose to clean up the resources added in fillInUnixFile 6056 ** and clear all the structure's references. Specifically, 6057 ** pFile->pMethods will be NULL so sqlite3OsClose will be a no-op 6058 */ 6059 unixClose(pFile); 6060 return rc; 6061 } 6062 } 6063 goto open_finished; 6064 } 6065 } 6066 #endif 6067 6068 assert( zPath==0 || zPath[0]=='/' 6069 || eType==SQLITE_OPEN_MASTER_JOURNAL || eType==SQLITE_OPEN_MAIN_JOURNAL 6070 ); 6071 rc = fillInUnixFile(pVfs, fd, pFile, zPath, ctrlFlags); 6072 6073 open_finished: 6074 if( rc!=SQLITE_OK ){ 6075 sqlite3_free(p->pPreallocatedUnused); 6076 } 6077 return rc; 6078 } 6079 6080 6081 /* 6082 ** Delete the file at zPath. If the dirSync argument is true, fsync() 6083 ** the directory after deleting the file. 6084 */ 6085 static int unixDelete( 6086 sqlite3_vfs *NotUsed, /* VFS containing this as the xDelete method */ 6087 const char *zPath, /* Name of file to be deleted */ 6088 int dirSync /* If true, fsync() directory after deleting file */ 6089 ){ 6090 int rc = SQLITE_OK; 6091 UNUSED_PARAMETER(NotUsed); 6092 SimulateIOError(return SQLITE_IOERR_DELETE); 6093 if( osUnlink(zPath)==(-1) ){ 6094 if( errno==ENOENT 6095 #if OS_VXWORKS 6096 || osAccess(zPath,0)!=0 6097 #endif 6098 ){ 6099 rc = SQLITE_IOERR_DELETE_NOENT; 6100 }else{ 6101 rc = unixLogError(SQLITE_IOERR_DELETE, "unlink", zPath); 6102 } 6103 return rc; 6104 } 6105 #ifndef SQLITE_DISABLE_DIRSYNC 6106 if( (dirSync & 1)!=0 ){ 6107 int fd; 6108 rc = osOpenDirectory(zPath, &fd); 6109 if( rc==SQLITE_OK ){ 6110 if( full_fsync(fd,0,0) ){ 6111 rc = unixLogError(SQLITE_IOERR_DIR_FSYNC, "fsync", zPath); 6112 } 6113 robust_close(0, fd, __LINE__); 6114 }else{ 6115 assert( rc==SQLITE_CANTOPEN ); 6116 rc = SQLITE_OK; 6117 } 6118 } 6119 #endif 6120 return rc; 6121 } 6122 6123 /* 6124 ** Test the existence of or access permissions of file zPath. The 6125 ** test performed depends on the value of flags: 6126 ** 6127 ** SQLITE_ACCESS_EXISTS: Return 1 if the file exists 6128 ** SQLITE_ACCESS_READWRITE: Return 1 if the file is read and writable. 6129 ** SQLITE_ACCESS_READONLY: Return 1 if the file is readable. 6130 ** 6131 ** Otherwise return 0. 6132 */ 6133 static int unixAccess( 6134 sqlite3_vfs *NotUsed, /* The VFS containing this xAccess method */ 6135 const char *zPath, /* Path of the file to examine */ 6136 int flags, /* What do we want to learn about the zPath file? */ 6137 int *pResOut /* Write result boolean here */ 6138 ){ 6139 UNUSED_PARAMETER(NotUsed); 6140 SimulateIOError( return SQLITE_IOERR_ACCESS; ); 6141 assert( pResOut!=0 ); 6142 6143 /* The spec says there are three possible values for flags. But only 6144 ** two of them are actually used */ 6145 assert( flags==SQLITE_ACCESS_EXISTS || flags==SQLITE_ACCESS_READWRITE ); 6146 6147 if( flags==SQLITE_ACCESS_EXISTS ){ 6148 struct stat buf; 6149 *pResOut = (0==osStat(zPath, &buf) && buf.st_size>0); 6150 }else{ 6151 *pResOut = osAccess(zPath, W_OK|R_OK)==0; 6152 } 6153 return SQLITE_OK; 6154 } 6155 6156 /* 6157 ** 6158 */ 6159 static int mkFullPathname( 6160 const char *zPath, /* Input path */ 6161 char *zOut, /* Output buffer */ 6162 int nOut /* Allocated size of buffer zOut */ 6163 ){ 6164 int nPath = sqlite3Strlen30(zPath); 6165 int iOff = 0; 6166 if( zPath[0]!='/' ){ 6167 if( osGetcwd(zOut, nOut-2)==0 ){ 6168 return unixLogError(SQLITE_CANTOPEN_BKPT, "getcwd", zPath); 6169 } 6170 iOff = sqlite3Strlen30(zOut); 6171 zOut[iOff++] = '/'; 6172 } 6173 if( (iOff+nPath+1)>nOut ){ 6174 /* SQLite assumes that xFullPathname() nul-terminates the output buffer 6175 ** even if it returns an error. */ 6176 zOut[iOff] = '\0'; 6177 return SQLITE_CANTOPEN_BKPT; 6178 } 6179 sqlite3_snprintf(nOut-iOff, &zOut[iOff], "%s", zPath); 6180 return SQLITE_OK; 6181 } 6182 6183 /* 6184 ** Turn a relative pathname into a full pathname. The relative path 6185 ** is stored as a nul-terminated string in the buffer pointed to by 6186 ** zPath. 6187 ** 6188 ** zOut points to a buffer of at least sqlite3_vfs.mxPathname bytes 6189 ** (in this case, MAX_PATHNAME bytes). The full-path is written to 6190 ** this buffer before returning. 6191 */ 6192 static int unixFullPathname( 6193 sqlite3_vfs *pVfs, /* Pointer to vfs object */ 6194 const char *zPath, /* Possibly relative input path */ 6195 int nOut, /* Size of output buffer in bytes */ 6196 char *zOut /* Output buffer */ 6197 ){ 6198 #if !defined(HAVE_READLINK) || !defined(HAVE_LSTAT) 6199 return mkFullPathname(zPath, zOut, nOut); 6200 #else 6201 int rc = SQLITE_OK; 6202 int nByte; 6203 int nLink = 1; /* Number of symbolic links followed so far */ 6204 const char *zIn = zPath; /* Input path for each iteration of loop */ 6205 char *zDel = 0; 6206 6207 assert( pVfs->mxPathname==MAX_PATHNAME ); 6208 UNUSED_PARAMETER(pVfs); 6209 6210 /* It's odd to simulate an io-error here, but really this is just 6211 ** using the io-error infrastructure to test that SQLite handles this 6212 ** function failing. This function could fail if, for example, the 6213 ** current working directory has been unlinked. 6214 */ 6215 SimulateIOError( return SQLITE_ERROR ); 6216 6217 do { 6218 6219 /* Call stat() on path zIn. Set bLink to true if the path is a symbolic 6220 ** link, or false otherwise. */ 6221 int bLink = 0; 6222 struct stat buf; 6223 if( osLstat(zIn, &buf)!=0 ){ 6224 if( errno!=ENOENT ){ 6225 rc = unixLogError(SQLITE_CANTOPEN_BKPT, "lstat", zIn); 6226 } 6227 }else{ 6228 bLink = S_ISLNK(buf.st_mode); 6229 } 6230 6231 if( bLink ){ 6232 if( zDel==0 ){ 6233 zDel = sqlite3_malloc(nOut); 6234 if( zDel==0 ) rc = SQLITE_NOMEM_BKPT; 6235 }else if( ++nLink>SQLITE_MAX_SYMLINKS ){ 6236 rc = SQLITE_CANTOPEN_BKPT; 6237 } 6238 6239 if( rc==SQLITE_OK ){ 6240 nByte = osReadlink(zIn, zDel, nOut-1); 6241 if( nByte<0 ){ 6242 rc = unixLogError(SQLITE_CANTOPEN_BKPT, "readlink", zIn); 6243 }else{ 6244 if( zDel[0]!='/' ){ 6245 int n; 6246 for(n = sqlite3Strlen30(zIn); n>0 && zIn[n-1]!='/'; n--); 6247 if( nByte+n+1>nOut ){ 6248 rc = SQLITE_CANTOPEN_BKPT; 6249 }else{ 6250 memmove(&zDel[n], zDel, nByte+1); 6251 memcpy(zDel, zIn, n); 6252 nByte += n; 6253 } 6254 } 6255 zDel[nByte] = '\0'; 6256 } 6257 } 6258 6259 zIn = zDel; 6260 } 6261 6262 assert( rc!=SQLITE_OK || zIn!=zOut || zIn[0]=='/' ); 6263 if( rc==SQLITE_OK && zIn!=zOut ){ 6264 rc = mkFullPathname(zIn, zOut, nOut); 6265 } 6266 if( bLink==0 ) break; 6267 zIn = zOut; 6268 }while( rc==SQLITE_OK ); 6269 6270 sqlite3_free(zDel); 6271 return rc; 6272 #endif /* HAVE_READLINK && HAVE_LSTAT */ 6273 } 6274 6275 6276 #ifndef SQLITE_OMIT_LOAD_EXTENSION 6277 /* 6278 ** Interfaces for opening a shared library, finding entry points 6279 ** within the shared library, and closing the shared library. 6280 */ 6281 #include <dlfcn.h> 6282 static void *unixDlOpen(sqlite3_vfs *NotUsed, const char *zFilename){ 6283 UNUSED_PARAMETER(NotUsed); 6284 return dlopen(zFilename, RTLD_NOW | RTLD_GLOBAL); 6285 } 6286 6287 /* 6288 ** SQLite calls this function immediately after a call to unixDlSym() or 6289 ** unixDlOpen() fails (returns a null pointer). If a more detailed error 6290 ** message is available, it is written to zBufOut. If no error message 6291 ** is available, zBufOut is left unmodified and SQLite uses a default 6292 ** error message. 6293 */ 6294 static void unixDlError(sqlite3_vfs *NotUsed, int nBuf, char *zBufOut){ 6295 const char *zErr; 6296 UNUSED_PARAMETER(NotUsed); 6297 unixEnterMutex(); 6298 zErr = dlerror(); 6299 if( zErr ){ 6300 sqlite3_snprintf(nBuf, zBufOut, "%s", zErr); 6301 } 6302 unixLeaveMutex(); 6303 } 6304 static void (*unixDlSym(sqlite3_vfs *NotUsed, void *p, const char*zSym))(void){ 6305 /* 6306 ** GCC with -pedantic-errors says that C90 does not allow a void* to be 6307 ** cast into a pointer to a function. And yet the library dlsym() routine 6308 ** returns a void* which is really a pointer to a function. So how do we 6309 ** use dlsym() with -pedantic-errors? 6310 ** 6311 ** Variable x below is defined to be a pointer to a function taking 6312 ** parameters void* and const char* and returning a pointer to a function. 6313 ** We initialize x by assigning it a pointer to the dlsym() function. 6314 ** (That assignment requires a cast.) Then we call the function that 6315 ** x points to. 6316 ** 6317 ** This work-around is unlikely to work correctly on any system where 6318 ** you really cannot cast a function pointer into void*. But then, on the 6319 ** other hand, dlsym() will not work on such a system either, so we have 6320 ** not really lost anything. 6321 */ 6322 void (*(*x)(void*,const char*))(void); 6323 UNUSED_PARAMETER(NotUsed); 6324 x = (void(*(*)(void*,const char*))(void))dlsym; 6325 return (*x)(p, zSym); 6326 } 6327 static void unixDlClose(sqlite3_vfs *NotUsed, void *pHandle){ 6328 UNUSED_PARAMETER(NotUsed); 6329 dlclose(pHandle); 6330 } 6331 #else /* if SQLITE_OMIT_LOAD_EXTENSION is defined: */ 6332 #define unixDlOpen 0 6333 #define unixDlError 0 6334 #define unixDlSym 0 6335 #define unixDlClose 0 6336 #endif 6337 6338 /* 6339 ** Write nBuf bytes of random data to the supplied buffer zBuf. 6340 */ 6341 static int unixRandomness(sqlite3_vfs *NotUsed, int nBuf, char *zBuf){ 6342 UNUSED_PARAMETER(NotUsed); 6343 assert((size_t)nBuf>=(sizeof(time_t)+sizeof(int))); 6344 6345 /* We have to initialize zBuf to prevent valgrind from reporting 6346 ** errors. The reports issued by valgrind are incorrect - we would 6347 ** prefer that the randomness be increased by making use of the 6348 ** uninitialized space in zBuf - but valgrind errors tend to worry 6349 ** some users. Rather than argue, it seems easier just to initialize 6350 ** the whole array and silence valgrind, even if that means less randomness 6351 ** in the random seed. 6352 ** 6353 ** When testing, initializing zBuf[] to zero is all we do. That means 6354 ** that we always use the same random number sequence. This makes the 6355 ** tests repeatable. 6356 */ 6357 memset(zBuf, 0, nBuf); 6358 randomnessPid = osGetpid(0); 6359 #if !defined(SQLITE_TEST) && !defined(SQLITE_OMIT_RANDOMNESS) 6360 { 6361 int fd, got; 6362 fd = robust_open("/dev/urandom", O_RDONLY, 0); 6363 if( fd<0 ){ 6364 time_t t; 6365 time(&t); 6366 memcpy(zBuf, &t, sizeof(t)); 6367 memcpy(&zBuf[sizeof(t)], &randomnessPid, sizeof(randomnessPid)); 6368 assert( sizeof(t)+sizeof(randomnessPid)<=(size_t)nBuf ); 6369 nBuf = sizeof(t) + sizeof(randomnessPid); 6370 }else{ 6371 do{ got = osRead(fd, zBuf, nBuf); }while( got<0 && errno==EINTR ); 6372 robust_close(0, fd, __LINE__); 6373 } 6374 } 6375 #endif 6376 return nBuf; 6377 } 6378 6379 6380 /* 6381 ** Sleep for a little while. Return the amount of time slept. 6382 ** The argument is the number of microseconds we want to sleep. 6383 ** The return value is the number of microseconds of sleep actually 6384 ** requested from the underlying operating system, a number which 6385 ** might be greater than or equal to the argument, but not less 6386 ** than the argument. 6387 */ 6388 static int unixSleep(sqlite3_vfs *NotUsed, int microseconds){ 6389 #if OS_VXWORKS 6390 struct timespec sp; 6391 6392 sp.tv_sec = microseconds / 1000000; 6393 sp.tv_nsec = (microseconds % 1000000) * 1000; 6394 nanosleep(&sp, NULL); 6395 UNUSED_PARAMETER(NotUsed); 6396 return microseconds; 6397 #elif defined(HAVE_USLEEP) && HAVE_USLEEP 6398 usleep(microseconds); 6399 UNUSED_PARAMETER(NotUsed); 6400 return microseconds; 6401 #else 6402 int seconds = (microseconds+999999)/1000000; 6403 sleep(seconds); 6404 UNUSED_PARAMETER(NotUsed); 6405 return seconds*1000000; 6406 #endif 6407 } 6408 6409 /* 6410 ** The following variable, if set to a non-zero value, is interpreted as 6411 ** the number of seconds since 1970 and is used to set the result of 6412 ** sqlite3OsCurrentTime() during testing. 6413 */ 6414 #ifdef SQLITE_TEST 6415 int sqlite3_current_time = 0; /* Fake system time in seconds since 1970. */ 6416 #endif 6417 6418 /* 6419 ** Find the current time (in Universal Coordinated Time). Write into *piNow 6420 ** the current time and date as a Julian Day number times 86_400_000. In 6421 ** other words, write into *piNow the number of milliseconds since the Julian 6422 ** epoch of noon in Greenwich on November 24, 4714 B.C according to the 6423 ** proleptic Gregorian calendar. 6424 ** 6425 ** On success, return SQLITE_OK. Return SQLITE_ERROR if the time and date 6426 ** cannot be found. 6427 */ 6428 static int unixCurrentTimeInt64(sqlite3_vfs *NotUsed, sqlite3_int64 *piNow){ 6429 static const sqlite3_int64 unixEpoch = 24405875*(sqlite3_int64)8640000; 6430 int rc = SQLITE_OK; 6431 #if defined(NO_GETTOD) 6432 time_t t; 6433 time(&t); 6434 *piNow = ((sqlite3_int64)t)*1000 + unixEpoch; 6435 #elif OS_VXWORKS 6436 struct timespec sNow; 6437 clock_gettime(CLOCK_REALTIME, &sNow); 6438 *piNow = unixEpoch + 1000*(sqlite3_int64)sNow.tv_sec + sNow.tv_nsec/1000000; 6439 #else 6440 struct timeval sNow; 6441 (void)gettimeofday(&sNow, 0); /* Cannot fail given valid arguments */ 6442 *piNow = unixEpoch + 1000*(sqlite3_int64)sNow.tv_sec + sNow.tv_usec/1000; 6443 #endif 6444 6445 #ifdef SQLITE_TEST 6446 if( sqlite3_current_time ){ 6447 *piNow = 1000*(sqlite3_int64)sqlite3_current_time + unixEpoch; 6448 } 6449 #endif 6450 UNUSED_PARAMETER(NotUsed); 6451 return rc; 6452 } 6453 6454 #ifndef SQLITE_OMIT_DEPRECATED 6455 /* 6456 ** Find the current time (in Universal Coordinated Time). Write the 6457 ** current time and date as a Julian Day number into *prNow and 6458 ** return 0. Return 1 if the time and date cannot be found. 6459 */ 6460 static int unixCurrentTime(sqlite3_vfs *NotUsed, double *prNow){ 6461 sqlite3_int64 i = 0; 6462 int rc; 6463 UNUSED_PARAMETER(NotUsed); 6464 rc = unixCurrentTimeInt64(0, &i); 6465 *prNow = i/86400000.0; 6466 return rc; 6467 } 6468 #else 6469 # define unixCurrentTime 0 6470 #endif 6471 6472 /* 6473 ** The xGetLastError() method is designed to return a better 6474 ** low-level error message when operating-system problems come up 6475 ** during SQLite operation. Only the integer return code is currently 6476 ** used. 6477 */ 6478 static int unixGetLastError(sqlite3_vfs *NotUsed, int NotUsed2, char *NotUsed3){ 6479 UNUSED_PARAMETER(NotUsed); 6480 UNUSED_PARAMETER(NotUsed2); 6481 UNUSED_PARAMETER(NotUsed3); 6482 return errno; 6483 } 6484 6485 6486 /* 6487 ************************ End of sqlite3_vfs methods *************************** 6488 ******************************************************************************/ 6489 6490 /****************************************************************************** 6491 ************************** Begin Proxy Locking ******************************** 6492 ** 6493 ** Proxy locking is a "uber-locking-method" in this sense: It uses the 6494 ** other locking methods on secondary lock files. Proxy locking is a 6495 ** meta-layer over top of the primitive locking implemented above. For 6496 ** this reason, the division that implements of proxy locking is deferred 6497 ** until late in the file (here) after all of the other I/O methods have 6498 ** been defined - so that the primitive locking methods are available 6499 ** as services to help with the implementation of proxy locking. 6500 ** 6501 **** 6502 ** 6503 ** The default locking schemes in SQLite use byte-range locks on the 6504 ** database file to coordinate safe, concurrent access by multiple readers 6505 ** and writers [http://sqlite.org/lockingv3.html]. The five file locking 6506 ** states (UNLOCKED, PENDING, SHARED, RESERVED, EXCLUSIVE) are implemented 6507 ** as POSIX read & write locks over fixed set of locations (via fsctl), 6508 ** on AFP and SMB only exclusive byte-range locks are available via fsctl 6509 ** with _IOWR('z', 23, struct ByteRangeLockPB2) to track the same 5 states. 6510 ** To simulate a F_RDLCK on the shared range, on AFP a randomly selected 6511 ** address in the shared range is taken for a SHARED lock, the entire 6512 ** shared range is taken for an EXCLUSIVE lock): 6513 ** 6514 ** PENDING_BYTE 0x40000000 6515 ** RESERVED_BYTE 0x40000001 6516 ** SHARED_RANGE 0x40000002 -> 0x40000200 6517 ** 6518 ** This works well on the local file system, but shows a nearly 100x 6519 ** slowdown in read performance on AFP because the AFP client disables 6520 ** the read cache when byte-range locks are present. Enabling the read 6521 ** cache exposes a cache coherency problem that is present on all OS X 6522 ** supported network file systems. NFS and AFP both observe the 6523 ** close-to-open semantics for ensuring cache coherency 6524 ** [http://nfs.sourceforge.net/#faq_a8], which does not effectively 6525 ** address the requirements for concurrent database access by multiple 6526 ** readers and writers 6527 ** [http://www.nabble.com/SQLite-on-NFS-cache-coherency-td15655701.html]. 6528 ** 6529 ** To address the performance and cache coherency issues, proxy file locking 6530 ** changes the way database access is controlled by limiting access to a 6531 ** single host at a time and moving file locks off of the database file 6532 ** and onto a proxy file on the local file system. 6533 ** 6534 ** 6535 ** Using proxy locks 6536 ** ----------------- 6537 ** 6538 ** C APIs 6539 ** 6540 ** sqlite3_file_control(db, dbname, SQLITE_FCNTL_SET_LOCKPROXYFILE, 6541 ** <proxy_path> | ":auto:"); 6542 ** sqlite3_file_control(db, dbname, SQLITE_FCNTL_GET_LOCKPROXYFILE, 6543 ** &<proxy_path>); 6544 ** 6545 ** 6546 ** SQL pragmas 6547 ** 6548 ** PRAGMA [database.]lock_proxy_file=<proxy_path> | :auto: 6549 ** PRAGMA [database.]lock_proxy_file 6550 ** 6551 ** Specifying ":auto:" means that if there is a conch file with a matching 6552 ** host ID in it, the proxy path in the conch file will be used, otherwise 6553 ** a proxy path based on the user's temp dir 6554 ** (via confstr(_CS_DARWIN_USER_TEMP_DIR,...)) will be used and the 6555 ** actual proxy file name is generated from the name and path of the 6556 ** database file. For example: 6557 ** 6558 ** For database path "/Users/me/foo.db" 6559 ** The lock path will be "<tmpdir>/sqliteplocks/_Users_me_foo.db:auto:") 6560 ** 6561 ** Once a lock proxy is configured for a database connection, it can not 6562 ** be removed, however it may be switched to a different proxy path via 6563 ** the above APIs (assuming the conch file is not being held by another 6564 ** connection or process). 6565 ** 6566 ** 6567 ** How proxy locking works 6568 ** ----------------------- 6569 ** 6570 ** Proxy file locking relies primarily on two new supporting files: 6571 ** 6572 ** * conch file to limit access to the database file to a single host 6573 ** at a time 6574 ** 6575 ** * proxy file to act as a proxy for the advisory locks normally 6576 ** taken on the database 6577 ** 6578 ** The conch file - to use a proxy file, sqlite must first "hold the conch" 6579 ** by taking an sqlite-style shared lock on the conch file, reading the 6580 ** contents and comparing the host's unique host ID (see below) and lock 6581 ** proxy path against the values stored in the conch. The conch file is 6582 ** stored in the same directory as the database file and the file name 6583 ** is patterned after the database file name as ".<databasename>-conch". 6584 ** If the conch file does not exist, or its contents do not match the 6585 ** host ID and/or proxy path, then the lock is escalated to an exclusive 6586 ** lock and the conch file contents is updated with the host ID and proxy 6587 ** path and the lock is downgraded to a shared lock again. If the conch 6588 ** is held by another process (with a shared lock), the exclusive lock 6589 ** will fail and SQLITE_BUSY is returned. 6590 ** 6591 ** The proxy file - a single-byte file used for all advisory file locks 6592 ** normally taken on the database file. This allows for safe sharing 6593 ** of the database file for multiple readers and writers on the same 6594 ** host (the conch ensures that they all use the same local lock file). 6595 ** 6596 ** Requesting the lock proxy does not immediately take the conch, it is 6597 ** only taken when the first request to lock database file is made. 6598 ** This matches the semantics of the traditional locking behavior, where 6599 ** opening a connection to a database file does not take a lock on it. 6600 ** The shared lock and an open file descriptor are maintained until 6601 ** the connection to the database is closed. 6602 ** 6603 ** The proxy file and the lock file are never deleted so they only need 6604 ** to be created the first time they are used. 6605 ** 6606 ** Configuration options 6607 ** --------------------- 6608 ** 6609 ** SQLITE_PREFER_PROXY_LOCKING 6610 ** 6611 ** Database files accessed on non-local file systems are 6612 ** automatically configured for proxy locking, lock files are 6613 ** named automatically using the same logic as 6614 ** PRAGMA lock_proxy_file=":auto:" 6615 ** 6616 ** SQLITE_PROXY_DEBUG 6617 ** 6618 ** Enables the logging of error messages during host id file 6619 ** retrieval and creation 6620 ** 6621 ** LOCKPROXYDIR 6622 ** 6623 ** Overrides the default directory used for lock proxy files that 6624 ** are named automatically via the ":auto:" setting 6625 ** 6626 ** SQLITE_DEFAULT_PROXYDIR_PERMISSIONS 6627 ** 6628 ** Permissions to use when creating a directory for storing the 6629 ** lock proxy files, only used when LOCKPROXYDIR is not set. 6630 ** 6631 ** 6632 ** As mentioned above, when compiled with SQLITE_PREFER_PROXY_LOCKING, 6633 ** setting the environment variable SQLITE_FORCE_PROXY_LOCKING to 1 will 6634 ** force proxy locking to be used for every database file opened, and 0 6635 ** will force automatic proxy locking to be disabled for all database 6636 ** files (explicitly calling the SQLITE_FCNTL_SET_LOCKPROXYFILE pragma or 6637 ** sqlite_file_control API is not affected by SQLITE_FORCE_PROXY_LOCKING). 6638 */ 6639 6640 /* 6641 ** Proxy locking is only available on MacOSX 6642 */ 6643 #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE 6644 6645 /* 6646 ** The proxyLockingContext has the path and file structures for the remote 6647 ** and local proxy files in it 6648 */ 6649 typedef struct proxyLockingContext proxyLockingContext; 6650 struct proxyLockingContext { 6651 unixFile *conchFile; /* Open conch file */ 6652 char *conchFilePath; /* Name of the conch file */ 6653 unixFile *lockProxy; /* Open proxy lock file */ 6654 char *lockProxyPath; /* Name of the proxy lock file */ 6655 char *dbPath; /* Name of the open file */ 6656 int conchHeld; /* 1 if the conch is held, -1 if lockless */ 6657 int nFails; /* Number of conch taking failures */ 6658 void *oldLockingContext; /* Original lockingcontext to restore on close */ 6659 sqlite3_io_methods const *pOldMethod; /* Original I/O methods for close */ 6660 }; 6661 6662 /* 6663 ** The proxy lock file path for the database at dbPath is written into lPath, 6664 ** which must point to valid, writable memory large enough for a maxLen length 6665 ** file path. 6666 */ 6667 static int proxyGetLockPath(const char *dbPath, char *lPath, size_t maxLen){ 6668 int len; 6669 int dbLen; 6670 int i; 6671 6672 #ifdef LOCKPROXYDIR 6673 len = strlcpy(lPath, LOCKPROXYDIR, maxLen); 6674 #else 6675 # ifdef _CS_DARWIN_USER_TEMP_DIR 6676 { 6677 if( !confstr(_CS_DARWIN_USER_TEMP_DIR, lPath, maxLen) ){ 6678 OSTRACE(("GETLOCKPATH failed %s errno=%d pid=%d\n", 6679 lPath, errno, osGetpid(0))); 6680 return SQLITE_IOERR_LOCK; 6681 } 6682 len = strlcat(lPath, "sqliteplocks", maxLen); 6683 } 6684 # else 6685 len = strlcpy(lPath, "/tmp/", maxLen); 6686 # endif 6687 #endif 6688 6689 if( lPath[len-1]!='/' ){ 6690 len = strlcat(lPath, "/", maxLen); 6691 } 6692 6693 /* transform the db path to a unique cache name */ 6694 dbLen = (int)strlen(dbPath); 6695 for( i=0; i<dbLen && (i+len+7)<(int)maxLen; i++){ 6696 char c = dbPath[i]; 6697 lPath[i+len] = (c=='/')?'_':c; 6698 } 6699 lPath[i+len]='\0'; 6700 strlcat(lPath, ":auto:", maxLen); 6701 OSTRACE(("GETLOCKPATH proxy lock path=%s pid=%d\n", lPath, osGetpid(0))); 6702 return SQLITE_OK; 6703 } 6704 6705 /* 6706 ** Creates the lock file and any missing directories in lockPath 6707 */ 6708 static int proxyCreateLockPath(const char *lockPath){ 6709 int i, len; 6710 char buf[MAXPATHLEN]; 6711 int start = 0; 6712 6713 assert(lockPath!=NULL); 6714 /* try to create all the intermediate directories */ 6715 len = (int)strlen(lockPath); 6716 buf[0] = lockPath[0]; 6717 for( i=1; i<len; i++ ){ 6718 if( lockPath[i] == '/' && (i - start > 0) ){ 6719 /* only mkdir if leaf dir != "." or "/" or ".." */ 6720 if( i-start>2 || (i-start==1 && buf[start] != '.' && buf[start] != '/') 6721 || (i-start==2 && buf[start] != '.' && buf[start+1] != '.') ){ 6722 buf[i]='\0'; 6723 if( osMkdir(buf, SQLITE_DEFAULT_PROXYDIR_PERMISSIONS) ){ 6724 int err=errno; 6725 if( err!=EEXIST ) { 6726 OSTRACE(("CREATELOCKPATH FAILED creating %s, " 6727 "'%s' proxy lock path=%s pid=%d\n", 6728 buf, strerror(err), lockPath, osGetpid(0))); 6729 return err; 6730 } 6731 } 6732 } 6733 start=i+1; 6734 } 6735 buf[i] = lockPath[i]; 6736 } 6737 OSTRACE(("CREATELOCKPATH proxy lock path=%s pid=%d\n",lockPath,osGetpid(0))); 6738 return 0; 6739 } 6740 6741 /* 6742 ** Create a new VFS file descriptor (stored in memory obtained from 6743 ** sqlite3_malloc) and open the file named "path" in the file descriptor. 6744 ** 6745 ** The caller is responsible not only for closing the file descriptor 6746 ** but also for freeing the memory associated with the file descriptor. 6747 */ 6748 static int proxyCreateUnixFile( 6749 const char *path, /* path for the new unixFile */ 6750 unixFile **ppFile, /* unixFile created and returned by ref */ 6751 int islockfile /* if non zero missing dirs will be created */ 6752 ) { 6753 int fd = -1; 6754 unixFile *pNew; 6755 int rc = SQLITE_OK; 6756 int openFlags = O_RDWR | O_CREAT; 6757 sqlite3_vfs dummyVfs; 6758 int terrno = 0; 6759 UnixUnusedFd *pUnused = NULL; 6760 6761 /* 1. first try to open/create the file 6762 ** 2. if that fails, and this is a lock file (not-conch), try creating 6763 ** the parent directories and then try again. 6764 ** 3. if that fails, try to open the file read-only 6765 ** otherwise return BUSY (if lock file) or CANTOPEN for the conch file 6766 */ 6767 pUnused = findReusableFd(path, openFlags); 6768 if( pUnused ){ 6769 fd = pUnused->fd; 6770 }else{ 6771 pUnused = sqlite3_malloc64(sizeof(*pUnused)); 6772 if( !pUnused ){ 6773 return SQLITE_NOMEM_BKPT; 6774 } 6775 } 6776 if( fd<0 ){ 6777 fd = robust_open(path, openFlags, 0); 6778 terrno = errno; 6779 if( fd<0 && errno==ENOENT && islockfile ){ 6780 if( proxyCreateLockPath(path) == SQLITE_OK ){ 6781 fd = robust_open(path, openFlags, 0); 6782 } 6783 } 6784 } 6785 if( fd<0 ){ 6786 openFlags = O_RDONLY; 6787 fd = robust_open(path, openFlags, 0); 6788 terrno = errno; 6789 } 6790 if( fd<0 ){ 6791 if( islockfile ){ 6792 return SQLITE_BUSY; 6793 } 6794 switch (terrno) { 6795 case EACCES: 6796 return SQLITE_PERM; 6797 case EIO: 6798 return SQLITE_IOERR_LOCK; /* even though it is the conch */ 6799 default: 6800 return SQLITE_CANTOPEN_BKPT; 6801 } 6802 } 6803 6804 pNew = (unixFile *)sqlite3_malloc64(sizeof(*pNew)); 6805 if( pNew==NULL ){ 6806 rc = SQLITE_NOMEM_BKPT; 6807 goto end_create_proxy; 6808 } 6809 memset(pNew, 0, sizeof(unixFile)); 6810 pNew->openFlags = openFlags; 6811 memset(&dummyVfs, 0, sizeof(dummyVfs)); 6812 dummyVfs.pAppData = (void*)&autolockIoFinder; 6813 dummyVfs.zName = "dummy"; 6814 pUnused->fd = fd; 6815 pUnused->flags = openFlags; 6816 pNew->pPreallocatedUnused = pUnused; 6817 6818 rc = fillInUnixFile(&dummyVfs, fd, (sqlite3_file*)pNew, path, 0); 6819 if( rc==SQLITE_OK ){ 6820 *ppFile = pNew; 6821 return SQLITE_OK; 6822 } 6823 end_create_proxy: 6824 robust_close(pNew, fd, __LINE__); 6825 sqlite3_free(pNew); 6826 sqlite3_free(pUnused); 6827 return rc; 6828 } 6829 6830 #ifdef SQLITE_TEST 6831 /* simulate multiple hosts by creating unique hostid file paths */ 6832 int sqlite3_hostid_num = 0; 6833 #endif 6834 6835 #define PROXY_HOSTIDLEN 16 /* conch file host id length */ 6836 6837 #ifdef HAVE_GETHOSTUUID 6838 /* Not always defined in the headers as it ought to be */ 6839 extern int gethostuuid(uuid_t id, const struct timespec *wait); 6840 #endif 6841 6842 /* get the host ID via gethostuuid(), pHostID must point to PROXY_HOSTIDLEN 6843 ** bytes of writable memory. 6844 */ 6845 static int proxyGetHostID(unsigned char *pHostID, int *pError){ 6846 assert(PROXY_HOSTIDLEN == sizeof(uuid_t)); 6847 memset(pHostID, 0, PROXY_HOSTIDLEN); 6848 #ifdef HAVE_GETHOSTUUID 6849 { 6850 struct timespec timeout = {1, 0}; /* 1 sec timeout */ 6851 if( gethostuuid(pHostID, &timeout) ){ 6852 int err = errno; 6853 if( pError ){ 6854 *pError = err; 6855 } 6856 return SQLITE_IOERR; 6857 } 6858 } 6859 #else 6860 UNUSED_PARAMETER(pError); 6861 #endif 6862 #ifdef SQLITE_TEST 6863 /* simulate multiple hosts by creating unique hostid file paths */ 6864 if( sqlite3_hostid_num != 0){ 6865 pHostID[0] = (char)(pHostID[0] + (char)(sqlite3_hostid_num & 0xFF)); 6866 } 6867 #endif 6868 6869 return SQLITE_OK; 6870 } 6871 6872 /* The conch file contains the header, host id and lock file path 6873 */ 6874 #define PROXY_CONCHVERSION 2 /* 1-byte header, 16-byte host id, path */ 6875 #define PROXY_HEADERLEN 1 /* conch file header length */ 6876 #define PROXY_PATHINDEX (PROXY_HEADERLEN+PROXY_HOSTIDLEN) 6877 #define PROXY_MAXCONCHLEN (PROXY_HEADERLEN+PROXY_HOSTIDLEN+MAXPATHLEN) 6878 6879 /* 6880 ** Takes an open conch file, copies the contents to a new path and then moves 6881 ** it back. The newly created file's file descriptor is assigned to the 6882 ** conch file structure and finally the original conch file descriptor is 6883 ** closed. Returns zero if successful. 6884 */ 6885 static int proxyBreakConchLock(unixFile *pFile, uuid_t myHostID){ 6886 proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext; 6887 unixFile *conchFile = pCtx->conchFile; 6888 char tPath[MAXPATHLEN]; 6889 char buf[PROXY_MAXCONCHLEN]; 6890 char *cPath = pCtx->conchFilePath; 6891 size_t readLen = 0; 6892 size_t pathLen = 0; 6893 char errmsg[64] = ""; 6894 int fd = -1; 6895 int rc = -1; 6896 UNUSED_PARAMETER(myHostID); 6897 6898 /* create a new path by replace the trailing '-conch' with '-break' */ 6899 pathLen = strlcpy(tPath, cPath, MAXPATHLEN); 6900 if( pathLen>MAXPATHLEN || pathLen<6 || 6901 (strlcpy(&tPath[pathLen-5], "break", 6) != 5) ){ 6902 sqlite3_snprintf(sizeof(errmsg),errmsg,"path error (len %d)",(int)pathLen); 6903 goto end_breaklock; 6904 } 6905 /* read the conch content */ 6906 readLen = osPread(conchFile->h, buf, PROXY_MAXCONCHLEN, 0); 6907 if( readLen<PROXY_PATHINDEX ){ 6908 sqlite3_snprintf(sizeof(errmsg),errmsg,"read error (len %d)",(int)readLen); 6909 goto end_breaklock; 6910 } 6911 /* write it out to the temporary break file */ 6912 fd = robust_open(tPath, (O_RDWR|O_CREAT|O_EXCL), 0); 6913 if( fd<0 ){ 6914 sqlite3_snprintf(sizeof(errmsg), errmsg, "create failed (%d)", errno); 6915 goto end_breaklock; 6916 } 6917 if( osPwrite(fd, buf, readLen, 0) != (ssize_t)readLen ){ 6918 sqlite3_snprintf(sizeof(errmsg), errmsg, "write failed (%d)", errno); 6919 goto end_breaklock; 6920 } 6921 if( rename(tPath, cPath) ){ 6922 sqlite3_snprintf(sizeof(errmsg), errmsg, "rename failed (%d)", errno); 6923 goto end_breaklock; 6924 } 6925 rc = 0; 6926 fprintf(stderr, "broke stale lock on %s\n", cPath); 6927 robust_close(pFile, conchFile->h, __LINE__); 6928 conchFile->h = fd; 6929 conchFile->openFlags = O_RDWR | O_CREAT; 6930 6931 end_breaklock: 6932 if( rc ){ 6933 if( fd>=0 ){ 6934 osUnlink(tPath); 6935 robust_close(pFile, fd, __LINE__); 6936 } 6937 fprintf(stderr, "failed to break stale lock on %s, %s\n", cPath, errmsg); 6938 } 6939 return rc; 6940 } 6941 6942 /* Take the requested lock on the conch file and break a stale lock if the 6943 ** host id matches. 6944 */ 6945 static int proxyConchLock(unixFile *pFile, uuid_t myHostID, int lockType){ 6946 proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext; 6947 unixFile *conchFile = pCtx->conchFile; 6948 int rc = SQLITE_OK; 6949 int nTries = 0; 6950 struct timespec conchModTime; 6951 6952 memset(&conchModTime, 0, sizeof(conchModTime)); 6953 do { 6954 rc = conchFile->pMethod->xLock((sqlite3_file*)conchFile, lockType); 6955 nTries ++; 6956 if( rc==SQLITE_BUSY ){ 6957 /* If the lock failed (busy): 6958 * 1st try: get the mod time of the conch, wait 0.5s and try again. 6959 * 2nd try: fail if the mod time changed or host id is different, wait 6960 * 10 sec and try again 6961 * 3rd try: break the lock unless the mod time has changed. 6962 */ 6963 struct stat buf; 6964 if( osFstat(conchFile->h, &buf) ){ 6965 storeLastErrno(pFile, errno); 6966 return SQLITE_IOERR_LOCK; 6967 } 6968 6969 if( nTries==1 ){ 6970 conchModTime = buf.st_mtimespec; 6971 usleep(500000); /* wait 0.5 sec and try the lock again*/ 6972 continue; 6973 } 6974 6975 assert( nTries>1 ); 6976 if( conchModTime.tv_sec != buf.st_mtimespec.tv_sec || 6977 conchModTime.tv_nsec != buf.st_mtimespec.tv_nsec ){ 6978 return SQLITE_BUSY; 6979 } 6980 6981 if( nTries==2 ){ 6982 char tBuf[PROXY_MAXCONCHLEN]; 6983 int len = osPread(conchFile->h, tBuf, PROXY_MAXCONCHLEN, 0); 6984 if( len<0 ){ 6985 storeLastErrno(pFile, errno); 6986 return SQLITE_IOERR_LOCK; 6987 } 6988 if( len>PROXY_PATHINDEX && tBuf[0]==(char)PROXY_CONCHVERSION){ 6989 /* don't break the lock if the host id doesn't match */ 6990 if( 0!=memcmp(&tBuf[PROXY_HEADERLEN], myHostID, PROXY_HOSTIDLEN) ){ 6991 return SQLITE_BUSY; 6992 } 6993 }else{ 6994 /* don't break the lock on short read or a version mismatch */ 6995 return SQLITE_BUSY; 6996 } 6997 usleep(10000000); /* wait 10 sec and try the lock again */ 6998 continue; 6999 } 7000 7001 assert( nTries==3 ); 7002 if( 0==proxyBreakConchLock(pFile, myHostID) ){ 7003 rc = SQLITE_OK; 7004 if( lockType==EXCLUSIVE_LOCK ){ 7005 rc = conchFile->pMethod->xLock((sqlite3_file*)conchFile, SHARED_LOCK); 7006 } 7007 if( !rc ){ 7008 rc = conchFile->pMethod->xLock((sqlite3_file*)conchFile, lockType); 7009 } 7010 } 7011 } 7012 } while( rc==SQLITE_BUSY && nTries<3 ); 7013 7014 return rc; 7015 } 7016 7017 /* Takes the conch by taking a shared lock and read the contents conch, if 7018 ** lockPath is non-NULL, the host ID and lock file path must match. A NULL 7019 ** lockPath means that the lockPath in the conch file will be used if the 7020 ** host IDs match, or a new lock path will be generated automatically 7021 ** and written to the conch file. 7022 */ 7023 static int proxyTakeConch(unixFile *pFile){ 7024 proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext; 7025 7026 if( pCtx->conchHeld!=0 ){ 7027 return SQLITE_OK; 7028 }else{ 7029 unixFile *conchFile = pCtx->conchFile; 7030 uuid_t myHostID; 7031 int pError = 0; 7032 char readBuf[PROXY_MAXCONCHLEN]; 7033 char lockPath[MAXPATHLEN]; 7034 char *tempLockPath = NULL; 7035 int rc = SQLITE_OK; 7036 int createConch = 0; 7037 int hostIdMatch = 0; 7038 int readLen = 0; 7039 int tryOldLockPath = 0; 7040 int forceNewLockPath = 0; 7041 7042 OSTRACE(("TAKECONCH %d for %s pid=%d\n", conchFile->h, 7043 (pCtx->lockProxyPath ? pCtx->lockProxyPath : ":auto:"), 7044 osGetpid(0))); 7045 7046 rc = proxyGetHostID(myHostID, &pError); 7047 if( (rc&0xff)==SQLITE_IOERR ){ 7048 storeLastErrno(pFile, pError); 7049 goto end_takeconch; 7050 } 7051 rc = proxyConchLock(pFile, myHostID, SHARED_LOCK); 7052 if( rc!=SQLITE_OK ){ 7053 goto end_takeconch; 7054 } 7055 /* read the existing conch file */ 7056 readLen = seekAndRead((unixFile*)conchFile, 0, readBuf, PROXY_MAXCONCHLEN); 7057 if( readLen<0 ){ 7058 /* I/O error: lastErrno set by seekAndRead */ 7059 storeLastErrno(pFile, conchFile->lastErrno); 7060 rc = SQLITE_IOERR_READ; 7061 goto end_takeconch; 7062 }else if( readLen<=(PROXY_HEADERLEN+PROXY_HOSTIDLEN) || 7063 readBuf[0]!=(char)PROXY_CONCHVERSION ){ 7064 /* a short read or version format mismatch means we need to create a new 7065 ** conch file. 7066 */ 7067 createConch = 1; 7068 } 7069 /* if the host id matches and the lock path already exists in the conch 7070 ** we'll try to use the path there, if we can't open that path, we'll 7071 ** retry with a new auto-generated path 7072 */ 7073 do { /* in case we need to try again for an :auto: named lock file */ 7074 7075 if( !createConch && !forceNewLockPath ){ 7076 hostIdMatch = !memcmp(&readBuf[PROXY_HEADERLEN], myHostID, 7077 PROXY_HOSTIDLEN); 7078 /* if the conch has data compare the contents */ 7079 if( !pCtx->lockProxyPath ){ 7080 /* for auto-named local lock file, just check the host ID and we'll 7081 ** use the local lock file path that's already in there 7082 */ 7083 if( hostIdMatch ){ 7084 size_t pathLen = (readLen - PROXY_PATHINDEX); 7085 7086 if( pathLen>=MAXPATHLEN ){ 7087 pathLen=MAXPATHLEN-1; 7088 } 7089 memcpy(lockPath, &readBuf[PROXY_PATHINDEX], pathLen); 7090 lockPath[pathLen] = 0; 7091 tempLockPath = lockPath; 7092 tryOldLockPath = 1; 7093 /* create a copy of the lock path if the conch is taken */ 7094 goto end_takeconch; 7095 } 7096 }else if( hostIdMatch 7097 && !strncmp(pCtx->lockProxyPath, &readBuf[PROXY_PATHINDEX], 7098 readLen-PROXY_PATHINDEX) 7099 ){ 7100 /* conch host and lock path match */ 7101 goto end_takeconch; 7102 } 7103 } 7104 7105 /* if the conch isn't writable and doesn't match, we can't take it */ 7106 if( (conchFile->openFlags&O_RDWR) == 0 ){ 7107 rc = SQLITE_BUSY; 7108 goto end_takeconch; 7109 } 7110 7111 /* either the conch didn't match or we need to create a new one */ 7112 if( !pCtx->lockProxyPath ){ 7113 proxyGetLockPath(pCtx->dbPath, lockPath, MAXPATHLEN); 7114 tempLockPath = lockPath; 7115 /* create a copy of the lock path _only_ if the conch is taken */ 7116 } 7117 7118 /* update conch with host and path (this will fail if other process 7119 ** has a shared lock already), if the host id matches, use the big 7120 ** stick. 7121 */ 7122 futimes(conchFile->h, NULL); 7123 if( hostIdMatch && !createConch ){ 7124 if( conchFile->pInode && conchFile->pInode->nShared>1 ){ 7125 /* We are trying for an exclusive lock but another thread in this 7126 ** same process is still holding a shared lock. */ 7127 rc = SQLITE_BUSY; 7128 } else { 7129 rc = proxyConchLock(pFile, myHostID, EXCLUSIVE_LOCK); 7130 } 7131 }else{ 7132 rc = proxyConchLock(pFile, myHostID, EXCLUSIVE_LOCK); 7133 } 7134 if( rc==SQLITE_OK ){ 7135 char writeBuffer[PROXY_MAXCONCHLEN]; 7136 int writeSize = 0; 7137 7138 writeBuffer[0] = (char)PROXY_CONCHVERSION; 7139 memcpy(&writeBuffer[PROXY_HEADERLEN], myHostID, PROXY_HOSTIDLEN); 7140 if( pCtx->lockProxyPath!=NULL ){ 7141 strlcpy(&writeBuffer[PROXY_PATHINDEX], pCtx->lockProxyPath, 7142 MAXPATHLEN); 7143 }else{ 7144 strlcpy(&writeBuffer[PROXY_PATHINDEX], tempLockPath, MAXPATHLEN); 7145 } 7146 writeSize = PROXY_PATHINDEX + strlen(&writeBuffer[PROXY_PATHINDEX]); 7147 robust_ftruncate(conchFile->h, writeSize); 7148 rc = unixWrite((sqlite3_file *)conchFile, writeBuffer, writeSize, 0); 7149 full_fsync(conchFile->h,0,0); 7150 /* If we created a new conch file (not just updated the contents of a 7151 ** valid conch file), try to match the permissions of the database 7152 */ 7153 if( rc==SQLITE_OK && createConch ){ 7154 struct stat buf; 7155 int err = osFstat(pFile->h, &buf); 7156 if( err==0 ){ 7157 mode_t cmode = buf.st_mode&(S_IRUSR|S_IWUSR | S_IRGRP|S_IWGRP | 7158 S_IROTH|S_IWOTH); 7159 /* try to match the database file R/W permissions, ignore failure */ 7160 #ifndef SQLITE_PROXY_DEBUG 7161 osFchmod(conchFile->h, cmode); 7162 #else 7163 do{ 7164 rc = osFchmod(conchFile->h, cmode); 7165 }while( rc==(-1) && errno==EINTR ); 7166 if( rc!=0 ){ 7167 int code = errno; 7168 fprintf(stderr, "fchmod %o FAILED with %d %s\n", 7169 cmode, code, strerror(code)); 7170 } else { 7171 fprintf(stderr, "fchmod %o SUCCEDED\n",cmode); 7172 } 7173 }else{ 7174 int code = errno; 7175 fprintf(stderr, "STAT FAILED[%d] with %d %s\n", 7176 err, code, strerror(code)); 7177 #endif 7178 } 7179 } 7180 } 7181 conchFile->pMethod->xUnlock((sqlite3_file*)conchFile, SHARED_LOCK); 7182 7183 end_takeconch: 7184 OSTRACE(("TRANSPROXY: CLOSE %d\n", pFile->h)); 7185 if( rc==SQLITE_OK && pFile->openFlags ){ 7186 int fd; 7187 if( pFile->h>=0 ){ 7188 robust_close(pFile, pFile->h, __LINE__); 7189 } 7190 pFile->h = -1; 7191 fd = robust_open(pCtx->dbPath, pFile->openFlags, 0); 7192 OSTRACE(("TRANSPROXY: OPEN %d\n", fd)); 7193 if( fd>=0 ){ 7194 pFile->h = fd; 7195 }else{ 7196 rc=SQLITE_CANTOPEN_BKPT; /* SQLITE_BUSY? proxyTakeConch called 7197 during locking */ 7198 } 7199 } 7200 if( rc==SQLITE_OK && !pCtx->lockProxy ){ 7201 char *path = tempLockPath ? tempLockPath : pCtx->lockProxyPath; 7202 rc = proxyCreateUnixFile(path, &pCtx->lockProxy, 1); 7203 if( rc!=SQLITE_OK && rc!=SQLITE_NOMEM && tryOldLockPath ){ 7204 /* we couldn't create the proxy lock file with the old lock file path 7205 ** so try again via auto-naming 7206 */ 7207 forceNewLockPath = 1; 7208 tryOldLockPath = 0; 7209 continue; /* go back to the do {} while start point, try again */ 7210 } 7211 } 7212 if( rc==SQLITE_OK ){ 7213 /* Need to make a copy of path if we extracted the value 7214 ** from the conch file or the path was allocated on the stack 7215 */ 7216 if( tempLockPath ){ 7217 pCtx->lockProxyPath = sqlite3DbStrDup(0, tempLockPath); 7218 if( !pCtx->lockProxyPath ){ 7219 rc = SQLITE_NOMEM_BKPT; 7220 } 7221 } 7222 } 7223 if( rc==SQLITE_OK ){ 7224 pCtx->conchHeld = 1; 7225 7226 if( pCtx->lockProxy->pMethod == &afpIoMethods ){ 7227 afpLockingContext *afpCtx; 7228 afpCtx = (afpLockingContext *)pCtx->lockProxy->lockingContext; 7229 afpCtx->dbPath = pCtx->lockProxyPath; 7230 } 7231 } else { 7232 conchFile->pMethod->xUnlock((sqlite3_file*)conchFile, NO_LOCK); 7233 } 7234 OSTRACE(("TAKECONCH %d %s\n", conchFile->h, 7235 rc==SQLITE_OK?"ok":"failed")); 7236 return rc; 7237 } while (1); /* in case we need to retry the :auto: lock file - 7238 ** we should never get here except via the 'continue' call. */ 7239 } 7240 } 7241 7242 /* 7243 ** If pFile holds a lock on a conch file, then release that lock. 7244 */ 7245 static int proxyReleaseConch(unixFile *pFile){ 7246 int rc = SQLITE_OK; /* Subroutine return code */ 7247 proxyLockingContext *pCtx; /* The locking context for the proxy lock */ 7248 unixFile *conchFile; /* Name of the conch file */ 7249 7250 pCtx = (proxyLockingContext *)pFile->lockingContext; 7251 conchFile = pCtx->conchFile; 7252 OSTRACE(("RELEASECONCH %d for %s pid=%d\n", conchFile->h, 7253 (pCtx->lockProxyPath ? pCtx->lockProxyPath : ":auto:"), 7254 osGetpid(0))); 7255 if( pCtx->conchHeld>0 ){ 7256 rc = conchFile->pMethod->xUnlock((sqlite3_file*)conchFile, NO_LOCK); 7257 } 7258 pCtx->conchHeld = 0; 7259 OSTRACE(("RELEASECONCH %d %s\n", conchFile->h, 7260 (rc==SQLITE_OK ? "ok" : "failed"))); 7261 return rc; 7262 } 7263 7264 /* 7265 ** Given the name of a database file, compute the name of its conch file. 7266 ** Store the conch filename in memory obtained from sqlite3_malloc64(). 7267 ** Make *pConchPath point to the new name. Return SQLITE_OK on success 7268 ** or SQLITE_NOMEM if unable to obtain memory. 7269 ** 7270 ** The caller is responsible for ensuring that the allocated memory 7271 ** space is eventually freed. 7272 ** 7273 ** *pConchPath is set to NULL if a memory allocation error occurs. 7274 */ 7275 static int proxyCreateConchPathname(char *dbPath, char **pConchPath){ 7276 int i; /* Loop counter */ 7277 int len = (int)strlen(dbPath); /* Length of database filename - dbPath */ 7278 char *conchPath; /* buffer in which to construct conch name */ 7279 7280 /* Allocate space for the conch filename and initialize the name to 7281 ** the name of the original database file. */ 7282 *pConchPath = conchPath = (char *)sqlite3_malloc64(len + 8); 7283 if( conchPath==0 ){ 7284 return SQLITE_NOMEM_BKPT; 7285 } 7286 memcpy(conchPath, dbPath, len+1); 7287 7288 /* now insert a "." before the last / character */ 7289 for( i=(len-1); i>=0; i-- ){ 7290 if( conchPath[i]=='/' ){ 7291 i++; 7292 break; 7293 } 7294 } 7295 conchPath[i]='.'; 7296 while ( i<len ){ 7297 conchPath[i+1]=dbPath[i]; 7298 i++; 7299 } 7300 7301 /* append the "-conch" suffix to the file */ 7302 memcpy(&conchPath[i+1], "-conch", 7); 7303 assert( (int)strlen(conchPath) == len+7 ); 7304 7305 return SQLITE_OK; 7306 } 7307 7308 7309 /* Takes a fully configured proxy locking-style unix file and switches 7310 ** the local lock file path 7311 */ 7312 static int switchLockProxyPath(unixFile *pFile, const char *path) { 7313 proxyLockingContext *pCtx = (proxyLockingContext*)pFile->lockingContext; 7314 char *oldPath = pCtx->lockProxyPath; 7315 int rc = SQLITE_OK; 7316 7317 if( pFile->eFileLock!=NO_LOCK ){ 7318 return SQLITE_BUSY; 7319 } 7320 7321 /* nothing to do if the path is NULL, :auto: or matches the existing path */ 7322 if( !path || path[0]=='\0' || !strcmp(path, ":auto:") || 7323 (oldPath && !strncmp(oldPath, path, MAXPATHLEN)) ){ 7324 return SQLITE_OK; 7325 }else{ 7326 unixFile *lockProxy = pCtx->lockProxy; 7327 pCtx->lockProxy=NULL; 7328 pCtx->conchHeld = 0; 7329 if( lockProxy!=NULL ){ 7330 rc=lockProxy->pMethod->xClose((sqlite3_file *)lockProxy); 7331 if( rc ) return rc; 7332 sqlite3_free(lockProxy); 7333 } 7334 sqlite3_free(oldPath); 7335 pCtx->lockProxyPath = sqlite3DbStrDup(0, path); 7336 } 7337 7338 return rc; 7339 } 7340 7341 /* 7342 ** pFile is a file that has been opened by a prior xOpen call. dbPath 7343 ** is a string buffer at least MAXPATHLEN+1 characters in size. 7344 ** 7345 ** This routine find the filename associated with pFile and writes it 7346 ** int dbPath. 7347 */ 7348 static int proxyGetDbPathForUnixFile(unixFile *pFile, char *dbPath){ 7349 #if defined(__APPLE__) 7350 if( pFile->pMethod == &afpIoMethods ){ 7351 /* afp style keeps a reference to the db path in the filePath field 7352 ** of the struct */ 7353 assert( (int)strlen((char*)pFile->lockingContext)<=MAXPATHLEN ); 7354 strlcpy(dbPath, ((afpLockingContext *)pFile->lockingContext)->dbPath, 7355 MAXPATHLEN); 7356 } else 7357 #endif 7358 if( pFile->pMethod == &dotlockIoMethods ){ 7359 /* dot lock style uses the locking context to store the dot lock 7360 ** file path */ 7361 int len = strlen((char *)pFile->lockingContext) - strlen(DOTLOCK_SUFFIX); 7362 memcpy(dbPath, (char *)pFile->lockingContext, len + 1); 7363 }else{ 7364 /* all other styles use the locking context to store the db file path */ 7365 assert( strlen((char*)pFile->lockingContext)<=MAXPATHLEN ); 7366 strlcpy(dbPath, (char *)pFile->lockingContext, MAXPATHLEN); 7367 } 7368 return SQLITE_OK; 7369 } 7370 7371 /* 7372 ** Takes an already filled in unix file and alters it so all file locking 7373 ** will be performed on the local proxy lock file. The following fields 7374 ** are preserved in the locking context so that they can be restored and 7375 ** the unix structure properly cleaned up at close time: 7376 ** ->lockingContext 7377 ** ->pMethod 7378 */ 7379 static int proxyTransformUnixFile(unixFile *pFile, const char *path) { 7380 proxyLockingContext *pCtx; 7381 char dbPath[MAXPATHLEN+1]; /* Name of the database file */ 7382 char *lockPath=NULL; 7383 int rc = SQLITE_OK; 7384 7385 if( pFile->eFileLock!=NO_LOCK ){ 7386 return SQLITE_BUSY; 7387 } 7388 proxyGetDbPathForUnixFile(pFile, dbPath); 7389 if( !path || path[0]=='\0' || !strcmp(path, ":auto:") ){ 7390 lockPath=NULL; 7391 }else{ 7392 lockPath=(char *)path; 7393 } 7394 7395 OSTRACE(("TRANSPROXY %d for %s pid=%d\n", pFile->h, 7396 (lockPath ? lockPath : ":auto:"), osGetpid(0))); 7397 7398 pCtx = sqlite3_malloc64( sizeof(*pCtx) ); 7399 if( pCtx==0 ){ 7400 return SQLITE_NOMEM_BKPT; 7401 } 7402 memset(pCtx, 0, sizeof(*pCtx)); 7403 7404 rc = proxyCreateConchPathname(dbPath, &pCtx->conchFilePath); 7405 if( rc==SQLITE_OK ){ 7406 rc = proxyCreateUnixFile(pCtx->conchFilePath, &pCtx->conchFile, 0); 7407 if( rc==SQLITE_CANTOPEN && ((pFile->openFlags&O_RDWR) == 0) ){ 7408 /* if (a) the open flags are not O_RDWR, (b) the conch isn't there, and 7409 ** (c) the file system is read-only, then enable no-locking access. 7410 ** Ugh, since O_RDONLY==0x0000 we test for !O_RDWR since unixOpen asserts 7411 ** that openFlags will have only one of O_RDONLY or O_RDWR. 7412 */ 7413 struct statfs fsInfo; 7414 struct stat conchInfo; 7415 int goLockless = 0; 7416 7417 if( osStat(pCtx->conchFilePath, &conchInfo) == -1 ) { 7418 int err = errno; 7419 if( (err==ENOENT) && (statfs(dbPath, &fsInfo) != -1) ){ 7420 goLockless = (fsInfo.f_flags&MNT_RDONLY) == MNT_RDONLY; 7421 } 7422 } 7423 if( goLockless ){ 7424 pCtx->conchHeld = -1; /* read only FS/ lockless */ 7425 rc = SQLITE_OK; 7426 } 7427 } 7428 } 7429 if( rc==SQLITE_OK && lockPath ){ 7430 pCtx->lockProxyPath = sqlite3DbStrDup(0, lockPath); 7431 } 7432 7433 if( rc==SQLITE_OK ){ 7434 pCtx->dbPath = sqlite3DbStrDup(0, dbPath); 7435 if( pCtx->dbPath==NULL ){ 7436 rc = SQLITE_NOMEM_BKPT; 7437 } 7438 } 7439 if( rc==SQLITE_OK ){ 7440 /* all memory is allocated, proxys are created and assigned, 7441 ** switch the locking context and pMethod then return. 7442 */ 7443 pCtx->oldLockingContext = pFile->lockingContext; 7444 pFile->lockingContext = pCtx; 7445 pCtx->pOldMethod = pFile->pMethod; 7446 pFile->pMethod = &proxyIoMethods; 7447 }else{ 7448 if( pCtx->conchFile ){ 7449 pCtx->conchFile->pMethod->xClose((sqlite3_file *)pCtx->conchFile); 7450 sqlite3_free(pCtx->conchFile); 7451 } 7452 sqlite3DbFree(0, pCtx->lockProxyPath); 7453 sqlite3_free(pCtx->conchFilePath); 7454 sqlite3_free(pCtx); 7455 } 7456 OSTRACE(("TRANSPROXY %d %s\n", pFile->h, 7457 (rc==SQLITE_OK ? "ok" : "failed"))); 7458 return rc; 7459 } 7460 7461 7462 /* 7463 ** This routine handles sqlite3_file_control() calls that are specific 7464 ** to proxy locking. 7465 */ 7466 static int proxyFileControl(sqlite3_file *id, int op, void *pArg){ 7467 switch( op ){ 7468 case SQLITE_FCNTL_GET_LOCKPROXYFILE: { 7469 unixFile *pFile = (unixFile*)id; 7470 if( pFile->pMethod == &proxyIoMethods ){ 7471 proxyLockingContext *pCtx = (proxyLockingContext*)pFile->lockingContext; 7472 proxyTakeConch(pFile); 7473 if( pCtx->lockProxyPath ){ 7474 *(const char **)pArg = pCtx->lockProxyPath; 7475 }else{ 7476 *(const char **)pArg = ":auto: (not held)"; 7477 } 7478 } else { 7479 *(const char **)pArg = NULL; 7480 } 7481 return SQLITE_OK; 7482 } 7483 case SQLITE_FCNTL_SET_LOCKPROXYFILE: { 7484 unixFile *pFile = (unixFile*)id; 7485 int rc = SQLITE_OK; 7486 int isProxyStyle = (pFile->pMethod == &proxyIoMethods); 7487 if( pArg==NULL || (const char *)pArg==0 ){ 7488 if( isProxyStyle ){ 7489 /* turn off proxy locking - not supported. If support is added for 7490 ** switching proxy locking mode off then it will need to fail if 7491 ** the journal mode is WAL mode. 7492 */ 7493 rc = SQLITE_ERROR /*SQLITE_PROTOCOL? SQLITE_MISUSE?*/; 7494 }else{ 7495 /* turn off proxy locking - already off - NOOP */ 7496 rc = SQLITE_OK; 7497 } 7498 }else{ 7499 const char *proxyPath = (const char *)pArg; 7500 if( isProxyStyle ){ 7501 proxyLockingContext *pCtx = 7502 (proxyLockingContext*)pFile->lockingContext; 7503 if( !strcmp(pArg, ":auto:") 7504 || (pCtx->lockProxyPath && 7505 !strncmp(pCtx->lockProxyPath, proxyPath, MAXPATHLEN)) 7506 ){ 7507 rc = SQLITE_OK; 7508 }else{ 7509 rc = switchLockProxyPath(pFile, proxyPath); 7510 } 7511 }else{ 7512 /* turn on proxy file locking */ 7513 rc = proxyTransformUnixFile(pFile, proxyPath); 7514 } 7515 } 7516 return rc; 7517 } 7518 default: { 7519 assert( 0 ); /* The call assures that only valid opcodes are sent */ 7520 } 7521 } 7522 /*NOTREACHED*/ 7523 return SQLITE_ERROR; 7524 } 7525 7526 /* 7527 ** Within this division (the proxying locking implementation) the procedures 7528 ** above this point are all utilities. The lock-related methods of the 7529 ** proxy-locking sqlite3_io_method object follow. 7530 */ 7531 7532 7533 /* 7534 ** This routine checks if there is a RESERVED lock held on the specified 7535 ** file by this or any other process. If such a lock is held, set *pResOut 7536 ** to a non-zero value otherwise *pResOut is set to zero. The return value 7537 ** is set to SQLITE_OK unless an I/O error occurs during lock checking. 7538 */ 7539 static int proxyCheckReservedLock(sqlite3_file *id, int *pResOut) { 7540 unixFile *pFile = (unixFile*)id; 7541 int rc = proxyTakeConch(pFile); 7542 if( rc==SQLITE_OK ){ 7543 proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext; 7544 if( pCtx->conchHeld>0 ){ 7545 unixFile *proxy = pCtx->lockProxy; 7546 return proxy->pMethod->xCheckReservedLock((sqlite3_file*)proxy, pResOut); 7547 }else{ /* conchHeld < 0 is lockless */ 7548 pResOut=0; 7549 } 7550 } 7551 return rc; 7552 } 7553 7554 /* 7555 ** Lock the file with the lock specified by parameter eFileLock - one 7556 ** of the following: 7557 ** 7558 ** (1) SHARED_LOCK 7559 ** (2) RESERVED_LOCK 7560 ** (3) PENDING_LOCK 7561 ** (4) EXCLUSIVE_LOCK 7562 ** 7563 ** Sometimes when requesting one lock state, additional lock states 7564 ** are inserted in between. The locking might fail on one of the later 7565 ** transitions leaving the lock state different from what it started but 7566 ** still short of its goal. The following chart shows the allowed 7567 ** transitions and the inserted intermediate states: 7568 ** 7569 ** UNLOCKED -> SHARED 7570 ** SHARED -> RESERVED 7571 ** SHARED -> (PENDING) -> EXCLUSIVE 7572 ** RESERVED -> (PENDING) -> EXCLUSIVE 7573 ** PENDING -> EXCLUSIVE 7574 ** 7575 ** This routine will only increase a lock. Use the sqlite3OsUnlock() 7576 ** routine to lower a locking level. 7577 */ 7578 static int proxyLock(sqlite3_file *id, int eFileLock) { 7579 unixFile *pFile = (unixFile*)id; 7580 int rc = proxyTakeConch(pFile); 7581 if( rc==SQLITE_OK ){ 7582 proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext; 7583 if( pCtx->conchHeld>0 ){ 7584 unixFile *proxy = pCtx->lockProxy; 7585 rc = proxy->pMethod->xLock((sqlite3_file*)proxy, eFileLock); 7586 pFile->eFileLock = proxy->eFileLock; 7587 }else{ 7588 /* conchHeld < 0 is lockless */ 7589 } 7590 } 7591 return rc; 7592 } 7593 7594 7595 /* 7596 ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock 7597 ** must be either NO_LOCK or SHARED_LOCK. 7598 ** 7599 ** If the locking level of the file descriptor is already at or below 7600 ** the requested locking level, this routine is a no-op. 7601 */ 7602 static int proxyUnlock(sqlite3_file *id, int eFileLock) { 7603 unixFile *pFile = (unixFile*)id; 7604 int rc = proxyTakeConch(pFile); 7605 if( rc==SQLITE_OK ){ 7606 proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext; 7607 if( pCtx->conchHeld>0 ){ 7608 unixFile *proxy = pCtx->lockProxy; 7609 rc = proxy->pMethod->xUnlock((sqlite3_file*)proxy, eFileLock); 7610 pFile->eFileLock = proxy->eFileLock; 7611 }else{ 7612 /* conchHeld < 0 is lockless */ 7613 } 7614 } 7615 return rc; 7616 } 7617 7618 /* 7619 ** Close a file that uses proxy locks. 7620 */ 7621 static int proxyClose(sqlite3_file *id) { 7622 if( ALWAYS(id) ){ 7623 unixFile *pFile = (unixFile*)id; 7624 proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext; 7625 unixFile *lockProxy = pCtx->lockProxy; 7626 unixFile *conchFile = pCtx->conchFile; 7627 int rc = SQLITE_OK; 7628 7629 if( lockProxy ){ 7630 rc = lockProxy->pMethod->xUnlock((sqlite3_file*)lockProxy, NO_LOCK); 7631 if( rc ) return rc; 7632 rc = lockProxy->pMethod->xClose((sqlite3_file*)lockProxy); 7633 if( rc ) return rc; 7634 sqlite3_free(lockProxy); 7635 pCtx->lockProxy = 0; 7636 } 7637 if( conchFile ){ 7638 if( pCtx->conchHeld ){ 7639 rc = proxyReleaseConch(pFile); 7640 if( rc ) return rc; 7641 } 7642 rc = conchFile->pMethod->xClose((sqlite3_file*)conchFile); 7643 if( rc ) return rc; 7644 sqlite3_free(conchFile); 7645 } 7646 sqlite3DbFree(0, pCtx->lockProxyPath); 7647 sqlite3_free(pCtx->conchFilePath); 7648 sqlite3DbFree(0, pCtx->dbPath); 7649 /* restore the original locking context and pMethod then close it */ 7650 pFile->lockingContext = pCtx->oldLockingContext; 7651 pFile->pMethod = pCtx->pOldMethod; 7652 sqlite3_free(pCtx); 7653 return pFile->pMethod->xClose(id); 7654 } 7655 return SQLITE_OK; 7656 } 7657 7658 7659 7660 #endif /* defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE */ 7661 /* 7662 ** The proxy locking style is intended for use with AFP filesystems. 7663 ** And since AFP is only supported on MacOSX, the proxy locking is also 7664 ** restricted to MacOSX. 7665 ** 7666 ** 7667 ******************* End of the proxy lock implementation ********************** 7668 ******************************************************************************/ 7669 7670 /* 7671 ** Initialize the operating system interface. 7672 ** 7673 ** This routine registers all VFS implementations for unix-like operating 7674 ** systems. This routine, and the sqlite3_os_end() routine that follows, 7675 ** should be the only routines in this file that are visible from other 7676 ** files. 7677 ** 7678 ** This routine is called once during SQLite initialization and by a 7679 ** single thread. The memory allocation and mutex subsystems have not 7680 ** necessarily been initialized when this routine is called, and so they 7681 ** should not be used. 7682 */ 7683 int sqlite3_os_init(void){ 7684 /* 7685 ** The following macro defines an initializer for an sqlite3_vfs object. 7686 ** The name of the VFS is NAME. The pAppData is a pointer to a pointer 7687 ** to the "finder" function. (pAppData is a pointer to a pointer because 7688 ** silly C90 rules prohibit a void* from being cast to a function pointer 7689 ** and so we have to go through the intermediate pointer to avoid problems 7690 ** when compiling with -pedantic-errors on GCC.) 7691 ** 7692 ** The FINDER parameter to this macro is the name of the pointer to the 7693 ** finder-function. The finder-function returns a pointer to the 7694 ** sqlite_io_methods object that implements the desired locking 7695 ** behaviors. See the division above that contains the IOMETHODS 7696 ** macro for addition information on finder-functions. 7697 ** 7698 ** Most finders simply return a pointer to a fixed sqlite3_io_methods 7699 ** object. But the "autolockIoFinder" available on MacOSX does a little 7700 ** more than that; it looks at the filesystem type that hosts the 7701 ** database file and tries to choose an locking method appropriate for 7702 ** that filesystem time. 7703 */ 7704 #define UNIXVFS(VFSNAME, FINDER) { \ 7705 3, /* iVersion */ \ 7706 sizeof(unixFile), /* szOsFile */ \ 7707 MAX_PATHNAME, /* mxPathname */ \ 7708 0, /* pNext */ \ 7709 VFSNAME, /* zName */ \ 7710 (void*)&FINDER, /* pAppData */ \ 7711 unixOpen, /* xOpen */ \ 7712 unixDelete, /* xDelete */ \ 7713 unixAccess, /* xAccess */ \ 7714 unixFullPathname, /* xFullPathname */ \ 7715 unixDlOpen, /* xDlOpen */ \ 7716 unixDlError, /* xDlError */ \ 7717 unixDlSym, /* xDlSym */ \ 7718 unixDlClose, /* xDlClose */ \ 7719 unixRandomness, /* xRandomness */ \ 7720 unixSleep, /* xSleep */ \ 7721 unixCurrentTime, /* xCurrentTime */ \ 7722 unixGetLastError, /* xGetLastError */ \ 7723 unixCurrentTimeInt64, /* xCurrentTimeInt64 */ \ 7724 unixSetSystemCall, /* xSetSystemCall */ \ 7725 unixGetSystemCall, /* xGetSystemCall */ \ 7726 unixNextSystemCall, /* xNextSystemCall */ \ 7727 } 7728 7729 /* 7730 ** All default VFSes for unix are contained in the following array. 7731 ** 7732 ** Note that the sqlite3_vfs.pNext field of the VFS object is modified 7733 ** by the SQLite core when the VFS is registered. So the following 7734 ** array cannot be const. 7735 */ 7736 static sqlite3_vfs aVfs[] = { 7737 #if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) 7738 UNIXVFS("unix", autolockIoFinder ), 7739 #elif OS_VXWORKS 7740 UNIXVFS("unix", vxworksIoFinder ), 7741 #else 7742 UNIXVFS("unix", posixIoFinder ), 7743 #endif 7744 UNIXVFS("unix-none", nolockIoFinder ), 7745 UNIXVFS("unix-dotfile", dotlockIoFinder ), 7746 UNIXVFS("unix-excl", posixIoFinder ), 7747 #if OS_VXWORKS 7748 UNIXVFS("unix-namedsem", semIoFinder ), 7749 #endif 7750 #if SQLITE_ENABLE_LOCKING_STYLE || OS_VXWORKS 7751 UNIXVFS("unix-posix", posixIoFinder ), 7752 #endif 7753 #if SQLITE_ENABLE_LOCKING_STYLE 7754 UNIXVFS("unix-flock", flockIoFinder ), 7755 #endif 7756 #if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) 7757 UNIXVFS("unix-afp", afpIoFinder ), 7758 UNIXVFS("unix-nfs", nfsIoFinder ), 7759 UNIXVFS("unix-proxy", proxyIoFinder ), 7760 #endif 7761 }; 7762 unsigned int i; /* Loop counter */ 7763 7764 /* Double-check that the aSyscall[] array has been constructed 7765 ** correctly. See ticket [bb3a86e890c8e96ab] */ 7766 assert( ArraySize(aSyscall)==29 ); 7767 7768 /* Register all VFSes defined in the aVfs[] array */ 7769 for(i=0; i<(sizeof(aVfs)/sizeof(sqlite3_vfs)); i++){ 7770 sqlite3_vfs_register(&aVfs[i], i==0); 7771 } 7772 unixBigLock = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_VFS1); 7773 return SQLITE_OK; 7774 } 7775 7776 /* 7777 ** Shutdown the operating system interface. 7778 ** 7779 ** Some operating systems might need to do some cleanup in this routine, 7780 ** to release dynamically allocated objects. But not on unix. 7781 ** This routine is a no-op for unix. 7782 */ 7783 int sqlite3_os_end(void){ 7784 unixBigLock = 0; 7785 return SQLITE_OK; 7786 } 7787 7788 #endif /* SQLITE_OS_UNIX */