/ Check-in [ec2f46de]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Transient locks in WAL mode can now block in order to resolve priority inversions.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: ec2f46de531ec8ef91981b19b48ab64db7727264
User & Date: drh 2015-03-17 16:59:57
Context
2015-03-17
17:08
Also merge the WAL blocking lock tests that were somehow missed on the previous check-in. check-in: 7214dab7 user: drh tags: trunk
16:59
Transient locks in WAL mode can now block in order to resolve priority inversions. check-in: ec2f46de user: drh tags: trunk
2015-03-16
20:40
Make SQLite slightly more likely to use an auto-index within a sub-query. check-in: ab832336 user: dan tags: trunk
2015-03-10
20:22
Arrange for some of the transient locks in WAL mode to block, as a single to the OS to fix priority inversions. check-in: c6e6d5f4 user: drh tags: wal-blocking-lock
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to src/os_unix.c.

   244    244   # define UNIXFILE_DIRSYNC    0x00
   245    245   #endif
   246    246   #define UNIXFILE_PSOW        0x10     /* SQLITE_IOCAP_POWERSAFE_OVERWRITE */
   247    247   #define UNIXFILE_DELETE      0x20     /* Delete on close */
   248    248   #define UNIXFILE_URI         0x40     /* Filename might have query parameters */
   249    249   #define UNIXFILE_NOLOCK      0x80     /* Do no file locking */
   250    250   #define UNIXFILE_WARNED    0x0100     /* verifyDbFile() warnings issued */
          251  +#define UNIXFILE_BLOCK     0x0200     /* Next SHM lock might block */
   251    252   
   252    253   /*
   253    254   ** Include code that is common to all os_*.c files
   254    255   */
   255    256   #include "os_common.h"
   256    257   
   257    258   /*
................................................................................
  4086   4087   /*
  4087   4088   ** Apply posix advisory locks for all bytes from ofst through ofst+n-1.
  4088   4089   **
  4089   4090   ** Locks block if the mask is exactly UNIX_SHM_C and are non-blocking
  4090   4091   ** otherwise.
  4091   4092   */
  4092   4093   static int unixShmSystemLock(
  4093         -  unixShmNode *pShmNode, /* Apply locks to this open shared-memory segment */
         4094  +  unixFile *pFile,       /* Open connection to the WAL file */
  4094   4095     int lockType,          /* F_UNLCK, F_RDLCK, or F_WRLCK */
  4095   4096     int ofst,              /* First byte of the locking range */
  4096   4097     int n                  /* Number of bytes to lock */
  4097   4098   ){
  4098         -  struct flock f;       /* The posix advisory locking structure */
  4099         -  int rc = SQLITE_OK;   /* Result code form fcntl() */
         4099  +  unixShmNode *pShmNode; /* Apply locks to this open shared-memory segment */
         4100  +  struct flock f;        /* The posix advisory locking structure */
         4101  +  int rc = SQLITE_OK;    /* Result code form fcntl() */
  4100   4102   
  4101   4103     /* Access to the unixShmNode object is serialized by the caller */
         4104  +  pShmNode = pFile->pInode->pShmNode;
  4102   4105     assert( sqlite3_mutex_held(pShmNode->mutex) || pShmNode->nRef==0 );
  4103   4106   
  4104   4107     /* Shared locks never span more than one byte */
  4105   4108     assert( n==1 || lockType!=F_RDLCK );
  4106   4109   
  4107   4110     /* Locks are within range */
  4108   4111     assert( n>=1 && n<SQLITE_SHM_NLOCK );
  4109   4112   
  4110   4113     if( pShmNode->h>=0 ){
         4114  +    int lkType;
  4111   4115       /* Initialize the locking parameters */
  4112   4116       memset(&f, 0, sizeof(f));
  4113   4117       f.l_type = lockType;
  4114   4118       f.l_whence = SEEK_SET;
  4115   4119       f.l_start = ofst;
  4116   4120       f.l_len = n;
  4117   4121   
  4118         -    rc = osFcntl(pShmNode->h, F_SETLK, &f);
         4122  +    lkType = (pFile->ctrlFlags & UNIXFILE_BLOCK)!=0 ? F_SETLKW : F_SETLK;
         4123  +    rc = osFcntl(pShmNode->h, lkType, &f);
  4119   4124       rc = (rc!=(-1)) ? SQLITE_OK : SQLITE_BUSY;
         4125  +    pFile->ctrlFlags &= ~UNIXFILE_BLOCK;
  4120   4126     }
  4121   4127   
  4122   4128     /* Update the global lock state and do debug tracing */
  4123   4129   #ifdef SQLITE_DEBUG
  4124   4130     { u16 mask;
  4125   4131     OSTRACE(("SHM-LOCK "));
  4126   4132     mask = ofst>31 ? 0xffff : (1<<(ofst+n)) - (1<<ofst);
................................................................................
  4322   4328         */
  4323   4329         osFchown(pShmNode->h, sStat.st_uid, sStat.st_gid);
  4324   4330     
  4325   4331         /* Check to see if another process is holding the dead-man switch.
  4326   4332         ** If not, truncate the file to zero length. 
  4327   4333         */
  4328   4334         rc = SQLITE_OK;
  4329         -      if( unixShmSystemLock(pShmNode, F_WRLCK, UNIX_SHM_DMS, 1)==SQLITE_OK ){
         4335  +      if( unixShmSystemLock(pDbFd, F_WRLCK, UNIX_SHM_DMS, 1)==SQLITE_OK ){
  4330   4336           if( robust_ftruncate(pShmNode->h, 0) ){
  4331   4337             rc = unixLogError(SQLITE_IOERR_SHMOPEN, "ftruncate", zShmFilename);
  4332   4338           }
  4333   4339         }
  4334   4340         if( rc==SQLITE_OK ){
  4335         -        rc = unixShmSystemLock(pShmNode, F_RDLCK, UNIX_SHM_DMS, 1);
         4341  +        rc = unixShmSystemLock(pDbFd, F_RDLCK, UNIX_SHM_DMS, 1);
  4336   4342         }
  4337   4343         if( rc ) goto shm_open_err;
  4338   4344       }
  4339   4345     }
  4340   4346   
  4341   4347     /* Make the new connection a child of the unixShmNode */
  4342   4348     p->pShmNode = pShmNode;
................................................................................
  4560   4566         if( pX==p ) continue;
  4561   4567         assert( (pX->exclMask & (p->exclMask|p->sharedMask))==0 );
  4562   4568         allMask |= pX->sharedMask;
  4563   4569       }
  4564   4570   
  4565   4571       /* Unlock the system-level locks */
  4566   4572       if( (mask & allMask)==0 ){
  4567         -      rc = unixShmSystemLock(pShmNode, F_UNLCK, ofst+UNIX_SHM_BASE, n);
         4573  +      rc = unixShmSystemLock(pDbFd, F_UNLCK, ofst+UNIX_SHM_BASE, n);
  4568   4574       }else{
  4569   4575         rc = SQLITE_OK;
  4570   4576       }
  4571   4577   
  4572   4578       /* Undo the local locks */
  4573   4579       if( rc==SQLITE_OK ){
  4574   4580         p->exclMask &= ~mask;
................................................................................
  4588   4594         }
  4589   4595         allShared |= pX->sharedMask;
  4590   4596       }
  4591   4597   
  4592   4598       /* Get shared locks at the system level, if necessary */
  4593   4599       if( rc==SQLITE_OK ){
  4594   4600         if( (allShared & mask)==0 ){
  4595         -        rc = unixShmSystemLock(pShmNode, F_RDLCK, ofst+UNIX_SHM_BASE, n);
         4601  +        rc = unixShmSystemLock(pDbFd, F_RDLCK, ofst+UNIX_SHM_BASE, n);
  4596   4602         }else{
  4597   4603           rc = SQLITE_OK;
  4598   4604         }
  4599   4605       }
  4600   4606   
  4601   4607       /* Get the local shared locks */
  4602   4608       if( rc==SQLITE_OK ){
................................................................................
  4613   4619         }
  4614   4620       }
  4615   4621     
  4616   4622       /* Get the exclusive locks at the system level.  Then if successful
  4617   4623       ** also mark the local connection as being locked.
  4618   4624       */
  4619   4625       if( rc==SQLITE_OK ){
  4620         -      rc = unixShmSystemLock(pShmNode, F_WRLCK, ofst+UNIX_SHM_BASE, n);
         4626  +      rc = unixShmSystemLock(pDbFd, F_WRLCK, ofst+UNIX_SHM_BASE, n);
  4621   4627         if( rc==SQLITE_OK ){
  4622   4628           assert( (p->sharedMask & mask)==0 );
  4623   4629           p->exclMask |= mask;
  4624   4630         }
  4625   4631       }
  4626   4632     }
  4627   4633     sqlite3_mutex_leave(pShmNode->mutex);
................................................................................
  7218   7224   
  7219   7225   /*
  7220   7226   ** This routine handles sqlite3_file_control() calls that are specific
  7221   7227   ** to proxy locking.
  7222   7228   */
  7223   7229   static int proxyFileControl(sqlite3_file *id, int op, void *pArg){
  7224   7230     switch( op ){
         7231  +    case SQLITE_FCNTL_WAL_BLOCK: {
         7232  +      id->ctrlFlags |= UNIXFILE_BLOCK;
         7233  +      return SQLITE_OK;
         7234  +    }
  7225   7235       case SQLITE_FCNTL_GET_LOCKPROXYFILE: {
  7226   7236         unixFile *pFile = (unixFile*)id;
  7227   7237         if( pFile->pMethod == &proxyIoMethods ){
  7228   7238           proxyLockingContext *pCtx = (proxyLockingContext*)pFile->lockingContext;
  7229   7239           proxyTakeConch(pFile);
  7230   7240           if( pCtx->lockProxyPath ){
  7231   7241             *(const char **)pArg = pCtx->lockProxyPath;

Changes to src/sqlite.h.in.

   941    941   **
   942    942   ** <li>[[SQLITE_FCNTL_WIN32_SET_HANDLE]]
   943    943   ** The [SQLITE_FCNTL_WIN32_SET_HANDLE] opcode is used for debugging.  This
   944    944   ** opcode causes the xFileControl method to swap the file handle with the one
   945    945   ** pointed to by the pArg argument.  This capability is used during testing
   946    946   ** and only needs to be supported when SQLITE_TEST is defined.
   947    947   **
          948  +** <li>[[SQLITE_FCNTL_WAL_BLOCK]]
          949  +** The [SQLITE_FCNTL_WAL_BLOCK] is a signal to the VFS layer that it might
          950  +** be advantageous to block on the next WAL lock if the lock is not immediately
          951  +** available.  The WAL subsystem issues this signal during rare
          952  +** circumstances in order to fix a problem with priority inversion.
          953  +** Applications should <em>not</em> use this file-control.
          954  +**
   948    955   ** </ul>
   949    956   */
   950    957   #define SQLITE_FCNTL_LOCKSTATE               1
   951    958   #define SQLITE_FCNTL_GET_LOCKPROXYFILE       2
   952    959   #define SQLITE_FCNTL_SET_LOCKPROXYFILE       3
   953    960   #define SQLITE_FCNTL_LAST_ERRNO              4
   954    961   #define SQLITE_FCNTL_SIZE_HINT               5
................................................................................
   965    972   #define SQLITE_FCNTL_TEMPFILENAME           16
   966    973   #define SQLITE_FCNTL_MMAP_SIZE              18
   967    974   #define SQLITE_FCNTL_TRACE                  19
   968    975   #define SQLITE_FCNTL_HAS_MOVED              20
   969    976   #define SQLITE_FCNTL_SYNC                   21
   970    977   #define SQLITE_FCNTL_COMMIT_PHASETWO        22
   971    978   #define SQLITE_FCNTL_WIN32_SET_HANDLE       23
          979  +#define SQLITE_FCNTL_WAL_BLOCK              24
   972    980   
   973    981   /* deprecated names */
   974    982   #define SQLITE_GET_LOCKPROXYFILE      SQLITE_FCNTL_GET_LOCKPROXYFILE
   975    983   #define SQLITE_SET_LOCKPROXYFILE      SQLITE_FCNTL_SET_LOCKPROXYFILE
   976    984   #define SQLITE_LAST_ERRNO             SQLITE_FCNTL_LAST_ERRNO
   977    985   
   978    986   

Changes to src/wal.c.

   784    784   }
   785    785   static void walUnlockShared(Wal *pWal, int lockIdx){
   786    786     if( pWal->exclusiveMode ) return;
   787    787     (void)sqlite3OsShmLock(pWal->pDbFd, lockIdx, 1,
   788    788                            SQLITE_SHM_UNLOCK | SQLITE_SHM_SHARED);
   789    789     WALTRACE(("WAL%p: release SHARED-%s\n", pWal, walLockName(lockIdx)));
   790    790   }
   791         -static int walLockExclusive(Wal *pWal, int lockIdx, int n){
          791  +static int walLockExclusive(Wal *pWal, int lockIdx, int n, int fBlock){
   792    792     int rc;
   793    793     if( pWal->exclusiveMode ) return SQLITE_OK;
          794  +  if( fBlock ) sqlite3OsFileControl(pWal->pDbFd, SQLITE_FCNTL_WAL_BLOCK, 0);
   794    795     rc = sqlite3OsShmLock(pWal->pDbFd, lockIdx, n,
   795    796                           SQLITE_SHM_LOCK | SQLITE_SHM_EXCLUSIVE);
   796    797     WALTRACE(("WAL%p: acquire EXCLUSIVE-%s cnt=%d %s\n", pWal,
   797    798               walLockName(lockIdx), n, rc ? "failed" : "ok"));
   798    799     VVA_ONLY( pWal->lockError = (u8)(rc!=SQLITE_OK && rc!=SQLITE_BUSY); )
   799    800     return rc;
   800    801   }
................................................................................
  1072   1073     */
  1073   1074     assert( pWal->ckptLock==1 || pWal->ckptLock==0 );
  1074   1075     assert( WAL_ALL_BUT_WRITE==WAL_WRITE_LOCK+1 );
  1075   1076     assert( WAL_CKPT_LOCK==WAL_ALL_BUT_WRITE );
  1076   1077     assert( pWal->writeLock );
  1077   1078     iLock = WAL_ALL_BUT_WRITE + pWal->ckptLock;
  1078   1079     nLock = SQLITE_SHM_NLOCK - iLock;
  1079         -  rc = walLockExclusive(pWal, iLock, nLock);
         1080  +  rc = walLockExclusive(pWal, iLock, nLock, 0);
  1080   1081     if( rc ){
  1081   1082       return rc;
  1082   1083     }
  1083   1084     WALTRACE(("WAL%p: recovery begin...\n", pWal));
  1084   1085   
  1085   1086     memset(&pWal->hdr, 0, sizeof(WalIndexHdr));
  1086   1087   
................................................................................
  1606   1607     int (*xBusy)(void*),            /* Function to call when busy */
  1607   1608     void *pBusyArg,                 /* Context argument for xBusyHandler */
  1608   1609     int lockIdx,                    /* Offset of first byte to lock */
  1609   1610     int n                           /* Number of bytes to lock */
  1610   1611   ){
  1611   1612     int rc;
  1612   1613     do {
  1613         -    rc = walLockExclusive(pWal, lockIdx, n);
         1614  +    rc = walLockExclusive(pWal, lockIdx, n, 0);
  1614   1615     }while( xBusy && rc==SQLITE_BUSY && xBusy(pBusyArg) );
  1615   1616     return rc;
  1616   1617   }
  1617   1618   
  1618   1619   /*
  1619   1620   ** The cache of the wal-index header must be valid to call this function.
  1620   1621   ** Return the page-size in bytes used by the database.
................................................................................
  2039   2040     assert( badHdr==0 || pWal->writeLock==0 );
  2040   2041     if( badHdr ){
  2041   2042       if( pWal->readOnly & WAL_SHM_RDONLY ){
  2042   2043         if( SQLITE_OK==(rc = walLockShared(pWal, WAL_WRITE_LOCK)) ){
  2043   2044           walUnlockShared(pWal, WAL_WRITE_LOCK);
  2044   2045           rc = SQLITE_READONLY_RECOVERY;
  2045   2046         }
  2046         -    }else if( SQLITE_OK==(rc = walLockExclusive(pWal, WAL_WRITE_LOCK, 1)) ){
         2047  +    }else if( SQLITE_OK==(rc = walLockExclusive(pWal, WAL_WRITE_LOCK, 1, 1)) ){
  2047   2048         pWal->writeLock = 1;
  2048   2049         if( SQLITE_OK==(rc = walIndexPage(pWal, 0, &page0)) ){
  2049   2050           badHdr = walIndexTryHdr(pWal, pChanged);
  2050   2051           if( badHdr ){
  2051   2052             /* If the wal-index header is still malformed even while holding
  2052   2053             ** a WRITE lock, it can only mean that the header is corrupted and
  2053   2054             ** needs to be reconstructed.  So run recovery to do exactly that.
................................................................................
  2245   2246     }
  2246   2247     /* There was once an "if" here. The extra "{" is to preserve indentation. */
  2247   2248     {
  2248   2249       if( (pWal->readOnly & WAL_SHM_RDONLY)==0
  2249   2250        && (mxReadMark<pWal->hdr.mxFrame || mxI==0)
  2250   2251       ){
  2251   2252         for(i=1; i<WAL_NREADER; i++){
  2252         -        rc = walLockExclusive(pWal, WAL_READ_LOCK(i), 1);
         2253  +        rc = walLockExclusive(pWal, WAL_READ_LOCK(i), 1, 0);
  2253   2254           if( rc==SQLITE_OK ){
  2254   2255             mxReadMark = pInfo->aReadMark[i] = pWal->hdr.mxFrame;
  2255   2256             mxI = i;
  2256   2257             walUnlockExclusive(pWal, WAL_READ_LOCK(i), 1);
  2257   2258             break;
  2258   2259           }else if( rc!=SQLITE_BUSY ){
  2259   2260             return rc;
................................................................................
  2501   2502     if( pWal->readOnly ){
  2502   2503       return SQLITE_READONLY;
  2503   2504     }
  2504   2505   
  2505   2506     /* Only one writer allowed at a time.  Get the write lock.  Return
  2506   2507     ** SQLITE_BUSY if unable.
  2507   2508     */
  2508         -  rc = walLockExclusive(pWal, WAL_WRITE_LOCK, 1);
         2509  +  rc = walLockExclusive(pWal, WAL_WRITE_LOCK, 1, 0);
  2509   2510     if( rc ){
  2510   2511       return rc;
  2511   2512     }
  2512   2513     pWal->writeLock = 1;
  2513   2514   
  2514   2515     /* If another connection has written to the database file since the
  2515   2516     ** time the read transaction on this connection was started, then
................................................................................
  2646   2647   
  2647   2648     if( pWal->readLock==0 ){
  2648   2649       volatile WalCkptInfo *pInfo = walCkptInfo(pWal);
  2649   2650       assert( pInfo->nBackfill==pWal->hdr.mxFrame );
  2650   2651       if( pInfo->nBackfill>0 ){
  2651   2652         u32 salt1;
  2652   2653         sqlite3_randomness(4, &salt1);
  2653         -      rc = walLockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1);
         2654  +      rc = walLockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1, 0);
  2654   2655         if( rc==SQLITE_OK ){
  2655   2656           /* If all readers are using WAL_READ_LOCK(0) (in other words if no
  2656   2657           ** readers are currently using the WAL), then the transactions
  2657   2658           ** frames will overwrite the start of the existing log. Update the
  2658   2659           ** wal-index header to reflect this.
  2659   2660           **
  2660   2661           ** In theory it would be Ok to update the cache of the header only
................................................................................
  2971   2972     assert( eMode!=SQLITE_CHECKPOINT_PASSIVE || xBusy==0 );
  2972   2973   
  2973   2974     if( pWal->readOnly ) return SQLITE_READONLY;
  2974   2975     WALTRACE(("WAL%p: checkpoint begins\n", pWal));
  2975   2976   
  2976   2977     /* IMPLEMENTATION-OF: R-62028-47212 All calls obtain an exclusive 
  2977   2978     ** "checkpoint" lock on the database file. */
  2978         -  rc = walLockExclusive(pWal, WAL_CKPT_LOCK, 1);
         2979  +  rc = walLockExclusive(pWal, WAL_CKPT_LOCK, 1, 0);
  2979   2980     if( rc ){
  2980   2981       /* EVIDENCE-OF: R-10421-19736 If any other process is running a
  2981   2982       ** checkpoint operation at the same time, the lock cannot be obtained and
  2982   2983       ** SQLITE_BUSY is returned.
  2983   2984       ** EVIDENCE-OF: R-53820-33897 Even if there is a busy-handler configured,
  2984   2985       ** it will not be invoked in this case.
  2985   2986       */