/ Check-in [22f47cf4]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:In the unix VFS, give every unixInodeInfo object its own mutex, rather than using the global VFS mutex, to improve concurrency in cases where there are many threads operating on separate database files.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA3-256: 22f47cf430827c50634794a5a33987511bb71492c0dd1f6466a0c5b779d0521b
User & Date: drh 2018-07-26 21:48:05
Context
2018-07-27
20:37
The WHERE-clause constant propagation optimization attempts to use transitive laws to replace column values with constants in the WHERE clause in order to help to query planner make more aggressive optimizations. check-in: f4229707 user: drh tags: trunk
2018-07-26
21:48
In the unix VFS, give every unixInodeInfo object its own mutex, rather than using the global VFS mutex, to improve concurrency in cases where there are many threads operating on separate database files. check-in: 22f47cf4 user: drh tags: trunk
2018-07-25
15:25
Fix a buffer overread in fts5. check-in: 0e3de8ab user: dan tags: trunk
2018-07-23
22:55
Return an SQLITE_NOMEM error if the locking mutex fails to allocate. Closed-Leaf check-in: 1c948348 user: drh tags: separate-lock-mutex
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to src/os_unix.c.

  1104   1104   ** An instance of the following structure is allocated for each open
  1105   1105   ** inode.  Or, on LinuxThreads, there is one of these structures for
  1106   1106   ** each inode opened by each thread.
  1107   1107   **
  1108   1108   ** A single inode can have multiple file descriptors, so each unixFile
  1109   1109   ** structure contains a pointer to an instance of this object and this
  1110   1110   ** object keeps a count of the number of unixFile pointing to it.
         1111  +**
         1112  +** Mutex rules:
         1113  +**
         1114  +**  (1) The pLockMutex mutex must be held in order to read or write
         1115  +**      any of the locking fields:
         1116  +**          nShared, nLock, eFileLock, or bProcessLock
         1117  +**
         1118  +**  (2) When nRef>0, then the following fields are unchanging and can
         1119  +**      be read (but not written) without holding any mutex:
         1120  +**          fileId, pLockMutex
         1121  +**
         1122  +**  (3) With the exceptions above, all the fields may only be read
         1123  +**      or written while holding the global unixBigLock mutex.
  1111   1124   */
  1112   1125   struct unixInodeInfo {
  1113   1126     struct unixFileId fileId;       /* The lookup key */
  1114         -  int nShared;                    /* Number of SHARED locks held */
  1115         -  unsigned char eFileLock;        /* One of SHARED_LOCK, RESERVED_LOCK etc. */
  1116         -  unsigned char bProcessLock;     /* An exclusive process lock is held */
         1127  +  sqlite3_mutex *pLockMutex;      /* Hold this mutex for... */
         1128  +  int nShared;                      /* Number of SHARED locks held */
         1129  +  int nLock;                        /* Number of outstanding file locks */
         1130  +  unsigned char eFileLock;          /* One of SHARED_LOCK, RESERVED_LOCK etc. */
         1131  +  unsigned char bProcessLock;       /* An exclusive process lock is held */
  1117   1132     int nRef;                       /* Number of pointers to this structure */
  1118   1133     unixShmNode *pShmNode;          /* Shared memory associated with this inode */
  1119         -  int nLock;                      /* Number of outstanding file locks */
  1120   1134     UnixUnusedFd *pUnused;          /* Unused file descriptors to close */
  1121   1135     unixInodeInfo *pNext;           /* List of all unixInodeInfo objects */
  1122   1136     unixInodeInfo *pPrev;           /*    .... doubly linked */
  1123   1137   #if SQLITE_ENABLE_LOCKING_STYLE
  1124   1138     unsigned long long sharedByte;  /* for AFP simulated shared lock */
  1125   1139   #endif
  1126   1140   #if OS_VXWORKS
................................................................................
  1269   1283           assert( inodeList==pInode );
  1270   1284           inodeList = pInode->pNext;
  1271   1285         }
  1272   1286         if( pInode->pNext ){
  1273   1287           assert( pInode->pNext->pPrev==pInode );
  1274   1288           pInode->pNext->pPrev = pInode->pPrev;
  1275   1289         }
         1290  +      sqlite3_mutex_free(pInode->pLockMutex);
  1276   1291         sqlite3_free(pInode);
  1277   1292       }
  1278   1293     }
  1279   1294     assert( inodeList!=0 || nUnusedFd==0 );
  1280   1295   }
  1281   1296   
  1282   1297   /*
................................................................................
  1354   1369     if( pInode==0 ){
  1355   1370       pInode = sqlite3_malloc64( sizeof(*pInode) );
  1356   1371       if( pInode==0 ){
  1357   1372         return SQLITE_NOMEM_BKPT;
  1358   1373       }
  1359   1374       memset(pInode, 0, sizeof(*pInode));
  1360   1375       memcpy(&pInode->fileId, &fileId, sizeof(fileId));
         1376  +    if( sqlite3GlobalConfig.bCoreMutex ){
         1377  +      pInode->pLockMutex = sqlite3_mutex_alloc(SQLITE_MUTEX_FAST);
         1378  +      if( pInode->pLockMutex==0 ){
         1379  +        sqlite3_free(pInode);
         1380  +        return SQLITE_NOMEM_BKPT;
         1381  +      }
         1382  +    }
  1361   1383       pInode->nRef = 1;
  1362   1384       pInode->pNext = inodeList;
  1363   1385       pInode->pPrev = 0;
  1364   1386       if( inodeList ) inodeList->pPrev = pInode;
  1365   1387       inodeList = pInode;
  1366   1388     }else{
  1367   1389       pInode->nRef++;
................................................................................
  1432   1454     int reserved = 0;
  1433   1455     unixFile *pFile = (unixFile*)id;
  1434   1456   
  1435   1457     SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; );
  1436   1458   
  1437   1459     assert( pFile );
  1438   1460     assert( pFile->eFileLock<=SHARED_LOCK );
  1439         -  unixEnterMutex(); /* Because pFile->pInode is shared across threads */
         1461  +  sqlite3_mutex_enter(pFile->pInode->pLockMutex);
  1440   1462   
  1441   1463     /* Check if a thread in this process holds such a lock */
  1442   1464     if( pFile->pInode->eFileLock>SHARED_LOCK ){
  1443   1465       reserved = 1;
  1444   1466     }
  1445   1467   
  1446   1468     /* Otherwise see if some other process holds it.
................................................................................
  1457   1479         storeLastErrno(pFile, errno);
  1458   1480       } else if( lock.l_type!=F_UNLCK ){
  1459   1481         reserved = 1;
  1460   1482       }
  1461   1483     }
  1462   1484   #endif
  1463   1485     
  1464         -  unixLeaveMutex();
         1486  +  sqlite3_mutex_leave(pFile->pInode->pLockMutex);
  1465   1487     OSTRACE(("TEST WR-LOCK %d %d %d (unix)\n", pFile->h, rc, reserved));
  1466   1488   
  1467   1489     *pResOut = reserved;
  1468   1490     return rc;
  1469   1491   }
  1470   1492   
  1471   1493   /*
................................................................................
  1523   1545   **
  1524   1546   ** Zero is returned if the call completes successfully, or -1 if a call
  1525   1547   ** to fcntl() fails. In this case, errno is set appropriately (by fcntl()).
  1526   1548   */
  1527   1549   static int unixFileLock(unixFile *pFile, struct flock *pLock){
  1528   1550     int rc;
  1529   1551     unixInodeInfo *pInode = pFile->pInode;
  1530         -  assert( unixMutexHeld() );
  1531   1552     assert( pInode!=0 );
         1553  +  assert( sqlite3_mutex_held(pInode->pLockMutex) );
  1532   1554     if( (pFile->ctrlFlags & (UNIXFILE_EXCL|UNIXFILE_RDONLY))==UNIXFILE_EXCL ){
  1533   1555       if( pInode->bProcessLock==0 ){
  1534   1556         struct flock lock;
  1535   1557         assert( pInode->nLock==0 );
  1536   1558         lock.l_whence = SEEK_SET;
  1537   1559         lock.l_start = SHARED_FIRST;
  1538   1560         lock.l_len = SHARED_SIZE;
................................................................................
  1643   1665     */
  1644   1666     assert( pFile->eFileLock!=NO_LOCK || eFileLock==SHARED_LOCK );
  1645   1667     assert( eFileLock!=PENDING_LOCK );
  1646   1668     assert( eFileLock!=RESERVED_LOCK || pFile->eFileLock==SHARED_LOCK );
  1647   1669   
  1648   1670     /* This mutex is needed because pFile->pInode is shared across threads
  1649   1671     */
  1650         -  unixEnterMutex();
  1651   1672     pInode = pFile->pInode;
         1673  +  sqlite3_mutex_enter(pInode->pLockMutex);
  1652   1674   
  1653   1675     /* If some thread using this PID has a lock via a different unixFile*
  1654   1676     ** handle that precludes the requested lock, return BUSY.
  1655   1677     */
  1656   1678     if( (pFile->eFileLock!=pInode->eFileLock && 
  1657   1679             (pInode->eFileLock>=PENDING_LOCK || eFileLock>SHARED_LOCK))
  1658   1680     ){
................................................................................
  1787   1809       pInode->eFileLock = eFileLock;
  1788   1810     }else if( eFileLock==EXCLUSIVE_LOCK ){
  1789   1811       pFile->eFileLock = PENDING_LOCK;
  1790   1812       pInode->eFileLock = PENDING_LOCK;
  1791   1813     }
  1792   1814   
  1793   1815   end_lock:
  1794         -  unixLeaveMutex();
         1816  +  sqlite3_mutex_leave(pInode->pLockMutex);
  1795   1817     OSTRACE(("LOCK    %d %s %s (unix)\n", pFile->h, azFileLock(eFileLock), 
  1796   1818         rc==SQLITE_OK ? "ok" : "failed"));
  1797   1819     return rc;
  1798   1820   }
  1799   1821   
  1800   1822   /*
  1801   1823   ** Add the file descriptor used by file handle pFile to the corresponding
................................................................................
  1835   1857         pFile->eFileLock, pFile->pInode->eFileLock, pFile->pInode->nShared,
  1836   1858         osGetpid(0)));
  1837   1859   
  1838   1860     assert( eFileLock<=SHARED_LOCK );
  1839   1861     if( pFile->eFileLock<=eFileLock ){
  1840   1862       return SQLITE_OK;
  1841   1863     }
  1842         -  unixEnterMutex();
  1843   1864     pInode = pFile->pInode;
         1865  +  sqlite3_mutex_enter(pInode->pLockMutex);
  1844   1866     assert( pInode->nShared!=0 );
  1845   1867     if( pFile->eFileLock>SHARED_LOCK ){
  1846   1868       assert( pInode->eFileLock==pFile->eFileLock );
  1847   1869   
  1848   1870   #ifdef SQLITE_DEBUG
  1849   1871       /* When reducing a lock such that other processes can start
  1850   1872       ** reading the database file again, make sure that the
................................................................................
  1968   1990       assert( pInode->nLock>=0 );
  1969   1991       if( pInode->nLock==0 ){
  1970   1992         closePendingFds(pFile);
  1971   1993       }
  1972   1994     }
  1973   1995   
  1974   1996   end_unlock:
  1975         -  unixLeaveMutex();
         1997  +  sqlite3_mutex_leave(pInode->pLockMutex);
  1976   1998     if( rc==SQLITE_OK ) pFile->eFileLock = eFileLock;
  1977   1999     return rc;
  1978   2000   }
  1979   2001   
  1980   2002   /*
  1981   2003   ** Lower the locking level on file descriptor pFile to eFileLock.  eFileLock
  1982   2004   ** must be either NO_LOCK or SHARED_LOCK.
................................................................................
  2767   2789     
  2768   2790     assert( pFile );
  2769   2791     context = (afpLockingContext *) pFile->lockingContext;
  2770   2792     if( context->reserved ){
  2771   2793       *pResOut = 1;
  2772   2794       return SQLITE_OK;
  2773   2795     }
  2774         -  unixEnterMutex(); /* Because pFile->pInode is shared across threads */
  2775         -  
         2796  +  sqlite3_mutex_enter(pFile->pInode->pLockMutex);
  2776   2797     /* Check if a thread in this process holds such a lock */
  2777   2798     if( pFile->pInode->eFileLock>SHARED_LOCK ){
  2778   2799       reserved = 1;
  2779   2800     }
  2780   2801     
  2781   2802     /* Otherwise see if some other process holds it.
  2782   2803      */
................................................................................
  2792   2813         reserved = 1;
  2793   2814       }
  2794   2815       if( IS_LOCK_ERROR(lrc) ){
  2795   2816         rc=lrc;
  2796   2817       }
  2797   2818     }
  2798   2819     
  2799         -  unixLeaveMutex();
         2820  +  sqlite3_mutex_leave(pFile->pInode->pLockMutex);
  2800   2821     OSTRACE(("TEST WR-LOCK %d %d %d (afp)\n", pFile->h, rc, reserved));
  2801   2822     
  2802   2823     *pResOut = reserved;
  2803   2824     return rc;
  2804   2825   }
  2805   2826   
  2806   2827   /*
................................................................................
  2855   2876     */
  2856   2877     assert( pFile->eFileLock!=NO_LOCK || eFileLock==SHARED_LOCK );
  2857   2878     assert( eFileLock!=PENDING_LOCK );
  2858   2879     assert( eFileLock!=RESERVED_LOCK || pFile->eFileLock==SHARED_LOCK );
  2859   2880     
  2860   2881     /* This mutex is needed because pFile->pInode is shared across threads
  2861   2882     */
  2862         -  unixEnterMutex();
  2863   2883     pInode = pFile->pInode;
         2884  +  sqlite3_mutex_enter(pInode->pLockMutex);
  2864   2885   
  2865   2886     /* If some thread using this PID has a lock via a different unixFile*
  2866   2887     ** handle that precludes the requested lock, return BUSY.
  2867   2888     */
  2868   2889     if( (pFile->eFileLock!=pInode->eFileLock && 
  2869   2890          (pInode->eFileLock>=PENDING_LOCK || eFileLock>SHARED_LOCK))
  2870   2891        ){
................................................................................
  2992   3013       pInode->eFileLock = eFileLock;
  2993   3014     }else if( eFileLock==EXCLUSIVE_LOCK ){
  2994   3015       pFile->eFileLock = PENDING_LOCK;
  2995   3016       pInode->eFileLock = PENDING_LOCK;
  2996   3017     }
  2997   3018     
  2998   3019   afp_end_lock:
  2999         -  unixLeaveMutex();
         3020  +  sqlite3_mutex_leave(pInode->pLockMutex);
  3000   3021     OSTRACE(("LOCK    %d %s %s (afp)\n", pFile->h, azFileLock(eFileLock), 
  3001   3022            rc==SQLITE_OK ? "ok" : "failed"));
  3002   3023     return rc;
  3003   3024   }
  3004   3025   
  3005   3026   /*
  3006   3027   ** Lower the locking level on file descriptor pFile to eFileLock.  eFileLock
................................................................................
  3024   3045              pFile->eFileLock, pFile->pInode->eFileLock, pFile->pInode->nShared,
  3025   3046              osGetpid(0)));
  3026   3047   
  3027   3048     assert( eFileLock<=SHARED_LOCK );
  3028   3049     if( pFile->eFileLock<=eFileLock ){
  3029   3050       return SQLITE_OK;
  3030   3051     }
  3031         -  unixEnterMutex();
  3032   3052     pInode = pFile->pInode;
         3053  +  sqlite3_mutex_enter(pInode->pLockMutex);
  3033   3054     assert( pInode->nShared!=0 );
  3034   3055     if( pFile->eFileLock>SHARED_LOCK ){
  3035   3056       assert( pInode->eFileLock==pFile->eFileLock );
  3036   3057       SimulateIOErrorBenign(1);
  3037   3058       SimulateIOError( h=(-1) )
  3038   3059       SimulateIOErrorBenign(0);
  3039   3060       
................................................................................
  3100   3121         assert( pInode->nLock>=0 );
  3101   3122         if( pInode->nLock==0 ){
  3102   3123           closePendingFds(pFile);
  3103   3124         }
  3104   3125       }
  3105   3126     }
  3106   3127     
  3107         -  unixLeaveMutex();
         3128  +  sqlite3_mutex_leave(pInode->pLockMutex);
  3108   3129     if( rc==SQLITE_OK ) pFile->eFileLock = eFileLock;
  3109   3130     return rc;
  3110   3131   }
  3111   3132   
  3112   3133   /*
  3113   3134   ** Close a file & cleanup AFP specific locking context 
  3114   3135   */