Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | In the unix VFS, give every unixInodeInfo object its own mutex, rather than using the global VFS mutex, to improve concurrency in cases where there are many threads operating on separate database files. |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA3-256: | 22f47cf430827c50634794a5a3398751 |
User & Date: | drh 2018-07-26 21:48:05 |
Context
2018-07-27
| ||
20:37 | The WHERE-clause constant propagation optimization attempts to use transitive laws to replace column values with constants in the WHERE clause in order to help to query planner make more aggressive optimizations. check-in: f4229707 user: drh tags: trunk | |
2018-07-26
| ||
21:48 | In the unix VFS, give every unixInodeInfo object its own mutex, rather than using the global VFS mutex, to improve concurrency in cases where there are many threads operating on separate database files. check-in: 22f47cf4 user: drh tags: trunk | |
2018-07-25
| ||
15:25 | Fix a buffer overread in fts5. check-in: 0e3de8ab user: dan tags: trunk | |
2018-07-23
| ||
22:55 | Return an SQLITE_NOMEM error if the locking mutex fails to allocate. Closed-Leaf check-in: 1c948348 user: drh tags: separate-lock-mutex | |
Changes
Changes to src/os_unix.c.
1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 .... 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 .... 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 .... 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 .... 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 .... 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 .... 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 .... 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 .... 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 .... 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 .... 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 .... 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 .... 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 .... 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 .... 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 .... 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 |
** An instance of the following structure is allocated for each open ** inode. Or, on LinuxThreads, there is one of these structures for ** each inode opened by each thread. ** ** A single inode can have multiple file descriptors, so each unixFile ** structure contains a pointer to an instance of this object and this ** object keeps a count of the number of unixFile pointing to it. */ struct unixInodeInfo { struct unixFileId fileId; /* The lookup key */ int nShared; /* Number of SHARED locks held */ unsigned char eFileLock; /* One of SHARED_LOCK, RESERVED_LOCK etc. */ unsigned char bProcessLock; /* An exclusive process lock is held */ int nRef; /* Number of pointers to this structure */ unixShmNode *pShmNode; /* Shared memory associated with this inode */ int nLock; /* Number of outstanding file locks */ UnixUnusedFd *pUnused; /* Unused file descriptors to close */ unixInodeInfo *pNext; /* List of all unixInodeInfo objects */ unixInodeInfo *pPrev; /* .... doubly linked */ #if SQLITE_ENABLE_LOCKING_STYLE unsigned long long sharedByte; /* for AFP simulated shared lock */ #endif #if OS_VXWORKS ................................................................................ assert( inodeList==pInode ); inodeList = pInode->pNext; } if( pInode->pNext ){ assert( pInode->pNext->pPrev==pInode ); pInode->pNext->pPrev = pInode->pPrev; } sqlite3_free(pInode); } } assert( inodeList!=0 || nUnusedFd==0 ); } /* ................................................................................ if( pInode==0 ){ pInode = sqlite3_malloc64( sizeof(*pInode) ); if( pInode==0 ){ return SQLITE_NOMEM_BKPT; } memset(pInode, 0, sizeof(*pInode)); memcpy(&pInode->fileId, &fileId, sizeof(fileId)); pInode->nRef = 1; pInode->pNext = inodeList; pInode->pPrev = 0; if( inodeList ) inodeList->pPrev = pInode; inodeList = pInode; }else{ pInode->nRef++; ................................................................................ int reserved = 0; unixFile *pFile = (unixFile*)id; SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; ); assert( pFile ); assert( pFile->eFileLock<=SHARED_LOCK ); unixEnterMutex(); /* Because pFile->pInode is shared across threads */ /* Check if a thread in this process holds such a lock */ if( pFile->pInode->eFileLock>SHARED_LOCK ){ reserved = 1; } /* Otherwise see if some other process holds it. ................................................................................ storeLastErrno(pFile, errno); } else if( lock.l_type!=F_UNLCK ){ reserved = 1; } } #endif unixLeaveMutex(); OSTRACE(("TEST WR-LOCK %d %d %d (unix)\n", pFile->h, rc, reserved)); *pResOut = reserved; return rc; } /* ................................................................................ ** ** Zero is returned if the call completes successfully, or -1 if a call ** to fcntl() fails. In this case, errno is set appropriately (by fcntl()). */ static int unixFileLock(unixFile *pFile, struct flock *pLock){ int rc; unixInodeInfo *pInode = pFile->pInode; assert( unixMutexHeld() ); assert( pInode!=0 ); if( (pFile->ctrlFlags & (UNIXFILE_EXCL|UNIXFILE_RDONLY))==UNIXFILE_EXCL ){ if( pInode->bProcessLock==0 ){ struct flock lock; assert( pInode->nLock==0 ); lock.l_whence = SEEK_SET; lock.l_start = SHARED_FIRST; lock.l_len = SHARED_SIZE; ................................................................................ */ assert( pFile->eFileLock!=NO_LOCK || eFileLock==SHARED_LOCK ); assert( eFileLock!=PENDING_LOCK ); assert( eFileLock!=RESERVED_LOCK || pFile->eFileLock==SHARED_LOCK ); /* This mutex is needed because pFile->pInode is shared across threads */ unixEnterMutex(); pInode = pFile->pInode; /* If some thread using this PID has a lock via a different unixFile* ** handle that precludes the requested lock, return BUSY. */ if( (pFile->eFileLock!=pInode->eFileLock && (pInode->eFileLock>=PENDING_LOCK || eFileLock>SHARED_LOCK)) ){ ................................................................................ pInode->eFileLock = eFileLock; }else if( eFileLock==EXCLUSIVE_LOCK ){ pFile->eFileLock = PENDING_LOCK; pInode->eFileLock = PENDING_LOCK; } end_lock: unixLeaveMutex(); OSTRACE(("LOCK %d %s %s (unix)\n", pFile->h, azFileLock(eFileLock), rc==SQLITE_OK ? "ok" : "failed")); return rc; } /* ** Add the file descriptor used by file handle pFile to the corresponding ................................................................................ pFile->eFileLock, pFile->pInode->eFileLock, pFile->pInode->nShared, osGetpid(0))); assert( eFileLock<=SHARED_LOCK ); if( pFile->eFileLock<=eFileLock ){ return SQLITE_OK; } unixEnterMutex(); pInode = pFile->pInode; assert( pInode->nShared!=0 ); if( pFile->eFileLock>SHARED_LOCK ){ assert( pInode->eFileLock==pFile->eFileLock ); #ifdef SQLITE_DEBUG /* When reducing a lock such that other processes can start ** reading the database file again, make sure that the ................................................................................ assert( pInode->nLock>=0 ); if( pInode->nLock==0 ){ closePendingFds(pFile); } } end_unlock: unixLeaveMutex(); if( rc==SQLITE_OK ) pFile->eFileLock = eFileLock; return rc; } /* ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock ** must be either NO_LOCK or SHARED_LOCK. ................................................................................ assert( pFile ); context = (afpLockingContext *) pFile->lockingContext; if( context->reserved ){ *pResOut = 1; return SQLITE_OK; } unixEnterMutex(); /* Because pFile->pInode is shared across threads */ /* Check if a thread in this process holds such a lock */ if( pFile->pInode->eFileLock>SHARED_LOCK ){ reserved = 1; } /* Otherwise see if some other process holds it. */ ................................................................................ reserved = 1; } if( IS_LOCK_ERROR(lrc) ){ rc=lrc; } } unixLeaveMutex(); OSTRACE(("TEST WR-LOCK %d %d %d (afp)\n", pFile->h, rc, reserved)); *pResOut = reserved; return rc; } /* ................................................................................ */ assert( pFile->eFileLock!=NO_LOCK || eFileLock==SHARED_LOCK ); assert( eFileLock!=PENDING_LOCK ); assert( eFileLock!=RESERVED_LOCK || pFile->eFileLock==SHARED_LOCK ); /* This mutex is needed because pFile->pInode is shared across threads */ unixEnterMutex(); pInode = pFile->pInode; /* If some thread using this PID has a lock via a different unixFile* ** handle that precludes the requested lock, return BUSY. */ if( (pFile->eFileLock!=pInode->eFileLock && (pInode->eFileLock>=PENDING_LOCK || eFileLock>SHARED_LOCK)) ){ ................................................................................ pInode->eFileLock = eFileLock; }else if( eFileLock==EXCLUSIVE_LOCK ){ pFile->eFileLock = PENDING_LOCK; pInode->eFileLock = PENDING_LOCK; } afp_end_lock: unixLeaveMutex(); OSTRACE(("LOCK %d %s %s (afp)\n", pFile->h, azFileLock(eFileLock), rc==SQLITE_OK ? "ok" : "failed")); return rc; } /* ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock ................................................................................ pFile->eFileLock, pFile->pInode->eFileLock, pFile->pInode->nShared, osGetpid(0))); assert( eFileLock<=SHARED_LOCK ); if( pFile->eFileLock<=eFileLock ){ return SQLITE_OK; } unixEnterMutex(); pInode = pFile->pInode; assert( pInode->nShared!=0 ); if( pFile->eFileLock>SHARED_LOCK ){ assert( pInode->eFileLock==pFile->eFileLock ); SimulateIOErrorBenign(1); SimulateIOError( h=(-1) ) SimulateIOErrorBenign(0); ................................................................................ assert( pInode->nLock>=0 ); if( pInode->nLock==0 ){ closePendingFds(pFile); } } } unixLeaveMutex(); if( rc==SQLITE_OK ) pFile->eFileLock = eFileLock; return rc; } /* ** Close a file & cleanup AFP specific locking context */ |
> > > > > > > > > > > > > > | > | | < > > > > > > > > | | < > < > | < > | | < | < > | < > | |
1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 .... 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 .... 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 .... 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 .... 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 .... 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 .... 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 .... 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 .... 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 .... 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 .... 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 .... 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 .... 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 .... 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 .... 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 .... 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 |
** An instance of the following structure is allocated for each open ** inode. Or, on LinuxThreads, there is one of these structures for ** each inode opened by each thread. ** ** A single inode can have multiple file descriptors, so each unixFile ** structure contains a pointer to an instance of this object and this ** object keeps a count of the number of unixFile pointing to it. ** ** Mutex rules: ** ** (1) The pLockMutex mutex must be held in order to read or write ** any of the locking fields: ** nShared, nLock, eFileLock, or bProcessLock ** ** (2) When nRef>0, then the following fields are unchanging and can ** be read (but not written) without holding any mutex: ** fileId, pLockMutex ** ** (3) With the exceptions above, all the fields may only be read ** or written while holding the global unixBigLock mutex. */ struct unixInodeInfo { struct unixFileId fileId; /* The lookup key */ sqlite3_mutex *pLockMutex; /* Hold this mutex for... */ int nShared; /* Number of SHARED locks held */ int nLock; /* Number of outstanding file locks */ unsigned char eFileLock; /* One of SHARED_LOCK, RESERVED_LOCK etc. */ unsigned char bProcessLock; /* An exclusive process lock is held */ int nRef; /* Number of pointers to this structure */ unixShmNode *pShmNode; /* Shared memory associated with this inode */ UnixUnusedFd *pUnused; /* Unused file descriptors to close */ unixInodeInfo *pNext; /* List of all unixInodeInfo objects */ unixInodeInfo *pPrev; /* .... doubly linked */ #if SQLITE_ENABLE_LOCKING_STYLE unsigned long long sharedByte; /* for AFP simulated shared lock */ #endif #if OS_VXWORKS ................................................................................ assert( inodeList==pInode ); inodeList = pInode->pNext; } if( pInode->pNext ){ assert( pInode->pNext->pPrev==pInode ); pInode->pNext->pPrev = pInode->pPrev; } sqlite3_mutex_free(pInode->pLockMutex); sqlite3_free(pInode); } } assert( inodeList!=0 || nUnusedFd==0 ); } /* ................................................................................ if( pInode==0 ){ pInode = sqlite3_malloc64( sizeof(*pInode) ); if( pInode==0 ){ return SQLITE_NOMEM_BKPT; } memset(pInode, 0, sizeof(*pInode)); memcpy(&pInode->fileId, &fileId, sizeof(fileId)); if( sqlite3GlobalConfig.bCoreMutex ){ pInode->pLockMutex = sqlite3_mutex_alloc(SQLITE_MUTEX_FAST); if( pInode->pLockMutex==0 ){ sqlite3_free(pInode); return SQLITE_NOMEM_BKPT; } } pInode->nRef = 1; pInode->pNext = inodeList; pInode->pPrev = 0; if( inodeList ) inodeList->pPrev = pInode; inodeList = pInode; }else{ pInode->nRef++; ................................................................................ int reserved = 0; unixFile *pFile = (unixFile*)id; SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; ); assert( pFile ); assert( pFile->eFileLock<=SHARED_LOCK ); sqlite3_mutex_enter(pFile->pInode->pLockMutex); /* Check if a thread in this process holds such a lock */ if( pFile->pInode->eFileLock>SHARED_LOCK ){ reserved = 1; } /* Otherwise see if some other process holds it. ................................................................................ storeLastErrno(pFile, errno); } else if( lock.l_type!=F_UNLCK ){ reserved = 1; } } #endif sqlite3_mutex_leave(pFile->pInode->pLockMutex); OSTRACE(("TEST WR-LOCK %d %d %d (unix)\n", pFile->h, rc, reserved)); *pResOut = reserved; return rc; } /* ................................................................................ ** ** Zero is returned if the call completes successfully, or -1 if a call ** to fcntl() fails. In this case, errno is set appropriately (by fcntl()). */ static int unixFileLock(unixFile *pFile, struct flock *pLock){ int rc; unixInodeInfo *pInode = pFile->pInode; assert( pInode!=0 ); assert( sqlite3_mutex_held(pInode->pLockMutex) ); if( (pFile->ctrlFlags & (UNIXFILE_EXCL|UNIXFILE_RDONLY))==UNIXFILE_EXCL ){ if( pInode->bProcessLock==0 ){ struct flock lock; assert( pInode->nLock==0 ); lock.l_whence = SEEK_SET; lock.l_start = SHARED_FIRST; lock.l_len = SHARED_SIZE; ................................................................................ */ assert( pFile->eFileLock!=NO_LOCK || eFileLock==SHARED_LOCK ); assert( eFileLock!=PENDING_LOCK ); assert( eFileLock!=RESERVED_LOCK || pFile->eFileLock==SHARED_LOCK ); /* This mutex is needed because pFile->pInode is shared across threads */ pInode = pFile->pInode; sqlite3_mutex_enter(pInode->pLockMutex); /* If some thread using this PID has a lock via a different unixFile* ** handle that precludes the requested lock, return BUSY. */ if( (pFile->eFileLock!=pInode->eFileLock && (pInode->eFileLock>=PENDING_LOCK || eFileLock>SHARED_LOCK)) ){ ................................................................................ pInode->eFileLock = eFileLock; }else if( eFileLock==EXCLUSIVE_LOCK ){ pFile->eFileLock = PENDING_LOCK; pInode->eFileLock = PENDING_LOCK; } end_lock: sqlite3_mutex_leave(pInode->pLockMutex); OSTRACE(("LOCK %d %s %s (unix)\n", pFile->h, azFileLock(eFileLock), rc==SQLITE_OK ? "ok" : "failed")); return rc; } /* ** Add the file descriptor used by file handle pFile to the corresponding ................................................................................ pFile->eFileLock, pFile->pInode->eFileLock, pFile->pInode->nShared, osGetpid(0))); assert( eFileLock<=SHARED_LOCK ); if( pFile->eFileLock<=eFileLock ){ return SQLITE_OK; } pInode = pFile->pInode; sqlite3_mutex_enter(pInode->pLockMutex); assert( pInode->nShared!=0 ); if( pFile->eFileLock>SHARED_LOCK ){ assert( pInode->eFileLock==pFile->eFileLock ); #ifdef SQLITE_DEBUG /* When reducing a lock such that other processes can start ** reading the database file again, make sure that the ................................................................................ assert( pInode->nLock>=0 ); if( pInode->nLock==0 ){ closePendingFds(pFile); } } end_unlock: sqlite3_mutex_leave(pInode->pLockMutex); if( rc==SQLITE_OK ) pFile->eFileLock = eFileLock; return rc; } /* ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock ** must be either NO_LOCK or SHARED_LOCK. ................................................................................ assert( pFile ); context = (afpLockingContext *) pFile->lockingContext; if( context->reserved ){ *pResOut = 1; return SQLITE_OK; } sqlite3_mutex_enter(pFile->pInode->pLockMutex); /* Check if a thread in this process holds such a lock */ if( pFile->pInode->eFileLock>SHARED_LOCK ){ reserved = 1; } /* Otherwise see if some other process holds it. */ ................................................................................ reserved = 1; } if( IS_LOCK_ERROR(lrc) ){ rc=lrc; } } sqlite3_mutex_leave(pFile->pInode->pLockMutex); OSTRACE(("TEST WR-LOCK %d %d %d (afp)\n", pFile->h, rc, reserved)); *pResOut = reserved; return rc; } /* ................................................................................ */ assert( pFile->eFileLock!=NO_LOCK || eFileLock==SHARED_LOCK ); assert( eFileLock!=PENDING_LOCK ); assert( eFileLock!=RESERVED_LOCK || pFile->eFileLock==SHARED_LOCK ); /* This mutex is needed because pFile->pInode is shared across threads */ pInode = pFile->pInode; sqlite3_mutex_enter(pInode->pLockMutex); /* If some thread using this PID has a lock via a different unixFile* ** handle that precludes the requested lock, return BUSY. */ if( (pFile->eFileLock!=pInode->eFileLock && (pInode->eFileLock>=PENDING_LOCK || eFileLock>SHARED_LOCK)) ){ ................................................................................ pInode->eFileLock = eFileLock; }else if( eFileLock==EXCLUSIVE_LOCK ){ pFile->eFileLock = PENDING_LOCK; pInode->eFileLock = PENDING_LOCK; } afp_end_lock: sqlite3_mutex_leave(pInode->pLockMutex); OSTRACE(("LOCK %d %s %s (afp)\n", pFile->h, azFileLock(eFileLock), rc==SQLITE_OK ? "ok" : "failed")); return rc; } /* ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock ................................................................................ pFile->eFileLock, pFile->pInode->eFileLock, pFile->pInode->nShared, osGetpid(0))); assert( eFileLock<=SHARED_LOCK ); if( pFile->eFileLock<=eFileLock ){ return SQLITE_OK; } pInode = pFile->pInode; sqlite3_mutex_enter(pInode->pLockMutex); assert( pInode->nShared!=0 ); if( pFile->eFileLock>SHARED_LOCK ){ assert( pInode->eFileLock==pFile->eFileLock ); SimulateIOErrorBenign(1); SimulateIOError( h=(-1) ) SimulateIOErrorBenign(0); ................................................................................ assert( pInode->nLock>=0 ); if( pInode->nLock==0 ){ closePendingFds(pFile); } } } sqlite3_mutex_leave(pInode->pLockMutex); if( rc==SQLITE_OK ) pFile->eFileLock = eFileLock; return rc; } /* ** Close a file & cleanup AFP specific locking context */ |