/ Check-in [a05a6d40]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:In the "unix-excl" VFS, use the heap for shared memory, since only a single process is able to read or write the database.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | unix-excl
Files: files | file ages | folders
SHA1: a05a6d40875df674f9c2b46e33128c6878d4edaa
User & Date: drh 2011-03-12 18:10:44
Original Comment: In the "unix-excl" VFS, use the heap for shared memory, since only a single process is able to read or write the database.
Context
2011-03-14
13:54
Merge the unix-excl VFS into the trunk. This merge also adds the -vfs option to the command-line shell. check-in: 3934b004 user: drh tags: trunk
2011-03-12
18:10
In the "unix-excl" VFS, use the heap for shared memory, since only a single process is able to read or write the database. Closed-Leaf check-in: a05a6d40 user: drh tags: unix-excl
17:02
Add the new optional "unix-excl" VFS. This VFS grabs an exclusive lock on the database preventing other processes from accessing it, but continues to allow other database connections from the same process. check-in: 00051c32 user: drh tags: unix-excl
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to src/os_unix.c.

  1198   1198   ** operations become no-ops.  Locking operations still happen internally,
  1199   1199   ** in order to coordinate access between separate database connections
  1200   1200   ** within this process, but all of that is handled in memory and the
  1201   1201   ** operating system does not participate.
  1202   1202   */
  1203   1203   static int unixFileLock(unixFile *pFile, struct flock *pLock){
  1204   1204     int rc;
         1205  +  unixInodeInfo *pInode = pFile->pInode;
  1205   1206     assert( unixMutexHeld() );
  1206         -  if( (pFile->ctrlFlags & UNIXFILE_EXCL)!=0 || pFile->pInode->bProcessLock ){
  1207         -    if( pFile->pInode->bProcessLock==0 ){
         1207  +  assert( pInode!=0 );
         1208  +  if( (pFile->ctrlFlags & UNIXFILE_EXCL)!=0 || pInode->bProcessLock ){
         1209  +    if( pInode->bProcessLock==0 ){
  1208   1210         struct flock lock;
         1211  +      assert( pInode->nLock==0 );
  1209   1212         lock.l_whence = SEEK_SET;
  1210   1213         lock.l_start = SHARED_FIRST;
  1211   1214         lock.l_len = SHARED_SIZE;
  1212   1215         lock.l_type = F_WRLCK;
  1213   1216         rc = osFcntl(pFile->h, F_SETLK, &lock);
  1214   1217         if( rc<0 ) return rc;
  1215         -      pFile->pInode->bProcessLock = 1;
         1218  +      pInode->bProcessLock = 1;
         1219  +      pInode->nLock++;
  1216   1220       }else{
  1217   1221         rc = 0;
  1218   1222       }
  1219   1223     }else{
  1220   1224       rc = osFcntl(pFile->h, F_SETLK, pLock);
  1221   1225     }
  1222   1226     return rc;
................................................................................
  1727   1731   */
  1728   1732   static int unixClose(sqlite3_file *id){
  1729   1733     int rc = SQLITE_OK;
  1730   1734     if( id ){
  1731   1735       unixFile *pFile = (unixFile *)id;
  1732   1736       unixUnlock(id, NO_LOCK);
  1733   1737       unixEnterMutex();
         1738  +    assert( pFile->pInode==0 || pFile->pInode->nLock>0
         1739  +            || pFile->pInode->bProcessLock==0 );
  1734   1740       if( pFile->pInode && pFile->pInode->nLock ){
  1735   1741         /* If there are outstanding locks, do not actually close the file just
  1736   1742         ** yet because that would clear those locks.  Instead, add the file
  1737   1743         ** descriptor to pInode->pUnused list.  It will be automatically closed 
  1738   1744         ** when the last lock is cleared.
  1739   1745         */
  1740   1746         setPendingFd(pFile);
................................................................................
  3555   3561   
  3556   3562     /* Shared locks never span more than one byte */
  3557   3563     assert( n==1 || lockType!=F_RDLCK );
  3558   3564   
  3559   3565     /* Locks are within range */
  3560   3566     assert( n>=1 && n<SQLITE_SHM_NLOCK );
  3561   3567   
  3562         -  /* Initialize the locking parameters */
  3563         -  memset(&f, 0, sizeof(f));
  3564         -  f.l_type = lockType;
  3565         -  f.l_whence = SEEK_SET;
  3566         -  f.l_start = ofst;
  3567         -  f.l_len = n;
         3568  +  if( pShmNode->h>=0 ){
         3569  +    /* Initialize the locking parameters */
         3570  +    memset(&f, 0, sizeof(f));
         3571  +    f.l_type = lockType;
         3572  +    f.l_whence = SEEK_SET;
         3573  +    f.l_start = ofst;
         3574  +    f.l_len = n;
  3568   3575   
  3569         -  rc = osFcntl(pShmNode->h, F_SETLK, &f);
  3570         -  rc = (rc!=(-1)) ? SQLITE_OK : SQLITE_BUSY;
         3576  +    rc = osFcntl(pShmNode->h, F_SETLK, &f);
         3577  +    rc = (rc!=(-1)) ? SQLITE_OK : SQLITE_BUSY;
         3578  +  }
  3571   3579   
  3572   3580     /* Update the global lock state and do debug tracing */
  3573   3581   #ifdef SQLITE_DEBUG
  3574   3582     { u16 mask;
  3575   3583     OSTRACE(("SHM-LOCK "));
  3576   3584     mask = (1<<(ofst+n)) - (1<<ofst);
  3577   3585     if( rc==SQLITE_OK ){
................................................................................
  3618   3626     unixShmNode *p = pFd->pInode->pShmNode;
  3619   3627     assert( unixMutexHeld() );
  3620   3628     if( p && p->nRef==0 ){
  3621   3629       int i;
  3622   3630       assert( p->pInode==pFd->pInode );
  3623   3631       if( p->mutex ) sqlite3_mutex_free(p->mutex);
  3624   3632       for(i=0; i<p->nRegion; i++){
  3625         -      munmap(p->apRegion[i], p->szRegion);
         3633  +      if( p->h>=0 ){
         3634  +        munmap(p->apRegion[i], p->szRegion);
         3635  +      }else{
         3636  +        sqlite3_free(p->apRegion[i]);
         3637  +      }
  3626   3638       }
  3627   3639       sqlite3_free(p->apRegion);
  3628   3640       if( p->h>=0 ){
  3629   3641         robust_close(pFd, p->h, __LINE__);
  3630   3642         p->h = -1;
  3631   3643       }
  3632   3644       p->pInode->pShmNode = 0;
................................................................................
  3658   3670   ** same database file at the same time, database corruption will likely
  3659   3671   ** result. The SQLITE_SHM_DIRECTORY compile-time option is considered
  3660   3672   ** "unsupported" and may go away in a future SQLite release.
  3661   3673   **
  3662   3674   ** When opening a new shared-memory file, if no other instances of that
  3663   3675   ** file are currently open, in this process or in other processes, then
  3664   3676   ** the file must be truncated to zero length or have its header cleared.
         3677  +**
         3678  +** If the original database file (pDbFd) is using the "unix-excl" VFS
         3679  +** that means that an exclusive lock is held on the database file and
         3680  +** that no other processes are able to read or write the database.  In
         3681  +** that case, we do not really need shared memory.  No shared memory
         3682  +** file is created.  The shared memory will be simulated with heap memory.
  3665   3683   */
  3666   3684   static int unixOpenSharedMemory(unixFile *pDbFd){
  3667   3685     struct unixShm *p = 0;          /* The connection to be opened */
  3668   3686     struct unixShmNode *pShmNode;   /* The underlying mmapped file */
  3669   3687     int rc;                         /* Result code */
  3670   3688     unixInodeInfo *pInode;          /* The inode of fd */
  3671   3689     char *zShmFilename;             /* Name of the file used for SHM */
................................................................................
  3687   3705       struct stat sStat;                 /* fstat() info for database file */
  3688   3706   
  3689   3707       /* Call fstat() to figure out the permissions on the database file. If
  3690   3708       ** a new *-shm file is created, an attempt will be made to create it
  3691   3709       ** with the same permissions. The actual permissions the file is created
  3692   3710       ** with are subject to the current umask setting.
  3693   3711       */
  3694         -    if( osFstat(pDbFd->h, &sStat) ){
         3712  +    if( osFstat(pDbFd->h, &sStat) && pInode->bProcessLock==0 ){
  3695   3713         rc = SQLITE_IOERR_FSTAT;
  3696   3714         goto shm_open_err;
  3697   3715       }
  3698   3716   
  3699   3717   #ifdef SQLITE_SHM_DIRECTORY
  3700   3718       nShmFilename = sizeof(SQLITE_SHM_DIRECTORY) + 30;
  3701   3719   #else
................................................................................
  3720   3738       pShmNode->pInode = pDbFd->pInode;
  3721   3739       pShmNode->mutex = sqlite3_mutex_alloc(SQLITE_MUTEX_FAST);
  3722   3740       if( pShmNode->mutex==0 ){
  3723   3741         rc = SQLITE_NOMEM;
  3724   3742         goto shm_open_err;
  3725   3743       }
  3726   3744   
  3727         -    pShmNode->h = robust_open(zShmFilename, O_RDWR|O_CREAT,
  3728         -                             (sStat.st_mode & 0777));
  3729         -    if( pShmNode->h<0 ){
  3730         -      rc = unixLogError(SQLITE_CANTOPEN_BKPT, "open", zShmFilename);
  3731         -      goto shm_open_err;
  3732         -    }
  3733         -
  3734         -    /* Check to see if another process is holding the dead-man switch.
  3735         -    ** If not, truncate the file to zero length. 
  3736         -    */
  3737         -    rc = SQLITE_OK;
  3738         -    if( unixShmSystemLock(pShmNode, F_WRLCK, UNIX_SHM_DMS, 1)==SQLITE_OK ){
  3739         -      if( robust_ftruncate(pShmNode->h, 0) ){
  3740         -        rc = unixLogError(SQLITE_IOERR_SHMOPEN, "ftruncate", zShmFilename);
  3741         -      }
  3742         -    }
  3743         -    if( rc==SQLITE_OK ){
  3744         -      rc = unixShmSystemLock(pShmNode, F_RDLCK, UNIX_SHM_DMS, 1);
  3745         -    }
  3746         -    if( rc ) goto shm_open_err;
         3745  +    if( pInode->bProcessLock==0 ){
         3746  +      pShmNode->h = robust_open(zShmFilename, O_RDWR|O_CREAT,
         3747  +                               (sStat.st_mode & 0777));
         3748  +      if( pShmNode->h<0 ){
         3749  +        rc = unixLogError(SQLITE_CANTOPEN_BKPT, "open", zShmFilename);
         3750  +        goto shm_open_err;
         3751  +      }
         3752  +  
         3753  +      /* Check to see if another process is holding the dead-man switch.
         3754  +      ** If not, truncate the file to zero length. 
         3755  +      */
         3756  +      rc = SQLITE_OK;
         3757  +      if( unixShmSystemLock(pShmNode, F_WRLCK, UNIX_SHM_DMS, 1)==SQLITE_OK ){
         3758  +        if( robust_ftruncate(pShmNode->h, 0) ){
         3759  +          rc = unixLogError(SQLITE_IOERR_SHMOPEN, "ftruncate", zShmFilename);
         3760  +        }
         3761  +      }
         3762  +      if( rc==SQLITE_OK ){
         3763  +        rc = unixShmSystemLock(pShmNode, F_RDLCK, UNIX_SHM_DMS, 1);
         3764  +      }
         3765  +      if( rc ) goto shm_open_err;
         3766  +    }
  3747   3767     }
  3748   3768   
  3749   3769     /* Make the new connection a child of the unixShmNode */
  3750   3770     p->pShmNode = pShmNode;
  3751   3771   #ifdef SQLITE_DEBUG
  3752   3772     p->id = pShmNode->nextShmId++;
  3753   3773   #endif
................................................................................
  3813   3833       if( rc!=SQLITE_OK ) return rc;
  3814   3834     }
  3815   3835   
  3816   3836     p = pDbFd->pShm;
  3817   3837     pShmNode = p->pShmNode;
  3818   3838     sqlite3_mutex_enter(pShmNode->mutex);
  3819   3839     assert( szRegion==pShmNode->szRegion || pShmNode->nRegion==0 );
         3840  +  assert( pShmNode->pInode==pDbFd->pInode );
         3841  +  assert( pShmNode->h>=0 || pDbFd->pInode->bProcessLock==1 );
         3842  +  assert( pShmNode->h<0 || pDbFd->pInode->bProcessLock==0 );
  3820   3843   
  3821   3844     if( pShmNode->nRegion<=iRegion ){
  3822   3845       char **apNew;                      /* New apRegion[] array */
  3823   3846       int nByte = (iRegion+1)*szRegion;  /* Minimum required file size */
  3824   3847       struct stat sStat;                 /* Used by fstat() */
  3825   3848   
  3826   3849       pShmNode->szRegion = szRegion;
  3827   3850   
  3828         -    /* The requested region is not mapped into this processes address space.
  3829         -    ** Check to see if it has been allocated (i.e. if the wal-index file is
  3830         -    ** large enough to contain the requested region).
  3831         -    */
  3832         -    if( osFstat(pShmNode->h, &sStat) ){
  3833         -      rc = SQLITE_IOERR_SHMSIZE;
  3834         -      goto shmpage_out;
  3835         -    }
  3836         -
  3837         -    if( sStat.st_size<nByte ){
  3838         -      /* The requested memory region does not exist. If bExtend is set to
  3839         -      ** false, exit early. *pp will be set to NULL and SQLITE_OK returned.
  3840         -      **
  3841         -      ** Alternatively, if bExtend is true, use ftruncate() to allocate
  3842         -      ** the requested memory region.
         3851  +    if( pShmNode->h>=0 ){
         3852  +      /* The requested region is not mapped into this processes address space.
         3853  +      ** Check to see if it has been allocated (i.e. if the wal-index file is
         3854  +      ** large enough to contain the requested region).
  3843   3855         */
  3844         -      if( !bExtend ) goto shmpage_out;
  3845         -      if( robust_ftruncate(pShmNode->h, nByte) ){
  3846         -        rc = unixLogError(SQLITE_IOERR_SHMSIZE,"ftruncate",pShmNode->zFilename);
         3856  +      if( osFstat(pShmNode->h, &sStat) ){
         3857  +        rc = SQLITE_IOERR_SHMSIZE;
  3847   3858           goto shmpage_out;
  3848   3859         }
         3860  +  
         3861  +      if( sStat.st_size<nByte ){
         3862  +        /* The requested memory region does not exist. If bExtend is set to
         3863  +        ** false, exit early. *pp will be set to NULL and SQLITE_OK returned.
         3864  +        **
         3865  +        ** Alternatively, if bExtend is true, use ftruncate() to allocate
         3866  +        ** the requested memory region.
         3867  +        */
         3868  +        if( !bExtend ) goto shmpage_out;
         3869  +        if( robust_ftruncate(pShmNode->h, nByte) ){
         3870  +          rc = unixLogError(SQLITE_IOERR_SHMSIZE, "ftruncate",
         3871  +                            pShmNode->zFilename);
         3872  +          goto shmpage_out;
         3873  +        }
         3874  +      }
  3849   3875       }
  3850   3876   
  3851   3877       /* Map the requested memory region into this processes address space. */
  3852   3878       apNew = (char **)sqlite3_realloc(
  3853   3879           pShmNode->apRegion, (iRegion+1)*sizeof(char *)
  3854   3880       );
  3855   3881       if( !apNew ){
  3856   3882         rc = SQLITE_IOERR_NOMEM;
  3857   3883         goto shmpage_out;
  3858   3884       }
  3859   3885       pShmNode->apRegion = apNew;
  3860   3886       while(pShmNode->nRegion<=iRegion){
  3861         -      void *pMem = mmap(0, szRegion, PROT_READ|PROT_WRITE, 
  3862         -          MAP_SHARED, pShmNode->h, pShmNode->nRegion*szRegion
  3863         -      );
  3864         -      if( pMem==MAP_FAILED ){
  3865         -        rc = SQLITE_IOERR;
  3866         -        goto shmpage_out;
         3887  +      void *pMem;
         3888  +      if( pShmNode->h>=0 ){
         3889  +        pMem = mmap(0, szRegion, PROT_READ|PROT_WRITE, 
         3890  +            MAP_SHARED, pShmNode->h, pShmNode->nRegion*szRegion
         3891  +        );
         3892  +        if( pMem==MAP_FAILED ){
         3893  +          rc = SQLITE_IOERR;
         3894  +          goto shmpage_out;
         3895  +        }
         3896  +      }else{
         3897  +        pMem = sqlite3_malloc(szRegion);
         3898  +        if( pMem==0 ){
         3899  +          rc = SQLITE_NOMEM;
         3900  +          goto shmpage_out;
         3901  +        }
         3902  +        memset(pMem, 0, szRegion);
  3867   3903         }
  3868   3904         pShmNode->apRegion[pShmNode->nRegion] = pMem;
  3869   3905         pShmNode->nRegion++;
  3870   3906       }
  3871   3907     }
  3872   3908   
  3873   3909   shmpage_out:
................................................................................
  3906   3942     assert( ofst>=0 && ofst+n<=SQLITE_SHM_NLOCK );
  3907   3943     assert( n>=1 );
  3908   3944     assert( flags==(SQLITE_SHM_LOCK | SQLITE_SHM_SHARED)
  3909   3945          || flags==(SQLITE_SHM_LOCK | SQLITE_SHM_EXCLUSIVE)
  3910   3946          || flags==(SQLITE_SHM_UNLOCK | SQLITE_SHM_SHARED)
  3911   3947          || flags==(SQLITE_SHM_UNLOCK | SQLITE_SHM_EXCLUSIVE) );
  3912   3948     assert( n==1 || (flags & SQLITE_SHM_EXCLUSIVE)!=0 );
         3949  +  assert( pShmNode->h>=0 || pDbFd->pInode->bProcessLock==1 );
         3950  +  assert( pShmNode->h<0 || pDbFd->pInode->bProcessLock==0 );
  3913   3951   
  3914   3952     mask = (1<<(ofst+n)) - (1<<ofst);
  3915   3953     assert( n>1 || mask==(1<<ofst) );
  3916   3954     sqlite3_mutex_enter(pShmNode->mutex);
  3917   3955     if( flags & SQLITE_SHM_UNLOCK ){
  3918   3956       u16 allMask = 0; /* Mask of locks held by siblings */
  3919   3957   
................................................................................
  4043   4081   
  4044   4082     /* If pShmNode->nRef has reached 0, then close the underlying
  4045   4083     ** shared-memory file, too */
  4046   4084     unixEnterMutex();
  4047   4085     assert( pShmNode->nRef>0 );
  4048   4086     pShmNode->nRef--;
  4049   4087     if( pShmNode->nRef==0 ){
  4050         -    if( deleteFlag ) unlink(pShmNode->zFilename);
         4088  +    if( deleteFlag && pShmNode->h>=0 ) unlink(pShmNode->zFilename);
  4051   4089       unixShmPurge(pDbFd);
  4052   4090     }
  4053   4091     unixLeaveMutex();
  4054   4092   
  4055   4093     return SQLITE_OK;
  4056   4094   }
  4057   4095