SQLite

Check-in [1bde41cf08]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Progress towards a VFS that will support WAL. Locking code is in place but is untested. Still no support for the DMS.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | wal
Files: files | file ages | folders
SHA1: 1bde41cf081570ad257f927b641e752dff4ed014
User & Date: drh 2010-04-29 15:17:48.000
Context
2010-04-29
16:40
Untested implementation of the shared-memory dead-man-switch. (check-in: 706611283e user: drh tags: wal)
15:17
Progress towards a VFS that will support WAL. Locking code is in place but is untested. Still no support for the DMS. (check-in: 1bde41cf08 user: drh tags: wal)
14:58
Close all open database connections at the end of wal.test. (check-in: 3cc55a7568 user: dan tags: wal)
Changes
Unified Diff Ignore Whitespace Patch
Changes to src/os_common.h.
36
37
38
39
40
41
42

43
44
45
46
47
48
49
50

51
52
53
54
55
56
57
#define OSTRACE3(X,Y,Z)     if( sqlite3OSTrace ) sqlite3DebugPrintf(X,Y,Z)
#define OSTRACE4(X,Y,Z,A)   if( sqlite3OSTrace ) sqlite3DebugPrintf(X,Y,Z,A)
#define OSTRACE5(X,Y,Z,A,B) if( sqlite3OSTrace ) sqlite3DebugPrintf(X,Y,Z,A,B)
#define OSTRACE6(X,Y,Z,A,B,C) \
    if(sqlite3OSTrace) sqlite3DebugPrintf(X,Y,Z,A,B,C)
#define OSTRACE7(X,Y,Z,A,B,C,D) \
    if(sqlite3OSTrace) sqlite3DebugPrintf(X,Y,Z,A,B,C,D)

#else
#define OSTRACE1(X)
#define OSTRACE2(X,Y)
#define OSTRACE3(X,Y,Z)
#define OSTRACE4(X,Y,Z,A)
#define OSTRACE5(X,Y,Z,A,B)
#define OSTRACE6(X,Y,Z,A,B,C)
#define OSTRACE7(X,Y,Z,A,B,C,D)

#endif

/*
** Macros for performance tracing.  Normally turned off.  Only works
** on i486 hardware.
*/
#ifdef SQLITE_PERFORMANCE_TRACE







>








>







36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
#define OSTRACE3(X,Y,Z)     if( sqlite3OSTrace ) sqlite3DebugPrintf(X,Y,Z)
#define OSTRACE4(X,Y,Z,A)   if( sqlite3OSTrace ) sqlite3DebugPrintf(X,Y,Z,A)
#define OSTRACE5(X,Y,Z,A,B) if( sqlite3OSTrace ) sqlite3DebugPrintf(X,Y,Z,A,B)
#define OSTRACE6(X,Y,Z,A,B,C) \
    if(sqlite3OSTrace) sqlite3DebugPrintf(X,Y,Z,A,B,C)
#define OSTRACE7(X,Y,Z,A,B,C,D) \
    if(sqlite3OSTrace) sqlite3DebugPrintf(X,Y,Z,A,B,C,D)
#define OSTRACE(X)          if( sqlite3OSTrace ) sqlite3DebugPrintf X
#else
#define OSTRACE1(X)
#define OSTRACE2(X,Y)
#define OSTRACE3(X,Y,Z)
#define OSTRACE4(X,Y,Z,A)
#define OSTRACE5(X,Y,Z,A,B)
#define OSTRACE6(X,Y,Z,A,B,C)
#define OSTRACE7(X,Y,Z,A,B,C,D)
#define OSTRACE(X)
#endif

/*
** Macros for performance tracing.  Normally turned off.  Only works
** on i486 hardware.
*/
#ifdef SQLITE_PERFORMANCE_TRACE
Changes to src/os_unix.c.
4558
4559
4560
4561
4562
4563
4564




4565
4566

4567


















4568
4569


4570

4571
4572
4573











4574
4575
4576





4577









4578
4579
4580

4581



4582
4583
4584
4585
4586
4587

4588
4589
4590
4591


4592
4593
4594
4595
4596
4597
4598
4599



































































































































































































































































4600




4601
4602
4603
4604
4605
4606
4607

4608
4609
4610
4611
4612
4613
4614
4615
4616

4617
4618
4619
4620





4621





4622
4623







4624


4625
4626
4627
4628
4629
4630




4631
4632
4633

4634
4635
4636
4637



































4638
4639
4640

4641

4642

4643
4644




































4645
4646
4647
4648
4649
4650
4651
4652
4653
4654
4655
4656
4657
4658
4659
4660
4661
4662
4663
4664

4665
4666


4667
4668
4669
4670
4671
4672
4673
4674
4675
4676
4677

4678
4679
4680
4681
4682
4683

4684
4685
4686
4687
4688
4689
4690
4691



4692
4693
4694


4695
4696
4697
4698
4699
4700





4701










4702






4703

4704




























































































4705
4706
4707
4708
4709
4710
4711
static int unixGetLastError(sqlite3_vfs *NotUsed, int NotUsed2, char *NotUsed3){
  UNUSED_PARAMETER(NotUsed);
  UNUSED_PARAMETER(NotUsed2);
  UNUSED_PARAMETER(NotUsed3);
  return 0;
}





/*
** Structure used internally by this VFS to record the state of an

** open shared memory segment.


















*/
struct unixShm {


  sqlite3_vfs *pVfs;   /* VFS that opened this shared-memory segment */

  int size;            /* Size of the shared memory area */
  char *pBuf;          /* Pointer to the beginning */
  unixFile fd;         /* The open file descriptor */











};

/*





** Close a shared-memory segment









*/
static int unixShmClose(sqlite3_shm *pSharedMem){
  struct unixShm *p = (struct unixShm*)pSharedMem;

  if( p && p->pVfs ){



    if( p->pBuf ){
      munmap(p->pBuf, p->size);
    }
    if( p->fd.pMethod ){
      p->fd.pMethod->xClose((sqlite3_file*)&p->fd);
    }

    memset(p, 0, sizeof(*p));
    sqlite3_free(p);
  }
  return SQLITE_OK;


}

/*
** Size increment by which shared memory grows
*/
#define SQLITE_UNIX_SHM_INCR  4096

/*



































































































































































































































































** Open a shared-memory area.  This implementation uses mmapped files.




*/
static int unixShmOpen(
  sqlite3_vfs *pVfs,    /* The VFS */
  const char *zName,    /* Name of file to mmap */
  sqlite3_shm **pShm    /* Write the unixShm object created here */
){
  struct unixShm *p = 0;

  int rc;
  int outFlags;
  struct stat sStat;

  p = sqlite3_malloc( sizeof(*p) );
  *pShm = (sqlite3_shm*)p;
  if( p==0 ) return SQLITE_NOMEM;
  memset(p, 0, sizeof(*p));
  p->pVfs = pVfs;

  rc = pVfs->xOpen(pVfs, zName, (sqlite3_file*)&p->fd,
                   SQLITE_OPEN_READWRITE | SQLITE_OPEN_MAIN_JOURNAL,
                   &outFlags);
  if( rc!=SQLITE_OK ) goto shm_open_err;











  rc = fstat(p->fd.h, &sStat);
  if( rc!=0 ) goto shm_open_err;







  if( sStat.st_size<SQLITE_UNIX_SHM_INCR ){


    rc = ftruncate(p->fd.h, SQLITE_UNIX_SHM_INCR);
    if( rc!=0 ) goto shm_open_err;
    p->size = SQLITE_UNIX_SHM_INCR;
  }else{
    p->size = sStat.st_size;
  }





  /* Map the file. */
  p->pBuf = mmap(0, p->size, PROT_READ|PROT_WRITE, MAP_SHARED, p->fd.h, 0);

  if( p->pBuf==MAP_FAILED ){
    rc = SQLITE_IOERR;
    goto shm_open_err;
  }



































  return SQLITE_OK;

shm_open_err:

  unixShmClose((sqlite3_shm*)p);

  *pShm = 0;

  return rc;
}





































/*
** Query and/or changes the size of a shared-memory segment.
** The reqSize parameter is the new size of the segment, or -1 to
** do just a query.  The size of the segment after resizing is
** written into pNewSize.  A writer lock is held on the shared memory
** segment while resizing it.
**
** If ppBuffer is not NULL, the a reader lock is acquired no the shared
** memory segment and *ppBuffer is made to point to the start of the 
** shared memory segment.  xShmRelease() must be called to release the
** lock.
*/
static int unixShmSize(
  sqlite3_shm *pSharedMem,  /* Pointer returned by unixShmOpen() */
  int reqSize,              /* Requested size.  -1 for query only */
  int *pNewSize,            /* Write new size here */
  char **ppBuf              /* Write new buffer origin here */
){
  struct unixShm *p = (struct unixShm*)pSharedMem;

  int rc = SQLITE_OK;



  if( reqSize>=0 ){
    reqSize = (reqSize + SQLITE_UNIX_SHM_INCR - 1)/SQLITE_UNIX_SHM_INCR;
    reqSize *= SQLITE_UNIX_SHM_INCR;
    if( reqSize!=p->size ){
      munmap(p->pBuf, p->size);
      rc = ftruncate(p->fd.h, reqSize);
      if( rc ){
        p->pBuf = 0;
        p->size = 0;
      }else{
        p->pBuf = mmap(0, reqSize, PROT_READ|PROT_WRITE, MAP_SHARED, p->fd.h,0);

        p->size = p->pBuf ? reqSize : 0;
      }
    }
  }
  *pNewSize = p->size;
  *ppBuf = p->pBuf;

  return rc;
}

/*
** Release the lock held on the shared memory segment to that other
** threads are free to resize it if necessary.
*/
static int unixShmRelease(sqlite3_shm *pSharedMem){



  return SQLITE_OK;
}



/*
** Create or release a lock on shared memory.
*/
static int unixShmLock(
  sqlite3_shm *pSharedMem,   /* Pointer from unixShmOpen() */
  int desiredLock,           /* The locking state desired */





  int *pGotLock,             /* The locking state actually obtained */










  int shouldBlock            /* Block for the lock if true and possible */






){

  return SQLITE_OK;




























































































}

/*
** Delete a shared-memory segment from the system.
*/
static int unixShmDelete(sqlite3_vfs *pVfs, const char *zName){
  return pVfs->xDelete(pVfs, zName, 0);







>
>
>
>

<
>
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>

|
>
>
|
>
|
|
|
>
>
>
>
>
>
>
>
>
>
>



>
>
>
>
>
|
>
>
>
>
>
>
>
>
>

<
|
>
|
>
>
>
|
<
<
<
|
<
>
|
<
<
|
>
>
|







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>

>
>
>
>







>

|



<


|
>
|
<
<
|
>
>
>
>
>
|
>
>
>
>
>
|
|
>
>
>
>
>
>
>
|
>
>
|
|
<
<
<
|
>
>
>
>
|
<
<
>
|
|
|
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>



>
|
>

>


>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>



















|
>


>
>



|
|
|

|
|

|
>
|



|
|
>








>
>
>



>
>

|



|
>
>
>
>
>
|
>
>
>
>
>
>
>
>
>
>
|
>
>
>
>
>
>
|
>
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







4558
4559
4560
4561
4562
4563
4564
4565
4566
4567
4568
4569

4570
4571
4572
4573
4574
4575
4576
4577
4578
4579
4580
4581
4582
4583
4584
4585
4586
4587
4588
4589
4590
4591
4592
4593
4594
4595
4596
4597
4598
4599
4600
4601
4602
4603
4604
4605
4606
4607
4608
4609
4610
4611
4612
4613
4614
4615
4616
4617
4618
4619
4620
4621
4622
4623
4624
4625
4626
4627
4628

4629
4630
4631
4632
4633
4634
4635



4636

4637
4638


4639
4640
4641
4642
4643
4644
4645
4646
4647
4648
4649
4650
4651
4652
4653
4654
4655
4656
4657
4658
4659
4660
4661
4662
4663
4664
4665
4666
4667
4668
4669
4670
4671
4672
4673
4674
4675
4676
4677
4678
4679
4680
4681
4682
4683
4684
4685
4686
4687
4688
4689
4690
4691
4692
4693
4694
4695
4696
4697
4698
4699
4700
4701
4702
4703
4704
4705
4706
4707
4708
4709
4710
4711
4712
4713
4714
4715
4716
4717
4718
4719
4720
4721
4722
4723
4724
4725
4726
4727
4728
4729
4730
4731
4732
4733
4734
4735
4736
4737
4738
4739
4740
4741
4742
4743
4744
4745
4746
4747
4748
4749
4750
4751
4752
4753
4754
4755
4756
4757
4758
4759
4760
4761
4762
4763
4764
4765
4766
4767
4768
4769
4770
4771
4772
4773
4774
4775
4776
4777
4778
4779
4780
4781
4782
4783
4784
4785
4786
4787
4788
4789
4790
4791
4792
4793
4794
4795
4796
4797
4798
4799
4800
4801
4802
4803
4804
4805
4806
4807
4808
4809
4810
4811
4812
4813
4814
4815
4816
4817
4818
4819
4820
4821
4822
4823
4824
4825
4826
4827
4828
4829
4830
4831
4832
4833
4834
4835
4836
4837
4838
4839
4840
4841
4842
4843
4844
4845
4846
4847
4848
4849
4850
4851
4852
4853
4854
4855
4856
4857
4858
4859
4860
4861
4862
4863
4864
4865
4866
4867
4868
4869
4870
4871
4872
4873
4874
4875
4876
4877
4878
4879
4880
4881
4882
4883
4884
4885
4886
4887
4888
4889
4890
4891
4892
4893
4894
4895
4896
4897
4898
4899
4900
4901
4902
4903
4904
4905
4906
4907
4908
4909
4910
4911
4912
4913
4914
4915
4916
4917
4918
4919
4920
4921
4922
4923
4924
4925
4926

4927
4928
4929
4930
4931


4932
4933
4934
4935
4936
4937
4938
4939
4940
4941
4942
4943
4944
4945
4946
4947
4948
4949
4950
4951
4952
4953
4954
4955
4956
4957



4958
4959
4960
4961
4962
4963


4964
4965
4966
4967
4968
4969
4970
4971
4972
4973
4974
4975
4976
4977
4978
4979
4980
4981
4982
4983
4984
4985
4986
4987
4988
4989
4990
4991
4992
4993
4994
4995
4996
4997
4998
4999
5000
5001
5002
5003
5004
5005
5006
5007
5008
5009
5010
5011
5012
5013
5014
5015
5016
5017
5018
5019
5020
5021
5022
5023
5024
5025
5026
5027
5028
5029
5030
5031
5032
5033
5034
5035
5036
5037
5038
5039
5040
5041
5042
5043
5044
5045
5046
5047
5048
5049
5050
5051
5052
5053
5054
5055
5056
5057
5058
5059
5060
5061
5062
5063
5064
5065
5066
5067
5068
5069
5070
5071
5072
5073
5074
5075
5076
5077
5078
5079
5080
5081
5082
5083
5084
5085
5086
5087
5088
5089
5090
5091
5092
5093
5094
5095
5096
5097
5098
5099
5100
5101
5102
5103
5104
5105
5106
5107
5108
5109
5110
5111
5112
5113
5114
5115
5116
5117
5118
5119
5120
5121
5122
5123
5124
5125
5126
5127
5128
5129
5130
5131
5132
5133
5134
5135
5136
5137
5138
5139
5140
5141
5142
5143
5144
5145
5146
5147
5148
5149
5150
5151
5152
5153
5154
5155
5156
5157
5158
5159
5160
5161
5162
5163
5164
5165
5166
5167
5168
5169
5170
5171
5172
5173
5174
5175
5176
5177
5178
5179
5180
5181
5182
5183
5184
5185
5186
5187
5188
5189
5190
5191
5192
5193
5194
5195
5196
5197
5198
5199
5200
5201
5202
5203
5204
5205
5206
5207
5208
5209
5210
5211
5212
5213
5214
5215
5216
5217
5218
5219
5220
5221
5222
5223
5224
5225
5226
5227
5228
5229
5230
5231
5232
5233
5234
5235
5236
5237
5238
5239
5240
static int unixGetLastError(sqlite3_vfs *NotUsed, int NotUsed2, char *NotUsed3){
  UNUSED_PARAMETER(NotUsed);
  UNUSED_PARAMETER(NotUsed2);
  UNUSED_PARAMETER(NotUsed3);
  return 0;
}

/* Forward reference */
typedef struct unixShm unixShm;
typedef struct unixShmFile unixShmFile;

/*

** Object used to represent a single file opened and mmapped to provide
** shared memory.  When multiple threads all reference the same
** log-summary, each thread has its own unixFile object, but they all
** point to a single instance of this object.  In other words, each
** log-summary is opened only once per process.
**
** unixMutexHeld() must be true when creating or destroying
** this object or while reading or writing the following fields:
**
**      nRef
**      pNext 
**
** The following fields are read-only after the object is created:
** 
**      fid
**      zFilename
**
** Either unixShmFile.mutex must be held or unixShmFile.nRef==0 and
** unixMutexHeld() is true when reading or writing any other field
** in this structure.
*/
struct unixShmFile {
  struct unixFileId fid;     /* Unique file identifier */
  sqlite3_mutex *mutex;      /* Mutex to access this object */
  sqlite3_mutex *mutexBuf;   /* Mutex to access zBuf[] */
  sqlite3_mutex *mutexRecov; /* The RECOVER mutex */
  char *zFilename;           /* Name of the file */
  int size;                  /* Size of the file */
  int h;                     /* Open file descriptor */
  char *pMMapBuf;            /* Where currently mmapped() */
  int nReadPrefix;           /* Number of SQLITE_SHM_READ_PREFIX locks */
  int nReadFull;             /* Number of SQLITE_SHM_READ_FULL locks */
  int nRef;                  /* Number of unixShm objects pointing to this */
  unixShm *pFirst;           /* All unixShm objects pointing to this */
  unixShmFile *pNext;        /* Next in list of all unixShmFile objects */
#ifdef SQLITE_DEBUG
  u8 exclMask;               /* Mask of exclusive locks held */
  u8 sharedMask;             /* Mask of shared locks held */
  u8 nextShmId;              /* Next available unixShm.id value */
#endif
};

/*
** A global array of all unixShmFile objects.
**
** The unixMutexHeld() must be true while reading or writing this list.
*/
static unixShmFile *unixShmFileList = 0;

/*
** Structure used internally by this VFS to record the state of an
** open shared memory connection.
**
** unixShm.pFile->mutex must be held while reading or writing the
** unixShm.pNext and unixShm.locks[] elements.
**
** The unixShm.pFile element is initialized when the object is created
** and is read-only thereafter.
*/

struct unixShm {
  unixShmFile *pFile;        /* The underlying unixShmFile object */
  unixShm *pNext;            /* Next unixShm with the same unixShmFile */
  u8 lockState;              /* Current lock state */
  u8 readLock;               /* Which of the two read-lock states to use */
  u8 hasMutex;               /* True if holding the unixShmFile mutex */
  u8 hasMutexBuf;            /* True if holding pFile->mutexBuf */



  u8 hasMutexRecov;          /* True if holding pFile->mutexRecov */

  u8 sharedMask;             /* Mask of shared locks held */
  u8 exclMask;               /* Mask of exclusive locks held */


#ifdef SQLITE_DEBUG
  u8 id;                     /* Id of this connection with its unixShmFile */
#endif
};

/*
** Size increment by which shared memory grows
*/
#define SQLITE_UNIX_SHM_INCR  4096

/*
** Constants used for locking
*/
#define UNIX_SHM_BASE      32        /* Byte offset of the first lock byte */
#define UNIX_SHM_MUTEX     0x01      /* Mask for MUTEX lock */
#define UNIX_SHM_DMS       0x04      /* Mask for Dead-Man-Switch lock */
#define UNIX_SHM_A         0x10      /* Mask for region locks... */
#define UNIX_SHM_B         0x20
#define UNIX_SHM_C         0x40
#define UNIX_SHM_D         0x80

#ifdef SQLITE_DEBUG
/*
** Return a pointer to a nul-terminated string in static memory that
** describes a locking mask.  The string is of the form "MSABCD" with
** each character representing a lock.  "M" for MUTEX, "S" for DMS, 
** and "A" through "D" for the region locks.  If a lock is held, the
** letter is shown.  If the lock is not held, the letter is converted
** to ".".
**
** This routine is for debugging purposes only and does not appear
** in a production build.
*/
static const char *unixShmLockString(u8 mask){
  static char zBuf[48];
  static int iBuf = 0;
  char *z;

  z = &zBuf[iBuf];
  iBuf += 8;
  if( iBuf>=sizeof(zBuf) ) iBuf = 0;

  z[0] = (mask & UNIX_SHM_MUTEX) ? 'M' : '.';
  z[1] = (mask & UNIX_SHM_DMS)   ? 'S' : '.';
  z[2] = (mask & UNIX_SHM_A)     ? 'A' : '.';
  z[3] = (mask & UNIX_SHM_B)     ? 'B' : '.';
  z[4] = (mask & UNIX_SHM_C)     ? 'C' : '.';
  z[5] = (mask & UNIX_SHM_D)     ? 'D' : '.';
  z[6] = 0;
  return z;
}
#endif /* SQLITE_DEBUG */

/*
** Apply posix advisory locks for all bytes identified in lockMask.
**
** lockMask might contain multiple bits but all bits are guaranteed
** to be contiguous.
**
** Locks block if the UNIX_SHM_MUTEX bit is set and are non-blocking
** otherwise.
*/
static int unixShmSystemLocks(
  unixShmFile *pFile,   /* Apply locks to this open shared-memory segment */
  int lockType,         /* F_UNLCK, F_RDLCK, or F_WRLCK */
  u8 lockMask           /* Which bytes to lock or unlock */
){
  struct flock f;       /* The posix advisory locking structure */
  int lockOp;           /* The opcode for fcntl() */
  int i;                /* Offset into the locking byte range */
  int rc;               /* Result code form fcntl() */
  u8 mask;              /* Mask of bits in lockMask */

  /* Initialize the locking parameters */
  memset(&f, 0, sizeof(f));
  f.l_type = lockType;
  f.l_whence = SEEK_SET;
  if( (lockMask & UNIX_SHM_MUTEX)!=0 && lockType!=F_UNLCK ){
    lockOp = F_SETLKW;
  }else{
    lockOp = F_SETLK;
  }

  /* Find the first bit in lockMask that is set */
  for(i=0, mask=0x01; mask!=0 && (lockMask&mask)==0; mask <<= 1, i++){}
  assert( mask!=0 );
  f.l_start = i+UNIX_SHM_BASE;
  f.l_len = 1;

  /* Extend the locking range for each additional bit that is set */
  mask <<= 1;
  while( mask!=0 && (lockMask & mask)!=0 ){
    f.l_len++;
  }

  /* Verify that all bits set in lockMask are contiguous */
  assert( mask==0 || (lockMask & ~(mask | (mask-1)))==0 );

  /* Acquire the system-level lock */
  rc = (fcntl(pFile->h, lockOp, &f)==0) ? SQLITE_OK : SQLITE_BUSY;

  /* Update the global lock state and do debug tracing */
#ifdef SQLITE_DEBUG
  OSTRACE(("SHM-LOCK "));
  if( rc==SQLITE_OK ){
    if( lockType==F_UNLCK ){
      OSTRACE(("unlock ok"));
      pFile->exclMask &= ~lockMask;
      pFile->sharedMask &= ~lockMask;
    }else if( lockType==F_RDLCK ){
      OSTRACE(("read-lock ok"));
      pFile->exclMask &= ~lockMask;
      pFile->sharedMask |= lockMask;
    }else{
      assert( lockType==F_WRLCK );
      OSTRACE(("write-lock ok"));
      pFile->exclMask |= lockMask;
      pFile->sharedMask &= ~lockMask;
    }
  }else{
    if( lockType==F_UNLCK ){
      OSTRACE(("unlock failed"));
    }else if( lockType==F_RDLCK ){
      OSTRACE(("read-lock failed"));
    }else{
      assert( lockType==F_WRLCK );
      OSTRACE(("write-lock failed"));
    }
  }
  OSTRACE((" - change requested %s - afterwards %s,%s\n",
           unixShmLockString(lockMask),
           unixShmLockString(pFile->sharedMask),
           unixShmLockString(pFile->exclMask)));
#endif

  return rc;        
}

/*
** For connection p, unlock all of the locks identified by the unlockMask
** parameter.
*/
static int unixShmUnlock(
  unixShmFile *pFile,   /* The underlying shared-memory file */
  unixShm *p,           /* The connection to be unlocked */
  u8 unlockMask         /* Mask of locks to be unlocked */
){
  int rc;      /* Result code */
  unixShm *pX; /* For looping over all sibling connections */
  u8 allMask;  /* Union of locks held by connections other than "p" */

  /* We never try to unlock locks that we do not hold */
  assert( ((p->exclMask|p->sharedMask) & unlockMask)==unlockMask );

  /* Compute locks held by sibling connections */
  for(pX=pFile->pFirst; pX; pX=pX->pNext){
    if( pX==p ) continue;
    assert( (pX->exclMask & unlockMask)==0 );
    allMask |= pX->sharedMask;
  }

  /* Unlock the system-level locks */
  if( (unlockMask & allMask)!=unlockMask ){
    rc = unixShmSystemLocks(pFile, F_UNLCK, unlockMask & ~allMask);
  }else{
    rc = SQLITE_OK;
  }

  /* Undo the local locks */
  if( rc==SQLITE_OK ){
    p->exclMask &= ~unlockMask;
    p->sharedMask &= ~unlockMask;
  } 
  return rc;
}

/*
** Get reader locks for connection p on all locks in the readMask parameter.
*/
static int unixShmSharedLock(
  unixShmFile *pFile,   /* The underlying shared-memory file */
  unixShm *p,           /* The connection to get the shared locks */
  u8 readMask           /* Mask of shared locks to be acquired */
){
  int rc;        /* Result code */
  unixShm *pX;   /* For looping over all sibling connections */
  u8 allShared;  /* Union of locks held by connections other than "p" */

  /* Find out which shared locks are already held by sibling connections.
  ** If any sibling already holds an exclusive lock, go ahead and return
  ** SQLITE_BUSY.
  */
  for(pX=pFile->pFirst; pX; pX=pX->pNext){
    if( pX==p ) continue;
    if( (pX->exclMask & readMask)!=0 ) return SQLITE_BUSY;
    allShared |= pX->sharedMask;
  }

  /* Get shared locks at the system level, if necessary */
  if( (~allShared) & readMask ){
    rc = unixShmSystemLocks(pFile, F_RDLCK, readMask);
  }else{
    rc = SQLITE_OK;
  }

  /* Get the local shared locks */
  if( rc==SQLITE_OK ){
    p->sharedMask |= readMask;
  }
  return rc;
}

/*
** For connection p, get an exclusive lock on all locks identified in
** the writeMask parameter.
*/
static int unixShmExclusiveLock(
  unixShmFile *pFile,    /* The underlying shared-memory file */
  unixShm *p,            /* The connection to get the exclusive locks */
  u8 writeMask           /* Mask of exclusive locks to be acquired */
){
  int rc;        /* Result code */
  unixShm *pX;   /* For looping over all sibling connections */

  /* Make sure no sibling connections hold locks that will block this
  ** lock.  If any do, return SQLITE_BUSY right away.
  */
  for(pX=pFile->pFirst; pX; pX=pX->pNext){
    if( pX==p ) continue;
    if( (pX->exclMask & writeMask)!=0 ) return SQLITE_BUSY;
    if( (pX->sharedMask & writeMask)!=0 ) return SQLITE_BUSY;
  }

  /* Get the exclusive locks at the system level.  Then if successful
  ** also mark the local connection as being locked.
  */
  rc = unixShmSystemLocks(pFile, F_WRLCK, writeMask);
  if( rc==SQLITE_OK ){
    p->sharedMask &= ~writeMask;
    p->exclMask |= writeMask;
  }
  return rc;
}

/*
** Purge the unixShmFileList list of all entries with unixShmFile.nRef==0.
**
** This is not a VFS shared-memory method; it is a utility function called
** by VFS shared-memory methods.
*/
static void unixShmPurge(void){
  unixShmFile **pp;
  unixShmFile *p;
  assert( unixMutexHeld() );
  pp = &unixShmFileList;
  while( (p = *pp)!=0 ){
    if( p->nRef==0 ){
      if( p->mutex ) sqlite3_mutex_free(p->mutex);
      if( p->mutexBuf ) sqlite3_mutex_free(p->mutexBuf);
      if( p->mutexRecov ) sqlite3_mutex_free(p->mutexRecov);
      if( p->h>=0 ) close(p->h);
      *pp = p->pNext;
      sqlite3_free(p);
    }else{
      pp = &p->pNext;
    }
  }
}

/*
** Open a shared-memory area.  This implementation uses mmapped files.
**
** When opening a new shared-memory file, if no other instances of that
** file are currently open, in this process or in other processes, then
** the file must be truncated to zero length or have its header cleared.
*/
static int unixShmOpen(
  sqlite3_vfs *pVfs,    /* The VFS */
  const char *zName,    /* Name of file to mmap */
  sqlite3_shm **pShm    /* Write the unixShm object created here */
){
  struct unixShm *p = 0;
  struct unixShmFile *pFile = 0;
  int rc;
  struct unixFileId fid;
  struct stat sStat;

  p = sqlite3_malloc( sizeof(*p) );

  if( p==0 ) return SQLITE_NOMEM;
  memset(p, 0, sizeof(*p));

  unixEnterMutex();
  rc = stat(zName, &sStat);


  if( rc==0 ){
    memset(&fid, 0, sizeof(fid));
    fid.dev = sStat.st_dev;
    fid.ino = sStat.st_ino;
    for(pFile = unixShmFileList; pFile; pFile=pFile->pNext){
      if( memcmp(&pFile->fid, &fid, sizeof(fid))==0 ) break;
    }
  }
  if( pFile==0 ){
    int nName = strlen(zName);
    pFile = sqlite3_malloc( sizeof(*pFile) + nName + 1 );
    if( pFile==0 ){
      rc = SQLITE_NOMEM;
      goto shm_open_err;
    }
    memset(pFile, 0, sizeof(pFile));
    pFile->zFilename = (char*)&pFile[1];
    memcpy(pFile->zFilename, zName, nName+1);
    pFile->h = -1;
    pFile->pNext = unixShmFileList;
    unixShmFileList = pFile;

    pFile->mutex = sqlite3_mutex_alloc(SQLITE_MUTEX_FAST);
    if( pFile->mutex==0 ){
      rc = SQLITE_NOMEM;
      goto shm_open_err;



    }
    pFile->mutexBuf = sqlite3_mutex_alloc(SQLITE_MUTEX_FAST);
    if( pFile->mutexBuf==0 ){
      rc = SQLITE_NOMEM;
      goto shm_open_err;
    }


    pFile->mutexRecov = sqlite3_mutex_alloc(SQLITE_MUTEX_FAST);
    if( pFile->mutexRecov==0 ){
      rc = SQLITE_NOMEM;
      goto shm_open_err;
    }

    pFile->h = open(zName, O_CREAT, 0664);
    if( pFile->h<0 ){
      rc = SQLITE_CANTOPEN;
      goto shm_open_err;
    }

    rc = fstat(pFile->h, &sStat);
    if( rc ){
      rc = SQLITE_CANTOPEN;
      goto shm_open_err;
    }
    pFile->fid.dev = sStat.st_dev;
    pFile->fid.ino = sStat.st_ino;
    pFile->size = (int)sStat.st_size;
    pFile->size = (pFile->size/SQLITE_UNIX_SHM_INCR)*SQLITE_UNIX_SHM_INCR;
    if( pFile->size==0 ){
      pFile->size = SQLITE_UNIX_SHM_INCR;
      rc = ftruncate(pFile->h, pFile->size);
      if( rc ){
        rc = SQLITE_FULL;
        goto shm_open_err;
      }
    }
  }

  p->pFile = pFile;
  p->pNext = pFile->pFirst;
#ifdef SQLITE_DEBUG
  p->id = pFile->nextShmId++;
#endif
  pFile->pFirst = p;
  pFile->nRef++;
  *pShm = (sqlite3_shm*)p;
  unixLeaveMutex();
  return SQLITE_OK;

shm_open_err:
  unixShmPurge();
  sqlite3_free(p);
  sqlite3_free(pFile);
  *pShm = 0;
  unixLeaveMutex();
  return rc;
}

/*
** Close a connectioon to shared-memory.
*/
static int unixShmClose(sqlite3_shm *pSharedMem){
  unixShm *p;            /* The connection to be closed */
  unixShmFile *pFile;    /* The underlying shared-memory file */
  unixShm **pp;          /* For looping over sibling connections */
  int nRef;              /* Number of connections to pFile */

  p = (struct unixShm*)pSharedMem;
  pFile = p->pFile;

  /* Verify that the connection being closed holds no locks */
  assert( p->exclMask==0 );
  assert( p->sharedMask==0 );


  /* Remove connection p from the set of connections associated with pFile */
  sqlite3_mutex_enter(pFile->mutex);
  for(pp=&pFile->pFirst; (*pp)!=p; pp = &(*pp)->pNext){}
  *pp = p->pNext;
  pFile->nRef--;
  nRef = pFile->nRef;

  /* Free the connection p */
  sqlite3_free(p);
  sqlite3_mutex_leave(pFile->mutex);

  /* If pFile->nRef has reached 0, then close the underlying
  ** shared-memory file, too */
  if( nRef==0 ){
    unixShmPurge();
  }
  return SQLITE_OK;
}

/*
** Query and/or changes the size of a shared-memory segment.
** The reqSize parameter is the new size of the segment, or -1 to
** do just a query.  The size of the segment after resizing is
** written into pNewSize.  A writer lock is held on the shared memory
** segment while resizing it.
**
** If ppBuffer is not NULL, the a reader lock is acquired no the shared
** memory segment and *ppBuffer is made to point to the start of the 
** shared memory segment.  xShmRelease() must be called to release the
** lock.
*/
static int unixShmSize(
  sqlite3_shm *pSharedMem,  /* Pointer returned by unixShmOpen() */
  int reqSize,              /* Requested size.  -1 for query only */
  int *pNewSize,            /* Write new size here */
  char **ppBuf              /* Write new buffer origin here */
){
  unixShm *p = (unixShm*)pSharedMem;
  unixShmFile *pFile = p->pFile;
  int rc = SQLITE_OK;

  sqlite3_mutex_enter(pFile->mutexBuf);
  sqlite3_mutex_enter(pFile->mutex);
  if( reqSize>=0 ){
    reqSize = (reqSize + SQLITE_UNIX_SHM_INCR - 1)/SQLITE_UNIX_SHM_INCR;
    reqSize *= SQLITE_UNIX_SHM_INCR;
    if( reqSize!=pFile->size ){
      if( pFile->pMMapBuf ) munmap(pFile->pMMapBuf, pFile->size);
      rc = ftruncate(pFile->h, reqSize);
      if( rc ){
        pFile->pMMapBuf = 0;
        pFile->size = 0;
      }else{
        pFile->pMMapBuf = mmap(0, reqSize, PROT_READ|PROT_WRITE, MAP_SHARED,
                               pFile->h, 0);
        pFile->size = pFile->pMMapBuf ? reqSize : 0;
      }
    }
  }
  *pNewSize = pFile->size;
  *ppBuf = pFile->pMMapBuf;
  sqlite3_mutex_leave(pFile->mutex);
  return rc;
}

/*
** Release the lock held on the shared memory segment to that other
** threads are free to resize it if necessary.
*/
static int unixShmRelease(sqlite3_shm *pSharedMem){
  unixShm *p = (unixShm*)pSharedMem;
  unixShmFile *pFile = p->pFile;
  sqlite3_mutex_leave(pFile->mutexBuf);  
  return SQLITE_OK;
}



/*
** Change the lock state for a shared-memory segment.
*/
static int unixShmLock(
  sqlite3_shm *pSharedMem,   /* Pointer from unixShmOpen() */
  int desiredLock,           /* One of SQLITE_SHM_xxxxx locking states */
  int *pGotLock              /* The lock you actually got */
){
  unixShm *p = (unixShm*)pSharedMem;
  unixShmFile *pFile = p->pFile;
  int rc = SQLITE_PROTOCOL;

  /* Note that SQLITE_SHM_READ_FULL and SQLITE_SHM_PENDING are never
  ** directly requested; they are side effects from requesting
  ** SQLITE_SHM_READ and SQLITE_SHM_CHECKPOINT, respectively.
  */
  assert( desiredLock==SQLITE_SHM_QUERY
       || desiredLock==SQLITE_SHM_UNLOCK
       || desiredLock==SQLITE_SHM_READ
       || desiredLock==SQLITE_SHM_WRITE
       || desiredLock==SQLITE_SHM_CHECKPOINT
       || desiredLock==SQLITE_SHM_RECOVER );

  /* Return directly if this is just a lock state query, or if
  ** the connection is already in the desired locking state.
  */
  if( desiredLock==SQLITE_SHM_QUERY
   || desiredLock==p->lockState
   || (desiredLock==SQLITE_SHM_READ && p->lockState==SQLITE_SHM_READ_FULL)
  ){
    *pGotLock = p->lockState;
    return SQLITE_OK;
  }

  sqlite3_mutex_enter(pFile->mutex);
  switch( desiredLock ){
    case SQLITE_SHM_UNLOCK: {
      assert( p->lockState!=SQLITE_SHM_RECOVER );
      unixShmUnlock(pFile, p, UNIX_SHM_A|UNIX_SHM_B|UNIX_SHM_C|UNIX_SHM_D);
      rc = SQLITE_OK;
      p->lockState = SQLITE_SHM_UNLOCK;
      break;
    }
    case SQLITE_SHM_READ: {
      if( p->lockState==SQLITE_SHM_UNLOCK ){
        int nAttempt;
        rc = SQLITE_BUSY;
        assert( p->lockState==SQLITE_SHM_UNLOCK );
        for(nAttempt=0; nAttempt<5 && rc==SQLITE_BUSY; nAttempt++){
          rc = unixShmSharedLock(pFile, p, UNIX_SHM_A|UNIX_SHM_B);
          if( rc==SQLITE_BUSY ){
            rc = unixShmSharedLock(pFile, p, UNIX_SHM_D);
            if( rc==SQLITE_OK ){
              p->lockState = p->readLock = SQLITE_SHM_READ_FULL;
            }
          }else{
            unixShmUnlock(pFile, p, UNIX_SHM_B);
            p->lockState = p->readLock = SQLITE_SHM_READ;
          }
        }
      }else if( p->lockState==SQLITE_SHM_WRITE ){
        unixShmUnlock(pFile, p, UNIX_SHM_C|UNIX_SHM_D);
        p->lockState = p->readLock;
        rc = SQLITE_OK;
      }else{
        assert( p->lockState==SQLITE_SHM_RECOVER );
        unixShmUnlock(pFile, p, UNIX_SHM_MUTEX);
        sqlite3_mutex_leave(pFile->mutexRecov);
        p->lockState = p->readLock;
        rc = SQLITE_OK;
      }
      break;
    }
    case SQLITE_SHM_WRITE: {
      assert( p->lockState==SQLITE_SHM_READ 
              || p->lockState==SQLITE_SHM_READ_FULL );
      rc = unixShmExclusiveLock(pFile, p, UNIX_SHM_C|UNIX_SHM_D);
      if( rc==SQLITE_OK ){
        p->lockState = SQLITE_SHM_WRITE;
      }
      break;
    }
    case SQLITE_SHM_CHECKPOINT: {
      assert( p->lockState==SQLITE_SHM_UNLOCK
           || p->lockState==SQLITE_SHM_PENDING
           || p->lockState==SQLITE_SHM_RECOVER );
      if( p->lockState==SQLITE_SHM_RECOVER ){
        unixShmUnlock(pFile, p, UNIX_SHM_MUTEX);
        sqlite3_mutex_leave(pFile->mutexRecov);
        p->lockState = SQLITE_SHM_CHECKPOINT;
        rc = SQLITE_OK;
      }
      if( p->lockState==SQLITE_SHM_UNLOCK ){
        rc = unixShmExclusiveLock(pFile, p, UNIX_SHM_B|UNIX_SHM_C);
        if( rc==SQLITE_OK ){
          p->lockState = SQLITE_SHM_PENDING;
        }
      }
      if( p->lockState==SQLITE_SHM_PENDING ){
        rc = unixShmExclusiveLock(pFile, p, UNIX_SHM_A);
        if( rc==SQLITE_OK ){
          p->lockState = SQLITE_SHM_CHECKPOINT;
        }
      }
      break;
    }
    default: {
      assert( desiredLock==SQLITE_SHM_RECOVER );
      assert( p->lockState==SQLITE_SHM_READ
           || p->lockState==SQLITE_SHM_READ_FULL
           || p->lockState==SQLITE_SHM_CHECKPOINT );
      sqlite3_mutex_leave(pFile->mutex);
      sqlite3_mutex_enter(pFile->mutexRecov);
      sqlite3_mutex_enter(pFile->mutex);
      rc = unixShmExclusiveLock(pFile, p, UNIX_SHM_MUTEX);
      if( rc==SQLITE_OK ){
        p->lockState = SQLITE_SHM_RECOVER;
      }
      break;
    }
  }
  sqlite3_mutex_leave(pFile->mutex);
  *pGotLock = p->lockState;
  return rc;
}

/*
** Delete a shared-memory segment from the system.
*/
static int unixShmDelete(sqlite3_vfs *pVfs, const char *zName){
  return pVfs->xDelete(pVfs, zName, 0);
Changes to src/sqlite.h.in.
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
  ** definition.  Those that follow are added in version 2 or later
  */
  int (*xShmOpen)(sqlite3_vfs*, const char *zName, sqlite3_shm**);
  int (*xShmSize)(sqlite3_shm*, int reqSize, int *pNewSize, char**);
  int (*xShmRelease)(sqlite3_shm*);
  int (*xShmPush)(sqlite3_shm*);
  int (*xShmPull)(sqlite3_shm*);
  int (*xShmLock)(sqlite3_shm*, int desiredLock, int *gotLock, int shouldBlock);
  int (*xShmClose)(sqlite3_shm*);
  int (*xShmDelete)(sqlite3_vfs*, const char *zName);
  int (*xRename)(sqlite3_vfs*, const char *zOld, const char *zNew, int dirSync);
  int (*xCurrentTimeInt64)(sqlite3_vfs*, sqlite3_int64*);
  /*
  ** The methods above are in versions 1 and 2 of the sqlite_vfs object.
  ** New fields may be appended in figure versions.  The iVersion







|







844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
  ** definition.  Those that follow are added in version 2 or later
  */
  int (*xShmOpen)(sqlite3_vfs*, const char *zName, sqlite3_shm**);
  int (*xShmSize)(sqlite3_shm*, int reqSize, int *pNewSize, char**);
  int (*xShmRelease)(sqlite3_shm*);
  int (*xShmPush)(sqlite3_shm*);
  int (*xShmPull)(sqlite3_shm*);
  int (*xShmLock)(sqlite3_shm*, int desiredLock, int *gotLock);
  int (*xShmClose)(sqlite3_shm*);
  int (*xShmDelete)(sqlite3_vfs*, const char *zName);
  int (*xRename)(sqlite3_vfs*, const char *zOld, const char *zNew, int dirSync);
  int (*xCurrentTimeInt64)(sqlite3_vfs*, sqlite3_int64*);
  /*
  ** The methods above are in versions 1 and 2 of the sqlite_vfs object.
  ** New fields may be appended in figure versions.  The iVersion
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
** These integer constants define the various locking states that
** an sqlite3_shm object can be in.  The SQLITE_SHM_QUERY integer
** is not a valid data - it is a constant pasted to the 
** sqlite3_vfs.xShmLock() method for querying the current lock
** state.
*/
#define SQLITE_SHM_UNLOCK       0
#define SQLITE_SHM_READ_PREFIX  1
#define SQLITE_SHM_READ_FULL    2
#define SQLITE_SHM_WRITE        3
#define SQLITE_SHM_PENDING      4
#define SQLITE_SHM_CHECKPOINT   5
#define SQLITE_SHM_RECOVER      6
#define SQLITE_SHM_QUERY        (-1)








|







883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
** These integer constants define the various locking states that
** an sqlite3_shm object can be in.  The SQLITE_SHM_QUERY integer
** is not a valid data - it is a constant pasted to the 
** sqlite3_vfs.xShmLock() method for querying the current lock
** state.
*/
#define SQLITE_SHM_UNLOCK       0
#define SQLITE_SHM_READ         1
#define SQLITE_SHM_READ_FULL    2
#define SQLITE_SHM_WRITE        3
#define SQLITE_SHM_PENDING      4
#define SQLITE_SHM_CHECKPOINT   5
#define SQLITE_SHM_RECOVER      6
#define SQLITE_SHM_QUERY        (-1)