/ Check-in [4cbe49f1]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Separate the concepts of underlying storage size and mapped size in the VFS shared-memory implementation.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | wal
Files: files | file ages | folders
SHA1: 4cbe49f13fed288f94ff305bcfd99df907bf7baf
User & Date: drh 2010-04-30 14:39:51
Context
2010-04-30
15:24
If a reader attempts to upgrade to a writer, but is not reading the most recent database snapshot, return SQLITE_BUSY. check-in: 837d82a9 user: dan tags: wal
14:39
Separate the concepts of underlying storage size and mapped size in the VFS shared-memory implementation. check-in: 4cbe49f1 user: drh tags: wal
11:43
Add a missing walIndexUnmap() call to the checkpoint code. Change a couple of SQLITE_CANTOPEN constants to SQLITE_CANTOPEN_BKPT. check-in: 1f9e8c5c user: dan tags: wal
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to src/os_unix.c.

4590
4591
4592
4593
4594
4595
4596
4597
4598

4599
4600
4601
4602
4603
4604
4605
4606
4607
4608
....
4991
4992
4993
4994
4995
4996
4997
4998
4999
5000
5001
5002
5003
5004
5005
5006
5007
5008
5009
5010
5011
5012
5013
5014
5015
5016
5017
5018
5019
5020
....
5074
5075
5076
5077
5078
5079
5080
5081
5082
5083

5084
5085
5086
5087
5088
5089
5090


5091
5092
5093
5094
5095
5096
5097
5098
5099
5100

5101
5102
5103
5104
5105
5106
5107
5108
5109































5110













5111
5112
5113


5114
5115
5116
5117
5118
5119
5120
5121
5122
5123
5124
5125
5126
5127
....
6486
6487
6488
6489
6490
6491
6492

6493
6494
6495
6496
6497
6498
6499
*/
struct unixShmFile {
  struct unixFileId fid;     /* Unique file identifier */
  sqlite3_mutex *mutex;      /* Mutex to access this object */
  sqlite3_mutex *mutexBuf;   /* Mutex to access zBuf[] */
  sqlite3_mutex *mutexRecov; /* The RECOVER mutex */
  char *zFilename;           /* Name of the file */
  int size;                  /* Size of the file */
  int h;                     /* Open file descriptor */

  char *pMMapBuf;            /* Where currently mmapped() */
  int nReadPrefix;           /* Number of SQLITE_SHM_READ_PREFIX locks */
  int nReadFull;             /* Number of SQLITE_SHM_READ_FULL locks */
  int nRef;                  /* Number of unixShm objects pointing to this */
  unixShm *pFirst;           /* All unixShm objects pointing to this */
  unixShmFile *pNext;        /* Next in list of all unixShmFile objects */
#ifdef SQLITE_DEBUG
  u8 exclMask;               /* Mask of exclusive locks held */
  u8 sharedMask;             /* Mask of shared locks held */
  u8 nextShmId;              /* Next available unixShm.id value */
................................................................................
    rc = fstat(pFile->h, &sStat);
    if( rc ){
      rc = SQLITE_CANTOPEN_BKPT;
      goto shm_open_err;
    }
    pFile->fid.dev = sStat.st_dev;
    pFile->fid.ino = sStat.st_ino;
    pFile->size = (int)sStat.st_size;
    pFile->size = (pFile->size/SQLITE_UNIX_SHM_INCR)*SQLITE_UNIX_SHM_INCR;

    /* Check to see if another process is holding the dead-man switch.
    ** If not, truncate the file to zero length. 
    */
    if( unixShmSystemLock(pFile, F_WRLCK, UNIX_SHM_MUTEX) ){
      rc = SQLITE_IOERR_LOCK;
      goto shm_open_err;
    }
    if( unixShmSystemLock(pFile, F_WRLCK, UNIX_SHM_DMS)==SQLITE_OK ){
      if( ftruncate(pFile->h, 0) ){
        rc = SQLITE_IOERR;
        goto shm_open_err;
      }
      pFile->size = 0;
    }
    rc = unixShmSystemLock(pFile, F_RDLCK, UNIX_SHM_DMS);
    if( rc ) goto shm_open_err;
    unixShmSystemLock(pFile, F_UNLCK, UNIX_SHM_MUTEX);
  }

  /* Make the new connection a child of the unixShmFile */
................................................................................
  }
  unixLeaveMutex();

  return SQLITE_OK;
}

/*
** Query and/or changes the size of a shared-memory segment.
** The reqSize parameter is the new size of the segment, or -1 to
** do just a query.  The size of the segment after resizing is

** written into pNewSize.  A writer lock is held on the shared memory
** segment while resizing it.
**
** If ppBuffer is not NULL, the a reader lock is acquired on the shared
** memory segment and *ppBuffer is made to point to the start of the 
** shared memory segment.  xShmRelease() must be called to release the
** lock.


*/
static int unixShmSize(
  sqlite3_shm *pSharedMem,  /* Pointer returned by unixShmOpen() */
  int reqSize,              /* Requested size.  -1 for query only */
  int *pNewSize,            /* Write new size here */
  void **ppBuf              /* Write new buffer origin here */
){
  unixShm *p = (unixShm*)pSharedMem;
  unixShmFile *pFile = p->pFile;
  int rc = SQLITE_OK;


  sqlite3_mutex_enter(pFile->mutexBuf);
  sqlite3_mutex_enter(pFile->mutex);
  if( reqSize>=0 ){
    reqSize = (reqSize + SQLITE_UNIX_SHM_INCR - 1)/SQLITE_UNIX_SHM_INCR;
    reqSize *= SQLITE_UNIX_SHM_INCR;
    if( reqSize!=pFile->size ){
      if( pFile->pMMapBuf ) munmap(pFile->pMMapBuf, pFile->size);
      rc = ftruncate(pFile->h, reqSize);































      if( rc ){













        pFile->pMMapBuf = 0;
        pFile->size = 0;
      }else{


        pFile->pMMapBuf = mmap(0, reqSize, PROT_READ|PROT_WRITE, MAP_SHARED,
                               pFile->h, 0);
        pFile->size = pFile->pMMapBuf ? reqSize : 0;
      }
    }
  }
  *pNewSize = pFile->size;
  *ppBuf = pFile->pMMapBuf;
  sqlite3_mutex_leave(pFile->mutex);
  return rc;
}

/*
** Release the lock held on the shared memory segment to that other
................................................................................
    unixDlClose,          /* xDlClose */                    \
    unixRandomness,       /* xRandomness */                 \
    unixSleep,            /* xSleep */                      \
    unixCurrentTime,      /* xCurrentTime */                \
    unixGetLastError,     /* xGetLastError */               \
    unixShmOpen,          /* xShmOpen */                    \
    unixShmSize,          /* xShmSize */                    \

    unixShmRelease,       /* xShmRelease */                 \
    0,                    /* xShmPush */                    \
    0,                    /* xShmPull */                    \
    unixShmLock,          /* xShmLock */                    \
    unixShmClose,         /* xShmClose */                   \
    unixShmDelete,        /* xShmDelete */                  \
    0,                    /* xRename */                     \







<

>
|
<
<







 







<
<













<







 







|
|
|
>
|
<

|
|
|
|
>
>




|
<




>

<
<



<
<
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
>
>
>
>
>
>
>
>
>
>
>
>
>
|
<
<
>
>
|
|
|
|
<
<
|







 







>







4590
4591
4592
4593
4594
4595
4596

4597
4598
4599


4600
4601
4602
4603
4604
4605
4606
....
4989
4990
4991
4992
4993
4994
4995


4996
4997
4998
4999
5000
5001
5002
5003
5004
5005
5006
5007
5008

5009
5010
5011
5012
5013
5014
5015
....
5069
5070
5071
5072
5073
5074
5075
5076
5077
5078
5079
5080

5081
5082
5083
5084
5085
5086
5087
5088
5089
5090
5091
5092

5093
5094
5095
5096
5097
5098


5099
5100
5101


5102
5103
5104
5105
5106
5107
5108
5109
5110
5111
5112
5113
5114
5115
5116
5117
5118
5119
5120
5121
5122
5123
5124
5125
5126
5127
5128
5129
5130
5131
5132
5133
5134
5135
5136
5137
5138
5139
5140
5141
5142
5143
5144
5145
5146
5147
5148


5149
5150
5151
5152
5153
5154


5155
5156
5157
5158
5159
5160
5161
5162
....
6521
6522
6523
6524
6525
6526
6527
6528
6529
6530
6531
6532
6533
6534
6535
*/
struct unixShmFile {
  struct unixFileId fid;     /* Unique file identifier */
  sqlite3_mutex *mutex;      /* Mutex to access this object */
  sqlite3_mutex *mutexBuf;   /* Mutex to access zBuf[] */
  sqlite3_mutex *mutexRecov; /* The RECOVER mutex */
  char *zFilename;           /* Name of the file */

  int h;                     /* Open file descriptor */
  int szMap;                 /* Size of the mapping of file into memory */
  char *pMMapBuf;            /* Where currently mmapped().  NULL if unmapped */


  int nRef;                  /* Number of unixShm objects pointing to this */
  unixShm *pFirst;           /* All unixShm objects pointing to this */
  unixShmFile *pNext;        /* Next in list of all unixShmFile objects */
#ifdef SQLITE_DEBUG
  u8 exclMask;               /* Mask of exclusive locks held */
  u8 sharedMask;             /* Mask of shared locks held */
  u8 nextShmId;              /* Next available unixShm.id value */
................................................................................
    rc = fstat(pFile->h, &sStat);
    if( rc ){
      rc = SQLITE_CANTOPEN_BKPT;
      goto shm_open_err;
    }
    pFile->fid.dev = sStat.st_dev;
    pFile->fid.ino = sStat.st_ino;



    /* Check to see if another process is holding the dead-man switch.
    ** If not, truncate the file to zero length. 
    */
    if( unixShmSystemLock(pFile, F_WRLCK, UNIX_SHM_MUTEX) ){
      rc = SQLITE_IOERR_LOCK;
      goto shm_open_err;
    }
    if( unixShmSystemLock(pFile, F_WRLCK, UNIX_SHM_DMS)==SQLITE_OK ){
      if( ftruncate(pFile->h, 0) ){
        rc = SQLITE_IOERR;
        goto shm_open_err;
      }

    }
    rc = unixShmSystemLock(pFile, F_RDLCK, UNIX_SHM_DMS);
    if( rc ) goto shm_open_err;
    unixShmSystemLock(pFile, F_UNLCK, UNIX_SHM_MUTEX);
  }

  /* Make the new connection a child of the unixShmFile */
................................................................................
  }
  unixLeaveMutex();

  return SQLITE_OK;
}

/*
** Query and/or changes the size of the underlying storage for
** a shared-memory segment.  The reqSize parameter is the new size
** of the underlying storage, or -1 to do just a query.  The size
** of the underlying storage (after resizing if resizing occurs) is
** written into pNewSize.

**
** This routine does not (necessarily) change the size of the mapping 
** of the underlying storage into memory.  Use xShmGet() to change
** the mapping size.
**
** The reqSize parameter is the minimum size requested.  The implementation
** is free to expand the storage to some larger amount if it chooses.
*/
static int unixShmSize(
  sqlite3_shm *pSharedMem,  /* Pointer returned by unixShmOpen() */
  int reqSize,              /* Requested size.  -1 for query only */
  int *pNewSize             /* Write new size here */

){
  unixShm *p = (unixShm*)pSharedMem;
  unixShmFile *pFile = p->pFile;
  int rc = SQLITE_OK;
  struct stat sStat;



  if( reqSize>=0 ){
    reqSize = (reqSize + SQLITE_UNIX_SHM_INCR - 1)/SQLITE_UNIX_SHM_INCR;
    reqSize *= SQLITE_UNIX_SHM_INCR;


    rc = ftruncate(pFile->h, reqSize);
  }
  if( fstat(pFile->h, &sStat)==0 ){
    *pNewSize = (int)sStat.st_size;
  }else{
    *pNewSize = 0;
    rc = SQLITE_IOERR;
  }
  return rc;
}


/*
** Map the shared storage into memory.  The minimum size of the
** mapping should be reqMapSize if reqMapSize is positive.  If
** reqMapSize is zero or negative, the implementation can choose
** whatever mapping size is convenient.
**
** *ppBuf is made to point to the memory which is a mapping of the
** underlying storage.  This segment is locked.  unixShmRelease()
** must be called to release the lock.
**
** *pNewMapSize is set to the size of the mapping.
**
** *ppBuf and *pNewMapSize might be NULL and zero if no space has
** yet been allocated to the underlying storage.
*/
static int unixShmGet(
  sqlite3_shm *pSharedMem, /* Pointer returned by unixShmOpen() */
  int reqMapSize,          /* Requested size of mapping. -1 means don't care */
  int *pNewMapSize,        /* Write new size of mapping here */
  void **ppBuf             /* Write mapping buffer origin here */
){
  unixShm *p = (unixShm*)pSharedMem;
  unixShmFile *pFile = p->pFile;
  int rc = SQLITE_OK;

  sqlite3_mutex_enter(pFile->mutexBuf);
  sqlite3_mutex_enter(pFile->mutex);
  if( pFile->szMap==0 || reqMapSize>pFile->szMap ){
    int actualSize;
    if( unixShmSize(pSharedMem, -1, &actualSize)==SQLITE_OK
     && reqMapSize<actualSize
    ){
      reqMapSize = actualSize;
    }
    if( pFile->pMMapBuf ){


      munmap(pFile->pMMapBuf, pFile->szMap);
    }
    pFile->pMMapBuf = mmap(0, reqMapSize, PROT_READ|PROT_WRITE, MAP_SHARED,
                           pFile->h, 0);
    pFile->szMap = pFile->pMMapBuf ? reqMapSize : 0;
  }


  *pNewMapSize = pFile->szMap;
  *ppBuf = pFile->pMMapBuf;
  sqlite3_mutex_leave(pFile->mutex);
  return rc;
}

/*
** Release the lock held on the shared memory segment to that other
................................................................................
    unixDlClose,          /* xDlClose */                    \
    unixRandomness,       /* xRandomness */                 \
    unixSleep,            /* xSleep */                      \
    unixCurrentTime,      /* xCurrentTime */                \
    unixGetLastError,     /* xGetLastError */               \
    unixShmOpen,          /* xShmOpen */                    \
    unixShmSize,          /* xShmSize */                    \
    unixShmGet,           /* xShmGet */                     \
    unixShmRelease,       /* xShmRelease */                 \
    0,                    /* xShmPush */                    \
    0,                    /* xShmPull */                    \
    unixShmLock,          /* xShmLock */                    \
    unixShmClose,         /* xShmClose */                   \
    unixShmDelete,        /* xShmDelete */                  \
    0,                    /* xRename */                     \

Changes to src/sqlite.h.in.

840
841
842
843
844
845
846
847

848
849
850
851
852
853
854
  int (*xCurrentTime)(sqlite3_vfs*, double*);
  int (*xGetLastError)(sqlite3_vfs*, int, char *);
  /*
  ** The methods above are in version 1 of the sqlite_vfs object
  ** definition.  Those that follow are added in version 2 or later
  */
  int (*xShmOpen)(sqlite3_vfs*, const char *zName, sqlite3_shm**);
  int (*xShmSize)(sqlite3_shm*, int reqSize, int *pNewSize, void**);

  int (*xShmRelease)(sqlite3_shm*);
  int (*xShmPush)(sqlite3_shm*);
  int (*xShmPull)(sqlite3_shm*);
  int (*xShmLock)(sqlite3_shm*, int desiredLock, int *gotLock);
  int (*xShmClose)(sqlite3_shm*);
  int (*xShmDelete)(sqlite3_vfs*, const char *zName);
  int (*xRename)(sqlite3_vfs*, const char *zOld, const char *zNew, int dirSync);







|
>







840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
  int (*xCurrentTime)(sqlite3_vfs*, double*);
  int (*xGetLastError)(sqlite3_vfs*, int, char *);
  /*
  ** The methods above are in version 1 of the sqlite_vfs object
  ** definition.  Those that follow are added in version 2 or later
  */
  int (*xShmOpen)(sqlite3_vfs*, const char *zName, sqlite3_shm**);
  int (*xShmSize)(sqlite3_shm*, int reqSize, int *pNewSize);
  int (*xShmGet)(sqlite3_shm*, int reqMapSize, int *pMapSize, void**);
  int (*xShmRelease)(sqlite3_shm*);
  int (*xShmPush)(sqlite3_shm*);
  int (*xShmPull)(sqlite3_shm*);
  int (*xShmLock)(sqlite3_shm*, int desiredLock, int *gotLock);
  int (*xShmClose)(sqlite3_shm*);
  int (*xShmDelete)(sqlite3_vfs*, const char *zName);
  int (*xRename)(sqlite3_vfs*, const char *zOld, const char *zNew, int dirSync);

Changes to src/wal.c.

122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
...
364
365
366
367
368
369
370
371

372
373
374
375
376
377
378
379
380
381
382



383
384
385
386

387
388
389


390
391



















392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
...
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
...
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
...
785
786
787
788
789
790
791

792
793
794
795
796
797
798
799
...
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
...
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
...
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
** following object.
*/
struct Wal {
  sqlite3_vfs *pVfs;         /* The VFS used to create pFd */
  sqlite3_file *pFd;         /* File handle for WAL file */
  u32 iCallback;             /* Value to pass to log callback (or 0) */
  sqlite3_shm *pWIndex;      /* The open wal-index file */
  int szWIndex;              /* Size of the wal-index */
  u32 *pWiData;              /* Pointer to wal-index content in memory */
  u8 lockState;              /* SQLITE_SHM_xxxx constant showing lock state */
  u8 readerType;             /* SQLITE_SHM_READ or SQLITE_SHM_READ_FULL */
  WalIndexHdr hdr;           /* Wal-index for current snapshot */
};


................................................................................
      (WALINDEX_LOCK_OFFSET+WALINDEX_LOCK_RESERVED)/sizeof(u32)
    + (((iFrame-1)>>8)<<6)        /* Indexes that occur before iFrame */
    + iFrame-1                    /* Db page numbers that occur before iFrame */
  );
}

/*
** Release our reference to the wal-index memory map.

*/
static void walIndexUnmap(Wal *pWal){
  if( pWal->pWiData ){
    pWal->pVfs->xShmRelease(pWal->pWIndex);
    pWal->pWiData = 0;
  }
}

/*
** Resize the wal-index file.  If newSize is negative, leave the size
** unchanged.



*/
static int walIndexRemap(Wal *pWal, int newSize){
  int rc;
  walIndexUnmap(pWal);

  rc = pWal->pVfs->xShmSize(pWal->pWIndex, newSize,
                            &pWal->szWIndex, (void**)(char*)&pWal->pWiData);
  if( rc==SQLITE_OK && pWal->pWiData==0 ){


    assert( pWal->szWIndex==0 );
    pWal->pWiData = &pWal->iCallback;



















  }
  return rc;
}

/*
** Map the wal-index file into memory if it isn't already.
*/
static int walIndexMap(Wal *pWal){
  int rc = walIndexRemap(pWal, -1);
  return rc;
}

/*
** Increment by which to increase the wal-index file size.
*/
#define WALINDEX_MMAP_INCREMENT (64*1024)

/*
** Set an entry in the wal-index map to map log frame iFrame to db 
................................................................................
** value of iFrame is always exactly one more than the value passed to
** the previous call), but that restriction is not enforced or asserted
** here.
*/
static int walIndexAppend(Wal *pWal, u32 iFrame, u32 iPage){
  u32 iSlot = walIndexEntry(iFrame);
  
  walIndexMap(pWal);
  while( (iSlot+128)>=pWal->szWIndex ){
    int rc;
    int nByte = pWal->szWIndex*4 + WALINDEX_MMAP_INCREMENT;

    /* Unmap and remap the wal-index file. */
    rc = walIndexRemap(pWal, nByte);
    if( rc!=SQLITE_OK ){
      return rc;
    }
  }

  /* Set the wal-index entry itself */
................................................................................
  u32 iLast;                      /* Last frame in log */
  int nByte;                      /* Number of bytes to allocate */
  int i;                          /* Iterator variable */
  int nFinal;                     /* Number of unindexed entries */
  struct WalSegment *pFinal;      /* Final (unindexed) segment */
  u8 *aTmp;                       /* Temp space used by merge-sort */

  walIndexMap(pWal);
  aData = pWal->pWiData;
  iLast = pWal->hdr.iLastPg;
  nSegment = (iLast >> 8) + 1;
  nFinal = (iLast & 0x000000FF);

  nByte = sizeof(WalIterator) + (nSegment-1)*sizeof(struct WalSegment) + 512;
  p = (WalIterator *)sqlite3_malloc(nByte);
................................................................................
** If the checksum cannot be verified return SQLITE_ERROR.
*/
int walIndexTryHdr(Wal *pWal, int *pChanged){
  u32 aCksum[2] = {1, 1};
  u32 aHdr[WALINDEX_HDR_NFIELD+2];

  if( pWal->szWIndex==0 ){

    int rc = walIndexRemap(pWal, WALINDEX_MMAP_INCREMENT);
    if( rc ) return rc;
  }

  /* Read the header. The caller may or may not have locked the wal-index
  ** file, meaning it is possible that an inconsistent snapshot is read
  ** from the file. If this happens, return SQLITE_ERROR. The caller will
  ** retry. Or, if the caller has already locked the file and the header
................................................................................
** If the wal-index header is successfully read, return SQLITE_OK. 
** Otherwise an SQLite error code.
*/
static int walIndexReadHdr(Wal *pWal, int *pChanged){
  int rc;

  assert( pWal->lockState>=SQLITE_SHM_READ );
  walIndexMap(pWal);

  /* First try to read the header without a lock. Verify the checksum
  ** before returning. This will almost always work.  
  */
  if( SQLITE_OK==walIndexTryHdr(pWal, pChanged) ){
    return SQLITE_OK;
  }
................................................................................

    rc = walIndexReadHdr(pWal, pChanged);
    if( rc!=SQLITE_OK ){
      /* An error occured while attempting log recovery. */
      sqlite3WalCloseSnapshot(pWal);
    }else{
      /* Check if the mapping needs to grow. */
     if( pWal->hdr.iLastPg 
      && walIndexEntry(pWal->hdr.iLastPg)>=pWal->szWIndex
     ){
        rc = walIndexRemap(pWal, 0);
        assert( rc || walIndexEntry(pWal->hdr.iLastPg)<pWal->szWIndex );
      }
    }
  }

  walIndexUnmap(pWal);
  return rc;
}
................................................................................
*/
int sqlite3WalRead(Wal *pWal, Pgno pgno, int *pInWal, u8 *pOut){
  u32 iRead = 0;
  u32 *aData; 
  int iFrame = (pWal->hdr.iLastPg & 0xFFFFFF00);

  assert( pWal->lockState==SQLITE_SHM_READ||pWal->lockState==SQLITE_SHM_WRITE );
  walIndexMap(pWal);

  /* Do a linear search of the unindexed block of page-numbers (if any) 
  ** at the end of the wal-index. An alternative to this would be to
  ** build an index in private memory each time a read transaction is
  ** opened on a new snapshot.
  */
  aData = pWal->pWiData;







|







 







|
>









|
|
>
>
>

|
|
<
>
|
|
|
>
>
|
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>




<
<
<
<
<
<
<
<







 







|




|







 







|







 







>
|







 







|







 







|
|
|
|
<







 







|







122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
...
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389

390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420








421
422
423
424
425
426
427
...
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
...
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
...
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
...
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
...
895
896
897
898
899
900
901
902
903
904
905

906
907
908
909
910
911
912
...
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
** following object.
*/
struct Wal {
  sqlite3_vfs *pVfs;         /* The VFS used to create pFd */
  sqlite3_file *pFd;         /* File handle for WAL file */
  u32 iCallback;             /* Value to pass to log callback (or 0) */
  sqlite3_shm *pWIndex;      /* The open wal-index file */
  int szWIndex;              /* Size of the wal-index that is mapped in mem */
  u32 *pWiData;              /* Pointer to wal-index content in memory */
  u8 lockState;              /* SQLITE_SHM_xxxx constant showing lock state */
  u8 readerType;             /* SQLITE_SHM_READ or SQLITE_SHM_READ_FULL */
  WalIndexHdr hdr;           /* Wal-index for current snapshot */
};


................................................................................
      (WALINDEX_LOCK_OFFSET+WALINDEX_LOCK_RESERVED)/sizeof(u32)
    + (((iFrame-1)>>8)<<6)        /* Indexes that occur before iFrame */
    + iFrame-1                    /* Db page numbers that occur before iFrame */
  );
}

/*
** Release our reference to the wal-index memory map, if we are holding
** it.
*/
static void walIndexUnmap(Wal *pWal){
  if( pWal->pWiData ){
    pWal->pVfs->xShmRelease(pWal->pWIndex);
    pWal->pWiData = 0;
  }
}

/*
** Map the wal-index file into memory if it isn't already. 
**
** The reqSize parameter is the minimum required size of the mapping.
** A value of -1 means "don't care".  The reqSize parameter is ignored
** if the mapping is already held.
*/
static int walIndexMap(Wal *pWal, int reqSize){
  int rc = SQLITE_OK;

  if( pWal->pWiData==0 ){
    rc = pWal->pVfs->xShmGet(pWal->pWIndex, reqSize, &pWal->szWIndex,
                             (void**)(char*)&pWal->pWiData);
    if( rc==SQLITE_OK && pWal->pWiData==0 ){
      /* Make sure pWal->pWiData is not NULL while we are holding the
      ** lock on the mapping. */
      assert( pWal->szWIndex==0 );
      pWal->pWiData = &pWal->iCallback;
    }
  }
  return rc;
}

/*
** Remap the wal-index so that the mapping covers the full size
** of the underlying file.
**
** If enlargeTo is non-negative, then increase the size of the underlying
** storage to be at least as big as enlargeTo before remapping.
*/
static int walIndexRemap(Wal *pWal, int enlargeTo){
  int rc;
  int sz;
  rc = pWal->pVfs->xShmSize(pWal->pWIndex, enlargeTo, &sz);
  if( rc==SQLITE_OK && sz>pWal->szWIndex ){
    walIndexUnmap(pWal);
    rc = walIndexMap(pWal, sz);
  }
  return rc;
}









/*
** Increment by which to increase the wal-index file size.
*/
#define WALINDEX_MMAP_INCREMENT (64*1024)

/*
** Set an entry in the wal-index map to map log frame iFrame to db 
................................................................................
** value of iFrame is always exactly one more than the value passed to
** the previous call), but that restriction is not enforced or asserted
** here.
*/
static int walIndexAppend(Wal *pWal, u32 iFrame, u32 iPage){
  u32 iSlot = walIndexEntry(iFrame);
  
  walIndexMap(pWal, -1);
  while( (iSlot+128)>=pWal->szWIndex ){
    int rc;
    int nByte = pWal->szWIndex*4 + WALINDEX_MMAP_INCREMENT;

    /* Enlarge the storage, then remap it. */
    rc = walIndexRemap(pWal, nByte);
    if( rc!=SQLITE_OK ){
      return rc;
    }
  }

  /* Set the wal-index entry itself */
................................................................................
  u32 iLast;                      /* Last frame in log */
  int nByte;                      /* Number of bytes to allocate */
  int i;                          /* Iterator variable */
  int nFinal;                     /* Number of unindexed entries */
  struct WalSegment *pFinal;      /* Final (unindexed) segment */
  u8 *aTmp;                       /* Temp space used by merge-sort */

  walIndexMap(pWal, -1);
  aData = pWal->pWiData;
  iLast = pWal->hdr.iLastPg;
  nSegment = (iLast >> 8) + 1;
  nFinal = (iLast & 0x000000FF);

  nByte = sizeof(WalIterator) + (nSegment-1)*sizeof(struct WalSegment) + 512;
  p = (WalIterator *)sqlite3_malloc(nByte);
................................................................................
** If the checksum cannot be verified return SQLITE_ERROR.
*/
int walIndexTryHdr(Wal *pWal, int *pChanged){
  u32 aCksum[2] = {1, 1};
  u32 aHdr[WALINDEX_HDR_NFIELD+2];

  if( pWal->szWIndex==0 ){
    int rc;
    rc = walIndexRemap(pWal, WALINDEX_MMAP_INCREMENT);
    if( rc ) return rc;
  }

  /* Read the header. The caller may or may not have locked the wal-index
  ** file, meaning it is possible that an inconsistent snapshot is read
  ** from the file. If this happens, return SQLITE_ERROR. The caller will
  ** retry. Or, if the caller has already locked the file and the header
................................................................................
** If the wal-index header is successfully read, return SQLITE_OK. 
** Otherwise an SQLite error code.
*/
static int walIndexReadHdr(Wal *pWal, int *pChanged){
  int rc;

  assert( pWal->lockState>=SQLITE_SHM_READ );
  walIndexMap(pWal, -1);

  /* First try to read the header without a lock. Verify the checksum
  ** before returning. This will almost always work.  
  */
  if( SQLITE_OK==walIndexTryHdr(pWal, pChanged) ){
    return SQLITE_OK;
  }
................................................................................

    rc = walIndexReadHdr(pWal, pChanged);
    if( rc!=SQLITE_OK ){
      /* An error occured while attempting log recovery. */
      sqlite3WalCloseSnapshot(pWal);
    }else{
      /* Check if the mapping needs to grow. */
      if( pWal->hdr.iLastPg 
       && walIndexEntry(pWal->hdr.iLastPg)>=pWal->szWIndex
      ){
         walIndexRemap(pWal, -1);

      }
    }
  }

  walIndexUnmap(pWal);
  return rc;
}
................................................................................
*/
int sqlite3WalRead(Wal *pWal, Pgno pgno, int *pInWal, u8 *pOut){
  u32 iRead = 0;
  u32 *aData; 
  int iFrame = (pWal->hdr.iLastPg & 0xFFFFFF00);

  assert( pWal->lockState==SQLITE_SHM_READ||pWal->lockState==SQLITE_SHM_WRITE );
  walIndexMap(pWal, -1);

  /* Do a linear search of the unindexed block of page-numbers (if any) 
  ** at the end of the wal-index. An alternative to this would be to
  ** build an index in private memory each time a read transaction is
  ** opened on a new snapshot.
  */
  aData = pWal->pWiData;