SQLite

Check-in [8549c28649]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Enhancements to wal-mode locking scheme.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | wal
Files: files | file ages | folders
SHA1: 8549c286497f3d2cd118be1334fce00d6f8a26c4
User & Date: dan 2010-04-17 12:31:37.000
Context
2010-04-17
15:42
In synchronous=normal mode, do not sync the log after every transaction. In synchronous=full mode, sync the log and add any extra frames required to avoid blast-radius related problems after each transaction. (check-in: 9bc9b68473 user: dan tags: wal)
12:31
Enhancements to wal-mode locking scheme. (check-in: 8549c28649 user: dan tags: wal)
2010-04-16
13:59
Change the log file format to include a small (12 byte) header at the start of the file. (check-in: 9865d14d60 user: dan tags: wal)
Changes
Unified Diff Ignore Whitespace Patch
Changes to src/log.c.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33

/*
** This file contains the implementation of a log file used in 
** "journal_mode=wal" mode.
*/

/*
** LOG FILE FORMAT
**
** A log file consists of a header followed by zero or more log frames.
** The log header is 12 bytes in size and consists of the following three
** big-endian 32-bit unsigned integer values:
**
**    0: Database page size,
**    4: Randomly selected salt value 1,
**    8: Randomly selected salt value 2.
**
** Immediately following the log header are zero or more log frames. Each
** frame itself consists of a 16-byte header followed by a <page-size> bytes
** of page data. The header is broken into 4 big-endian 32-bit unsigned 
** integer values, as follows:
**
**    0:  Page number.
**    4:  For commit records, the size of the database image in pages 
**        after the commit. For all other records, zero.
**    8:  Checksum value 1.
**    12: Checksum value 2.
*/

/* 
** LOG SUMMARY FORMAT
**
** TODO.













|
|
|






|
|

|







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33

/*
** This file contains the implementation of a log file used in 
** "journal_mode=wal" mode.
*/

/*
** LOG FILE FORMAT
**
** A log file consists of a header followed by zero or more log frames.
** The log header is 12 bytes in size and consists of the following three
** big-endian 32-bit unsigned integer values:
**
**     0: Database page size,
**     4: Randomly selected salt value 1,
**     8: Randomly selected salt value 2.
**
** Immediately following the log header are zero or more log frames. Each
** frame itself consists of a 16-byte header followed by a <page-size> bytes
** of page data. The header is broken into 4 big-endian 32-bit unsigned 
** integer values, as follows:
**
**     0: Page number.
**     4: For commit records, the size of the database image in pages 
**        after the commit. For all other records, zero.
**     8: Checksum value 1.
**    12: Checksum value 2.
*/

/* 
** LOG SUMMARY FORMAT
**
** TODO.
102
103
104
105
106
107
108
109


110
111
112
113
114




115
116
117
118
119
120
121
  int nData;                      /* Size of aData allocation/mapping */
  u32 *aData;                     /* File body */
};


/*
** The four lockable regions associated with each log-summary. A connection
** may take either a SHARED or EXCLUSIVE lock on each.


*/
#define LOG_REGION_A 0x01
#define LOG_REGION_B 0x02
#define LOG_REGION_C 0x04
#define LOG_REGION_D 0x08





/*
** A single instance of this structure is allocated as part of each 
** connection to a database log. All structures associated with the 
** same log file are linked together into a list using LogLock.pNext
** starting at LogSummary.pLock.
**







|
>
>





>
>
>
>







102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
  int nData;                      /* Size of aData allocation/mapping */
  u32 *aData;                     /* File body */
};


/*
** The four lockable regions associated with each log-summary. A connection
** may take either a SHARED or EXCLUSIVE lock on each. An ORed combination
** of the following bitmasks is passed as the second argument to the
** logLockRegion() function.
*/
#define LOG_REGION_A 0x01
#define LOG_REGION_B 0x02
#define LOG_REGION_C 0x04
#define LOG_REGION_D 0x08

#define LOG_LOCK_MUTEX  12
#define LOG_LOCK_DMH    13
#define LOG_LOCK_REGION 14

/*
** A single instance of this structure is allocated as part of each 
** connection to a database log. All structures associated with the 
** same log file are linked together into a list using LogLock.pNext
** starting at LogSummary.pLock.
**
312
313
314
315
316
317
318
319
320
321
322
323
324
325


326



327
328
329
330
331
332
333
** Unmap the log-summary mapping and close the file-descriptor. If
** the isTruncate argument is non-zero, truncate the log-summary file
** region to zero bytes.
**
** Regardless of the value of isTruncate, close the file-descriptor
** opened on the log-summary file.
*/
static int logSummaryUnmap(LogSummary *pSummary, int isTruncate){
  int rc = SQLITE_OK;
  if( pSummary->aData ){
    assert( pSummary->fd>0 );
    munmap(pSummary->aData, pSummary->nData);
    pSummary->aData = 0;
    if( isTruncate ){


      rc = (ftruncate(pSummary->fd, 0) ? SQLITE_IOERR : SQLITE_OK);



    }
  }
  if( pSummary->fd>0 ){
    close(pSummary->fd);
    pSummary->fd = -1;
  }
  return rc;







|





|
>
>
|
>
>
>







318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
** Unmap the log-summary mapping and close the file-descriptor. If
** the isTruncate argument is non-zero, truncate the log-summary file
** region to zero bytes.
**
** Regardless of the value of isTruncate, close the file-descriptor
** opened on the log-summary file.
*/
static int logSummaryUnmap(LogSummary *pSummary, int isUnlink){
  int rc = SQLITE_OK;
  if( pSummary->aData ){
    assert( pSummary->fd>0 );
    munmap(pSummary->aData, pSummary->nData);
    pSummary->aData = 0;
    if( isUnlink ){
      char *zFile = sqlite3_mprintf("%s-summary", pSummary->zPath);
      if( !zFile ){
        rc = SQLITE_NOMEM;
      }
      unlink(zFile);
      sqlite3_free(zFile);
    }
  }
  if( pSummary->fd>0 ){
    close(pSummary->fd);
    pSummary->fd = -1;
  }
  return rc;
585
586
587
588
589
590
591







592















































































































































































593
594
595
596
597



598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617

618
619



620

621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639

640
641
642
643
644
645
646
647
648
649
650
651
652
653
654






655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673

674
675
676
677
678
679
680
  }

finished:
  logSummaryWriteHdr(pSummary, &hdr);
  return rc;
}
























































































































































































/*
** This function intializes the connection to the log-summary identified
** by struct pSummary.
*/
static int logSummaryInit(LogSummary *pSummary, sqlite3_file *pFd){



  int rc;                         /* Return Code */
  char *zFile;                    /* File name for summary file */

  assert( pSummary->fd<0 );
  assert( pSummary->aData==0 );
  assert( pSummary->nRef>0 );
  assert( pSummary->zPath );

  /* Open a file descriptor on the summary file. */
  zFile = sqlite3_mprintf("%s-summary", pSummary->zPath);
  if( !zFile ){
    return SQLITE_NOMEM;
  }
  pSummary->fd = open(zFile, O_RDWR|O_CREAT, S_IWUSR|S_IRUSR);
  sqlite3_free(zFile);
  if( pSummary->fd<0 ){
    return SQLITE_IOERR;
  }

  /* Grab an exclusive lock the summary file. Then mmap() it. TODO: This 

  ** code needs to be enhanced to support a growable mapping. For now, just 
  ** make the mapping very large to start with.



  */

  rc = logSummaryLock(pSummary);
  if( rc!=SQLITE_OK ) return rc;
  rc = logSummaryMap(pSummary, 512*1024);
  if( rc!=SQLITE_OK ) goto out;

  /* Grab a SHARED lock on the log file. Then try to upgrade to an EXCLUSIVE
  ** lock. If successful, then this is the first (and only) connection to
  ** the database. In this case assume the contents of the log-summary 
  ** cannot be trusted. Zero the log-summary header to make sure.
  **
  ** The SHARED lock on the log file is not released until the connection
  ** to the database is closed.
  */
  rc = sqlite3OsLock(pFd, SQLITE_LOCK_SHARED);
  if( rc!=SQLITE_OK ) goto out;
  rc = sqlite3OsLock(pFd, SQLITE_LOCK_EXCLUSIVE);
  if( rc==SQLITE_OK ){
    /* This is the first and only connection. */
    memset(pSummary->aData, 0, (LOGSUMMARY_HDR_NFIELD+2)*sizeof(u32) );

    rc = sqlite3OsUnlock(pFd, SQLITE_LOCK_SHARED);
  }else if( rc==SQLITE_BUSY ){
    rc = SQLITE_OK;
  }

 out:
  logSummaryUnlock(pSummary);
  return rc;
}

/* 
** Open a connection to the log file associated with database zDb. The
** database file does not actually have to exist. zDb is used only to
** figure out the name of the log file to open. If the log file does not 
** exist it is created by this call.






*/
int sqlite3LogOpen(
  sqlite3_vfs *pVfs,              /* vfs module to open log file with */
  const char *zDb,                /* Name of database file */
  Log **ppLog                     /* OUT: Allocated Log handle */
){
  int rc = SQLITE_OK;             /* Return Code */
  Log *pRet;                      /* Object to allocate and return */
  LogSummary *pSummary = 0;       /* Summary object */
  sqlite3_mutex *mutex = 0;       /* LOG_SUMMARY_MUTEX mutex */
  int flags;                      /* Flags passed to OsOpen() */
  char *zWal = 0;                 /* Path to WAL file */
  int nWal;                       /* Length of zWal in bytes */

  /* Zero output variables */
  assert( zDb );
  *ppLog = 0;

  /* Allocate an instance of struct Log to return. */

  pRet = (Log *)sqlite3MallocZero(sizeof(Log) + pVfs->szOsFile);
  if( !pRet ) goto out;
  pRet->pVfs = pVfs;
  pRet->pFd = (sqlite3_file *)&pRet[1];
  pRet->sync_flags = SQLITE_SYNC_NORMAL;

  /* Normalize the path name. */







>
>
>
>
>
>
>

>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>




|
>
>
>



















|
>
|
|
>
>
>

>
|




|
<
|
|
<
<
<

|
<
<

<

>
|
|
|



|








>
>
>
>
>
>














<

<


>







596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827

828
829



830
831


832

833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869

870

871
872
873
874
875
876
877
878
879
880
  }

finished:
  logSummaryWriteHdr(pSummary, &hdr);
  return rc;
}

/*
** Values for the third parameter to logLockRegion().
*/
#define LOG_UNLOCK  0
#define LOG_RDLOCK  1
#define LOG_WRLOCK  2
#define LOG_WRLOCKW 3

static int logLockFd(LogSummary *pSummary, int iStart, int nByte, int op){
  int aType[4] = { 
    F_UNLCK,                    /* LOG_UNLOCK */
    F_RDLCK,                    /* LOG_RDLOCK */
    F_WRLCK,                    /* LOG_WRLOCK */
    F_WRLCK                     /* LOG_WRLOCKW */
  };
  int aOp[4] = { 
    F_SETLK,                    /* LOG_UNLOCK */
    F_SETLK,                    /* LOG_RDLOCK */
    F_SETLK,                    /* LOG_WRLOCK */
    F_SETLKW                    /* LOG_WRLOCKW */
  };

  struct flock f;               /* Locking operation */
  int rc;                       /* Value returned by fcntl() */

  assert( ArraySize(aType)==ArraySize(aOp) );
  assert( op>=0 && op<ArraySize(aType) );

  memset(&f, 0, sizeof(f));
  f.l_type = aType[op];
  f.l_whence = SEEK_SET;
  f.l_start = iStart;
  f.l_len = nByte;
  rc = fcntl(pSummary->fd, aOp[op], &f);
  return (rc==0) ? SQLITE_OK : SQLITE_BUSY;
}

static int logLockRegion(Log *pLog, u32 mRegion, int op){
  LogSummary *pSummary = pLog->pSummary;
  LogLock *p;                     /* Used to iterate through in-process locks */
  u32 mOther;                     /* Locks held by other connections */
  u32 mNew;                       /* New mask for pLog */

  assert( 
       /* Writer lock operations */
          (op==LOG_WRLOCK && mRegion==(LOG_REGION_C|LOG_REGION_D))
       || (op==LOG_UNLOCK && mRegion==(LOG_REGION_C|LOG_REGION_D))

       /* Normal reader lock operations */
       || (op==LOG_RDLOCK && mRegion==(LOG_REGION_A|LOG_REGION_B))
       || (op==LOG_UNLOCK && mRegion==(LOG_REGION_A))
       || (op==LOG_UNLOCK && mRegion==(LOG_REGION_B))

       /* Region D reader lock operations */
       || (op==LOG_RDLOCK && mRegion==(LOG_REGION_D))
       || (op==LOG_RDLOCK && mRegion==(LOG_REGION_A))
       || (op==LOG_UNLOCK && mRegion==(LOG_REGION_D))

       /* Checkpointer lock operations */
       || (op==LOG_WRLOCK && mRegion==(LOG_REGION_B|LOG_REGION_C))
       || (op==LOG_WRLOCK && mRegion==(LOG_REGION_A))
       || (op==LOG_UNLOCK && mRegion==(LOG_REGION_B|LOG_REGION_C))
       || (op==LOG_UNLOCK && mRegion==(LOG_REGION_A|LOG_REGION_B|LOG_REGION_C))
  );

  /* Assert that a connection never tries to go from an EXCLUSIVE to a 
  ** SHARED lock on a region. Moving from SHARED to EXCLUSIVE sometimes
  ** happens though (when a region D reader upgrades to a writer).
  */
  assert( op!=LOG_RDLOCK || 0==(pLog->lock.mLock & (mRegion<<8)) );

  sqlite3_mutex_enter(pSummary->mutex);

  /* Calculate a mask of logs held by all connections in this process apart
  ** from this one. The least significant byte of the mask contains a mask
  ** of the SHARED logs held. The next least significant byte of the mask
  ** indicates the EXCLUSIVE locks held. For example, to test if some other
  ** connection is holding a SHARED lock on region A, or an EXCLUSIVE lock
  ** on region C, do:
  **
  **   hasSharedOnA    = (mOther & (LOG_REGION_A<<0));
  **   hasExclusiveOnC = (mOther & (LOG_REGION_C<<8));
  **
  ** In all masks, if the bit in the EXCLUSIVE byte mask is set, so is the 
  ** corresponding bit in the SHARED mask.
  */
  mOther = 0;
  for(p=pSummary->pLock; p; p=p->pNext){
    assert( (p->mLock & (p->mLock<<8))==(p->mLock&0x0000FF00) );
    if( p!=&pLog->lock ){
      mOther |= p->mLock;
    }
  }

  /* If this call is to lock a region (not to unlock one), test if locks held
  ** by any other connection in this process prevent the new locks from
  ** begin granted. If so, exit the summary mutex and return SQLITE_BUSY.
  */
  if( op && (mOther & (mRegion << (op==LOG_RDLOCK ? 8 : 0))) ){
    sqlite3_mutex_leave(pSummary->mutex);
    return SQLITE_BUSY;
  }

  /* Figure out the new log mask for this connection. */
  switch( op ){
    case LOG_UNLOCK: 
      mNew = (pLog->lock.mLock & ~(mRegion|(mRegion<<8)));
      break;
    case LOG_RDLOCK:
      mNew = (pLog->lock.mLock | mRegion);
      break;
    default:
      assert( op==LOG_WRLOCK );
      mNew = (pLog->lock.mLock | (mRegion<<8) | mRegion);
      break;
  }

  /* Now modify the locks held on the log-summary file descriptor. This
  ** file descriptor is shared by all log connections in this process. 
  ** Therefore:
  **
  **   + If one or more log connections in this process hold a SHARED lock
  **     on a region, the file-descriptor should hold a SHARED lock on
  **     the file region.
  **
  **   + If a log connection in this process holds an EXCLUSIVE lock on a
  **     region, the file-descriptor should also hold an EXCLUSIVE lock on
  **     the region in question.
  **
  ** If this is an LOG_UNLOCK operation, only regions for which no other
  ** connection holds a lock should actually be unlocked. And if this
  ** is a LOG_RDLOCK operation and other connections already hold all
  ** the required SHARED locks, then no system call is required.
  */
  if( op==LOG_UNLOCK ){
    mRegion = (mRegion & ~mOther);
  }
  if( (op==LOG_WRLOCK)
   || (op==LOG_UNLOCK && mRegion) 
   || (op==LOG_RDLOCK && (mOther&mRegion)!=mRegion)
  ){
    struct LockMap {
      int iStart;                 /* Byte offset to start locking operation */
      int iLen;                   /* Length field for locking operation */
    } aMap[] = {
      /* 0000 */ {0, 0},                    /* 0001 */ {4+LOG_LOCK_REGION, 1}, 
      /* 0010 */ {3+LOG_LOCK_REGION, 1},    /* 0011 */ {3+LOG_LOCK_REGION, 2},
      /* 0100 */ {2+LOG_LOCK_REGION, 1},    /* 0101 */ {0, 0}, 
      /* 0110 */ {2+LOG_LOCK_REGION, 2},    /* 0111 */ {2+LOG_LOCK_REGION, 3},
      /* 1000 */ {1+LOG_LOCK_REGION, 1},    /* 1001 */ {0, 0}, 
      /* 1010 */ {0, 0},                    /* 1011 */ {0, 0},
      /* 1100 */ {1+LOG_LOCK_REGION, 2},    /* 1101 */ {0, 0}, 
      /* 1110 */ {0, 0},                    /* 1111 */ {0, 0}
    };
    int rc;                       /* Return code of logLockFd() */

    assert( mRegion<ArraySize(aMap) && aMap[mRegion].iStart!=0 );

    rc = logLockFd(pSummary, aMap[mRegion].iStart, aMap[mRegion].iLen, op);
    if( rc!=0 ){
      sqlite3_mutex_leave(pSummary->mutex);
      return rc;
    }
  }

  pLog->lock.mLock = mNew;
  sqlite3_mutex_leave(pSummary->mutex);
  return SQLITE_OK;
}

static int logLockDMH(LogSummary *pSummary, int eLock){
  assert( eLock==LOG_RDLOCK || eLock==LOG_WRLOCK );
  return logLockFd(pSummary, LOG_LOCK_DMH, 1, eLock);
}

static int logLockMutex(LogSummary *pSummary, int eLock){
  assert( eLock==LOG_WRLOCKW || eLock==LOG_UNLOCK );
  logLockFd(pSummary, LOG_LOCK_MUTEX, 1, eLock);
  return SQLITE_OK;
}



/*
** This function intializes the connection to the log-summary identified
** by struct pSummary.
*/
static int logSummaryInit(
  LogSummary *pSummary,           /* Log summary object to initialize */
  sqlite3_file *pFd               /* File descriptor open on log file */
){
  int rc;                         /* Return Code */
  char *zFile;                    /* File name for summary file */

  assert( pSummary->fd<0 );
  assert( pSummary->aData==0 );
  assert( pSummary->nRef>0 );
  assert( pSummary->zPath );

  /* Open a file descriptor on the summary file. */
  zFile = sqlite3_mprintf("%s-summary", pSummary->zPath);
  if( !zFile ){
    return SQLITE_NOMEM;
  }
  pSummary->fd = open(zFile, O_RDWR|O_CREAT, S_IWUSR|S_IRUSR);
  sqlite3_free(zFile);
  if( pSummary->fd<0 ){
    return SQLITE_IOERR;
  }

  /* Grab an exclusive lock the summary file. Then mmap() it. 
  **
  ** TODO: This code needs to be enhanced to support a growable mapping. 
  ** For now, just make the mapping very large to start with. The 
  ** pages should not be allocated until they are first accessed anyhow,
  ** so using a large mapping consumes no more resources than a smaller
  ** one would.
  */
  assert( sqlite3_mutex_held(pSummary->mutex) );
  rc = logLockMutex(pSummary, LOG_WRLOCKW);
  if( rc!=SQLITE_OK ) return rc;
  rc = logSummaryMap(pSummary, 512*1024);
  if( rc!=SQLITE_OK ) goto out;

  /* Try to obtain an EXCLUSIVE lock on the dead-mans-hand region. If this

  ** is possible, the contents of the log-summary file (if any) may not
  ** be trusted. Zero the log-summary header before continuing.



  */
  rc = logLockDMH(pSummary, LOG_WRLOCK);


  if( rc==SQLITE_OK ){

    memset(pSummary->aData, 0, (LOGSUMMARY_HDR_NFIELD+2)*sizeof(u32) );
  }
  rc = logLockDMH(pSummary, LOG_RDLOCK);
  if( rc!=SQLITE_OK ){
    return SQLITE_IOERR;
  }

 out:
  logLockMutex(pSummary, LOG_UNLOCK);
  return rc;
}

/* 
** Open a connection to the log file associated with database zDb. The
** database file does not actually have to exist. zDb is used only to
** figure out the name of the log file to open. If the log file does not 
** exist it is created by this call.
**
** A SHARED lock should be held on the database file when this function
** is called. The purpose of this SHARED lock is to prevent any other
** client from unlinking the log or log-summary file. If another process
** were to do this just after this client opened one of these files, the
** system would be badly broken.
*/
int sqlite3LogOpen(
  sqlite3_vfs *pVfs,              /* vfs module to open log file with */
  const char *zDb,                /* Name of database file */
  Log **ppLog                     /* OUT: Allocated Log handle */
){
  int rc = SQLITE_OK;             /* Return Code */
  Log *pRet;                      /* Object to allocate and return */
  LogSummary *pSummary = 0;       /* Summary object */
  sqlite3_mutex *mutex = 0;       /* LOG_SUMMARY_MUTEX mutex */
  int flags;                      /* Flags passed to OsOpen() */
  char *zWal = 0;                 /* Path to WAL file */
  int nWal;                       /* Length of zWal in bytes */


  assert( zDb );


  /* Allocate an instance of struct Log to return. */
  *ppLog = 0;
  pRet = (Log *)sqlite3MallocZero(sizeof(Log) + pVfs->szOsFile);
  if( !pRet ) goto out;
  pRet->pVfs = pVfs;
  pRet->pFd = (sqlite3_file *)&pRet[1];
  pRet->sync_flags = SQLITE_SYNC_NORMAL;

  /* Normalize the path name. */
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744

  /* Open file handle on the log file. */
  rc = sqlite3OsOpen(pVfs, pSummary->zPath, pRet->pFd, flags, &flags);
  if( rc!=SQLITE_OK ) goto out;

  /* Object pSummary is shared between all connections to the database made
  ** by this process. So at this point it may or may not be connected to
  ** the log-summary. If it is not, connect it. Otherwise, just take the
  ** SHARED lock on the log file.
  */
  sqlite3_mutex_enter(pSummary->mutex);
  mutex = pSummary->mutex;
  if( pSummary->fd<0 ){
    rc = logSummaryInit(pSummary, pRet->pFd);
  }else{
    rc = sqlite3OsLock(pRet->pFd, SQLITE_LOCK_SHARED);
  }

  pRet->lock.pNext = pSummary->pLock;
  pSummary->pLock = &pRet->lock;

 out:
  sqlite3_mutex_leave(mutex);







|
<





<
<







922
923
924
925
926
927
928
929

930
931
932
933
934


935
936
937
938
939
940
941

  /* Open file handle on the log file. */
  rc = sqlite3OsOpen(pVfs, pSummary->zPath, pRet->pFd, flags, &flags);
  if( rc!=SQLITE_OK ) goto out;

  /* Object pSummary is shared between all connections to the database made
  ** by this process. So at this point it may or may not be connected to
  ** the log-summary. If it is not, connect it.

  */
  sqlite3_mutex_enter(pSummary->mutex);
  mutex = pSummary->mutex;
  if( pSummary->fd<0 ){
    rc = logSummaryInit(pSummary, pRet->pFd);


  }

  pRet->lock.pNext = pSummary->pLock;
  pSummary->pLock = &pRet->lock;

 out:
  sqlite3_mutex_leave(mutex);
936
937
938
939
940
941
942

943


944

945

946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961


962
963
964
965
966
967
968
969

970
971
972
973
974
975
976
977


978
979
980
981
982
983
984
985
986
987
988
    ** reference to the log summary object in this process, the object will
    ** be freed. If this is also the last connection to the database, then
    ** checkpoint the database and truncate the log and log-summary files
    ** to zero bytes in size.
    **/
    pSummary->nRef--;
    if( pSummary->nRef==0 ){

      LogSummary **pp;




      rc = logSummaryLock(pSummary);

      if( rc==SQLITE_OK ){
        int isTruncate = 0;
        int rc2 = sqlite3OsLock(pLog->pFd, SQLITE_LOCK_EXCLUSIVE);
        if( rc2==SQLITE_OK ){
          /* This is the last connection to the database (including other
          ** processes). Do three things:
          **
          **   1. Checkpoint the db.
          **   2. Truncate the log file to zero bytes.
          **   3. Truncate the log-summary file to zero bytes.
          */
          rc2 = logCheckpoint(pLog, pFd, zBuf);
          if( rc2==SQLITE_OK ){
            rc2 = sqlite3OsTruncate(pLog->pFd, 0);
          }
          isTruncate = 1;


        }else if( rc2==SQLITE_BUSY ){
          rc2 = SQLITE_OK;
        }
        logSummaryUnmap(pSummary, isTruncate);
        sqlite3OsUnlock(pLog->pFd, SQLITE_LOCK_NONE);
        rc = logSummaryUnlock(pSummary);
        if( rc2!=SQLITE_OK ) rc = rc2;
      }


      /* Remove the LogSummary object from the global list. Then free the 
      ** mutex and the object itself.
      */
      for(pp=&pLogSummary; *pp!=pSummary; pp=&(*pp)->pNext);
      *pp = (*pp)->pNext;
      sqlite3_mutex_free(pSummary->mutex);
      sqlite3_free(pSummary);


    }

    sqlite3_mutex_leave(mutex);

    /* Close the connection to the log file and free the Log handle. */
    sqlite3OsClose(pLog->pFd);
    sqlite3_free(pLog);
  }
  return rc;
}








>

>
>

>
|
>

|
<
<
|
|
|
|
|
|
|
|
|
|
|
|
>
>
|
|

|
<
<
<

>

<
<
<
<
<


>
>


<
<







1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149


1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167



1168
1169
1170





1171
1172
1173
1174
1175
1176


1177
1178
1179
1180
1181
1182
1183
    ** reference to the log summary object in this process, the object will
    ** be freed. If this is also the last connection to the database, then
    ** checkpoint the database and truncate the log and log-summary files
    ** to zero bytes in size.
    **/
    pSummary->nRef--;
    if( pSummary->nRef==0 ){
      int rc;
      LogSummary **pp;
      for(pp=&pLogSummary; *pp!=pSummary; pp=&(*pp)->pNext);
      *pp = (*pp)->pNext;

      sqlite3_mutex_leave(mutex);

      rc = sqlite3OsLock(pFd, SQLITE_LOCK_EXCLUSIVE);
      if( rc==SQLITE_OK ){



        /* This is the last connection to the database (including other
        ** processes). Do three things:
        **
        **   1. Checkpoint the db.
        **   2. Truncate the log file.
        **   3. Unlink the log-summary file.
        */
        rc = logCheckpoint(pLog, pFd, zBuf);
        if( rc==SQLITE_OK ){
          rc = sqlite3OsDelete(pLog->pVfs, pSummary->zPath, 0);
        }

        logSummaryUnmap(pSummary, 1);
      }else{
        if( rc==SQLITE_BUSY ){
          rc = SQLITE_OK;
        }
        logSummaryUnmap(pSummary, 0);



      }
      sqlite3OsUnlock(pFd, SQLITE_LOCK_NONE);






      sqlite3_mutex_free(pSummary->mutex);
      sqlite3_free(pSummary);
    }else{
      sqlite3_mutex_leave(mutex);
    }



    /* Close the connection to the log file and free the Log handle. */
    sqlite3OsClose(pLog->pFd);
    sqlite3_free(pLog);
  }
  return rc;
}

1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
** hosted in this address space.
*/
static int logEnterMutex(Log *pLog){
  LogSummary *pSummary = pLog->pSummary;
  int rc;

  sqlite3_mutex_enter(pSummary->mutex);
  rc = logSummaryLock(pSummary);
  if( rc!=SQLITE_OK ){
    sqlite3_mutex_leave(pSummary->mutex);
  }
  return rc;
}
static void logLeaveMutex(Log *pLog){
  LogSummary *pSummary = pLog->pSummary;
  logSummaryUnlock(pSummary);
  sqlite3_mutex_leave(pSummary->mutex);
}

/*
** Values for the second parameter to logLockRegion().
*/
#define LOG_UNLOCK 0
#define LOG_RDLOCK 1
#define LOG_WRLOCK 2

static int logLockRegion(Log *pLog, u32 mRegion, int op){
  LogSummary *pSummary = pLog->pSummary;
  LogLock *p;                     /* Used to iterate through in-process locks */
  u32 mOther;                     /* Locks held by other connections */
  u32 mNew;                       /* New mask for pLog */

  assert( 
       /* Writer lock operations */
          (op==LOG_WRLOCK && mRegion==(LOG_REGION_C|LOG_REGION_D))
       || (op==LOG_UNLOCK && mRegion==(LOG_REGION_C|LOG_REGION_D))

       /* Normal reader lock operations */
       || (op==LOG_RDLOCK && mRegion==(LOG_REGION_A|LOG_REGION_B))
       || (op==LOG_UNLOCK && mRegion==(LOG_REGION_A))
       || (op==LOG_UNLOCK && mRegion==(LOG_REGION_B))

       /* Region D reader lock operations */
       || (op==LOG_RDLOCK && mRegion==(LOG_REGION_D))
       || (op==LOG_RDLOCK && mRegion==(LOG_REGION_A))
       || (op==LOG_UNLOCK && mRegion==(LOG_REGION_D))

       /* Checkpointer lock operations */
       || (op==LOG_WRLOCK && mRegion==(LOG_REGION_B|LOG_REGION_C))
       || (op==LOG_WRLOCK && mRegion==(LOG_REGION_A))
       || (op==LOG_UNLOCK && mRegion==(LOG_REGION_B|LOG_REGION_C))
       || (op==LOG_UNLOCK && mRegion==(LOG_REGION_A|LOG_REGION_B|LOG_REGION_C))
  );

  /* Assert that a connection never tries to go from an EXCLUSIVE to a 
  ** SHARED lock on a region. Moving from SHARED to EXCLUSIVE sometimes
  ** happens though (when a region D reader upgrades to a writer).
  */
  assert( op!=LOG_RDLOCK || 0==(pLog->lock.mLock & (mRegion<<8)) );

  sqlite3_mutex_enter(pSummary->mutex);

  /* Calculate a mask of logs held by all connections in this process apart
  ** from this one. The least significant byte of the mask contains a mask
  ** of the SHARED logs held. The next least significant byte of the mask
  ** indicates the EXCLUSIVE locks held. For example, to test if some other
  ** connection is holding a SHARED lock on region A, or an EXCLUSIVE lock
  ** on region C, do:
  **
  **   hasSharedOnA    = (mOther & (LOG_REGION_A<<0));
  **   hasExclusiveOnC = (mOther & (LOG_REGION_C<<8));
  **
  ** In all masks, if the bit in the EXCLUSIVE byte mask is set, so is the 
  ** corresponding bit in the SHARED mask.
  */
  mOther = 0;
  for(p=pSummary->pLock; p; p=p->pNext){
    assert( (p->mLock & (p->mLock<<8))==(p->mLock&0x0000FF00) );
    if( p!=&pLog->lock ){
      mOther |= p->mLock;
    }
  }

  /* If this call is to lock a region (not to unlock one), test if locks held
  ** by any other connection in this process prevent the new locks from
  ** begin granted. If so, exit the summary mutex and return SQLITE_BUSY.
  */
  if( op && (mOther & (mRegion << (op==LOG_RDLOCK ? 8 : 0))) ){
    sqlite3_mutex_leave(pSummary->mutex);
    return SQLITE_BUSY;
  }

  /* Figure out the new log mask for this connection. */
  switch( op ){
    case LOG_UNLOCK: 
      mNew = (pLog->lock.mLock & ~(mRegion|(mRegion<<8)));
      break;
    case LOG_RDLOCK:
      mNew = (pLog->lock.mLock | mRegion);
      break;
    default:
      assert( op==LOG_WRLOCK );
      mNew = (pLog->lock.mLock | (mRegion<<8) | mRegion);
      break;
  }

  /* Now modify the locks held on the log-summary file descriptor. This
  ** file descriptor is shared by all log connections in this process. 
  ** Therefore:
  **
  **   + If one or more log connections in this process hold a SHARED lock
  **     on a region, the file-descriptor should hold a SHARED lock on
  **     the file region.
  **
  **   + If a log connection in this process holds an EXCLUSIVE lock on a
  **     region, the file-descriptor should also hold an EXCLUSIVE lock on
  **     the region in question.
  **
  ** If this is an LOG_UNLOCK operation, only regions for which no other
  ** connection holds a lock should actually be unlocked. And if this
  ** is a LOG_RDLOCK operation and other connections already hold all
  ** the required SHARED locks, then no system call is required.
  */
  if( op==LOG_UNLOCK ){
    mRegion = (mRegion & ~mOther);
  }
  if( (op==LOG_WRLOCK)
   || (op==LOG_UNLOCK && mRegion) 
   || (op==LOG_RDLOCK && (mOther&mRegion)!=mRegion)
  ){
    struct LockMap {
      int iStart;                 /* Byte offset to start locking operation */
      int iLen;                   /* Length field for locking operation */
    } aMap[] = {
      /* 0000 */ {0, 0},    /* 0001 */ {4, 1}, 
      /* 0010 */ {3, 1},    /* 0011 */ {3, 2},
      /* 0100 */ {2, 1},    /* 0101 */ {0, 0}, 
      /* 0110 */ {2, 2},    /* 0111 */ {2, 3},
      /* 1000 */ {1, 1},    /* 1001 */ {0, 0}, 
      /* 1010 */ {0, 0},    /* 1011 */ {0, 0},
      /* 1100 */ {1, 2},    /* 1101 */ {0, 0}, 
      /* 1110 */ {0, 0},    /* 1111 */ {0, 0}
    };
    int rc;                       /* Return code of fcntl() */
    struct flock f;               /* Locking operation */

    assert( mRegion<ArraySize(aMap) && aMap[mRegion].iStart!=0 );

    memset(&f, 0, sizeof(f));
    f.l_type = (op==LOG_WRLOCK?F_WRLCK:(op==LOG_RDLOCK?F_RDLCK:F_UNLCK));
    f.l_whence = SEEK_SET;
    f.l_start = 32 + aMap[mRegion].iStart;
    f.l_len = aMap[mRegion].iLen;

    rc = fcntl(pSummary->fd, F_SETLK, &f);
    if( rc!=0 ){
      sqlite3_mutex_leave(pSummary->mutex);
      return SQLITE_BUSY;
    }
  }

  pLog->lock.mLock = mNew;
  sqlite3_mutex_leave(pSummary->mutex);
  return SQLITE_OK;
}

/*
** Try to read the log-summary header. Attempt to verify the header
** checksum. If the checksum can be verified, copy the log-summary
** header into structure pLog->hdr. If the contents of pLog->hdr are
** modified by this and pChanged is not NULL, set *pChanged to 1. 







|







<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
|
<
<
<
<
<
<
<







1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217















































































1218





























































1219







1220
1221
1222
1223
1224
1225
1226
** hosted in this address space.
*/
static int logEnterMutex(Log *pLog){
  LogSummary *pSummary = pLog->pSummary;
  int rc;

  sqlite3_mutex_enter(pSummary->mutex);
  rc = logLockMutex(pSummary, LOG_WRLOCKW);
  if( rc!=SQLITE_OK ){
    sqlite3_mutex_leave(pSummary->mutex);
  }
  return rc;
}
static void logLeaveMutex(Log *pLog){
  LogSummary *pSummary = pLog->pSummary;















































































  logLockMutex(pSummary, LOG_UNLOCK);





























































  sqlite3_mutex_leave(pSummary->mutex);







}

/*
** Try to read the log-summary header. Attempt to verify the header
** checksum. If the checksum can be verified, copy the log-summary
** header into structure pLog->hdr. If the contents of pLog->hdr are
** modified by this and pChanged is not NULL, set *pChanged to 1. 
Changes to src/pager.c.
486
487
488
489
490
491
492

493
494
495
496
497
498
499

  /* The changeCountDone flag is always set for temp-files */
  assert( pPager->tempFile==0 || pPager->changeCountDone );

  return 1;
}
#endif


/*
** Return true if it is necessary to write page *pPg into the sub-journal.
** A page needs to be written into the sub-journal if there exists one
** or more open savepoints for which:
**
**   * The page-number is less than or equal to PagerSavepoint.nOrig, and







>







486
487
488
489
490
491
492
493
494
495
496
497
498
499
500

  /* The changeCountDone flag is always set for temp-files */
  assert( pPager->tempFile==0 || pPager->changeCountDone );

  return 1;
}
#endif


/*
** Return true if it is necessary to write page *pPg into the sub-journal.
** A page needs to be written into the sub-journal if there exists one
** or more open savepoints for which:
**
**   * The page-number is less than or equal to PagerSavepoint.nOrig, and
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
      testcase( rc==SQLITE_NOMEM );
      assert( rc==SQLITE_OK || rc==SQLITE_NOMEM );
    }
  }
  return rc;
}

/*
** Open a connection to the write-ahead log file for pager pPager.
*/
static int pagerOpenLog(Pager *pPager){
  if( !pPager->pLog ){
    int rc;                       /* Return code from LogOpen() */

    rc = sqlite3LogOpen(pPager->pVfs, pPager->zFilename, &pPager->pLog);
    if( rc!=SQLITE_OK ) return rc;
  }
  return SQLITE_OK;
}

/*
** Return true if this pager uses a write-ahead log instead of the usual
** rollback journal. Otherwise false.
*/
static int pagerUseLog(Pager *pPager){
  return (pPager->pLog!=0);
}







<
<
<
<
<
<
<
<
<
<
<
<
<







1184
1185
1186
1187
1188
1189
1190













1191
1192
1193
1194
1195
1196
1197
      testcase( rc==SQLITE_NOMEM );
      assert( rc==SQLITE_OK || rc==SQLITE_NOMEM );
    }
  }
  return rc;
}














/*
** Return true if this pager uses a write-ahead log instead of the usual
** rollback journal. Otherwise false.
*/
static int pagerUseLog(Pager *pPager){
  return (pPager->pLog!=0);
}
1237
1238
1239
1240
1241
1242
1243
1244
1245

1246
1247
1248
1249
1250
1251
1252
    ** until the change-counter check fails in PagerSharedLock().
    ** Clearing the page size cache here is being conservative.
    */
    pPager->dbSizeValid = 0;

    if( pagerUseLog(pPager) ){
      sqlite3LogCloseSnapshot(pPager->pLog);
    }
    rc = osUnlock(pPager->fd, NO_LOCK);

    if( rc ){
      pPager->errCode = rc;
    }
    IOTRACE(("UNLOCK %p\n", pPager))

    /* If Pager.errCode is set, the contents of the pager cache cannot be
    ** trusted. Now that the pager file is unlocked, the contents of the







|
|
>







1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
    ** until the change-counter check fails in PagerSharedLock().
    ** Clearing the page size cache here is being conservative.
    */
    pPager->dbSizeValid = 0;

    if( pagerUseLog(pPager) ){
      sqlite3LogCloseSnapshot(pPager->pLog);
    }else{
      rc = osUnlock(pPager->fd, NO_LOCK);
    }
    if( rc ){
      pPager->errCode = rc;
    }
    IOTRACE(("UNLOCK %p\n", pPager))

    /* If Pager.errCode is set, the contents of the pager cache cannot be
    ** trusted. Now that the pager file is unlocked, the contents of the
3728
3729
3730
3731
3732
3733
3734
















































3735
3736
3737
3738
3739
3740
3741
        }
      }
    }
  }

  return rc;
}

















































/*
** This function is called to obtain a shared lock on the database file.
** It is illegal to call sqlite3PagerAcquire() until after this function
** has been successfully called. If a shared-lock is already held when
** this function is called, it is a no-op.
**







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
        }
      }
    }
  }

  return rc;
}

/*
** Open a connection to the write-ahead log file for pager pPager. If
** the log connection is already open, this function is a no-op.
*/
static int pagerOpenLog(Pager *pPager){
  if( !pPager->pLog ){
    int rc;                       /* Return code */

    /* Before opening the log file, obtain a SHARED lock on the database
    ** file. This lock will not be released until after the log file
    ** connection has been closed. The purpose of this lock is to stop
    ** any other process from unlinking the log or log-summary files while
    ** this connection still has them open. An EXCLUSIVE lock on the
    ** database file is required to unlink either of those two files.
    */
    assert( pPager->state==PAGER_UNLOCK );
    rc = pager_wait_on_lock(pPager, SHARED_LOCK);
    if( rc!=SQLITE_OK ){
      assert( pPager->state==PAGER_UNLOCK );
      return pager_error(pPager, rc);
    }
    assert( pPager->state>=SHARED_LOCK );

    /* Open the connection to the log file. If this operation fails, 
    ** (e.g. due to malloc() failure), unlock the database file and 
    ** return an error code.
    */
    rc = sqlite3LogOpen(pPager->pVfs, pPager->zFilename, &pPager->pLog);
    if( rc!=SQLITE_OK ){
      osUnlock(pPager->fd, SQLITE_LOCK_NONE);
      pPager->state = PAGER_UNLOCK;
      return rc;
    }
  }else{
    /* If the log file was already open, check that the pager is still holding
    ** the required SHARED lock on the database file. 
    */
#ifdef SQLITE_DEBUG
    int locktype;
    sqlite3OsFileControl(pPager->fd, SQLITE_FCNTL_LOCKSTATE, &locktype);
    assert( locktype==SQLITE_LOCK_SHARED );
#endif
  }

  return SQLITE_OK;
}


/*
** This function is called to obtain a shared lock on the database file.
** It is illegal to call sqlite3PagerAcquire() until after this function
** has been successfully called. If a shared-lock is already held when
** this function is called, it is a no-op.
**
3782
3783
3784
3785
3786
3787
3788
3789

3790
3791







3792
3793

3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
    if( isOpen(pPager->jfd) || pPager->zJournal ){
      isErrorReset = 1;
    }
    pPager->errCode = SQLITE_OK;
    pager_reset(pPager);
  }

  if( pagerUseLog(pPager) ){

    int changed = 0;








    rc = sqlite3LogOpenSnapshot(pPager->pLog, &changed);
    if( rc==SQLITE_OK ){

      if( changed ){
        pager_reset(pPager);
        assert( pPager->errCode || pPager->dbSizeValid==0 );
      }
      pPager->state = PAGER_SHARED;         /* TODO: Is this right? */
      rc = sqlite3PagerPagecount(pPager, &changed);
    }
  }else if( pPager->state==PAGER_UNLOCK || isErrorReset ){
    sqlite3_vfs * const pVfs = pPager->pVfs;
    int isHotJournal = 0;
    assert( !MEMDB );
    assert( sqlite3PcacheRefCount(pPager->pPCache)==0 );
    if( pPager->noReadlock ){







|
>
|

>
>
>
>
>
>
>


>




<
|







3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843

3844
3845
3846
3847
3848
3849
3850
3851
    if( isOpen(pPager->jfd) || pPager->zJournal ){
      isErrorReset = 1;
    }
    pPager->errCode = SQLITE_OK;
    pager_reset(pPager);
  }


  if( pPager->journalMode==PAGER_JOURNALMODE_WAL ){
    int changed = 0;              /* True if the cache must be flushed */

    /* Open the log file, if it is not already open. */
    rc = pagerOpenLog(pPager);
    if( rc!=SQLITE_OK ){
      return rc;
    }

    /* Open a log snapshot to read from. */
    rc = sqlite3LogOpenSnapshot(pPager->pLog, &changed);
    if( rc==SQLITE_OK ){
      int dummy;
      if( changed ){
        pager_reset(pPager);
        assert( pPager->errCode || pPager->dbSizeValid==0 );
      }

      rc = sqlite3PagerPagecount(pPager, &dummy);
    }
  }else if( pPager->state==PAGER_UNLOCK || isErrorReset ){
    sqlite3_vfs * const pVfs = pPager->pVfs;
    int isHotJournal = 0;
    assert( !MEMDB );
    assert( sqlite3PcacheRefCount(pPager->pPCache)==0 );
    if( pPager->noReadlock ){
5607
5608
5609
5610
5611
5612
5613



5614
5615
5616
5617
5618
5619
5620
5621
5622
5623
5624
5625
5626
5627
5628
5629
    assert( (PAGER_JOURNALMODE_MEMORY & 1)==0 );
    assert( (PAGER_JOURNALMODE_OFF & 1)==0 );
    if( (pPager->journalMode & 1)==1 && (eMode & 1)==0
         && !pPager->exclusiveMode ){
      sqlite3OsDelete(pPager->pVfs, pPager->zJournal, 0);
    }




    if( eMode==PAGER_JOURNALMODE_WAL ){
      int rc = pagerOpenLog(pPager);
      if( rc!=SQLITE_OK ){
        /* TODO: The error code should not just get dropped here. Change 
        ** this to set a flag to force the log to be opened the first time
        ** it is actually required.  */
        return (int)pPager->journalMode;
      }
    }
    pPager->journalMode = (u8)eMode;
  }
  return (int)pPager->journalMode;
}

/*
** Get/set the size-limit used for persistent journal files.







>
>
>
|
<
<
<
<
<
|
|
|







5652
5653
5654
5655
5656
5657
5658
5659
5660
5661
5662





5663
5664
5665
5666
5667
5668
5669
5670
5671
5672
    assert( (PAGER_JOURNALMODE_MEMORY & 1)==0 );
    assert( (PAGER_JOURNALMODE_OFF & 1)==0 );
    if( (pPager->journalMode & 1)==1 && (eMode & 1)==0
         && !pPager->exclusiveMode ){
      sqlite3OsDelete(pPager->pVfs, pPager->zJournal, 0);
    }

    /* Switching into WAL mode can only take place when no 
    ** locks are held on the database file. 
    */
    if( eMode==PAGER_JOURNALMODE_WAL && pPager->state!=PAGER_UNLOCK ){





      return (int)pPager->journalMode;
    }

    pPager->journalMode = (u8)eMode;
  }
  return (int)pPager->journalMode;
}

/*
** Get/set the size-limit used for persistent journal files.
Changes to test/wal.test.
28
29
30
31
32
33
34




35
36
37
38
39
40
41
  [lindex $args 0] eval { PRAGMA journal_mode = wal }
}

proc log_file_size {nFrame pgsz} {
  expr {12 + ($pgsz+16)*$nFrame}
}





#
# These are 'warm-body' tests used while developing the WAL code. They
# serve to prove that a few really simple cases work:
#
# wal-1.*: Read and write the database.
# wal-2.*: Test MVCC with one reader, one writer.
# wal-3.*: Test transaction rollback.







>
>
>
>







28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
  [lindex $args 0] eval { PRAGMA journal_mode = wal }
}

proc log_file_size {nFrame pgsz} {
  expr {12 + ($pgsz+16)*$nFrame}
}

proc log_deleted {logfile} {
  return [expr [file exists $logfile]==0]
}

#
# These are 'warm-body' tests used while developing the WAL code. They
# serve to prove that a few really simple cases work:
#
# wal-1.*: Read and write the database.
# wal-2.*: Test MVCC with one reader, one writer.
# wal-3.*: Test transaction rollback.
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
  execsql {
    COMMIT;
    SELECT * FROM t2;
  }
} {1 2 3 4}
db close


foreach sector {512 4096} {
  sqlite3_simulate_device -sectorsize $sector
  foreach pgsz {512 1024 2048 4096} {
    file delete -force test.db test.db-wal
    do_test wal-6.$sector.$pgsz.1 {
      sqlite3_wal db test.db -vfs devsym
      execsql "
        PRAGMA page_size = $pgsz ;
      "
      execsql "
        CREATE TABLE t1(a, b);
        INSERT INTO t1 VALUES(1, 2);
      "
      db close
      file size test.db
    } [expr $pgsz*2]
  
    do_test wal-6.$sector.$pgsz.2 {
      file size test.db-wal
    } {0}
  }
}

do_test wal-7.1 {
  file delete -force test.db test.db-wal
  sqlite3_wal db test.db
  execsql {







<


















|
|







179
180
181
182
183
184
185

186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
  execsql {
    COMMIT;
    SELECT * FROM t2;
  }
} {1 2 3 4}
db close


foreach sector {512 4096} {
  sqlite3_simulate_device -sectorsize $sector
  foreach pgsz {512 1024 2048 4096} {
    file delete -force test.db test.db-wal
    do_test wal-6.$sector.$pgsz.1 {
      sqlite3_wal db test.db -vfs devsym
      execsql "
        PRAGMA page_size = $pgsz ;
      "
      execsql "
        CREATE TABLE t1(a, b);
        INSERT INTO t1 VALUES(1, 2);
      "
      db close
      file size test.db
    } [expr $pgsz*2]
  
    do_test wal-6.$sector.$pgsz.2 {
      log_deleted test.db-wal
    } {1}
  }
}

do_test wal-7.1 {
  file delete -force test.db test.db-wal
  sqlite3_wal db test.db
  execsql {
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
do_test wal-11.8 {
  execsql { PRAGMA checkpoint }
  list [expr [file size test.db]/1024] [file size test.db-wal]
} [list 37 [log_file_size 42 1024]]
do_test wal-11.9 {
  db close
  sqlite3_wal db test.db
  list [expr [file size test.db]/1024] [file size test.db-wal]
} {37 0}

do_test wal-11.10 {
  execsql {
    PRAGMA cache_size = 10;
    BEGIN;
      INSERT INTO t1 SELECT randomblob(900) FROM t1;   -- 32
      SELECT count(*) FROM t1;







|
|







590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
do_test wal-11.8 {
  execsql { PRAGMA checkpoint }
  list [expr [file size test.db]/1024] [file size test.db-wal]
} [list 37 [log_file_size 42 1024]]
do_test wal-11.9 {
  db close
  sqlite3_wal db test.db
  list [expr [file size test.db]/1024] [log_deleted test.db-wal]
} {37 1}

do_test wal-11.10 {
  execsql {
    PRAGMA cache_size = 10;
    BEGIN;
      INSERT INTO t1 SELECT randomblob(900) FROM t1;   -- 32
      SELECT count(*) FROM t1;
Changes to test/walthread.test.
17
18
19
20
21
22
23

24

25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51

source $testdir/tester.tcl
if {[run_thread_tests]==0} { finish_test ; return }

do_test walthread-1.1 {
  execsql {
    PRAGMA journal_mode = WAL;

    CREATE TABLE t1(x PRIMARY KEY);

    INSERT INTO t1 VALUES(randomblob(100));
    INSERT INTO t1 VALUES(randomblob(100));
    INSERT INTO t1 SELECT md5sum(x) FROM t1;
  }
} {wal}
do_test walthread-1.2 {
  execsql {
    SELECT (SELECT count(*) FROM t1), (
      SELECT md5sum(x) FROM t1 WHERE oid != (SELECT max(oid) FROM t1)
    ) == (
      SELECT x FROM t1 WHERE oid = (SELECT max(oid) FROM t1)
    )
  }
} {3 1}
do_test walthread-1.3 {
  execsql { PRAGMA integrity_check } 
} {ok}
do_test walthread-1.4 {
  execsql { PRAGMA lock_status } 
} {main unlocked temp unknown}

#--------------------------------------------------------------------------
# Start N threads. Each thread performs both read and write transactions.
# Each read transaction consists of:
#
#   1) Reading the md5sum of all but the last table row,
#   2) Running integrity check.







>

>




|














|







17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53

source $testdir/tester.tcl
if {[run_thread_tests]==0} { finish_test ; return }

do_test walthread-1.1 {
  execsql {
    PRAGMA journal_mode = WAL;
    PRAGMA lock_status;
    CREATE TABLE t1(x PRIMARY KEY);
    PRAGMA lock_status;
    INSERT INTO t1 VALUES(randomblob(100));
    INSERT INTO t1 VALUES(randomblob(100));
    INSERT INTO t1 SELECT md5sum(x) FROM t1;
  }
} {wal main unlocked temp closed main shared temp closed}
do_test walthread-1.2 {
  execsql {
    SELECT (SELECT count(*) FROM t1), (
      SELECT md5sum(x) FROM t1 WHERE oid != (SELECT max(oid) FROM t1)
    ) == (
      SELECT x FROM t1 WHERE oid = (SELECT max(oid) FROM t1)
    )
  }
} {3 1}
do_test walthread-1.3 {
  execsql { PRAGMA integrity_check } 
} {ok}
do_test walthread-1.4 {
  execsql { PRAGMA lock_status } 
} {main shared temp unknown}

#--------------------------------------------------------------------------
# Start N threads. Each thread performs both read and write transactions.
# Each read transaction consists of:
#
#   1) Reading the md5sum of all but the last table row,
#   2) Running integrity check.