/ Check-in [d57e5252]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Untested updates to support atomic multi-file transactions (CVS 1526)
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1:d57e5252c8baaf615c2cd218a33356ea5d95a5e2
User & Date: danielk1977 2004-06-03 16:08:41
Context
2004-06-04
06:22
Move the 'busy-callback' logic to the pager layer. (CVS 1527) check-in: ff70b6d2 user: danielk1977 tags: trunk
2004-06-03
16:08
Untested updates to support atomic multi-file transactions (CVS 1526) check-in: d57e5252 user: danielk1977 tags: trunk
2004-06-02
06:30
Fix a segfault in sqlite3OsLock() (CVS 1525) check-in: 51348b82 user: danielk1977 tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to src/btree.c.

5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
....
1196
1197
1198
1199
1200
1201
1202





1203
1204
1205
1206
1207
1208
1209
1210
1211
....
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
....
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
....
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
....
4216
4217
4218
4219
4220
4221
4222























** a legal notice, here is a blessing:
**
**    May you do good and not evil.
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** $Id: btree.c,v 1.155 2004/06/02 01:22:02 drh Exp $
**
** This file implements a external (disk-based) database using BTrees.
** For a detailed discussion of BTrees, refer to
**
**     Donald E. Knuth, THE ART OF COMPUTER PROGRAMMING, Volume 3:
**     "Sorting And Searching", pages 473-480. Addison-Wesley
**     Publishing Company, Reading, Massachusetts.
................................................................................
**      sqlite3BtreeCreateTable()
**      sqlite3BtreeCreateIndex()
**      sqlite3BtreeClearTable()
**      sqlite3BtreeDropTable()
**      sqlite3BtreeInsert()
**      sqlite3BtreeDelete()
**      sqlite3BtreeUpdateMeta()





*/
int sqlite3BtreeBeginTrans(Btree *pBt, int wrflag){
  int rc = SQLITE_OK;

  /* If the btree is already in a write-transaction, or it
  ** is already in a read-transaction and a read-transaction
  ** is requested, this is a no-op.
  */
  if( pBt->inTrans==TRANS_WRITE || 
................................................................................
  }

  if( pBt->pPage1==0 ){
    rc = lockBtree(pBt);
  }

  if( rc==SQLITE_OK && wrflag ){
    rc = sqlite3pager_begin(pBt->pPage1->aData);
    if( rc==SQLITE_OK ){
      rc = newDatabase(pBt);
    }
  }

  if( rc==SQLITE_OK ){
    pBt->inTrans = (wrflag?TRANS_WRITE:TRANS_READ);
................................................................................
    Pgno chldPg;
    MemPage *pPage = pCur->pPage;
    int c = -1;  /* pRes return if table is empty must be -1 */
    lwr = 0;
    upr = pPage->nCell-1;
    pageIntegrity(pPage);
    while( lwr<=upr ){
      const void *pCellKey;
      i64 nCellKey;
      pCur->idx = (lwr+upr)/2;
      pCur->info.nSize = 0;
      sqlite3BtreeKeySize(pCur, &nCellKey);
      if( pPage->intKey ){
        if( nCellKey<nKey ){
          c = -1;
................................................................................
        }else if( nCellKey>nKey ){
          c = +1;
        }else{
          c = 0;
        }
      }else{
        int available;
        pCellKey = fetchPayload(pCur, &available, 0);
        if( available>=nCellKey ){
          c = pCur->xCompare(pCur->pArg, nCellKey, pCellKey, nKey, pKey);
        }else{
          pCellKey = sqliteMallocRaw( nCellKey );
          if( pCellKey==0 ) return SQLITE_NOMEM;
          rc = sqlite3BtreeKey(pCur, 0, nCellKey, pCellKey);
          c = pCur->xCompare(pCur->pArg, nCellKey, pCellKey, nKey, pKey);
          sqliteFree(pCellKey);
          if( rc ) return rc;
        }
      }
      if( c==0 ){
        if( pPage->leafData && !pPage->leaf ){
................................................................................

/*
** Return non-zero if a statement transaction is active.
*/
int sqlite3BtreeIsInStmt(Btree *pBt){
  return (pBt && pBt->inStmt);
}






























|







 







>
>
>
>
>

|







 







|







 







|







 







|





|







 







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
....
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
....
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
....
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
....
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
....
4221
4222
4223
4224
4225
4226
4227
4228
4229
4230
4231
4232
4233
4234
4235
4236
4237
4238
4239
4240
4241
4242
4243
4244
4245
4246
4247
4248
4249
4250
** a legal notice, here is a blessing:
**
**    May you do good and not evil.
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** $Id: btree.c,v 1.156 2004/06/03 16:08:41 danielk1977 Exp $
**
** This file implements a external (disk-based) database using BTrees.
** For a detailed discussion of BTrees, refer to
**
**     Donald E. Knuth, THE ART OF COMPUTER PROGRAMMING, Volume 3:
**     "Sorting And Searching", pages 473-480. Addison-Wesley
**     Publishing Company, Reading, Massachusetts.
................................................................................
**      sqlite3BtreeCreateTable()
**      sqlite3BtreeCreateIndex()
**      sqlite3BtreeClearTable()
**      sqlite3BtreeDropTable()
**      sqlite3BtreeInsert()
**      sqlite3BtreeDelete()
**      sqlite3BtreeUpdateMeta()
**
** If wrflag is true, then nMaster specifies the maximum length of
** a master journal file name supplied later via sqlite3BtreeSync().
** This is so that appropriate space can be allocated in the journal file
** when it is created..
*/
int sqlite3BtreeBeginTrans(Btree *pBt, int wrflag, int nMaster){
  int rc = SQLITE_OK;

  /* If the btree is already in a write-transaction, or it
  ** is already in a read-transaction and a read-transaction
  ** is requested, this is a no-op.
  */
  if( pBt->inTrans==TRANS_WRITE || 
................................................................................
  }

  if( pBt->pPage1==0 ){
    rc = lockBtree(pBt);
  }

  if( rc==SQLITE_OK && wrflag ){
    rc = sqlite3pager_begin(pBt->pPage1->aData, 0);
    if( rc==SQLITE_OK ){
      rc = newDatabase(pBt);
    }
  }

  if( rc==SQLITE_OK ){
    pBt->inTrans = (wrflag?TRANS_WRITE:TRANS_READ);
................................................................................
    Pgno chldPg;
    MemPage *pPage = pCur->pPage;
    int c = -1;  /* pRes return if table is empty must be -1 */
    lwr = 0;
    upr = pPage->nCell-1;
    pageIntegrity(pPage);
    while( lwr<=upr ){
      void *pCellKey;
      i64 nCellKey;
      pCur->idx = (lwr+upr)/2;
      pCur->info.nSize = 0;
      sqlite3BtreeKeySize(pCur, &nCellKey);
      if( pPage->intKey ){
        if( nCellKey<nKey ){
          c = -1;
................................................................................
        }else if( nCellKey>nKey ){
          c = +1;
        }else{
          c = 0;
        }
      }else{
        int available;
        pCellKey = (void *)fetchPayload(pCur, &available, 0);
        if( available>=nCellKey ){
          c = pCur->xCompare(pCur->pArg, nCellKey, pCellKey, nKey, pKey);
        }else{
          pCellKey = sqliteMallocRaw( nCellKey );
          if( pCellKey==0 ) return SQLITE_NOMEM;
          rc = sqlite3BtreeKey(pCur, 0, nCellKey, (void *)pCellKey);
          c = pCur->xCompare(pCur->pArg, nCellKey, pCellKey, nKey, pKey);
          sqliteFree(pCellKey);
          if( rc ) return rc;
        }
      }
      if( c==0 ){
        if( pPage->leafData && !pPage->leaf ){
................................................................................

/*
** Return non-zero if a statement transaction is active.
*/
int sqlite3BtreeIsInStmt(Btree *pBt){
  return (pBt && pBt->inStmt);
}

/*
** This call is a no-op if no write-transaction is currently active on pBt.
**
** Otherwise, sync the database file for the btree pBt. zMaster points to
** the name of a master journal file that should be written into the
** individual journal file, or is NULL, indicating no master journal file 
** (single database transaction).
**
** When this is called, the master journal should already have been
** created, populated with this journal pointer and synced to disk.
**
** Once this is routine has returned, the only thing required to commit
** the write-transaction for this database file is to delete the journal.
*/
int sqlite3BtreeSync(Btree *pBt, const char *zMaster){
  if( pBt->inTrans==TRANS_WRITE ){
    return sqlite3pager_sync(pBt->pPager, zMaster);
  }
  return SQLITE_OK;
}


Changes to src/btree.h.

9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
..
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52

53
54
55
56
57
58
59
**    May you share freely, never taking more than you give.
**
*************************************************************************
** This header file defines the interface that the sqlite B-Tree file
** subsystem.  See comments in the source code for a detailed description
** of what each interface routine does.
**
** @(#) $Id: btree.h,v 1.51 2004/05/31 10:01:35 danielk1977 Exp $
*/
#ifndef _BTREE_H_
#define _BTREE_H_

/* TODO: This definition is just included so other modules compile. It
** needs to be revisited.
*/
................................................................................
*/
#define BTREE_OMIT_JOURNAL  1  /* Do not use journal.  No argument */
#define BTREE_MEMORY        2  /* In-memory DB.  No argument */

int sqlite3BtreeClose(Btree*);
int sqlite3BtreeSetCacheSize(Btree*,int);
int sqlite3BtreeSetSafetyLevel(Btree*,int);
int sqlite3BtreeBeginTrans(Btree*,int);
int sqlite3BtreeCommit(Btree*);
int sqlite3BtreeRollback(Btree*);
int sqlite3BtreeBeginStmt(Btree*);
int sqlite3BtreeCommitStmt(Btree*);
int sqlite3BtreeRollbackStmt(Btree*);
int sqlite3BtreeCreateTable(Btree*, int*, int flags);
int sqlite3BtreeIsInTrans(Btree*);
int sqlite3BtreeIsInStmt(Btree*);


const char *sqlite3BtreeGetFilename(Btree *);
int sqlite3BtreeCopyFile(Btree *, Btree *);

/* The flags parameter to sqlite3BtreeCreateTable can be the bitwise OR
** of the following flags:
*/







|







 







|








>







9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
..
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
**    May you share freely, never taking more than you give.
**
*************************************************************************
** This header file defines the interface that the sqlite B-Tree file
** subsystem.  See comments in the source code for a detailed description
** of what each interface routine does.
**
** @(#) $Id: btree.h,v 1.52 2004/06/03 16:08:41 danielk1977 Exp $
*/
#ifndef _BTREE_H_
#define _BTREE_H_

/* TODO: This definition is just included so other modules compile. It
** needs to be revisited.
*/
................................................................................
*/
#define BTREE_OMIT_JOURNAL  1  /* Do not use journal.  No argument */
#define BTREE_MEMORY        2  /* In-memory DB.  No argument */

int sqlite3BtreeClose(Btree*);
int sqlite3BtreeSetCacheSize(Btree*,int);
int sqlite3BtreeSetSafetyLevel(Btree*,int);
int sqlite3BtreeBeginTrans(Btree*,int,int);
int sqlite3BtreeCommit(Btree*);
int sqlite3BtreeRollback(Btree*);
int sqlite3BtreeBeginStmt(Btree*);
int sqlite3BtreeCommitStmt(Btree*);
int sqlite3BtreeRollbackStmt(Btree*);
int sqlite3BtreeCreateTable(Btree*, int*, int flags);
int sqlite3BtreeIsInTrans(Btree*);
int sqlite3BtreeIsInStmt(Btree*);
int sqlite3BtreeSync(Btree*, const char *zMaster);

const char *sqlite3BtreeGetFilename(Btree *);
int sqlite3BtreeCopyFile(Btree *, Btree *);

/* The flags parameter to sqlite3BtreeCreateTable can be the bitwise OR
** of the following flags:
*/

Changes to src/build.c.

19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
...
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
**     DROP INDEX
**     creating ID lists
**     BEGIN TRANSACTION
**     COMMIT
**     ROLLBACK
**     PRAGMA
**
** $Id: build.c,v 1.205 2004/05/31 18:51:58 drh Exp $
*/
#include "sqliteInt.h"
#include <ctype.h>

/*
** This routine is called when a new SQL statement is beginning to
** be parsed.  Check to see if the schema for the database needs
................................................................................
    if( rc!=SQLITE_OK ){
      sqlite3ErrorMsg(pParse, "unable to open a temporary database "
        "file for storing temporary tables");
      pParse->nErr++;
      return;
    }
    if( db->flags & !db->autoCommit ){
      rc = sqlite3BtreeBeginTrans(db->aDb[1].pBt, 1);
      if( rc!=SQLITE_OK ){
        sqlite3ErrorMsg(pParse, "unable to get a write lock on "
          "the temporary database file");
        return;
      }
    }
  }







|







 







|







19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
...
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
**     DROP INDEX
**     creating ID lists
**     BEGIN TRANSACTION
**     COMMIT
**     ROLLBACK
**     PRAGMA
**
** $Id: build.c,v 1.206 2004/06/03 16:08:41 danielk1977 Exp $
*/
#include "sqliteInt.h"
#include <ctype.h>

/*
** This routine is called when a new SQL statement is beginning to
** be parsed.  Check to see if the schema for the database needs
................................................................................
    if( rc!=SQLITE_OK ){
      sqlite3ErrorMsg(pParse, "unable to open a temporary database "
        "file for storing temporary tables");
      pParse->nErr++;
      return;
    }
    if( db->flags & !db->autoCommit ){
      rc = sqlite3BtreeBeginTrans(db->aDb[1].pBt, 1, 0);
      if( rc!=SQLITE_OK ){
        sqlite3ErrorMsg(pParse, "unable to get a write lock on "
          "the temporary database file");
        return;
      }
    }
  }

Changes to src/delete.c.

8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
...
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** This file contains C code routines that are called by the parser
** to handle DELETE FROM statements.
**
** $Id: delete.c,v 1.71 2004/05/28 16:00:22 drh Exp $
*/
#include "sqliteInt.h"

/*
** Look up every table that is named in pSrc.  If any table is not found,
** add an error message to pParse->zErrMsg and return NULL.  If all tables
** are found, return a pointer to the last table.
................................................................................
  int iCur,          /* Cursor number for the table */
  char *aIdxUsed     /* Only delete if aIdxUsed!=0 && aIdxUsed[i]!=0 */
){
  int i;
  Index *pIdx;

  for(i=1, pIdx=pTab->pIndex; pIdx; i++, pIdx=pIdx->pNext){
    int j;
    if( aIdxUsed!=0 && aIdxUsed[i-1]==0 ) continue;
    sqlite3GenerateIndexKey(v, pIdx, iCur);
    sqlite3VdbeAddOp(v, OP_IdxDelete, iCur+i, 0);
  }
}

/*







|







 







<







8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
...
373
374
375
376
377
378
379

380
381
382
383
384
385
386
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** This file contains C code routines that are called by the parser
** to handle DELETE FROM statements.
**
** $Id: delete.c,v 1.72 2004/06/03 16:08:41 danielk1977 Exp $
*/
#include "sqliteInt.h"

/*
** Look up every table that is named in pSrc.  If any table is not found,
** add an error message to pParse->zErrMsg and return NULL.  If all tables
** are found, return a pointer to the last table.
................................................................................
  int iCur,          /* Cursor number for the table */
  char *aIdxUsed     /* Only delete if aIdxUsed!=0 && aIdxUsed[i]!=0 */
){
  int i;
  Index *pIdx;

  for(i=1, pIdx=pTab->pIndex; pIdx; i++, pIdx=pIdx->pNext){

    if( aIdxUsed!=0 && aIdxUsed[i-1]==0 ) continue;
    sqlite3GenerateIndexKey(v, pIdx, iCur);
    sqlite3VdbeAddOp(v, OP_IdxDelete, iCur+i, 0);
  }
}

/*

Changes to src/os.h.

110
111
112
113
114
115
116

117
118
int sqlite3OsRandomSeed(char*);
int sqlite3OsSleep(int ms);
int sqlite3OsCurrentTime(double*);
void sqlite3OsEnterMutex(void);
void sqlite3OsLeaveMutex(void);
char *sqlite3OsFullPathname(const char*);
int sqlite3OsLock(OsFile*, int);


#endif /* _SQLITE_OS_H_ */







>


110
111
112
113
114
115
116
117
118
119
int sqlite3OsRandomSeed(char*);
int sqlite3OsSleep(int ms);
int sqlite3OsCurrentTime(double*);
void sqlite3OsEnterMutex(void);
void sqlite3OsLeaveMutex(void);
char *sqlite3OsFullPathname(const char*);
int sqlite3OsLock(OsFile*, int);
int sqlite3OsCheckWriteLock(OsFile *id);

#endif /* _SQLITE_OS_H_ */

Changes to src/os_unix.c.

57
58
59
60
61
62
63




64
65
66
67
68
69
70
...
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
...
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
...
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
...
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
...
656
657
658
659
660
661
662



































663
664
665
666
667
668
669
...
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
...
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
...
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
...
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
...
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
...
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850


/*
** Include code that is common to all os_*.c files
*/
#include "os_common.h"





/*
** Here is the dirt on POSIX advisory locks:  ANSI STD 1003.1 (1996)
** section 6.5.2.2 lines 483 through 490 specify that when a process
** sets or clears a lock, that operation overrides any prior locks set
** by the same process.  It does not explicitly say so, but this implies
** that it overrides locks set by the same process using a different
** file descriptor.  Consider this test case:
................................................................................
**
** A single inode can have multiple file descriptors, so each OsFile
** structure contains a pointer to an instance of this object and this
** object keeps a count of the number of OsFiles pointing to it.
*/
struct lockInfo {
  struct lockKey key;  /* The lookup key */
  int cnt;             /* 0: unlocked.  -1: write lock.  1...: read lock. */
  int locktype;        /* One of SHARED_LOCK, RESERVED_LOCK etc. */
  int nRef;            /* Number of pointers to this structure */
};

/*
** An instance of the following structure serves as the key used
** to locate a particular openCnt structure given its inode.  This
................................................................................
  sqlite3OsEnterMutex();
  rc = findLockInfo(id->fd, &id->pLock, &id->pOpen);
  sqlite3OsLeaveMutex();
  if( rc ){
    close(id->fd);
    return SQLITE_NOMEM;
  }
  id->locked = 0;
  TRACE3("OPEN    %-3d %s\n", id->fd, zFilename);
  OpenCounter(+1);
  return SQLITE_OK;
}


/*
................................................................................
  rc = findLockInfo(id->fd, &id->pLock, &id->pOpen);
  sqlite3OsLeaveMutex();
  if( rc ){
    close(id->fd);
    unlink(zFilename);
    return SQLITE_NOMEM;
  }
  id->locked = 0;
  if( delFlag ){
    unlink(zFilename);
  }
  TRACE3("OPEN-EX %-3d %s\n", id->fd, zFilename);
  OpenCounter(+1);
  return SQLITE_OK;
}
................................................................................
  sqlite3OsEnterMutex();
  rc = findLockInfo(id->fd, &id->pLock, &id->pOpen);
  sqlite3OsLeaveMutex();
  if( rc ){
    close(id->fd);
    return SQLITE_NOMEM;
  }
  id->locked = 0;
  TRACE3("OPEN-RO %-3d %s\n", id->fd, zFilename);
  OpenCounter(+1);
  return SQLITE_OK;
}

/*
** Attempt to open a file descriptor for the directory that contains a
................................................................................
** SQLITE_OK on success and SQLITE_BUSY on a failure.  If this
** library was compiled with large file support (LFS) but LFS is not
** available on the host, then an SQLITE_NOLFS is returned.
*/
int sqlite3OsWriteLock(OsFile *id){
  return sqlite3OsLock(id, EXCLUSIVE_LOCK);
}




































/*
** Lock the file with the lock specified by parameter locktype - one
** of the following:
**
** SHARED_LOCK
** RESERVED_LOCK
................................................................................
int sqlite3OsLock(OsFile *id, int locktype){
  int rc = SQLITE_OK;
  struct lockInfo *pLock = id->pLock;
  struct flock lock;
  int s;

  /* It is an error to request any kind of lock before a shared lock */
  if( locktype>SHARED_LOCK && id->locked==0 ){
    rc = sqlite3OsLock(id, SHARED_LOCK);
    if( rc!=SQLITE_OK ) return rc;
  }
  assert( locktype==SHARED_LOCK || id->locked!=0 );

  /* If there is already a lock of this type or more restrictive on the
  ** OsFile, do nothing. Don't use the end_lock: exit path, as
  ** sqlite3OsEnterMutex() hasn't been called yet.
  */
  if( id->locked>=locktype ){
    return SQLITE_OK;
  }

  sqlite3OsEnterMutex();

  /* If some thread using this PID has a lock via a different OsFile*
  ** handle that precludes the requested lock, return BUSY.
  */
  if( (id->locked!=pLock->locktype && 
      (pLock->locktype>RESERVED_LOCK || locktype!=SHARED_LOCK)) ||
      (locktype>RESERVED_LOCK && pLock->cnt>1)
  ){
    rc = SQLITE_BUSY;
    goto end_lock;
  }

................................................................................
  /* If a SHARED lock is requested, and some thread using this PID already
  ** has a SHARED or RESERVED lock, then increment reference counts and
  ** return SQLITE_OK.
  */
  if( locktype==SHARED_LOCK && 
      (pLock->locktype==SHARED_LOCK || pLock->locktype==RESERVED_LOCK) ){
    assert( locktype==SHARED_LOCK );
    assert( id->locked==0 );
    assert( pLock->cnt>0 );
    id->locked = SHARED_LOCK;
    pLock->cnt++;
    id->pOpen->nLock++;
    goto end_lock;
  }

  lock.l_len = 0L;
  lock.l_whence = SEEK_SET;

  /* If control gets to this point, then actually go ahead and make
  ** operating system calls for the specified lock.
  */
  if( locktype==SHARED_LOCK ){
    assert( pLock->cnt==0 );
................................................................................
    s = fcntl(id->fd, F_SETLK, &lock);
    lock.l_start = 2;
    lock.l_type = F_UNLCK;
    fcntl(id->fd, F_SETLK, &lock);
    if( s ){
      rc = (errno==EINVAL) ? SQLITE_NOLFS : SQLITE_BUSY;
    }else{
      id->locked = SHARED_LOCK;
      id->pOpen->nLock++;
      pLock->cnt = 1;
    }
  }else{
    /* The request was for a RESERVED, PENDING or EXCLUSIVE lock.  It is
    ** assumed that there is a SHARED or greater lock on the file
    ** already.
    */
    assert( 0!=id->locked );
    lock.l_type = F_WRLCK;
    switch( locktype ){
      case RESERVED_LOCK:
        lock.l_start = 1;
        break;
      case PENDING_LOCK:
        lock.l_start = 2;
................................................................................
    s = fcntl(id->fd, F_SETLK, &lock);
    if( s ){
      rc = (errno==EINVAL) ? SQLITE_NOLFS : SQLITE_BUSY;
    }
  }
  
  if( rc==SQLITE_OK ){
    id->locked = locktype;
    pLock->locktype = locktype;
    assert( pLock->locktype==RESERVED_LOCK || pLock->cnt==1 );
  }

end_lock:
  sqlite3OsLeaveMutex();
  return rc;
................................................................................
** Unlock the given file descriptor.  If the file descriptor was
** not previously locked, then this routine is a no-op.  If this
** library was compiled with large file support (LFS) but LFS is not
** available on the host, then an SQLITE_NOLFS is returned.
*/
int sqlite3OsUnlock(OsFile *id){
  int rc;
  if( !id->locked ) return SQLITE_OK;
  id->locked = 0;
  sqlite3OsEnterMutex();
  assert( id->pLock->cnt!=0 );
  if( id->pLock->cnt>1 ){
    id->pLock->cnt--;
    rc = SQLITE_OK;
  }else{
    struct flock lock;
................................................................................
      }
      sqliteFree(pOpen->aPending);
      pOpen->nPending = 0;
      pOpen->aPending = 0;
    }
  }
  sqlite3OsLeaveMutex();
  id->locked = 0;
  return rc;
}

/*
** Get information to seed the random number generator.  The seed
** is written into the buffer zBuf[256].  The calling function must
** supply a sufficiently large buffer.







>
>
>
>







 







|







 







|







 







|







 







|







 







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







 







|



|





|








|







 







|

|





|







 







|








|







 







|







 







|
|







 







|







57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
...
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
...
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
...
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
...
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
...
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
...
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
...
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
...
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
...
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
...
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
...
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889


/*
** Include code that is common to all os_*.c files
*/
#include "os_common.h"

#if defined(THREADSAFE) && defined(__linux__)
#define getpid pthread_self
#endif

/*
** Here is the dirt on POSIX advisory locks:  ANSI STD 1003.1 (1996)
** section 6.5.2.2 lines 483 through 490 specify that when a process
** sets or clears a lock, that operation overrides any prior locks set
** by the same process.  It does not explicitly say so, but this implies
** that it overrides locks set by the same process using a different
** file descriptor.  Consider this test case:
................................................................................
**
** A single inode can have multiple file descriptors, so each OsFile
** structure contains a pointer to an instance of this object and this
** object keeps a count of the number of OsFiles pointing to it.
*/
struct lockInfo {
  struct lockKey key;  /* The lookup key */
  int cnt;             /* Number of locks held */
  int locktype;        /* One of SHARED_LOCK, RESERVED_LOCK etc. */
  int nRef;            /* Number of pointers to this structure */
};

/*
** An instance of the following structure serves as the key used
** to locate a particular openCnt structure given its inode.  This
................................................................................
  sqlite3OsEnterMutex();
  rc = findLockInfo(id->fd, &id->pLock, &id->pOpen);
  sqlite3OsLeaveMutex();
  if( rc ){
    close(id->fd);
    return SQLITE_NOMEM;
  }
  id->locktype = 0;
  TRACE3("OPEN    %-3d %s\n", id->fd, zFilename);
  OpenCounter(+1);
  return SQLITE_OK;
}


/*
................................................................................
  rc = findLockInfo(id->fd, &id->pLock, &id->pOpen);
  sqlite3OsLeaveMutex();
  if( rc ){
    close(id->fd);
    unlink(zFilename);
    return SQLITE_NOMEM;
  }
  id->locktype = 0;
  if( delFlag ){
    unlink(zFilename);
  }
  TRACE3("OPEN-EX %-3d %s\n", id->fd, zFilename);
  OpenCounter(+1);
  return SQLITE_OK;
}
................................................................................
  sqlite3OsEnterMutex();
  rc = findLockInfo(id->fd, &id->pLock, &id->pOpen);
  sqlite3OsLeaveMutex();
  if( rc ){
    close(id->fd);
    return SQLITE_NOMEM;
  }
  id->locktype = 0;
  TRACE3("OPEN-RO %-3d %s\n", id->fd, zFilename);
  OpenCounter(+1);
  return SQLITE_OK;
}

/*
** Attempt to open a file descriptor for the directory that contains a
................................................................................
** SQLITE_OK on success and SQLITE_BUSY on a failure.  If this
** library was compiled with large file support (LFS) but LFS is not
** available on the host, then an SQLITE_NOLFS is returned.
*/
int sqlite3OsWriteLock(OsFile *id){
  return sqlite3OsLock(id, EXCLUSIVE_LOCK);
}

/*
** This routine checks if there is a RESERVED lock held on the specified
** file by this or any other process. If such a lock is held, return
** non-zero, otherwise zero.
*/
int sqlite3OsCheckWriteLock(OsFile *id){
  int r = 0;

  sqlite3OsEnterMutex();

  /* Check if a thread in this process holds such a lock */
  if( id->pLock->locktype>SHARED_LOCK ){
    r = 1;
  }

  /* Otherwise see if some other process holds it. Just check the whole
  ** file for write-locks, rather than any specific bytes.
  */
  if( !r ){
    struct flock lock;
    lock.l_whence = SEEK_SET;
    lock.l_start = 0;
    lock.l_len = 0;
    lock.l_type = F_RDLCK;
    fcntl(id->fd, F_GETLK, &lock);
    if( lock.l_type!=F_UNLCK ){
      r = 1;
    }
  }
  
  sqlite3OsLeaveMutex();

  return r;
}

/*
** Lock the file with the lock specified by parameter locktype - one
** of the following:
**
** SHARED_LOCK
** RESERVED_LOCK
................................................................................
int sqlite3OsLock(OsFile *id, int locktype){
  int rc = SQLITE_OK;
  struct lockInfo *pLock = id->pLock;
  struct flock lock;
  int s;

  /* It is an error to request any kind of lock before a shared lock */
  if( locktype>SHARED_LOCK && id->locktype==0 ){
    rc = sqlite3OsLock(id, SHARED_LOCK);
    if( rc!=SQLITE_OK ) return rc;
  }
  assert( locktype==SHARED_LOCK || id->locktype!=0 );

  /* If there is already a lock of this type or more restrictive on the
  ** OsFile, do nothing. Don't use the end_lock: exit path, as
  ** sqlite3OsEnterMutex() hasn't been called yet.
  */
  if( id->locktype>=locktype ){
    return SQLITE_OK;
  }

  sqlite3OsEnterMutex();

  /* If some thread using this PID has a lock via a different OsFile*
  ** handle that precludes the requested lock, return BUSY.
  */
  if( (id->locktype!=pLock->locktype && 
      (pLock->locktype>RESERVED_LOCK || locktype!=SHARED_LOCK)) ||
      (locktype>RESERVED_LOCK && pLock->cnt>1)
  ){
    rc = SQLITE_BUSY;
    goto end_lock;
  }

................................................................................
  /* If a SHARED lock is requested, and some thread using this PID already
  ** has a SHARED or RESERVED lock, then increment reference counts and
  ** return SQLITE_OK.
  */
  if( locktype==SHARED_LOCK && 
      (pLock->locktype==SHARED_LOCK || pLock->locktype==RESERVED_LOCK) ){
    assert( locktype==SHARED_LOCK );
    assert( id->locktype==0 );
    assert( pLock->cnt>0 );
    id->locktype = SHARED_LOCK;
    pLock->cnt++;
    id->pOpen->nLock++;
    goto end_lock;
  }

  lock.l_len = 1L;
  lock.l_whence = SEEK_SET;

  /* If control gets to this point, then actually go ahead and make
  ** operating system calls for the specified lock.
  */
  if( locktype==SHARED_LOCK ){
    assert( pLock->cnt==0 );
................................................................................
    s = fcntl(id->fd, F_SETLK, &lock);
    lock.l_start = 2;
    lock.l_type = F_UNLCK;
    fcntl(id->fd, F_SETLK, &lock);
    if( s ){
      rc = (errno==EINVAL) ? SQLITE_NOLFS : SQLITE_BUSY;
    }else{
      id->locktype = SHARED_LOCK;
      id->pOpen->nLock++;
      pLock->cnt = 1;
    }
  }else{
    /* The request was for a RESERVED, PENDING or EXCLUSIVE lock.  It is
    ** assumed that there is a SHARED or greater lock on the file
    ** already.
    */
    assert( 0!=id->locktype );
    lock.l_type = F_WRLCK;
    switch( locktype ){
      case RESERVED_LOCK:
        lock.l_start = 1;
        break;
      case PENDING_LOCK:
        lock.l_start = 2;
................................................................................
    s = fcntl(id->fd, F_SETLK, &lock);
    if( s ){
      rc = (errno==EINVAL) ? SQLITE_NOLFS : SQLITE_BUSY;
    }
  }
  
  if( rc==SQLITE_OK ){
    id->locktype = locktype;
    pLock->locktype = locktype;
    assert( pLock->locktype==RESERVED_LOCK || pLock->cnt==1 );
  }

end_lock:
  sqlite3OsLeaveMutex();
  return rc;
................................................................................
** Unlock the given file descriptor.  If the file descriptor was
** not previously locked, then this routine is a no-op.  If this
** library was compiled with large file support (LFS) but LFS is not
** available on the host, then an SQLITE_NOLFS is returned.
*/
int sqlite3OsUnlock(OsFile *id){
  int rc;
  if( !id->locktype ) return SQLITE_OK;
  id->locktype = 0;
  sqlite3OsEnterMutex();
  assert( id->pLock->cnt!=0 );
  if( id->pLock->cnt>1 ){
    id->pLock->cnt--;
    rc = SQLITE_OK;
  }else{
    struct flock lock;
................................................................................
      }
      sqliteFree(pOpen->aPending);
      pOpen->nPending = 0;
      pOpen->aPending = 0;
    }
  }
  sqlite3OsLeaveMutex();
  id->locktype = 0;
  return rc;
}

/*
** Get information to seed the random number generator.  The seed
** is written into the buffer zBuf[256].  The calling function must
** supply a sufficiently large buffer.

Changes to src/os_unix.h.

52
53
54
55
56
57
58



59
60
61
62
63
64
65
66
67
68
69
70
71
72
#include <unistd.h>

/*
** The OsFile structure is a operating-system independing representation
** of an open file handle.  It is defined differently for each architecture.
**
** This is the definition for Unix.



*/
typedef struct OsFile OsFile;
struct OsFile {
  struct openCnt *pOpen;    /* Info about all open fd's on this inode */
  struct lockInfo *pLock;   /* Info about locks on this inode */
  int fd;                   /* The file descriptor */
  int locked;               /* True if this instance holds the lock */
  int dirfd;                /* File descriptor for the directory */
};

/*
** Maximum number of characters in a temporary file name
*/
#define SQLITE_TEMPNAME_SIZE 200







>
>
>






|







52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#include <unistd.h>

/*
** The OsFile structure is a operating-system independing representation
** of an open file handle.  It is defined differently for each architecture.
**
** This is the definition for Unix.
**
** OsFile.locktype takes one of the values SHARED_LOCK, RESERVED_LOCK,
** PENDING_LOCK or EXCLUSIVE_LOCK.
*/
typedef struct OsFile OsFile;
struct OsFile {
  struct openCnt *pOpen;    /* Info about all open fd's on this inode */
  struct lockInfo *pLock;   /* Info about locks on this inode */
  int fd;                   /* The file descriptor */
  int locktype;             /* The type of lock held on this fd */
  int dirfd;                /* File descriptor for the directory */
};

/*
** Maximum number of characters in a temporary file name
*/
#define SQLITE_TEMPNAME_SIZE 200

Changes to src/pager.c.

14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
...
208
209
210
211
212
213
214

215
216
217
218
219
220
221
...
295
296
297
298
299
300
301

302
303





304
305

306
307
308
309
310
311
312
...
596
597
598
599
600
601
602


































































































603
604
605
606
607
608
609
...
657
658
659
660
661
662
663

664
665
666
667
668
669
670
...
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719

















720
721
722
723
724
725
726
727
728
729
...
768
769
770
771
772
773
774










775




776
777
778
779
780
781
782
....
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
....
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
....
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
....
1334
1335
1336
1337
1338
1339
1340










1341
1342
1343
1344
1345
1346
1347
1348
....
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464

1465

1466


1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
....
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
....
1760
1761
1762
1763
1764
1765
1766








1767
1768
1769
1770
1771
1772
1773
1774
















1775
1776
1777
1778
1779
1780
1781
....
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809



1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820

1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831

1832
1833
1834
1835
1836
1837
1838
....
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
....
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
....
2151
2152
2153
2154
2155
2156
2157

2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177




2178
2179
2180
2181
2182
2183
2184
....
2306
2307
2308
2309
2310
2311
2312
2313

2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
....
2409
2410
2411
2412
2413
2414
2415











































2416
2417
2418
2419
2420
2421
2422
** The pager is used to access a database disk file.  It implements
** atomic commit and rollback through the use of a journal file that
** is separate from the database file.  The pager also implements file
** locking to prevent two processes from writing the same database
** file simultaneously, or one process from reading the database while
** another is writing.
**
** @(#) $Id: pager.c,v 1.109 2004/05/31 08:26:49 danielk1977 Exp $
*/
#include "os.h"         /* Must be first to enable large file support */
#include "sqliteInt.h"
#include "pager.h"
#include <assert.h>
#include <string.h>

................................................................................
  u8 *aInJournal;             /* One bit for each page in the database file */
  u8 *aInStmt;                /* One bit for each page in the database */
  PgHdr *pFirst, *pLast;      /* List of free pages */
  PgHdr *pFirstSynced;        /* First free page with PgHdr.needSync==0 */
  PgHdr *pAll;                /* List of all pages */
  PgHdr *pStmt;               /* List of pages in the statement subjournal */
  PgHdr *aHash[N_PG_HASH];    /* Hash table to map page number of PgHdr */

};

/*
** These are bits that can be set in Pager.errMask.
*/
#define PAGER_ERR_FULL     0x01  /* a write() failed */
#define PAGER_ERR_MEM      0x02  /* malloc() failed */
................................................................................
#endif

/*
** The size of the header and of each page in the journal varies according
** to which journal format is being used.  The following macros figure out
** the sizes based on format numbers.
*/

#define JOURNAL_HDR_SZ(X) \
   (sizeof(aJournalMagic1) + sizeof(Pgno) + ((X)>=3)*2*sizeof(u32))





#define JOURNAL_PG_SZ(X) \
   (SQLITE_PAGE_SIZE + sizeof(Pgno) + ((X)>=3)*sizeof(u32))


/*
** Enable reference count tracking here:
*/
#ifdef SQLITE_TEST
  int pager3_refinfo_enable = 0;
  static void pager_refinfo(PgHdr *p){
................................................................................
    }
    pPg->dirty = 0;
    pPg->needSync = 0;
    CODEC(pPager, pData, pPg->pgno, 3);
  }
  return rc;
}



































































































/*
** Playback the journal and thus restore the database file to
** the state it was in before we started making changes.  
**
** The journal file format is as follows: 
**
................................................................................
  off_t szJ;               /* Size of the journal file in bytes */
  int nRec;                /* Number of Records in the journal */
  int i;                   /* Loop counter */
  Pgno mxPg = 0;           /* Size of the original file in pages */
  int format;              /* Format of the journal file. */
  unsigned char aMagic[sizeof(aJournalMagic1)];
  int rc;


  /* Figure out how many records are in the journal.  Abort early if
  ** the journal is empty.
  */
  assert( pPager->journalOpen );
  sqlite3OsSeek(&pPager->jfd, 0);
  rc = sqlite3OsFileSize(&pPager->jfd, &szJ);
................................................................................
  }else if( memcmp(aMagic, aJournalMagic1, sizeof(aMagic))==0 ){
    format = JOURNAL_FORMAT_1;
  }else{
    rc = SQLITE_PROTOCOL;
    goto end_playback;
  }
  if( format>=JOURNAL_FORMAT_3 ){
    if( szJ < sizeof(aMagic) + 3*sizeof(u32) ){
      /* Ignore the journal if it is too small to contain a complete
      ** header.  We already did this test once above, but at the prior
      ** test, we did not know the journal format and so we had to assume
      ** the smallest possible header.  Now we know the header is bigger
      ** than the minimum so we test again.
      */
      goto end_playback;
    }
    rc = read32bits(format, &pPager->jfd, (u32*)&nRec);
    if( rc ) goto end_playback;
    rc = read32bits(format, &pPager->jfd, &pPager->cksumInit);
    if( rc ) goto end_playback;
    if( nRec==0xffffffff || useJournalSize ){
      nRec = (szJ - JOURNAL_HDR_SZ(3))/JOURNAL_PG_SZ(3);
    }

















  }else{
    nRec = (szJ - JOURNAL_HDR_SZ(2))/JOURNAL_PG_SZ(2);
    assert( nRec*JOURNAL_PG_SZ(2)+JOURNAL_HDR_SZ(2)==szJ );
  }
  rc = read32bits(format, &pPager->jfd, &mxPg);
  if( rc!=SQLITE_OK ){
    goto end_playback;
  }
  assert( pPager->origDbSize==0 || pPager->origDbSize==mxPg );
  rc = sqlite3OsTruncate(&pPager->fd, SQLITE_PAGE_SIZE*(off_t)mxPg);
................................................................................
      }
      pPg->needSync = 0;
      pPg->dirty = 0;
    }
  }

end_playback:










  if( rc!=SQLITE_OK ){




    pager_unwritelock(pPager);
    pPager->errMask |= PAGER_ERR_CORRUPT;
    rc = SQLITE_CORRUPT;
  }else{
    rc = pager_unwritelock(pPager);
  }
  return rc;
................................................................................
  }
  return n;
}

/*
** Forward declaration
*/
static int syncJournal(Pager*);


/*
** Unlink a page from the free list (the list of all pages where nRef==0)
** and from its hash collision chain.
*/
static void unlinkPage(PgHdr *pPg){
................................................................................
    return SQLITE_OK;
  }
  if( pPager->memDb ){
    pPager->dbSize = nPage;
    memoryTruncate(pPager);
    return SQLITE_OK;
  }
  syncJournal(pPager);
  rc = sqlite3OsTruncate(&pPager->fd, SQLITE_PAGE_SIZE*(off_t)nPage);
  if( rc==SQLITE_OK ){
    pPager->dbSize = nPage;
  }
  return rc;
}

................................................................................
**
** For temporary databases, we do not care if we are able to rollback
** after a power failure, so sync occurs.
**
** This routine clears the needSync field of every page current held in
** memory.
*/
static int syncJournal(Pager *pPager){
  PgHdr *pPg;
  int rc = SQLITE_OK;

  /* Sync the journal before modifying the main database
  ** (assuming there is a journal and it needs to be synced.)
  */
  if( pPager->needSync ){
    if( !pPager->tempFile ){
      assert( pPager->journalOpen );
      /* assert( !pPager->noSync ); // noSync might be set if synchronous
      ** was turned off after the transaction was started.  Ticket #615 */
#ifndef NDEBUG
      {
        /* Make sure the pPager->nRec counter we are keeping agrees
        ** with the nRec computed from the size of the journal file.
        */
        off_t hdrSz, pgSz, jSz;
        hdrSz = JOURNAL_HDR_SZ(journal_format);
        pgSz = JOURNAL_PG_SZ(journal_format);
        rc = sqlite3OsFileSize(&pPager->jfd, &jSz);
        if( rc!=0 ) return rc;
        assert( pPager->nRec*pgSz+hdrSz==jSz );
      }
#endif
      if( journal_format>=3 ){
................................................................................
          TRACE1("SYNC\n");
          rc = sqlite3OsSync(&pPager->jfd);
          if( rc!=0 ) return rc;
        }
        sqlite3OsSeek(&pPager->jfd, sizeof(aJournalMagic1));
        rc = write32bits(&pPager->jfd, pPager->nRec);
        if( rc ) return rc;










        szJ = JOURNAL_HDR_SZ(journal_format) +
                 pPager->nRec*JOURNAL_PG_SZ(journal_format);
        sqlite3OsSeek(&pPager->jfd, szJ);
      }
      TRACE1("SYNC\n");
      rc = sqlite3OsSync(&pPager->jfd);
      if( rc!=0 ) return rc;
      pPager->journalStarted = 1;
................................................................................
  assert( pPager!=0 );
  assert( pgno!=0 );
  *ppPage = 0;
  if( pPager->errMask & ~(PAGER_ERR_FULL) ){
    return pager_errcode(pPager);
  }

  /* If this is the first page accessed, then get a read lock
  ** on the database file.
  */
  if( pPager->nRef==0 && !pPager->memDb ){
    rc = sqlite3OsReadLock(&pPager->fd);
    if( rc!=SQLITE_OK ){
      return rc;
    }
    pPager->state = SQLITE_READLOCK;

    /* If a journal file exists, try to play it back.

    */

    if( pPager->useJournal && sqlite3OsFileExists(pPager->zJournal) ){


       int rc;

       /* Get a write lock on the database
       */
       rc = sqlite3OsWriteLock(&pPager->fd);
       if( rc!=SQLITE_OK ){
         if( sqlite3OsUnlock(&pPager->fd)!=SQLITE_OK ){
           /* This should never happen! */
           rc = SQLITE_INTERNAL;
         }
         return rc;
       }
................................................................................

      /* If we could not find a page that does not require an fsync()
      ** on the journal file then fsync the journal file.  This is a
      ** very slow operation, so we work hard to avoid it.  But sometimes
      ** it can't be helped.
      */
      if( pPg==0 ){
        int rc = syncJournal(pPager);
        if( rc!=0 ){
          sqlite3pager_rollback(pPager);
          return SQLITE_IOERR;
        }
        pPg = pPager->pFirst;
      }
      assert( pPg->nRef==0 );
................................................................................
  pPager->nRec = 0;
  if( pPager->errMask!=0 ){
    rc = pager_errcode(pPager);
    return rc;
  }
  pPager->origDbSize = pPager->dbSize;
  if( journal_format==JOURNAL_FORMAT_3 ){








    rc = sqlite3OsWrite(&pPager->jfd, aJournalMagic3, sizeof(aJournalMagic3));
    if( rc==SQLITE_OK ){
      rc = write32bits(&pPager->jfd, pPager->noSync ? 0xffffffff : 0);
    }
    if( rc==SQLITE_OK ){
      sqlite3Randomness(sizeof(pPager->cksumInit), &pPager->cksumInit);
      rc = write32bits(&pPager->jfd, pPager->cksumInit);
    }
















  }else if( journal_format==JOURNAL_FORMAT_2 ){
    rc = sqlite3OsWrite(&pPager->jfd, aJournalMagic2, sizeof(aJournalMagic2));
  }else{
    assert( journal_format==JOURNAL_FORMAT_1 );
    rc = sqlite3OsWrite(&pPager->jfd, aJournalMagic1, sizeof(aJournalMagic1));
  }
  if( rc==SQLITE_OK ){
................................................................................
** the any of the following happen:
**
**   *  sqlite3pager_commit() is called.
**   *  sqlite3pager_rollback() is called.
**   *  sqlite3pager_close() is called.
**   *  sqlite3pager_unref() is called to on every outstanding page.
**
** The parameter to this routine is a pointer to any open page of the
** database file.  Nothing changes about the page - it is used merely
** to acquire a pointer to the Pager structure and as proof that there
** is already a read-lock on the database.
**



** A journal file is opened if this is not a temporary file.  For
** temporary files, the opening of the journal file is deferred until
** there is an actual need to write to the journal.
**
** If the database is already write-locked, this routine is a no-op.
*/
int sqlite3pager_begin(void *pData){
  PgHdr *pPg = DATA_TO_PGHDR(pData);
  Pager *pPager = pPg->pPager;
  int rc = SQLITE_OK;
  assert( pPg->nRef>0 );

  assert( pPager->state!=SQLITE_UNLOCK );
  if( pPager->state==SQLITE_READLOCK ){
    assert( pPager->aInJournal==0 );
    if( pPager->memDb ){
      pPager->state = SQLITE_WRITELOCK;
      pPager->origDbSize = pPager->dbSize;
    }else{
      rc = sqlite3OsWriteLock(&pPager->fd);
      if( rc!=SQLITE_OK ){
        return rc;
      }

      pPager->state = SQLITE_WRITELOCK;
      pPager->dirtyFile = 0;
      TRACE1("TRANSACTION\n");
      if( pPager->useJournal && !pPager->tempFile ){
        rc = pager_open_journal(pPager);
      }
    }
................................................................................
  ** written to the transaction journal or the ckeckpoint journal
  ** or both.
  **
  ** First check to see that the transaction journal exists and
  ** create it if it does not.
  */
  assert( pPager->state!=SQLITE_UNLOCK );
  rc = sqlite3pager_begin(pData);
  if( rc!=SQLITE_OK ){
    return rc;
  }
  assert( pPager->state==SQLITE_WRITELOCK );
  if( !pPager->journalOpen && pPager->useJournal ){
    rc = pager_open_journal(pPager);
    if( rc!=SQLITE_OK ) return rc;
................................................................................
        TRACE2("JOURNAL %d\n", pPg->pgno);
        assert( pHist->pOrig==0 );
        pHist->pOrig = sqliteMallocRaw( pPager->pageSize );
        if( pHist->pOrig ){
          memcpy(pHist->pOrig, PGHDR_TO_DATA(pPg), pPager->pageSize);
        }
        pPg->inJournal = 1;
      }else {
        if( journal_format>=JOURNAL_FORMAT_3 ){
          u32 cksum = pager_cksum(pPager, pPg->pgno, pData);
          saved = *(u32*)PGHDR_TO_EXTRA(pPg);
          store32bits(cksum, pPg, SQLITE_PAGE_SIZE);
          szPg = SQLITE_PAGE_SIZE+8;
        }else{
          szPg = SQLITE_PAGE_SIZE+4;
................................................................................
      pPg->pPrevStmt = pPg->pNextStmt = 0;
      pPg = pPg->pDirty;
    }
    pPager->pStmt = 0;
    pPager->state = SQLITE_READLOCK;
    return SQLITE_OK;
  }

  if( pPager->dirtyFile==0 ){
    /* Exit early (without doing the time-consuming sqlite3OsSync() calls)
    ** if there have been no changes to the database file. */
    assert( pPager->needSync==0 );
    rc = pager_unwritelock(pPager);
    pPager->dbSize = -1;
    return rc;
  }
  assert( pPager->journalOpen );
  rc = syncJournal(pPager);
  if( rc!=SQLITE_OK ){
    goto commit_abort;
  }
  pPg = pager_get_all_dirty_pages(pPager);
  if( pPg ){
    rc = pager_write_pagelist(pPg);
    if( rc || (!pPager->noSync && sqlite3OsSync(&pPager->fd)!=SQLITE_OK) ){
      goto commit_abort;
    }
  }




  rc = pager_unwritelock(pPager);
  pPager->dbSize = -1;
  return rc;

  /* Jump here if anything goes wrong during the commit process.
  */
commit_abort:
................................................................................
    sqlite3OsReadLock(&pPager->fd);
    return SQLITE_NOMEM;
  }
#ifndef NDEBUG
  rc = sqlite3OsFileSize(&pPager->jfd, &pPager->stmtJSize);
  if( rc ) goto stmt_begin_failed;
  assert( pPager->stmtJSize == 
    pPager->nRec*JOURNAL_PG_SZ(journal_format)+JOURNAL_HDR_SZ(journal_format) );

#endif
  pPager->stmtJSize = pPager->nRec*JOURNAL_PG_SZ(journal_format)
                         + JOURNAL_HDR_SZ(journal_format);
  pPager->stmtSize = pPager->dbSize;
  if( !pPager->stmtOpen ){
    rc = sqlite3pager_opentemp(zTemp, &pPager->stfd);
    if( rc ) goto stmt_begin_failed;
    pPager->stmtOpen = 1;
    pPager->stmtNRec = 0;
  }
................................................................................
  Pager *pPager,
  void (*xCodec)(void*,void*,Pgno,int),
  void *pCodecArg
){
  pPager->xCodec = xCodec;
  pPager->pCodecArg = pCodecArg;
}












































#ifdef SQLITE_TEST
/*
** Print a listing of all referenced pages and their ref count.
*/
void sqlite3pager_refdump(Pager *pPager){
  PgHdr *pPg;







|







 







>







 







>


>
>
>
>
>


>







 







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







 







>







 







|













|

>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>

|
|







 







>
>
>
>
>
>
>
>
>
>

>
>
>
>







 







|







 







|







 







|






|










|







 







>
>
>
>
>
>
>
>
>
>
|







 







|









|
>

>
|
>
>


|
<
|







 







|







 







>
>
>
>
>
>
>
>








>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







 







|
|
|
|

>
>
>
|
|
|



|




>











>







 







|







 







|







 







>









|










>
>
>
>







 







|
>


|







 







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
...
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
...
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
...
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
...
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
...
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
...
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
....
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
....
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
....
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
....
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
....
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621

1622
1623
1624
1625
1626
1627
1628
1629
....
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
....
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
....
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
....
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
....
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
....
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
....
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
....
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
** The pager is used to access a database disk file.  It implements
** atomic commit and rollback through the use of a journal file that
** is separate from the database file.  The pager also implements file
** locking to prevent two processes from writing the same database
** file simultaneously, or one process from reading the database while
** another is writing.
**
** @(#) $Id: pager.c,v 1.110 2004/06/03 16:08:42 danielk1977 Exp $
*/
#include "os.h"         /* Must be first to enable large file support */
#include "sqliteInt.h"
#include "pager.h"
#include <assert.h>
#include <string.h>

................................................................................
  u8 *aInJournal;             /* One bit for each page in the database file */
  u8 *aInStmt;                /* One bit for each page in the database */
  PgHdr *pFirst, *pLast;      /* List of free pages */
  PgHdr *pFirstSynced;        /* First free page with PgHdr.needSync==0 */
  PgHdr *pAll;                /* List of all pages */
  PgHdr *pStmt;               /* List of pages in the statement subjournal */
  PgHdr *aHash[N_PG_HASH];    /* Hash table to map page number of PgHdr */
  int nMaster;                /* Number of bytes to reserve for master j.p */
};

/*
** These are bits that can be set in Pager.errMask.
*/
#define PAGER_ERR_FULL     0x01  /* a write() failed */
#define PAGER_ERR_MEM      0x02  /* malloc() failed */
................................................................................
#endif

/*
** The size of the header and of each page in the journal varies according
** to which journal format is being used.  The following macros figure out
** the sizes based on format numbers.
*/
/*
#define JOURNAL_HDR_SZ(X) \
   (sizeof(aJournalMagic1) + sizeof(Pgno) + ((X)>=3)*2*sizeof(u32))
*/
#define JOURNAL_HDR_SZ(pPager, X) (\
   sizeof(aJournalMagic1) + \
   sizeof(Pgno) + \
   ((X)>=3?3*sizeof(u32)+(pPager)->nMaster:0) )
#define JOURNAL_PG_SZ(X) \
   (SQLITE_PAGE_SIZE + sizeof(Pgno) + ((X)>=3)*sizeof(u32))


/*
** Enable reference count tracking here:
*/
#ifdef SQLITE_TEST
  int pager3_refinfo_enable = 0;
  static void pager_refinfo(PgHdr *p){
................................................................................
    }
    pPg->dirty = 0;
    pPg->needSync = 0;
    CODEC(pPager, pData, pPg->pgno, 3);
  }
  return rc;
}

/*
** Parameter zMaster is the name of a master journal file. A single journal
** file that referred to the master journal file has just been rolled back.
** This routine checks if it is possible to delete the master journal file,
** and does so if it is.
*/
static int pager_delmaster(const char *zMaster){
  int rc;
  int master_open = 0;
  OsFile master;
  char *zMasterJournal = 0; /* Contents of master journal file */
  off_t nMasterJournal;     /* Size of master journal file */

  /* Open the master journal file exclusively in case some other process
  ** is running this routine also. Not that it makes too much difference.
  */
  rc = sqlite3OsOpenExclusive(zMaster, &master, 0);
  if( rc!=SQLITE_OK ) goto delmaster_out;
  master_open = 1;

  rc = sqlite3OsFileSize(&master, &nMasterJournal);
  if( rc!=SQLITE_OK ) goto delmaster_out;

  if( nMasterJournal>0 ){
    char *zDb;
    zMasterJournal = (char *)sqliteMalloc(nMasterJournal);
    if( !zMasterJournal ){
      rc = SQLITE_NOMEM;
      goto delmaster_out;
    }
    rc = sqlite3OsRead(&master, zMasterJournal, nMasterJournal);
    if( rc!=SQLITE_OK ) goto delmaster_out;

    zDb = zMasterJournal;
    while( (zDb-zMasterJournal)<nMasterJournal ){
      char *zJournal = 0;
      sqlite3SetString(&zJournal, zDb, "-journal", 0);
      if( !zJournal ){
        rc = SQLITE_NOMEM;
        goto delmaster_out;
      }
      if( sqlite3OsFileExists(zJournal) ){
        /* One of the journals pointed to by the master journal exists.
        ** Open it and check if it points at the master journal. If
        ** so, return without deleting the master journal file.
        */
        OsFile journal;
        int nMaster;

        rc = sqlite3OsOpenReadOnly(zJournal, &journal);
        if( rc!=SQLITE_OK ){
          sqlite3OsClose(&journal);
          sqliteFree(zJournal);
          goto delmaster_out;
        }
        sqlite3OsClose(&journal);

        /* Seek to the point in the journal where the master journal name
        ** is stored. Read the master journal name into memory obtained
        ** from malloc.
        */
        rc = sqlite3OsSeek(&journal, sizeof(aJournalMagic3)+2*sizeof(u32));
        if( rc!=SQLITE_OK ) goto delmaster_out;
        rc = read32bits(3, &journal, (u32 *)&nMaster);
        if( rc!=SQLITE_OK ) goto delmaster_out;
        if( nMaster>0 && nMaster==strlen(zMaster)+1 ){
          char *zMasterPtr = (char *)sqliteMalloc(nMaster);
          if( !zMasterPtr ){
            rc = SQLITE_NOMEM;
          }
          rc = sqlite3OsRead(&journal, zMasterPtr, nMaster);
          if( rc!=SQLITE_OK ){
            sqliteFree(zMasterPtr);
            goto delmaster_out;
          }
          if( 0==strncmp(zMasterPtr, zMaster, nMaster) ){
            /* We have a match. Do not delete the master journal file. */
            sqliteFree(zMasterPtr);
            goto delmaster_out;
          }
        }
      }
      zDb += (strlen(zDb)+1);
    }
  }
  
  sqlite3OsDelete(zMaster);

delmaster_out:
  if( zMasterJournal ){
    sqliteFree(zMasterJournal);
  }  
  if( master_open ){
    sqlite3OsClose(&master);
  }
  return rc;
}

/*
** Playback the journal and thus restore the database file to
** the state it was in before we started making changes.  
**
** The journal file format is as follows: 
**
................................................................................
  off_t szJ;               /* Size of the journal file in bytes */
  int nRec;                /* Number of Records in the journal */
  int i;                   /* Loop counter */
  Pgno mxPg = 0;           /* Size of the original file in pages */
  int format;              /* Format of the journal file. */
  unsigned char aMagic[sizeof(aJournalMagic1)];
  int rc;
  char *zMaster = 0;       /* Name of master journal file if any */

  /* Figure out how many records are in the journal.  Abort early if
  ** the journal is empty.
  */
  assert( pPager->journalOpen );
  sqlite3OsSeek(&pPager->jfd, 0);
  rc = sqlite3OsFileSize(&pPager->jfd, &szJ);
................................................................................
  }else if( memcmp(aMagic, aJournalMagic1, sizeof(aMagic))==0 ){
    format = JOURNAL_FORMAT_1;
  }else{
    rc = SQLITE_PROTOCOL;
    goto end_playback;
  }
  if( format>=JOURNAL_FORMAT_3 ){
    if( szJ < sizeof(aMagic) + 4*sizeof(u32) ){
      /* Ignore the journal if it is too small to contain a complete
      ** header.  We already did this test once above, but at the prior
      ** test, we did not know the journal format and so we had to assume
      ** the smallest possible header.  Now we know the header is bigger
      ** than the minimum so we test again.
      */
      goto end_playback;
    }
    rc = read32bits(format, &pPager->jfd, (u32*)&nRec);
    if( rc ) goto end_playback;
    rc = read32bits(format, &pPager->jfd, &pPager->cksumInit);
    if( rc ) goto end_playback;
    if( nRec==0xffffffff || useJournalSize ){
      nRec = (szJ - JOURNAL_HDR_SZ(pPager, 3))/JOURNAL_PG_SZ(3);
    }

    /* Check if a master journal file is specified. If one is specified, 
    ** only proceed with the playback if it still exists.
    */
    rc = read32bits(format, &pPager->jfd, &pPager->nMaster);
    if( rc ) goto end_playback;
    if( pPager->nMaster>0 ){
      zMaster = sqliteMalloc(pPager->nMaster);
      if( !zMaster ){
        rc = SQLITE_NOMEM;
        goto end_playback;
      }
      rc = sqlite3OsRead(&pPager->jfd, zMaster, pPager->nMaster);
      if( rc!=SQLITE_OK || (strlen(zMaster) && !sqlite3OsFileExists(zMaster)) ){
        goto end_playback;
      }
    }
  }else{
    nRec = (szJ - JOURNAL_HDR_SZ(pPager, 2))/JOURNAL_PG_SZ(2);
    assert( nRec*JOURNAL_PG_SZ(2)+JOURNAL_HDR_SZ(pPager, 2)==szJ );
  }
  rc = read32bits(format, &pPager->jfd, &mxPg);
  if( rc!=SQLITE_OK ){
    goto end_playback;
  }
  assert( pPager->origDbSize==0 || pPager->origDbSize==mxPg );
  rc = sqlite3OsTruncate(&pPager->fd, SQLITE_PAGE_SIZE*(off_t)mxPg);
................................................................................
      }
      pPg->needSync = 0;
      pPg->dirty = 0;
    }
  }

end_playback:
  if( zMaster ){
    /* If there was a master journal and this routine will return true,
    ** see if it is possible to delete the master journal. If errors 
    ** occur during this process, ignore them.
    */
    if( rc==SQLITE_OK ){
      pager_delmaster(zMaster);
    }
    sqliteFree(zMaster);
  }
  if( rc!=SQLITE_OK ){
    /* FIX ME: We shouldn't delete the journal if an error occured during
    ** rollback. It may have been a transient error and the rollback may
    ** succeed next time it is attempted.
    */
    pager_unwritelock(pPager);
    pPager->errMask |= PAGER_ERR_CORRUPT;
    rc = SQLITE_CORRUPT;
  }else{
    rc = pager_unwritelock(pPager);
  }
  return rc;
................................................................................
  }
  return n;
}

/*
** Forward declaration
*/
static int syncJournal(Pager*, const char*);


/*
** Unlink a page from the free list (the list of all pages where nRef==0)
** and from its hash collision chain.
*/
static void unlinkPage(PgHdr *pPg){
................................................................................
    return SQLITE_OK;
  }
  if( pPager->memDb ){
    pPager->dbSize = nPage;
    memoryTruncate(pPager);
    return SQLITE_OK;
  }
  syncJournal(pPager, 0);
  rc = sqlite3OsTruncate(&pPager->fd, SQLITE_PAGE_SIZE*(off_t)nPage);
  if( rc==SQLITE_OK ){
    pPager->dbSize = nPage;
  }
  return rc;
}

................................................................................
**
** For temporary databases, we do not care if we are able to rollback
** after a power failure, so sync occurs.
**
** This routine clears the needSync field of every page current held in
** memory.
*/
static int syncJournal(Pager *pPager, const char *zMaster){
  PgHdr *pPg;
  int rc = SQLITE_OK;

  /* Sync the journal before modifying the main database
  ** (assuming there is a journal and it needs to be synced.)
  */
  if( pPager->needSync || zMaster ){
    if( !pPager->tempFile ){
      assert( pPager->journalOpen );
      /* assert( !pPager->noSync ); // noSync might be set if synchronous
      ** was turned off after the transaction was started.  Ticket #615 */
#ifndef NDEBUG
      {
        /* Make sure the pPager->nRec counter we are keeping agrees
        ** with the nRec computed from the size of the journal file.
        */
        off_t hdrSz, pgSz, jSz;
        hdrSz = JOURNAL_HDR_SZ(pPager, journal_format);
        pgSz = JOURNAL_PG_SZ(journal_format);
        rc = sqlite3OsFileSize(&pPager->jfd, &jSz);
        if( rc!=0 ) return rc;
        assert( pPager->nRec*pgSz+hdrSz==jSz );
      }
#endif
      if( journal_format>=3 ){
................................................................................
          TRACE1("SYNC\n");
          rc = sqlite3OsSync(&pPager->jfd);
          if( rc!=0 ) return rc;
        }
        sqlite3OsSeek(&pPager->jfd, sizeof(aJournalMagic1));
        rc = write32bits(&pPager->jfd, pPager->nRec);
        if( rc ) return rc;

        /* Write the name of the master journal file if one is specified */
        if( zMaster ){
          assert( strlen(zMaster)<pPager->nMaster );
          rc = sqlite3OsSeek(&pPager->jfd, sizeof(aJournalMagic3) + 3*4);
          if( rc ) return rc;
          rc = sqlite3OsWrite(&pPager->jfd, zMaster, strlen(zMaster)+1);
          if( rc ) return rc;
        }

        szJ = JOURNAL_HDR_SZ(pPager, journal_format) +
                 pPager->nRec*JOURNAL_PG_SZ(journal_format);
        sqlite3OsSeek(&pPager->jfd, szJ);
      }
      TRACE1("SYNC\n");
      rc = sqlite3OsSync(&pPager->jfd);
      if( rc!=0 ) return rc;
      pPager->journalStarted = 1;
................................................................................
  assert( pPager!=0 );
  assert( pgno!=0 );
  *ppPage = 0;
  if( pPager->errMask & ~(PAGER_ERR_FULL) ){
    return pager_errcode(pPager);
  }

  /* If this is the first page accessed, then get a SHARED lock
  ** on the database file.
  */
  if( pPager->nRef==0 && !pPager->memDb ){
    rc = sqlite3OsReadLock(&pPager->fd);
    if( rc!=SQLITE_OK ){
      return rc;
    }
    pPager->state = SQLITE_READLOCK;

    /* If a journal file exists, and there is no RESERVED lock on the
    ** database file, then it either needs to be played back or deleted.
    */
    if( pPager->useJournal && 
        sqlite3OsFileExists(pPager->zJournal) &&
        !sqlite3OsCheckWriteLock(&pPager->fd) 
    ){
       int rc;

       /* Get an EXCLUSIVE lock on the database file. */

       rc = sqlite3OsLock(&pPager->fd, EXCLUSIVE_LOCK);
       if( rc!=SQLITE_OK ){
         if( sqlite3OsUnlock(&pPager->fd)!=SQLITE_OK ){
           /* This should never happen! */
           rc = SQLITE_INTERNAL;
         }
         return rc;
       }
................................................................................

      /* If we could not find a page that does not require an fsync()
      ** on the journal file then fsync the journal file.  This is a
      ** very slow operation, so we work hard to avoid it.  But sometimes
      ** it can't be helped.
      */
      if( pPg==0 ){
        int rc = syncJournal(pPager, 0);
        if( rc!=0 ){
          sqlite3pager_rollback(pPager);
          return SQLITE_IOERR;
        }
        pPg = pPager->pFirst;
      }
      assert( pPg->nRef==0 );
................................................................................
  pPager->nRec = 0;
  if( pPager->errMask!=0 ){
    rc = pager_errcode(pPager);
    return rc;
  }
  pPager->origDbSize = pPager->dbSize;
  if( journal_format==JOURNAL_FORMAT_3 ){
    /* Create the header for a format 3 journal:
    ** - 8 bytes: Magic identifying journal format 3.
    ** - 4 bytes: Number of records in journal, or -1 no-sync mode is on.
    ** - 4 bytes: Magic used for page checksums.
    ** - 4 bytes: Number of bytes reserved for master journal ptr (nMaster)
    ** - nMaster bytes: Space for a master journal pointer.
    ** - 4 bytes: Initial database page count.
    */
    rc = sqlite3OsWrite(&pPager->jfd, aJournalMagic3, sizeof(aJournalMagic3));
    if( rc==SQLITE_OK ){
      rc = write32bits(&pPager->jfd, pPager->noSync ? 0xffffffff : 0);
    }
    if( rc==SQLITE_OK ){
      sqlite3Randomness(sizeof(pPager->cksumInit), &pPager->cksumInit);
      rc = write32bits(&pPager->jfd, pPager->cksumInit);
    }
    if( rc==SQLITE_OK ){
      rc = write32bits(&pPager->jfd, pPager->nMaster);
    }
   
    /* Unless the size reserved for the master-journal pointer is 0, set
    ** the first byte of the master journal pointer to 0x00.  Either way,
    ** this is interpreted as 'no master journal' in the event of a
    ** rollback after a crash.
    */
    if( rc==SQLITE_OK && pPager->nMaster>0 ){
      rc = sqlite3OsWrite(&pPager->jfd, "", 1);
    }
    if( rc==SQLITE_OK ){
      rc = sqlite3OsSeek(&pPager->jfd, 
          sizeof(aJournalMagic3) + 3*4 + pPager->nMaster);
    }
  }else if( journal_format==JOURNAL_FORMAT_2 ){
    rc = sqlite3OsWrite(&pPager->jfd, aJournalMagic2, sizeof(aJournalMagic2));
  }else{
    assert( journal_format==JOURNAL_FORMAT_1 );
    rc = sqlite3OsWrite(&pPager->jfd, aJournalMagic1, sizeof(aJournalMagic1));
  }
  if( rc==SQLITE_OK ){
................................................................................
** the any of the following happen:
**
**   *  sqlite3pager_commit() is called.
**   *  sqlite3pager_rollback() is called.
**   *  sqlite3pager_close() is called.
**   *  sqlite3pager_unref() is called to on every outstanding page.
**
** The first parameter to this routine is a pointer to any open page of the
** database file.  Nothing changes about the page - it is used merely to
** acquire a pointer to the Pager structure and as proof that there is
** already a read-lock on the database.
**
** The second parameter indicates how much space in bytes to reserve for a
** master journal file-name at the start of the journal when it is created.
**
** A journal file is opened if this is not a temporary file.  For temporary
** files, the opening of the journal file is deferred until there is an
** actual need to write to the journal.
**
** If the database is already write-locked, this routine is a no-op.
*/
int sqlite3pager_begin(void *pData, int nMaster){
  PgHdr *pPg = DATA_TO_PGHDR(pData);
  Pager *pPager = pPg->pPager;
  int rc = SQLITE_OK;
  assert( pPg->nRef>0 );
  assert( nMaster>=0 );
  assert( pPager->state!=SQLITE_UNLOCK );
  if( pPager->state==SQLITE_READLOCK ){
    assert( pPager->aInJournal==0 );
    if( pPager->memDb ){
      pPager->state = SQLITE_WRITELOCK;
      pPager->origDbSize = pPager->dbSize;
    }else{
      rc = sqlite3OsWriteLock(&pPager->fd);
      if( rc!=SQLITE_OK ){
        return rc;
      }
      pPager->nMaster = nMaster;
      pPager->state = SQLITE_WRITELOCK;
      pPager->dirtyFile = 0;
      TRACE1("TRANSACTION\n");
      if( pPager->useJournal && !pPager->tempFile ){
        rc = pager_open_journal(pPager);
      }
    }
................................................................................
  ** written to the transaction journal or the ckeckpoint journal
  ** or both.
  **
  ** First check to see that the transaction journal exists and
  ** create it if it does not.
  */
  assert( pPager->state!=SQLITE_UNLOCK );
  rc = sqlite3pager_begin(pData, 0);
  if( rc!=SQLITE_OK ){
    return rc;
  }
  assert( pPager->state==SQLITE_WRITELOCK );
  if( !pPager->journalOpen && pPager->useJournal ){
    rc = pager_open_journal(pPager);
    if( rc!=SQLITE_OK ) return rc;
................................................................................
        TRACE2("JOURNAL %d\n", pPg->pgno);
        assert( pHist->pOrig==0 );
        pHist->pOrig = sqliteMallocRaw( pPager->pageSize );
        if( pHist->pOrig ){
          memcpy(pHist->pOrig, PGHDR_TO_DATA(pPg), pPager->pageSize);
        }
        pPg->inJournal = 1;
      }else{
        if( journal_format>=JOURNAL_FORMAT_3 ){
          u32 cksum = pager_cksum(pPager, pPg->pgno, pData);
          saved = *(u32*)PGHDR_TO_EXTRA(pPg);
          store32bits(cksum, pPg, SQLITE_PAGE_SIZE);
          szPg = SQLITE_PAGE_SIZE+8;
        }else{
          szPg = SQLITE_PAGE_SIZE+4;
................................................................................
      pPg->pPrevStmt = pPg->pNextStmt = 0;
      pPg = pPg->pDirty;
    }
    pPager->pStmt = 0;
    pPager->state = SQLITE_READLOCK;
    return SQLITE_OK;
  }
#if 0
  if( pPager->dirtyFile==0 ){
    /* Exit early (without doing the time-consuming sqlite3OsSync() calls)
    ** if there have been no changes to the database file. */
    assert( pPager->needSync==0 );
    rc = pager_unwritelock(pPager);
    pPager->dbSize = -1;
    return rc;
  }
  assert( pPager->journalOpen );
  rc = syncJournal(pPager, 0);
  if( rc!=SQLITE_OK ){
    goto commit_abort;
  }
  pPg = pager_get_all_dirty_pages(pPager);
  if( pPg ){
    rc = pager_write_pagelist(pPg);
    if( rc || (!pPager->noSync && sqlite3OsSync(&pPager->fd)!=SQLITE_OK) ){
      goto commit_abort;
    }
  }
#endif
  rc = sqlite3pager_sync(pPager, 0);
  if( rc!=SQLITE_OK ) goto commit_abort;

  rc = pager_unwritelock(pPager);
  pPager->dbSize = -1;
  return rc;

  /* Jump here if anything goes wrong during the commit process.
  */
commit_abort:
................................................................................
    sqlite3OsReadLock(&pPager->fd);
    return SQLITE_NOMEM;
  }
#ifndef NDEBUG
  rc = sqlite3OsFileSize(&pPager->jfd, &pPager->stmtJSize);
  if( rc ) goto stmt_begin_failed;
  assert( pPager->stmtJSize == 
    pPager->nRec*JOURNAL_PG_SZ(journal_format) + 
    JOURNAL_HDR_SZ(pPager, journal_format) );
#endif
  pPager->stmtJSize = pPager->nRec*JOURNAL_PG_SZ(journal_format)
                         + JOURNAL_HDR_SZ(pPager, journal_format);
  pPager->stmtSize = pPager->dbSize;
  if( !pPager->stmtOpen ){
    rc = sqlite3pager_opentemp(zTemp, &pPager->stfd);
    if( rc ) goto stmt_begin_failed;
    pPager->stmtOpen = 1;
    pPager->stmtNRec = 0;
  }
................................................................................
  Pager *pPager,
  void (*xCodec)(void*,void*,Pgno,int),
  void *pCodecArg
){
  pPager->xCodec = xCodec;
  pPager->pCodecArg = pCodecArg;
}

/*
** Sync the database file for the pager pPager. zMaster points to the name
** of a master journal file that should be written into the individual
** journal file. zMaster may be NULL, which is interpreted as no master
** journal (a single database transaction).
**
** This routine ensures that the journal is synced, all dirty pages written
** to the database file and the database file synced. The only thing that
** remains to commit the transaction is to delete the journal file (or
** master journal file if specified).
**
** Note that if zMaster==NULL, this does not overwrite a previous value
** passed to an sqlite3pager_sync() call.
*/
int sqlite3pager_sync(Pager *pPager, const char *zMaster){
  int rc = SQLITE_OK;

  /* If this is an in-memory db, or no pages have been written to, this
  ** function is a no-op.
  */
  if( !pPager->memDb && pPager->dirtyFile ){
    PgHdr *pPg;
    assert( pPager->journalOpen );

    /* Sync the journal file */
    rc = syncJournal(pPager, zMaster);
    if( rc!=SQLITE_OK ) goto sync_exit;

    /* Write all dirty pages to the database file */
    pPg = pager_get_all_dirty_pages(pPager);
    rc = pager_write_pagelist(pPg);
    if( rc!=SQLITE_OK ) goto sync_exit;

    /* If any pages were actually written, sync the database file */
    if( pPg && !pPager->noSync ){
      rc = sqlite3OsSync(&pPager->fd);
    }
  }

sync_exit:
  return rc;
}

#ifdef SQLITE_TEST
/*
** Print a listing of all referenced pages and their ref count.
*/
void sqlite3pager_refdump(Pager *pPager){
  PgHdr *pPg;

Changes to src/pager.h.

9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
..
80
81
82
83
84
85
86
87
88

89
90
91
92
93
94
95
**    May you share freely, never taking more than you give.
**
*************************************************************************
** This header file defines the interface that the sqlite page cache
** subsystem.  The page cache subsystem reads and writes a file a page
** at a time and provides a journal for rollback.
**
** @(#) $Id: pager.h,v 1.29 2004/05/14 01:58:13 drh Exp $
*/

/*
** The size of a page.
**
** You can change this value to another (reasonable) value you want.
** It need not be a power of two, though the interface to the disk
................................................................................
int sqlite3pager_unref(void*);
Pgno sqlite3pager_pagenumber(void*);
int sqlite3pager_write(void*);
int sqlite3pager_iswriteable(void*);
int sqlite3pager_overwrite(Pager *pPager, Pgno pgno, void*);
int sqlite3pager_pagecount(Pager*);
int sqlite3pager_truncate(Pager*,Pgno);
int sqlite3pager_begin(void*);
int sqlite3pager_commit(Pager*);

int sqlite3pager_rollback(Pager*);
int sqlite3pager_isreadonly(Pager*);
int sqlite3pager_stmt_begin(Pager*);
int sqlite3pager_stmt_commit(Pager*);
int sqlite3pager_stmt_rollback(Pager*);
void sqlite3pager_dont_rollback(void*);
void sqlite3pager_dont_write(Pager*, Pgno);







|







 







|

>







9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
..
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
**    May you share freely, never taking more than you give.
**
*************************************************************************
** This header file defines the interface that the sqlite page cache
** subsystem.  The page cache subsystem reads and writes a file a page
** at a time and provides a journal for rollback.
**
** @(#) $Id: pager.h,v 1.30 2004/06/03 16:08:42 danielk1977 Exp $
*/

/*
** The size of a page.
**
** You can change this value to another (reasonable) value you want.
** It need not be a power of two, though the interface to the disk
................................................................................
int sqlite3pager_unref(void*);
Pgno sqlite3pager_pagenumber(void*);
int sqlite3pager_write(void*);
int sqlite3pager_iswriteable(void*);
int sqlite3pager_overwrite(Pager *pPager, Pgno pgno, void*);
int sqlite3pager_pagecount(Pager*);
int sqlite3pager_truncate(Pager*,Pgno);
int sqlite3pager_begin(void*,int);
int sqlite3pager_commit(Pager*);
int sqlite3pager_sync(Pager*,const char *zMaster);
int sqlite3pager_rollback(Pager*);
int sqlite3pager_isreadonly(Pager*);
int sqlite3pager_stmt_begin(Pager*);
int sqlite3pager_stmt_commit(Pager*);
int sqlite3pager_stmt_rollback(Pager*);
void sqlite3pager_dont_rollback(void*);
void sqlite3pager_dont_write(Pager*, Pgno);

Changes to src/pragma.c.

7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
...
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
**    May you do good and not evil.
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** This file contains code used to implement the PRAGMA command.
**
** $Id: pragma.c,v 1.35 2004/05/31 08:26:49 danielk1977 Exp $
*/
#include "sqliteInt.h"
#include <ctype.h>

/*
** Interpret the given string as a boolean value.
*/
................................................................................
                         (char*)&pIdx->keyInfo, P3_KEYINFO);
        }
        sqlite3VdbeAddOp(v, OP_Integer, 0, 0);
        sqlite3VdbeAddOp(v, OP_MemStore, 1, 1);
        loopTop = sqlite3VdbeAddOp(v, OP_Rewind, 1, 0);
        sqlite3VdbeAddOp(v, OP_MemIncr, 1, 0);
        for(j=0, pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext, j++){
          int k, jmp2;
          static VdbeOpList idxErr[] = {
            { OP_MemIncr,     0,  0,  0},
            { OP_String8,      0,  0,  "rowid "},
            { OP_Recno,       1,  0,  0},
            { OP_String8,      0,  0,  " missing from index "},
            { OP_String8,      0,  0,  0},    /* 4 */
            { OP_Concat,      4,  0,  0},







|







 







|







7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
...
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
**    May you do good and not evil.
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** This file contains code used to implement the PRAGMA command.
**
** $Id: pragma.c,v 1.36 2004/06/03 16:08:42 danielk1977 Exp $
*/
#include "sqliteInt.h"
#include <ctype.h>

/*
** Interpret the given string as a boolean value.
*/
................................................................................
                         (char*)&pIdx->keyInfo, P3_KEYINFO);
        }
        sqlite3VdbeAddOp(v, OP_Integer, 0, 0);
        sqlite3VdbeAddOp(v, OP_MemStore, 1, 1);
        loopTop = sqlite3VdbeAddOp(v, OP_Rewind, 1, 0);
        sqlite3VdbeAddOp(v, OP_MemIncr, 1, 0);
        for(j=0, pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext, j++){
          int jmp2;
          static VdbeOpList idxErr[] = {
            { OP_MemIncr,     0,  0,  0},
            { OP_String8,      0,  0,  "rowid "},
            { OP_Recno,       1,  0,  0},
            { OP_String8,      0,  0,  " missing from index "},
            { OP_String8,      0,  0,  0},    /* 4 */
            { OP_Concat,      4,  0,  0},

Changes to src/test3.c.

9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
...
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
**    May you share freely, never taking more than you give.
**
*************************************************************************
** Code for testing the btree.c module in SQLite.  This code
** is not included in the SQLite library.  It is used for automated
** testing of the SQLite library.
**
** $Id: test3.c,v 1.41 2004/05/31 10:01:35 danielk1977 Exp $
*/
#include "sqliteInt.h"
#include "pager.h"
#include "btree.h"
#include "tcl.h"
#include <stdlib.h>
#include <string.h>
................................................................................
  int rc;
  if( argc!=2 ){
    Tcl_AppendResult(interp, "wrong # args: should be \"", argv[0],
       " ID\"", 0);
    return TCL_ERROR;
  }
  if( Tcl_GetInt(interp, argv[1], (int*)&pBt) ) return TCL_ERROR;
  rc = sqlite3BtreeBeginTrans(pBt, 1);
  if( rc!=SQLITE_OK ){
    Tcl_AppendResult(interp, errorName(rc), 0);
    return TCL_ERROR;
  }
  return TCL_OK;
}








|







 







|







9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
...
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
**    May you share freely, never taking more than you give.
**
*************************************************************************
** Code for testing the btree.c module in SQLite.  This code
** is not included in the SQLite library.  It is used for automated
** testing of the SQLite library.
**
** $Id: test3.c,v 1.42 2004/06/03 16:08:42 danielk1977 Exp $
*/
#include "sqliteInt.h"
#include "pager.h"
#include "btree.h"
#include "tcl.h"
#include <stdlib.h>
#include <string.h>
................................................................................
  int rc;
  if( argc!=2 ){
    Tcl_AppendResult(interp, "wrong # args: should be \"", argv[0],
       " ID\"", 0);
    return TCL_ERROR;
  }
  if( Tcl_GetInt(interp, argv[1], (int*)&pBt) ) return TCL_ERROR;
  rc = sqlite3BtreeBeginTrans(pBt, 1, 0);
  if( rc!=SQLITE_OK ){
    Tcl_AppendResult(interp, errorName(rc), 0);
    return TCL_ERROR;
  }
  return TCL_OK;
}

Changes to src/vacuum.c.

10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
...
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
**
*************************************************************************
** This file contains code used to implement the VACUUM command.
**
** Most of the code in this file may be omitted by defining the
** SQLITE_OMIT_VACUUM macro.
**
** $Id: vacuum.c,v 1.20 2004/05/31 10:01:35 danielk1977 Exp $
*/
#include "sqliteInt.h"
#include "os.h"

#if !defined(SQLITE_OMIT_VACUUM) || SQLITE_OMIT_VACUUM
/*
** Generate a random name of 20 character in length.
................................................................................
  */
  pTemp = db->aDb[db->nDb-1].pBt;
  if( sqlite3BtreeIsInTrans(pTemp) ){
    Btree *pMain = db->aDb[0].pBt;
    u32 meta;

    assert( 0==sqlite3BtreeIsInTrans(pMain) );
    rc = sqlite3BtreeBeginTrans(db->aDb[0].pBt, 1);
    if( rc!=SQLITE_OK ) goto end_of_vacuum;

    /* Copy Btree meta values 3 and 4. These correspond to SQL layer meta 
    ** values 2 and 3, the default values of a couple of pragmas.
    */
    rc = sqlite3BtreeGetMeta(pMain, 3, &meta);
    if( rc!=SQLITE_OK ) goto end_of_vacuum;







|







 







|







10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
...
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
**
*************************************************************************
** This file contains code used to implement the VACUUM command.
**
** Most of the code in this file may be omitted by defining the
** SQLITE_OMIT_VACUUM macro.
**
** $Id: vacuum.c,v 1.21 2004/06/03 16:08:42 danielk1977 Exp $
*/
#include "sqliteInt.h"
#include "os.h"

#if !defined(SQLITE_OMIT_VACUUM) || SQLITE_OMIT_VACUUM
/*
** Generate a random name of 20 character in length.
................................................................................
  */
  pTemp = db->aDb[db->nDb-1].pBt;
  if( sqlite3BtreeIsInTrans(pTemp) ){
    Btree *pMain = db->aDb[0].pBt;
    u32 meta;

    assert( 0==sqlite3BtreeIsInTrans(pMain) );
    rc = sqlite3BtreeBeginTrans(db->aDb[0].pBt, 1, 0);
    if( rc!=SQLITE_OK ) goto end_of_vacuum;

    /* Copy Btree meta values 3 and 4. These correspond to SQL layer meta 
    ** values 2 and 3, the default values of a couple of pragmas.
    */
    rc = sqlite3BtreeGetMeta(pMain, 3, &meta);
    if( rc!=SQLITE_OK ) goto end_of_vacuum;

Changes to src/vdbe.c.

39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
....
2285
2286
2287
2288
2289
2290
2291

2292
2293
2294
2295
2296
2297
2298
2299
....
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
**
** Various scripts scan this source file in order to generate HTML
** documentation, headers files, or other derived files.  The formatting
** of the code in this file is, therefore, important.  See other comments
** in this file for details.  If in doubt, do not deviate from existing
** commenting and indentation practices when changing or adding code.
**
** $Id: vdbe.c,v 1.355 2004/06/02 01:22:02 drh Exp $
*/
#include "sqliteInt.h"
#include "os.h"
#include <ctype.h>
#include "vdbeInt.h"

/*
................................................................................
  int i = pOp->p1;
  Btree *pBt;

  assert( i>=0 && i<db->nDb );
  pBt = db->aDb[i].pBt;

  while( pBt && busy ){

    rc = sqlite3BtreeBeginTrans(db->aDb[i].pBt, pOp->p2);
    switch( rc ){
      case SQLITE_BUSY: {
        if( db->xBusyCallback==0 ){
          p->pc = pc;
          p->rc = SQLITE_BUSY;
          p->pTos = pTos;
          return SQLITE_BUSY;
................................................................................
  pCx = p->apCsr[i];
  sqlite3VdbeCleanupCursor(pCx);
  memset(pCx, 0, sizeof(*pCx));
  pCx->nullRow = 1;
  rc = sqlite3BtreeFactory(db, 0, 1, TEMP_PAGES, &pCx->pBt);

  if( rc==SQLITE_OK ){
    rc = sqlite3BtreeBeginTrans(pCx->pBt, 1);
  }
  if( rc==SQLITE_OK ){
    /* If a transient index is required, create it by calling
    ** sqlite3BtreeCreateTable() with the BTREE_ZERODATA flag before
    ** opening it. If a transient table is required, just use the
    ** automatically created table with root-page 1 (an INTKEY table).
    */







|







 







>
|







 







|







39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
....
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
....
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
**
** Various scripts scan this source file in order to generate HTML
** documentation, headers files, or other derived files.  The formatting
** of the code in this file is, therefore, important.  See other comments
** in this file for details.  If in doubt, do not deviate from existing
** commenting and indentation practices when changing or adding code.
**
** $Id: vdbe.c,v 1.356 2004/06/03 16:08:42 danielk1977 Exp $
*/
#include "sqliteInt.h"
#include "os.h"
#include <ctype.h>
#include "vdbeInt.h"

/*
................................................................................
  int i = pOp->p1;
  Btree *pBt;

  assert( i>=0 && i<db->nDb );
  pBt = db->aDb[i].pBt;

  while( pBt && busy ){
    int nMaster = strlen(sqlite3BtreeGetFilename(db->aDb[0].pBt))+11;
    rc = sqlite3BtreeBeginTrans(db->aDb[i].pBt, pOp->p2, nMaster);
    switch( rc ){
      case SQLITE_BUSY: {
        if( db->xBusyCallback==0 ){
          p->pc = pc;
          p->rc = SQLITE_BUSY;
          p->pTos = pTos;
          return SQLITE_BUSY;
................................................................................
  pCx = p->apCsr[i];
  sqlite3VdbeCleanupCursor(pCx);
  memset(pCx, 0, sizeof(*pCx));
  pCx->nullRow = 1;
  rc = sqlite3BtreeFactory(db, 0, 1, TEMP_PAGES, &pCx->pBt);

  if( rc==SQLITE_OK ){
    rc = sqlite3BtreeBeginTrans(pCx->pBt, 1, 0);
  }
  if( rc==SQLITE_OK ){
    /* If a transient index is required, create it by calling
    ** sqlite3BtreeCreateTable() with the BTREE_ZERODATA flag before
    ** opening it. If a transient table is required, just use the
    ** automatically created table with root-page 1 (an INTKEY table).
    */

Changes to src/vdbeInt.h.

20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
** intToKey() and keyToInt() used to transform the rowid.  But with
** the latest versions of the design they are no-ops.
*/
#define keyToInt(X)   (X)
#define intToKey(X)   (X)

/*
** The makefile scans this source file and creates the following
** array of string constants which are the names of all VDBE opcodes.
** This array is defined in a separate source code file named opcode.c
** which is automatically generated by the makefile.
*/
extern char *sqlite3OpcodeNames[];

/*
** SQL is translated into a sequence of instructions to be
** executed by a virtual machine.  Each instruction is an instance
** of the following structure.







|
|
|
|







20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
** intToKey() and keyToInt() used to transform the rowid.  But with
** the latest versions of the design they are no-ops.
*/
#define keyToInt(X)   (X)
#define intToKey(X)   (X)

/*
** The makefile scans the vdbe.c source file and creates the following
** array of string constants which are the names of all VDBE opcodes.  This
** array is defined in a separate source code file named opcode.c which is
** automatically generated by the makefile.
*/
extern char *sqlite3OpcodeNames[];

/*
** SQL is translated into a sequence of instructions to be
** executed by a virtual machine.  Each instruction is an instance
** of the following structure.

Changes to src/vdbeaux.c.

890
891
892
893
894
895
896
















































































































































897
898
899
900
901
902
903
...
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
...
952
953
954
955
956
957
958
959
960


961
962
963
964








965
966

967
968
969
970
971
972
973
...
974
975
976
977
978
979
980





981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
    rc = sqlite3VdbeMemSetStr(pColName, zName, N, TEXT_Utf8, N>0);
  }
  if( rc==SQLITE_OK && N==P3_DYNAMIC ){
    pColName->flags = (pColName->flags&(~MEM_Static))|MEM_Dyn;
  }
  return rc;
}

















































































































































/* 
** This routine checks that the sqlite3.activeVdbeCnt count variable
** matches the number of vdbe's in the list sqlite3.pVdbe that are
** currently active. An assertion fails if the two counts do not match.
**
** This is a no-op if NDEBUG is defined.
................................................................................
** After this routine is run, the VDBE should be ready to be executed
** again.
*/
int sqlite3VdbeReset(Vdbe *p, char **pzErrMsg){
  sqlite *db = p->db;
  int i;
  int (*xFunc)(Btree *pBt) = 0;  /* Function to call on each btree backend */
  int needXcommit = 0;

  if( p->magic!=VDBE_MAGIC_RUN && p->magic!=VDBE_MAGIC_HALT ){
    sqlite3SetString(pzErrMsg, sqlite3ErrStr(SQLITE_MISUSE), (char*)0);
    sqlite3Error(p->db, SQLITE_MISUSE, 0 ,0);
    db->activeVdbeCnt--;
    return SQLITE_MISUSE;
  }
................................................................................
    sqlite3SetString(pzErrMsg, sqlite3ErrStr(p->rc), (char*)0);
    sqlite3Error(p->db, p->rc, 0);
  }else{
    sqlite3Error(p->db, SQLITE_OK, 0);
  }
  Cleanup(p);

  /* Figure out which function to call on the btree backends that
  ** have active transactions.


  */
  checkActiveVdbeCnt(db);
  if( db->autoCommit && db->activeVdbeCnt==1 ){
    if( p->rc==SQLITE_OK || p->errorAction==OE_Fail ){








      xFunc = sqlite3BtreeCommit;
      needXcommit = 1;

    }else{
      xFunc = sqlite3BtreeRollback;
    }
  }else{
    if( p->rc==SQLITE_OK || p->errorAction==OE_Fail ){
      xFunc = sqlite3BtreeCommitStmt;
    }else if( p->errorAction==OE_Abort ){
................................................................................
      xFunc = sqlite3BtreeRollbackStmt;
    }else{
      xFunc = sqlite3BtreeRollback;
      db->autoCommit = 1;
    }
  }






  for(i=0; xFunc && i<db->nDb; i++){
    int rc;
    Btree *pBt = db->aDb[i].pBt;
    if( sqlite3BtreeIsInTrans(pBt) ){
      if( db->xCommitCallback && needXcommit ){
        if( db->xCommitCallback(db->pCommitArg)!=0 ){
          p->rc = SQLITE_CONSTRAINT;
          sqlite3Error(db, SQLITE_CONSTRAINT, 0);
          xFunc = sqlite3BtreeRollback;
        }
        needXcommit = 0;
      }
    }
    if( pBt ){
      rc = xFunc(pBt);
      if( p->rc==SQLITE_OK ) p->rc = rc;
    }
  }









>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







 







<







 







|
|
>
>




>
>
>
>
>
>
>
>
|
<
>







 







>
>
>
>
>
|


<
<
<
<
<
<
<
<
<
<







890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
....
1072
1073
1074
1075
1076
1077
1078

1079
1080
1081
1082
1083
1084
1085
....
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118

1119
1120
1121
1122
1123
1124
1125
1126
....
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141










1142
1143
1144
1145
1146
1147
1148
    rc = sqlite3VdbeMemSetStr(pColName, zName, N, TEXT_Utf8, N>0);
  }
  if( rc==SQLITE_OK && N==P3_DYNAMIC ){
    pColName->flags = (pColName->flags&(~MEM_Static))|MEM_Dyn;
  }
  return rc;
}

/*
** A read or write transaction may or may not be active on database handle
** db. If a transaction is active, commit it. If there is a
** write-transaction spanning more than one database file, this routine
** takes care of the master journal trickery.
*/
static int vdbeCommit(sqlite *db){
  int i;
  int nTrans = 0;  /* Number of databases with an active write-transaction */
  int rc = SQLITE_OK;
  int needXcommit = 0;

  for(i=0; i<db->nDb; i++){ 
    Btree *pBt = db->aDb[i].pBt;
    if( pBt && sqlite3BtreeIsInTrans(pBt) ){
      needXcommit = 1;
      if( i!=1 ) nTrans++;
    }
  }

  /* If there are any write-transactions at all, invoke the commit hook */
  if( needXcommit && db->xCommitCallback ){
    if( db->xCommitCallback(db->pCommitArg) ){
      return SQLITE_CONSTRAINT;
    }
  }

  /* The simple case - if less than two databases have write-transactions
  ** active, there is no need for the master-journal.
  */
  if( nTrans<2 ){
    for(i=0; i<db->nDb; i++){ 
      Btree *pBt = db->aDb[i].pBt;
      if( pBt ){
        int rc2 = sqlite3BtreeCommit(db->aDb[i].pBt);
        if( rc==SQLITE_OK ) rc = rc2;
      }
    }
  }

  /* The complex case - There is a multi-file write-transaction active.
  ** This requires a master journal file to ensure the transaction is
  ** committed atomicly.
  */
  else{
    char *zMaster = 0;   /* File-name for the master journal */
    char const *zMainFile = sqlite3BtreeGetFilename(db->aDb[0].pBt);
    OsFile master;

    /* Select a master journal file name */
    do {
      int random;
      if( zMaster ){
        sqliteFree(zMaster);
      }    
      sqlite3Randomness(sizeof(random), &random);
      zMaster = sqlite3_mprintf("%s%d", zMainFile, random);
      if( !zMaster ){
        return SQLITE_NOMEM;
      }
    }while( sqlite3OsFileExists(zMaster) );

    /* Open the master journal. */
    rc = sqlite3OsOpenExclusive(zMaster, &master, 0);
    if( rc!=SQLITE_OK ){
      sqliteFree(zMaster);
      return rc;
    }
 
    /* Write the name of each database file in the transaction into the new
    ** master journal file. If an error occurs at this point close
    ** and delete the master journal file. All the individual journal files
    ** still have 'null' as the master journal pointer, so they will roll
    ** back independantly if a failure occurs.
    */
    for(i=0; i<db->nDb; i++){ 
      Btree *pBt = db->aDb[i].pBt;
      if( pBt && sqlite3BtreeIsInTrans(pBt) ){
        char const *zFile = sqlite3BtreeGetFilename(pBt);
        rc = sqlite3OsWrite(&master, zFile, strlen(zFile));
        if( rc!=SQLITE_OK ){
          sqlite3OsClose(&master);
          sqlite3OsDelete(zMaster);
          sqliteFree(zMaster);
          return rc;
        }
        rc = sqlite3OsWrite(&master, "\0", 1);
        if( rc!=SQLITE_OK ){
          sqlite3OsClose(&master);
          sqlite3OsDelete(zMaster);
          sqliteFree(zMaster);
          return rc;
        }
      }
    }

    /* Sync the master journal file */
    rc = sqlite3OsSync(&master);
    sqlite3OsClose(&master);

    /* Sync all the db files involved in the transaction. The same call
    ** sets the master journal pointer in each individual journal. If
    ** an error occurs here, do not delete the master journal file.
    **
    ** If the error occurs during the first call to sqlite3BtreeSync(),
    ** then there is a chance that the master journal file will be
    ** orphaned. But we cannot delete it, in case the master journal
    ** file name was written into the journal file before the failure
    ** occured.
    */
    for(i=0; i<db->nDb; i++){ 
      Btree *pBt = db->aDb[i].pBt;
      if( pBt && sqlite3BtreeIsInTrans(pBt) ){
        rc = sqlite3BtreeSync(pBt, zMaster);
        if( rc!=SQLITE_OK ){
          sqliteFree(zMaster);
          return rc;
        }
      }
    }
    sqliteFree(zMaster);
    zMaster = 0;

    /* Delete the master journal file. This commits the transaction. */
    rc = sqlite3OsDelete(zMaster);
    assert( rc==SQLITE_OK );

    /* All files and directories have already been synced, so the following
    ** calls to sqlite3BtreeCommit() are only closing files and deleting
    ** journals. If something goes wrong while this is happening we don't
    ** really care. The integrity of the transaction is already guarenteed,
    ** but some stray 'cold' journals may be lying around. Returning an
    ** error code won't help matters.
    */
    for(i=0; i<db->nDb; i++){ 
      Btree *pBt = db->aDb[i].pBt;
      if( pBt ){
        sqlite3BtreeCommit(pBt);
      }
    }
  }
  return SQLITE_OK;
}

/* 
** This routine checks that the sqlite3.activeVdbeCnt count variable
** matches the number of vdbe's in the list sqlite3.pVdbe that are
** currently active. An assertion fails if the two counts do not match.
**
** This is a no-op if NDEBUG is defined.
................................................................................
** After this routine is run, the VDBE should be ready to be executed
** again.
*/
int sqlite3VdbeReset(Vdbe *p, char **pzErrMsg){
  sqlite *db = p->db;
  int i;
  int (*xFunc)(Btree *pBt) = 0;  /* Function to call on each btree backend */


  if( p->magic!=VDBE_MAGIC_RUN && p->magic!=VDBE_MAGIC_HALT ){
    sqlite3SetString(pzErrMsg, sqlite3ErrStr(SQLITE_MISUSE), (char*)0);
    sqlite3Error(p->db, SQLITE_MISUSE, 0 ,0);
    db->activeVdbeCnt--;
    return SQLITE_MISUSE;
  }
................................................................................
    sqlite3SetString(pzErrMsg, sqlite3ErrStr(p->rc), (char*)0);
    sqlite3Error(p->db, p->rc, 0);
  }else{
    sqlite3Error(p->db, SQLITE_OK, 0);
  }
  Cleanup(p);

  /* What is done now depends on the exit status of the vdbe, the value of
  ** the sqlite.autoCommit flag and whether or not there are any other
  ** queries in progress. A transaction or statement transaction may need
  ** to be committed or rolled back on each open database file.
  */
  checkActiveVdbeCnt(db);
  if( db->autoCommit && db->activeVdbeCnt==1 ){
    if( p->rc==SQLITE_OK || p->errorAction==OE_Fail ){
      /* The auto-commit flag is true, there are no other active queries
      ** using this handle and the vdbe program was successful or hit an
      ** 'OR FAIL' constraint. This means a commit is required, which is
      ** handled a little differently from the other options.
      */
      p->rc = vdbeCommit(db);
      if( p->rc!=SQLITE_OK ){
        sqlite3Error(p->db, p->rc, 0);
        xFunc = sqlite3BtreeRollback;

      }
    }else{
      xFunc = sqlite3BtreeRollback;
    }
  }else{
    if( p->rc==SQLITE_OK || p->errorAction==OE_Fail ){
      xFunc = sqlite3BtreeCommitStmt;
    }else if( p->errorAction==OE_Abort ){
................................................................................
      xFunc = sqlite3BtreeRollbackStmt;
    }else{
      xFunc = sqlite3BtreeRollback;
      db->autoCommit = 1;
    }
  }

  /* If xFunc is not NULL, then it is one of sqlite3BtreeRollback,
  ** sqlite3BtreeRollbackStmt or sqlite3BtreeCommitStmt. Call it once on
  ** each backend. If an error occurs and the return code is still
  ** SQLITE_OK, set the return code to the new error value.
  */
  for(i=0; xFunc && i<db->nDb; i++){ 
    int rc;
    Btree *pBt = db->aDb[i].pBt;










    if( pBt ){
      rc = xFunc(pBt);
      if( p->rc==SQLITE_OK ) p->rc = rc;
    }
  }