/ Check-in [00f08fc0]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:In the pager, cache a pointer to the first page on the freelist that does not need to be synced. This makes a fetch of a page that is not in cache go a lot faster when the cache is full. This check-in also adds some performance instrumentation to the OS layer. (CVS 842)
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1:00f08fc0b5b6b9c5efbf15a62f9a1cc1cfa71283
User & Date: drh 2003-01-21 02:39:37
Context
2003-01-21
23:06
fix a typo on the quickstart.html page. (CVS 843) check-in: 61869bb5 user: drh tags: trunk
02:39
In the pager, cache a pointer to the first page on the freelist that does not need to be synced. This makes a fetch of a page that is not in cache go a lot faster when the cache is full. This check-in also adds some performance instrumentation to the OS layer. (CVS 842) check-in: 00f08fc0 user: drh tags: trunk
2003-01-19
03:59
Update comments. No changes to code. (CVS 841) check-in: f6a87068 user: drh tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to src/os.c.

48
49
50
51
52
53
54











55
56
57
58


59


60
61
62
63


64
65
66
67
68
69
70
...
660
661
662
663
664
665
666
667
668



669
670
671
672
673
674
675
...
708
709
710
711
712
713
714
715
716
717
718
719



720
721
722
723
724
725
726
#endif

/*
** Macros for performance tracing.  Normally turned off
*/
#if 0
static int last_page = 0;











#define SEEK(X)       last_page=(X)
#define TRACE1(X)     fprintf(stderr,X)
#define TRACE2(X,Y)   fprintf(stderr,X,Y)
#define TRACE3(X,Y,Z) fprintf(stderr,X,Y,Z)


#else


#define SEEK(X)
#define TRACE1(X)
#define TRACE2(X,Y)
#define TRACE3(X,Y,Z)


#endif


#if OS_UNIX
/*
** Here is the dirt on POSIX advisory locks:  ANSI STD 1003.1 (1996)
** section 6.5.2.2 lines 483 through 490 specify that when a process
................................................................................
** bytes were read successfully and SQLITE_IOERR if anything goes
** wrong.
*/
int sqliteOsRead(OsFile *id, void *pBuf, int amt){
#if OS_UNIX
  int got;
  SimulateIOError(SQLITE_IOERR);
  TRACE3("READ    %-3d %d\n", id->fd, last_page);
  got = read(id->fd, pBuf, amt);



  /* if( got<0 ) got = 0; */
  if( got==amt ){
    return SQLITE_OK;
  }else{
    return SQLITE_IOERR;
  }
#endif
................................................................................
** Write data from a buffer into a file.  Return SQLITE_OK on success
** or some other error code on failure.
*/
int sqliteOsWrite(OsFile *id, const void *pBuf, int amt){
#if OS_UNIX
  int wrote = 0;
  SimulateIOError(SQLITE_IOERR);
  TRACE3("WRITE   %-3d %d\n", id->fd, last_page);
  while( amt>0 && (wrote = write(id->fd, pBuf, amt))>0 ){
    amt -= wrote;
    pBuf = &((char*)pBuf)[wrote];
  }



  if( amt>0 ){
    return SQLITE_FULL;
  }
  return SQLITE_OK;
#endif
#if OS_WIN
  int rc;







>
>
>
>
>
>
>
>
>
>
>
|
|
|
|
>
>

>
>




>
>







 







|

>
>
>







 







|




>
>
>







48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
...
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
...
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
#endif

/*
** Macros for performance tracing.  Normally turned off
*/
#if 0
static int last_page = 0;
__inline__ unsigned long long int hwtime(void){
  unsigned long long int x;
  __asm__("rdtsc\n\t"
          "mov %%edx, %%ecx\n\t"
          :"=A" (x));
  return x;
}
static unsigned long long int g_start;
static unsigned int elapse;
#define TIMER_START       g_start=hwtime()
#define TIMER_END         elapse=hwtime()-g_start
#define SEEK(X)           last_page=(X)
#define TRACE1(X)         fprintf(stderr,X)
#define TRACE2(X,Y)       fprintf(stderr,X,Y)
#define TRACE3(X,Y,Z)     fprintf(stderr,X,Y,Z)
#define TRACE4(X,Y,Z,A)   fprintf(stderr,X,Y,Z,A)
#define TRACE5(X,Y,Z,A,B) fprintf(stderr,X,Y,Z,A,B)
#else
#define TIMER_START
#define TIMER_END
#define SEEK(X)
#define TRACE1(X)
#define TRACE2(X,Y)
#define TRACE3(X,Y,Z)
#define TRACE4(X,Y,Z,A)
#define TRACE5(X,Y,Z,A,B)
#endif


#if OS_UNIX
/*
** Here is the dirt on POSIX advisory locks:  ANSI STD 1003.1 (1996)
** section 6.5.2.2 lines 483 through 490 specify that when a process
................................................................................
** bytes were read successfully and SQLITE_IOERR if anything goes
** wrong.
*/
int sqliteOsRead(OsFile *id, void *pBuf, int amt){
#if OS_UNIX
  int got;
  SimulateIOError(SQLITE_IOERR);
  TIMER_START;
  got = read(id->fd, pBuf, amt);
  TIMER_END;
  TRACE4("READ    %-3d %7d %d\n", id->fd, last_page, elapse);
  SEEK(0);
  /* if( got<0 ) got = 0; */
  if( got==amt ){
    return SQLITE_OK;
  }else{
    return SQLITE_IOERR;
  }
#endif
................................................................................
** Write data from a buffer into a file.  Return SQLITE_OK on success
** or some other error code on failure.
*/
int sqliteOsWrite(OsFile *id, const void *pBuf, int amt){
#if OS_UNIX
  int wrote = 0;
  SimulateIOError(SQLITE_IOERR);
  TIMER_START;
  while( amt>0 && (wrote = write(id->fd, pBuf, amt))>0 ){
    amt -= wrote;
    pBuf = &((char*)pBuf)[wrote];
  }
  TIMER_END;
  TRACE4("WRITE   %-3d %7d %d\n", id->fd, last_page, elapse);
  SEEK(0);
  if( amt>0 ){
    return SQLITE_FULL;
  }
  return SQLITE_OK;
#endif
#if OS_WIN
  int rc;

Changes to src/pager.c.

14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
...
158
159
160
161
162
163
164

165
166
167
168
169
170
171
...
342
343
344
345
346
347
348

349
350
351
352
353
354
355
...
733
734
735
736
737
738
739

740
741
742
743
744
745
746
...
833
834
835
836
837
838
839





840
841
842
843
844
845
846
...
897
898
899
900
901
902
903
904
905
906
907
908
909
910

















911
912
913
914
915
916
917
....
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
....
1079
1080
1081
1082
1083
1084
1085





1086
1087
1088
1089
1090
1091
1092
....
1221
1222
1223
1224
1225
1226
1227



1228
1229
1230
1231
1232
1233
1234
....
1573
1574
1575
1576
1577
1578
1579

1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
** The pager is used to access a database disk file.  It implements
** atomic commit and rollback through the use of a journal file that
** is separate from the database file.  The pager also implements file
** locking to prevent two processes from writing the same database
** file simultaneously, or one process from reading the database while
** another is writing.
**
** @(#) $Id: pager.c,v 1.68 2003/01/16 13:42:43 drh Exp $
*/
#include "os.h"         /* Must be first to enable large file support */
#include "sqliteInt.h"
#include "pager.h"
#include <assert.h>
#include <string.h>

................................................................................
  u8 needSync;                /* True if an fsync() is needed on the journal */
  u8 dirtyFile;               /* True if database file has changed in any way */
  u8 alwaysRollback;          /* Disable dont_rollback() for all pages */
  u8 journalFormat;           /* Version number of the journal file */
  u8 *aInJournal;             /* One bit for each page in the database file */
  u8 *aInCkpt;                /* One bit for each page in the database */
  PgHdr *pFirst, *pLast;      /* List of free pages */

  PgHdr *pAll;                /* List of all pages */
  PgHdr *pCkpt;               /* List of pages in the checkpoint journal */
  PgHdr *aHash[N_PG_HASH];    /* Hash table to map page number of PgHdr */
};

/*
** These are bits that can be set in Pager.errMask.
................................................................................
static void pager_reset(Pager *pPager){
  PgHdr *pPg, *pNext;
  for(pPg=pPager->pAll; pPg; pPg=pNext){
    pNext = pPg->pNextAll;
    sqliteFree(pPg);
  }
  pPager->pFirst = 0;

  pPager->pLast = 0;
  pPager->pAll = 0;
  memset(pPager->aHash, 0, sizeof(pPager->aHash));
  pPager->nPage = 0;
  if( pPager->state>=SQLITE_WRITELOCK ){
    sqlitepager_rollback(pPager);
  }
................................................................................
  pPager->state = SQLITE_UNLOCK;
  pPager->errMask = 0;
  pPager->tempFile = tempFile;
  pPager->readOnly = readOnly;
  pPager->needSync = 0;
  pPager->noSync = pPager->tempFile || !useJournal;
  pPager->pFirst = 0;

  pPager->pLast = 0;
  pPager->nExtra = nExtra;
  memset(pPager->aHash, 0, sizeof(pPager->aHash));
  *ppPager = pPager;
  return SQLITE_OK;
}

................................................................................
** currently on the freelist (the reference count is zero) then
** remove it from the freelist.
*/
#define page_ref(P)   ((P)->nRef==0?_page_ref(P):(void)(P)->nRef++)
static void _page_ref(PgHdr *pPg){
  if( pPg->nRef==0 ){
    /* The page is currently on the freelist.  Remove it. */





    if( pPg->pPrevFree ){
      pPg->pPrevFree->pNextFree = pPg->pNextFree;
    }else{
      pPg->pPager->pFirst = pPg->pNextFree;
    }
    if( pPg->pNextFree ){
      pPg->pNextFree->pPrevFree = pPg->pPrevFree;
................................................................................
#ifndef NDEBUG
      rc = sqliteOsFileSize(&pPager->jfd, &pPager->syncJSize);
      if( rc!=0 ) return rc;
#endif
      pPager->journalStarted = 1;
    }
    pPager->needSync = 0;
  }

  /* Erase the needSync flag from every page.
  */
  for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
    pPg->needSync = 0;
  }


















  return rc;
}

/*
** Acquire a page.
**
................................................................................
      pPg->pPrevAll = 0;
      pPager->pAll = pPg;
      pPager->nPage++;
    }else{
      /* Find a page to recycle.  Try to locate a page that does not
      ** require us to do an fsync() on the journal.
      */
      pPg = pPager->pFirst;
      while( pPg && pPg->needSync ){
        pPg = pPg->pNextFree;
      }

      /* If we could not find a page that does not require an fsync()
      ** on the journal file then fsync the journal file.  This is a
      ** very slow operation, so we work hard to avoid it.  But sometimes
      ** it can't be helped.
      */
      if( pPg==0 ){
................................................................................
      */
      if( pPg->alwaysRollback ){
        pPager->alwaysRollback = 1;
      }

      /* Unlink the old page from the free list and the hash table
      */





      if( pPg->pPrevFree ){
        pPg->pPrevFree->pNextFree = pPg->pNextFree;
      }else{
        assert( pPager->pFirst==pPg );
        pPager->pFirst = pPg->pNextFree;
      }
      if( pPg->pNextFree ){
................................................................................
    pPg->pNextFree = 0;
    pPg->pPrevFree = pPager->pLast;
    pPager->pLast = pPg;
    if( pPg->pPrevFree ){
      pPg->pPrevFree->pNextFree = pPg;
    }else{
      pPager->pFirst = pPg;



    }
    if( pPager->xDestructor ){
      pPager->xDestructor(pData);
    }
  
    /* When all pages reach the freelist, drop the read lock from
    ** the database file.
................................................................................
  if( pPager->state!=SQLITE_WRITELOCK ){
    return SQLITE_ERROR;
  }
  TRACE1("COMMIT\n");
  if( pPager->dirtyFile==0 ){
    /* Exit early (without doing the time-consuming sqliteOsSync() calls)
    ** if there have been no changes to the database file. */

    rc = pager_unwritelock(pPager);
    pPager->dbSize = -1;
    return rc;
  }
  assert( pPager->journalOpen );
  if( !pPager->journalStarted && !pPager->noSync ) pPager->needSync = 1;
  assert( pPager->dirtyFile || !pPager->needSync );
  if( pPager->needSync && sqliteOsSync(&pPager->jfd)!=SQLITE_OK ){
    goto commit_abort;
  }
  dbChanged = 0;
  for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
    if( pPg->dirty==0 ) continue;
    TRACE2("COMMIT-PAGE %d\n", pPg->pgno);







|







 







>







 







>







 







>







 







>
>
>
>
>







 







|
<
|
|
|
|
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







 







|
<
<
<







 







>
>
>
>
>







 







>
>
>







 







>





<
<







14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
...
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
...
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
...
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
...
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
...
905
906
907
908
909
910
911
912

913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
....
1052
1053
1054
1055
1056
1057
1058
1059



1060
1061
1062
1063
1064
1065
1066
....
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
....
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
....
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614


1615
1616
1617
1618
1619
1620
1621
** The pager is used to access a database disk file.  It implements
** atomic commit and rollback through the use of a journal file that
** is separate from the database file.  The pager also implements file
** locking to prevent two processes from writing the same database
** file simultaneously, or one process from reading the database while
** another is writing.
**
** @(#) $Id: pager.c,v 1.69 2003/01/21 02:39:37 drh Exp $
*/
#include "os.h"         /* Must be first to enable large file support */
#include "sqliteInt.h"
#include "pager.h"
#include <assert.h>
#include <string.h>

................................................................................
  u8 needSync;                /* True if an fsync() is needed on the journal */
  u8 dirtyFile;               /* True if database file has changed in any way */
  u8 alwaysRollback;          /* Disable dont_rollback() for all pages */
  u8 journalFormat;           /* Version number of the journal file */
  u8 *aInJournal;             /* One bit for each page in the database file */
  u8 *aInCkpt;                /* One bit for each page in the database */
  PgHdr *pFirst, *pLast;      /* List of free pages */
  PgHdr *pFirstSynced;        /* First free page with PgHdr.needSync==0 */
  PgHdr *pAll;                /* List of all pages */
  PgHdr *pCkpt;               /* List of pages in the checkpoint journal */
  PgHdr *aHash[N_PG_HASH];    /* Hash table to map page number of PgHdr */
};

/*
** These are bits that can be set in Pager.errMask.
................................................................................
static void pager_reset(Pager *pPager){
  PgHdr *pPg, *pNext;
  for(pPg=pPager->pAll; pPg; pPg=pNext){
    pNext = pPg->pNextAll;
    sqliteFree(pPg);
  }
  pPager->pFirst = 0;
  pPager->pFirstSynced = 0;
  pPager->pLast = 0;
  pPager->pAll = 0;
  memset(pPager->aHash, 0, sizeof(pPager->aHash));
  pPager->nPage = 0;
  if( pPager->state>=SQLITE_WRITELOCK ){
    sqlitepager_rollback(pPager);
  }
................................................................................
  pPager->state = SQLITE_UNLOCK;
  pPager->errMask = 0;
  pPager->tempFile = tempFile;
  pPager->readOnly = readOnly;
  pPager->needSync = 0;
  pPager->noSync = pPager->tempFile || !useJournal;
  pPager->pFirst = 0;
  pPager->pFirstSynced = 0;
  pPager->pLast = 0;
  pPager->nExtra = nExtra;
  memset(pPager->aHash, 0, sizeof(pPager->aHash));
  *ppPager = pPager;
  return SQLITE_OK;
}

................................................................................
** currently on the freelist (the reference count is zero) then
** remove it from the freelist.
*/
#define page_ref(P)   ((P)->nRef==0?_page_ref(P):(void)(P)->nRef++)
static void _page_ref(PgHdr *pPg){
  if( pPg->nRef==0 ){
    /* The page is currently on the freelist.  Remove it. */
    if( pPg==pPg->pPager->pFirstSynced ){
      PgHdr *p = pPg->pNextFree;
      while( p && p->needSync ){ p = p->pNextFree; }
      pPg->pPager->pFirstSynced = p;
    }
    if( pPg->pPrevFree ){
      pPg->pPrevFree->pNextFree = pPg->pNextFree;
    }else{
      pPg->pPager->pFirst = pPg->pNextFree;
    }
    if( pPg->pNextFree ){
      pPg->pNextFree->pPrevFree = pPg->pPrevFree;
................................................................................
#ifndef NDEBUG
      rc = sqliteOsFileSize(&pPager->jfd, &pPager->syncJSize);
      if( rc!=0 ) return rc;
#endif
      pPager->journalStarted = 1;
    }
    pPager->needSync = 0;


    /* Erase the needSync flag from every page.
    */
    for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
      pPg->needSync = 0;
    }
    pPager->pFirstSynced = pPager->pFirst;
  }

#ifndef NDEBUG
  /* If the Pager.needSync flag is clear then the PgHdr.needSync
  ** flag must also be clear for all pages.  Verify that this
  ** invariant is true.
  */
  else{
    for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
      assert( pPg->needSync==0 );
    }
    assert( pPager->pFirstSynced==pPager->pFirst );
  }
#endif
  


  return rc;
}

/*
** Acquire a page.
**
................................................................................
      pPg->pPrevAll = 0;
      pPager->pAll = pPg;
      pPager->nPage++;
    }else{
      /* Find a page to recycle.  Try to locate a page that does not
      ** require us to do an fsync() on the journal.
      */
      pPg = pPager->pFirstSynced;




      /* If we could not find a page that does not require an fsync()
      ** on the journal file then fsync the journal file.  This is a
      ** very slow operation, so we work hard to avoid it.  But sometimes
      ** it can't be helped.
      */
      if( pPg==0 ){
................................................................................
      */
      if( pPg->alwaysRollback ){
        pPager->alwaysRollback = 1;
      }

      /* Unlink the old page from the free list and the hash table
      */
      if( pPg==pPager->pFirstSynced ){
        PgHdr *p = pPg->pNextFree;
        while( p && p->needSync ){ p = p->pNextFree; }
        pPager->pFirstSynced = p;
      }
      if( pPg->pPrevFree ){
        pPg->pPrevFree->pNextFree = pPg->pNextFree;
      }else{
        assert( pPager->pFirst==pPg );
        pPager->pFirst = pPg->pNextFree;
      }
      if( pPg->pNextFree ){
................................................................................
    pPg->pNextFree = 0;
    pPg->pPrevFree = pPager->pLast;
    pPager->pLast = pPg;
    if( pPg->pPrevFree ){
      pPg->pPrevFree->pNextFree = pPg;
    }else{
      pPager->pFirst = pPg;
    }
    if( pPg->needSync==0 && pPager->pFirstSynced==0 ){
      pPager->pFirstSynced = pPg;
    }
    if( pPager->xDestructor ){
      pPager->xDestructor(pData);
    }
  
    /* When all pages reach the freelist, drop the read lock from
    ** the database file.
................................................................................
  if( pPager->state!=SQLITE_WRITELOCK ){
    return SQLITE_ERROR;
  }
  TRACE1("COMMIT\n");
  if( pPager->dirtyFile==0 ){
    /* Exit early (without doing the time-consuming sqliteOsSync() calls)
    ** if there have been no changes to the database file. */
    assert( pPager->needSync==0 );
    rc = pager_unwritelock(pPager);
    pPager->dbSize = -1;
    return rc;
  }
  assert( pPager->journalOpen );


  if( pPager->needSync && sqliteOsSync(&pPager->jfd)!=SQLITE_OK ){
    goto commit_abort;
  }
  dbChanged = 0;
  for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
    if( pPg->dirty==0 ) continue;
    TRACE2("COMMIT-PAGE %d\n", pPg->pgno);