SQLite

Check-in [00f08fc0b5]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:In the pager, cache a pointer to the first page on the freelist that does not need to be synced. This makes a fetch of a page that is not in cache go a lot faster when the cache is full. This check-in also adds some performance instrumentation to the OS layer. (CVS 842)
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 00f08fc0b5b6b9c5efbf15a62f9a1cc1cfa71283
User & Date: drh 2003-01-21 02:39:37.000
Context
2003-01-21
23:06
fix a typo on the quickstart.html page. (CVS 843) (check-in: 61869bb51b user: drh tags: trunk)
02:39
In the pager, cache a pointer to the first page on the freelist that does not need to be synced. This makes a fetch of a page that is not in cache go a lot faster when the cache is full. This check-in also adds some performance instrumentation to the OS layer. (CVS 842) (check-in: 00f08fc0b5 user: drh tags: trunk)
2003-01-19
03:59
Update comments. No changes to code. (CVS 841) (check-in: f6a8706872 user: drh tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Changes to src/os.c.
48
49
50
51
52
53
54











55
56
57
58


59


60
61
62
63


64
65
66
67
68
69
70
#endif

/*
** Macros for performance tracing.  Normally turned off
*/
#if 0
static int last_page = 0;











#define SEEK(X)       last_page=(X)
#define TRACE1(X)     fprintf(stderr,X)
#define TRACE2(X,Y)   fprintf(stderr,X,Y)
#define TRACE3(X,Y,Z) fprintf(stderr,X,Y,Z)


#else


#define SEEK(X)
#define TRACE1(X)
#define TRACE2(X,Y)
#define TRACE3(X,Y,Z)


#endif


#if OS_UNIX
/*
** Here is the dirt on POSIX advisory locks:  ANSI STD 1003.1 (1996)
** section 6.5.2.2 lines 483 through 490 specify that when a process







>
>
>
>
>
>
>
>
>
>
>
|
|
|
|
>
>

>
>




>
>







48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
#endif

/*
** Macros for performance tracing.  Normally turned off
*/
#if 0
static int last_page = 0;
__inline__ unsigned long long int hwtime(void){
  unsigned long long int x;
  __asm__("rdtsc\n\t"
          "mov %%edx, %%ecx\n\t"
          :"=A" (x));
  return x;
}
static unsigned long long int g_start;
static unsigned int elapse;
#define TIMER_START       g_start=hwtime()
#define TIMER_END         elapse=hwtime()-g_start
#define SEEK(X)           last_page=(X)
#define TRACE1(X)         fprintf(stderr,X)
#define TRACE2(X,Y)       fprintf(stderr,X,Y)
#define TRACE3(X,Y,Z)     fprintf(stderr,X,Y,Z)
#define TRACE4(X,Y,Z,A)   fprintf(stderr,X,Y,Z,A)
#define TRACE5(X,Y,Z,A,B) fprintf(stderr,X,Y,Z,A,B)
#else
#define TIMER_START
#define TIMER_END
#define SEEK(X)
#define TRACE1(X)
#define TRACE2(X,Y)
#define TRACE3(X,Y,Z)
#define TRACE4(X,Y,Z,A)
#define TRACE5(X,Y,Z,A,B)
#endif


#if OS_UNIX
/*
** Here is the dirt on POSIX advisory locks:  ANSI STD 1003.1 (1996)
** section 6.5.2.2 lines 483 through 490 specify that when a process
660
661
662
663
664
665
666
667
668



669
670
671
672
673
674
675
** bytes were read successfully and SQLITE_IOERR if anything goes
** wrong.
*/
int sqliteOsRead(OsFile *id, void *pBuf, int amt){
#if OS_UNIX
  int got;
  SimulateIOError(SQLITE_IOERR);
  TRACE3("READ    %-3d %d\n", id->fd, last_page);
  got = read(id->fd, pBuf, amt);



  /* if( got<0 ) got = 0; */
  if( got==amt ){
    return SQLITE_OK;
  }else{
    return SQLITE_IOERR;
  }
#endif







|

>
>
>







677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
** bytes were read successfully and SQLITE_IOERR if anything goes
** wrong.
*/
int sqliteOsRead(OsFile *id, void *pBuf, int amt){
#if OS_UNIX
  int got;
  SimulateIOError(SQLITE_IOERR);
  TIMER_START;
  got = read(id->fd, pBuf, amt);
  TIMER_END;
  TRACE4("READ    %-3d %7d %d\n", id->fd, last_page, elapse);
  SEEK(0);
  /* if( got<0 ) got = 0; */
  if( got==amt ){
    return SQLITE_OK;
  }else{
    return SQLITE_IOERR;
  }
#endif
708
709
710
711
712
713
714
715
716
717
718
719



720
721
722
723
724
725
726
** Write data from a buffer into a file.  Return SQLITE_OK on success
** or some other error code on failure.
*/
int sqliteOsWrite(OsFile *id, const void *pBuf, int amt){
#if OS_UNIX
  int wrote = 0;
  SimulateIOError(SQLITE_IOERR);
  TRACE3("WRITE   %-3d %d\n", id->fd, last_page);
  while( amt>0 && (wrote = write(id->fd, pBuf, amt))>0 ){
    amt -= wrote;
    pBuf = &((char*)pBuf)[wrote];
  }



  if( amt>0 ){
    return SQLITE_FULL;
  }
  return SQLITE_OK;
#endif
#if OS_WIN
  int rc;







|




>
>
>







728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
** Write data from a buffer into a file.  Return SQLITE_OK on success
** or some other error code on failure.
*/
int sqliteOsWrite(OsFile *id, const void *pBuf, int amt){
#if OS_UNIX
  int wrote = 0;
  SimulateIOError(SQLITE_IOERR);
  TIMER_START;
  while( amt>0 && (wrote = write(id->fd, pBuf, amt))>0 ){
    amt -= wrote;
    pBuf = &((char*)pBuf)[wrote];
  }
  TIMER_END;
  TRACE4("WRITE   %-3d %7d %d\n", id->fd, last_page, elapse);
  SEEK(0);
  if( amt>0 ){
    return SQLITE_FULL;
  }
  return SQLITE_OK;
#endif
#if OS_WIN
  int rc;
Changes to src/pager.c.
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
** The pager is used to access a database disk file.  It implements
** atomic commit and rollback through the use of a journal file that
** is separate from the database file.  The pager also implements file
** locking to prevent two processes from writing the same database
** file simultaneously, or one process from reading the database while
** another is writing.
**
** @(#) $Id: pager.c,v 1.68 2003/01/16 13:42:43 drh Exp $
*/
#include "os.h"         /* Must be first to enable large file support */
#include "sqliteInt.h"
#include "pager.h"
#include <assert.h>
#include <string.h>








|







14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
** The pager is used to access a database disk file.  It implements
** atomic commit and rollback through the use of a journal file that
** is separate from the database file.  The pager also implements file
** locking to prevent two processes from writing the same database
** file simultaneously, or one process from reading the database while
** another is writing.
**
** @(#) $Id: pager.c,v 1.69 2003/01/21 02:39:37 drh Exp $
*/
#include "os.h"         /* Must be first to enable large file support */
#include "sqliteInt.h"
#include "pager.h"
#include <assert.h>
#include <string.h>

158
159
160
161
162
163
164

165
166
167
168
169
170
171
  u8 needSync;                /* True if an fsync() is needed on the journal */
  u8 dirtyFile;               /* True if database file has changed in any way */
  u8 alwaysRollback;          /* Disable dont_rollback() for all pages */
  u8 journalFormat;           /* Version number of the journal file */
  u8 *aInJournal;             /* One bit for each page in the database file */
  u8 *aInCkpt;                /* One bit for each page in the database */
  PgHdr *pFirst, *pLast;      /* List of free pages */

  PgHdr *pAll;                /* List of all pages */
  PgHdr *pCkpt;               /* List of pages in the checkpoint journal */
  PgHdr *aHash[N_PG_HASH];    /* Hash table to map page number of PgHdr */
};

/*
** These are bits that can be set in Pager.errMask.







>







158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
  u8 needSync;                /* True if an fsync() is needed on the journal */
  u8 dirtyFile;               /* True if database file has changed in any way */
  u8 alwaysRollback;          /* Disable dont_rollback() for all pages */
  u8 journalFormat;           /* Version number of the journal file */
  u8 *aInJournal;             /* One bit for each page in the database file */
  u8 *aInCkpt;                /* One bit for each page in the database */
  PgHdr *pFirst, *pLast;      /* List of free pages */
  PgHdr *pFirstSynced;        /* First free page with PgHdr.needSync==0 */
  PgHdr *pAll;                /* List of all pages */
  PgHdr *pCkpt;               /* List of pages in the checkpoint journal */
  PgHdr *aHash[N_PG_HASH];    /* Hash table to map page number of PgHdr */
};

/*
** These are bits that can be set in Pager.errMask.
342
343
344
345
346
347
348

349
350
351
352
353
354
355
static void pager_reset(Pager *pPager){
  PgHdr *pPg, *pNext;
  for(pPg=pPager->pAll; pPg; pPg=pNext){
    pNext = pPg->pNextAll;
    sqliteFree(pPg);
  }
  pPager->pFirst = 0;

  pPager->pLast = 0;
  pPager->pAll = 0;
  memset(pPager->aHash, 0, sizeof(pPager->aHash));
  pPager->nPage = 0;
  if( pPager->state>=SQLITE_WRITELOCK ){
    sqlitepager_rollback(pPager);
  }







>







343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
static void pager_reset(Pager *pPager){
  PgHdr *pPg, *pNext;
  for(pPg=pPager->pAll; pPg; pPg=pNext){
    pNext = pPg->pNextAll;
    sqliteFree(pPg);
  }
  pPager->pFirst = 0;
  pPager->pFirstSynced = 0;
  pPager->pLast = 0;
  pPager->pAll = 0;
  memset(pPager->aHash, 0, sizeof(pPager->aHash));
  pPager->nPage = 0;
  if( pPager->state>=SQLITE_WRITELOCK ){
    sqlitepager_rollback(pPager);
  }
733
734
735
736
737
738
739

740
741
742
743
744
745
746
  pPager->state = SQLITE_UNLOCK;
  pPager->errMask = 0;
  pPager->tempFile = tempFile;
  pPager->readOnly = readOnly;
  pPager->needSync = 0;
  pPager->noSync = pPager->tempFile || !useJournal;
  pPager->pFirst = 0;

  pPager->pLast = 0;
  pPager->nExtra = nExtra;
  memset(pPager->aHash, 0, sizeof(pPager->aHash));
  *ppPager = pPager;
  return SQLITE_OK;
}








>







735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
  pPager->state = SQLITE_UNLOCK;
  pPager->errMask = 0;
  pPager->tempFile = tempFile;
  pPager->readOnly = readOnly;
  pPager->needSync = 0;
  pPager->noSync = pPager->tempFile || !useJournal;
  pPager->pFirst = 0;
  pPager->pFirstSynced = 0;
  pPager->pLast = 0;
  pPager->nExtra = nExtra;
  memset(pPager->aHash, 0, sizeof(pPager->aHash));
  *ppPager = pPager;
  return SQLITE_OK;
}

833
834
835
836
837
838
839





840
841
842
843
844
845
846
** currently on the freelist (the reference count is zero) then
** remove it from the freelist.
*/
#define page_ref(P)   ((P)->nRef==0?_page_ref(P):(void)(P)->nRef++)
static void _page_ref(PgHdr *pPg){
  if( pPg->nRef==0 ){
    /* The page is currently on the freelist.  Remove it. */





    if( pPg->pPrevFree ){
      pPg->pPrevFree->pNextFree = pPg->pNextFree;
    }else{
      pPg->pPager->pFirst = pPg->pNextFree;
    }
    if( pPg->pNextFree ){
      pPg->pNextFree->pPrevFree = pPg->pPrevFree;







>
>
>
>
>







836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
** currently on the freelist (the reference count is zero) then
** remove it from the freelist.
*/
#define page_ref(P)   ((P)->nRef==0?_page_ref(P):(void)(P)->nRef++)
static void _page_ref(PgHdr *pPg){
  if( pPg->nRef==0 ){
    /* The page is currently on the freelist.  Remove it. */
    if( pPg==pPg->pPager->pFirstSynced ){
      PgHdr *p = pPg->pNextFree;
      while( p && p->needSync ){ p = p->pNextFree; }
      pPg->pPager->pFirstSynced = p;
    }
    if( pPg->pPrevFree ){
      pPg->pPrevFree->pNextFree = pPg->pNextFree;
    }else{
      pPg->pPager->pFirst = pPg->pNextFree;
    }
    if( pPg->pNextFree ){
      pPg->pNextFree->pPrevFree = pPg->pPrevFree;
897
898
899
900
901
902
903
904
905
906
907
908
909
910

















911
912
913
914
915
916
917
#ifndef NDEBUG
      rc = sqliteOsFileSize(&pPager->jfd, &pPager->syncJSize);
      if( rc!=0 ) return rc;
#endif
      pPager->journalStarted = 1;
    }
    pPager->needSync = 0;
  }

  /* Erase the needSync flag from every page.
  */
  for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
    pPg->needSync = 0;
  }


















  return rc;
}

/*
** Acquire a page.
**







|
<
|
|
|
|
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







905
906
907
908
909
910
911
912

913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
#ifndef NDEBUG
      rc = sqliteOsFileSize(&pPager->jfd, &pPager->syncJSize);
      if( rc!=0 ) return rc;
#endif
      pPager->journalStarted = 1;
    }
    pPager->needSync = 0;


    /* Erase the needSync flag from every page.
    */
    for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
      pPg->needSync = 0;
    }
    pPager->pFirstSynced = pPager->pFirst;
  }

#ifndef NDEBUG
  /* If the Pager.needSync flag is clear then the PgHdr.needSync
  ** flag must also be clear for all pages.  Verify that this
  ** invariant is true.
  */
  else{
    for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
      assert( pPg->needSync==0 );
    }
    assert( pPager->pFirstSynced==pPager->pFirst );
  }
#endif
  


  return rc;
}

/*
** Acquire a page.
**
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
      pPg->pPrevAll = 0;
      pPager->pAll = pPg;
      pPager->nPage++;
    }else{
      /* Find a page to recycle.  Try to locate a page that does not
      ** require us to do an fsync() on the journal.
      */
      pPg = pPager->pFirst;
      while( pPg && pPg->needSync ){
        pPg = pPg->pNextFree;
      }

      /* If we could not find a page that does not require an fsync()
      ** on the journal file then fsync the journal file.  This is a
      ** very slow operation, so we work hard to avoid it.  But sometimes
      ** it can't be helped.
      */
      if( pPg==0 ){







|
<
<
<







1052
1053
1054
1055
1056
1057
1058
1059



1060
1061
1062
1063
1064
1065
1066
      pPg->pPrevAll = 0;
      pPager->pAll = pPg;
      pPager->nPage++;
    }else{
      /* Find a page to recycle.  Try to locate a page that does not
      ** require us to do an fsync() on the journal.
      */
      pPg = pPager->pFirstSynced;




      /* If we could not find a page that does not require an fsync()
      ** on the journal file then fsync the journal file.  This is a
      ** very slow operation, so we work hard to avoid it.  But sometimes
      ** it can't be helped.
      */
      if( pPg==0 ){
1079
1080
1081
1082
1083
1084
1085





1086
1087
1088
1089
1090
1091
1092
      */
      if( pPg->alwaysRollback ){
        pPager->alwaysRollback = 1;
      }

      /* Unlink the old page from the free list and the hash table
      */





      if( pPg->pPrevFree ){
        pPg->pPrevFree->pNextFree = pPg->pNextFree;
      }else{
        assert( pPager->pFirst==pPg );
        pPager->pFirst = pPg->pNextFree;
      }
      if( pPg->pNextFree ){







>
>
>
>
>







1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
      */
      if( pPg->alwaysRollback ){
        pPager->alwaysRollback = 1;
      }

      /* Unlink the old page from the free list and the hash table
      */
      if( pPg==pPager->pFirstSynced ){
        PgHdr *p = pPg->pNextFree;
        while( p && p->needSync ){ p = p->pNextFree; }
        pPager->pFirstSynced = p;
      }
      if( pPg->pPrevFree ){
        pPg->pPrevFree->pNextFree = pPg->pNextFree;
      }else{
        assert( pPager->pFirst==pPg );
        pPager->pFirst = pPg->pNextFree;
      }
      if( pPg->pNextFree ){
1221
1222
1223
1224
1225
1226
1227



1228
1229
1230
1231
1232
1233
1234
    pPg->pNextFree = 0;
    pPg->pPrevFree = pPager->pLast;
    pPager->pLast = pPg;
    if( pPg->pPrevFree ){
      pPg->pPrevFree->pNextFree = pPg;
    }else{
      pPager->pFirst = pPg;



    }
    if( pPager->xDestructor ){
      pPager->xDestructor(pData);
    }
  
    /* When all pages reach the freelist, drop the read lock from
    ** the database file.







>
>
>







1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
    pPg->pNextFree = 0;
    pPg->pPrevFree = pPager->pLast;
    pPager->pLast = pPg;
    if( pPg->pPrevFree ){
      pPg->pPrevFree->pNextFree = pPg;
    }else{
      pPager->pFirst = pPg;
    }
    if( pPg->needSync==0 && pPager->pFirstSynced==0 ){
      pPager->pFirstSynced = pPg;
    }
    if( pPager->xDestructor ){
      pPager->xDestructor(pData);
    }
  
    /* When all pages reach the freelist, drop the read lock from
    ** the database file.
1573
1574
1575
1576
1577
1578
1579

1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
  if( pPager->state!=SQLITE_WRITELOCK ){
    return SQLITE_ERROR;
  }
  TRACE1("COMMIT\n");
  if( pPager->dirtyFile==0 ){
    /* Exit early (without doing the time-consuming sqliteOsSync() calls)
    ** if there have been no changes to the database file. */

    rc = pager_unwritelock(pPager);
    pPager->dbSize = -1;
    return rc;
  }
  assert( pPager->journalOpen );
  if( !pPager->journalStarted && !pPager->noSync ) pPager->needSync = 1;
  assert( pPager->dirtyFile || !pPager->needSync );
  if( pPager->needSync && sqliteOsSync(&pPager->jfd)!=SQLITE_OK ){
    goto commit_abort;
  }
  dbChanged = 0;
  for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
    if( pPg->dirty==0 ) continue;
    TRACE2("COMMIT-PAGE %d\n", pPg->pgno);







>





<
<







1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614


1615
1616
1617
1618
1619
1620
1621
  if( pPager->state!=SQLITE_WRITELOCK ){
    return SQLITE_ERROR;
  }
  TRACE1("COMMIT\n");
  if( pPager->dirtyFile==0 ){
    /* Exit early (without doing the time-consuming sqliteOsSync() calls)
    ** if there have been no changes to the database file. */
    assert( pPager->needSync==0 );
    rc = pager_unwritelock(pPager);
    pPager->dbSize = -1;
    return rc;
  }
  assert( pPager->journalOpen );


  if( pPager->needSync && sqliteOsSync(&pPager->jfd)!=SQLITE_OK ){
    goto commit_abort;
  }
  dbChanged = 0;
  for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
    if( pPg->dirty==0 ) continue;
    TRACE2("COMMIT-PAGE %d\n", pPg->pgno);