SQLite

Check-in [745d66395d]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Back out the changes in the pager that sorted pages prior to writing them to the database. Additional measurements showed no performance gains. (CVS 785)
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 745d66395daf5cb8463305bbc9e4219534d2b7cf
User & Date: drh 2002-11-11 01:04:48.000
Context
2002-11-11
13:56
Remove extra from HTML output. Ticket #189. (CVS 786) (check-in: dc5d9c129c user: drh tags: trunk)
01:04
Back out the changes in the pager that sorted pages prior to writing them to the database. Additional measurements showed no performance gains. (CVS 785) (check-in: 745d66395d user: drh tags: trunk)
00:05
Replace the atoi() library routine with a faster home-grown version in the VDBE. This gives a dramatic speed improvement for some kinds of queries. (CVS 784) (check-in: 263a8ca40f user: drh tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Changes to src/pager.c.
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
** The pager is used to access a database disk file.  It implements
** atomic commit and rollback through the use of a journal file that
** is separate from the database file.  The pager also implements file
** locking to prevent two processes from writing the same database
** file simultaneously, or one process from reading the database while
** another is writing.
**
** @(#) $Id: pager.c,v 1.57 2002/11/10 23:32:57 drh Exp $
*/
#include "os.h"         /* Must be first to enable large file support */
#include "sqliteInt.h"
#include "pager.h"
#include <assert.h>
#include <string.h>








|







14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
** The pager is used to access a database disk file.  It implements
** atomic commit and rollback through the use of a journal file that
** is separate from the database file.  The pager also implements file
** locking to prevent two processes from writing the same database
** file simultaneously, or one process from reading the database while
** another is writing.
**
** @(#) $Id: pager.c,v 1.58 2002/11/11 01:04:48 drh Exp $
*/
#include "os.h"         /* Must be first to enable large file support */
#include "sqliteInt.h"
#include "pager.h"
#include <assert.h>
#include <string.h>

71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
  Pager *pPager;                 /* The pager to which this page belongs */
  Pgno pgno;                     /* The page number for this page */
  PgHdr *pNextHash, *pPrevHash;  /* Hash collision chain for PgHdr.pgno */
  int nRef;                      /* Number of users of this page */
  PgHdr *pNextFree, *pPrevFree;  /* Freelist of pages where nRef==0 */
  PgHdr *pNextAll, *pPrevAll;    /* A list of all pages */
  PgHdr *pNextCkpt, *pPrevCkpt;  /* List of pages in the checkpoint journal */
  PgHdr *pSort;                  /* Next in list of pages to be written */
  u8 inJournal;                  /* TRUE if has been written to journal */
  u8 inCkpt;                     /* TRUE if written to the checkpoint journal */
  u8 dirty;                      /* TRUE if we need to write back changes */
  u8 alwaysRollback;             /* Disable dont_rollback() for this page */
  /* SQLITE_PAGE_SIZE bytes of page data follow this header */
  /* Pager.nExtra bytes of local data follow the page data */
};







<







71
72
73
74
75
76
77

78
79
80
81
82
83
84
  Pager *pPager;                 /* The pager to which this page belongs */
  Pgno pgno;                     /* The page number for this page */
  PgHdr *pNextHash, *pPrevHash;  /* Hash collision chain for PgHdr.pgno */
  int nRef;                      /* Number of users of this page */
  PgHdr *pNextFree, *pPrevFree;  /* Freelist of pages where nRef==0 */
  PgHdr *pNextAll, *pPrevAll;    /* A list of all pages */
  PgHdr *pNextCkpt, *pPrevCkpt;  /* List of pages in the checkpoint journal */

  u8 inJournal;                  /* TRUE if has been written to journal */
  u8 inCkpt;                     /* TRUE if written to the checkpoint journal */
  u8 dirty;                      /* TRUE if we need to write back changes */
  u8 alwaysRollback;             /* Disable dont_rollback() for this page */
  /* SQLITE_PAGE_SIZE bytes of page data follow this header */
  /* Pager.nExtra bytes of local data follow the page data */
};
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874



875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923

924
925
926
927
928
929
930
** a reference to the page data.
*/
int sqlitepager_ref(void *pData){
  PgHdr *pPg = DATA_TO_PGHDR(pData);
  page_ref(pPg);
  return SQLITE_OK;
}

/*
** The parameters are pointers to the head of two sorted lists
** of page headers.  Merge these two lists together and return
** a single sorted list.  This routine forms the core of the 
** merge-sort algorithm that sorts dirty pages into accending
** order prior to writing them back to the disk.
**
** In the case of a tie, left sorts in front of right.
**
** Headers are sorted in order of ascending page number.
*/
static PgHdr *page_merge(PgHdr *pLeft, PgHdr *pRight){
  PgHdr sHead;
  PgHdr *pTail;
  pTail = &sHead;
  pTail->pSort = 0;
  while( pLeft && pRight ){
    if( pLeft->pgno<=pRight->pgno ){
      pTail->pSort = pLeft;
      pLeft = pLeft->pSort;
    }else{
      pTail->pSort = pRight;
      pRight = pRight->pSort;
    }
    pTail = pTail->pSort;
  }
  if( pLeft ){
    pTail->pSort = pLeft;
  }else if( pRight ){
    pTail->pSort = pRight;
  }
  return sHead.pSort;
}


/*
** Sync the journal and then write all free dirty pages to the database
** file.
**
** Writing all free dirty pages to the database after the sync is a
** non-obvious optimization.  fsync() is an expensive operation so we
** want to minimize the number ot times it is called. After an fsync() call,
** we are free to write dirty pages back to the database.  It is best
** to go ahead and write as many dirty pages as possible to minimize 
** the risk of having to do another fsync() later on.  Writing dirty
** free pages in this way was observed to make database operations go
** up to 10 times faster.
**
** If we are writing to temporary database, there is no need to preserve
** the integrity of the journal file, so we can save time and skip the
** fsync().
**
** This routine goes to the extra trouble of sorting all the dirty
** pages by their page number prior to writing them.  Tests show that
** writing pages in order by page number gives a modest speed improvement
** under Linux.  
*/
static int syncAllPages(Pager *pPager){
  PgHdr *pPg;
  PgHdr *pToWrite;
# define NSORT 28
  Pgno lastPgno;
  int i;
  PgHdr *apSorter[NSORT];
  int rc = SQLITE_OK;

  /* Sync the journal before modifying the main database
  ** (assuming there is a journal and it needs to be synced.)
  */
  if( pPager->needSync ){
    if( !pPager->tempFile ){
      rc = sqliteOsSync(&pPager->jfd);
      if( rc!=0 ) return rc;
    }
    pPager->needSync = 0;
  }

  /* Create a list of all dirty pages



  */
  pToWrite = 0;
  for(pPg=pPager->pFirst; pPg; pPg=pPg->pNextFree){
    if( pPg->dirty ){
      pPg->pSort = pToWrite;
      pToWrite = pPg;
    }
  }

  /* Sort the list of dirty pages into accending order by
  ** page number
  */
  for(i=0; i<NSORT; i++){
    apSorter[i] = 0;
  }
  while( pToWrite ){
    pPg = pToWrite;
    pToWrite = pPg->pSort;
    pPg->pSort = 0;
    for(i=0; i<NSORT-1; i++){
      if( apSorter[i]==0 ){
        apSorter[i] = pPg;
        break;
      }else{
        pPg = page_merge(apSorter[i], pPg);
        apSorter[i] = 0;
      }
    }
    if( i>=NSORT-1 ){
      apSorter[NSORT-1] = page_merge(apSorter[NSORT-1],pPg);
    }
  }
  pToWrite = 0;
  for(i=0; i<NSORT; i++){
    pToWrite = page_merge(apSorter[i], pToWrite);
  }

  /* Write all dirty pages back to the database and mark
  ** them all clean.
  */
  lastPgno = 0;
  for(pPg=pToWrite; pPg; pPg=pPg->pSort){
    if( lastPgno==0 || pPg->pgno!=lastPgno-1 ){
      sqliteOsSeek(&pPager->fd, (pPg->pgno-1)*SQLITE_PAGE_SIZE);
    }
    rc = sqliteOsWrite(&pPager->fd, PGHDR_TO_DATA(pPg), SQLITE_PAGE_SIZE);
    if( rc!=SQLITE_OK ) break;
    pPg->dirty = 0;
    lastPgno = pPg->pgno;

  }
  return rc;
}

/*
** Acquire a page.
**







<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<

















<
<
<
<
<



<
<

<
<













|
>
>
>

<


<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
|
|
|
|
|
|
|
>







788
789
790
791
792
793
794



































795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811





812
813
814


815


816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833

834
835






































836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
** a reference to the page data.
*/
int sqlitepager_ref(void *pData){
  PgHdr *pPg = DATA_TO_PGHDR(pData);
  page_ref(pPg);
  return SQLITE_OK;
}




































/*
** Sync the journal and then write all free dirty pages to the database
** file.
**
** Writing all free dirty pages to the database after the sync is a
** non-obvious optimization.  fsync() is an expensive operation so we
** want to minimize the number ot times it is called. After an fsync() call,
** we are free to write dirty pages back to the database.  It is best
** to go ahead and write as many dirty pages as possible to minimize 
** the risk of having to do another fsync() later on.  Writing dirty
** free pages in this way was observed to make database operations go
** up to 10 times faster.
**
** If we are writing to temporary database, there is no need to preserve
** the integrity of the journal file, so we can save time and skip the
** fsync().





*/
static int syncAllPages(Pager *pPager){
  PgHdr *pPg;


  Pgno lastPgno;


  int rc = SQLITE_OK;

  /* Sync the journal before modifying the main database
  ** (assuming there is a journal and it needs to be synced.)
  */
  if( pPager->needSync ){
    if( !pPager->tempFile ){
      rc = sqliteOsSync(&pPager->jfd);
      if( rc!=0 ) return rc;
    }
    pPager->needSync = 0;
  }

  /* Write all dirty free pages to the disk in the order that they
  ** appear on the disk.  We have experimented with sorting the pages
  ** by page numbers so that they are written in order, but that does
  ** not appear to improve performance.
  */

  for(pPg=pPager->pFirst; pPg; pPg=pPg->pNextFree){
    if( pPg->dirty ){






































      if( lastPgno==0 || pPg->pgno!=lastPgno+1 ){
        sqliteOsSeek(&pPager->fd, (pPg->pgno-1)*SQLITE_PAGE_SIZE);
      }
      rc = sqliteOsWrite(&pPager->fd, PGHDR_TO_DATA(pPg), SQLITE_PAGE_SIZE);
      if( rc!=SQLITE_OK ) break;
      pPg->dirty = 0;
      lastPgno = pPg->pgno;
    }
  }
  return rc;
}

/*
** Acquire a page.
**