Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Fix bug in log recovery (last frame in log was being ignored). Also remove an incorrect assert statement. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | wal |
Files: | files | file ages | folders |
SHA1: |
67d2a89ec2d593a077eb19a6ea2b06cb |
User & Date: | dan 2010-04-16 11:30:18.000 |
Context
2010-04-16
| ||
13:59 | Change the log file format to include a small (12 byte) header at the start of the file. (check-in: 9865d14d60 user: dan tags: wal) | |
11:30 | Fix bug in log recovery (last frame in log was being ignored). Also remove an incorrect assert statement. (check-in: 67d2a89ec2 user: dan tags: wal) | |
2010-04-15
| ||
16:45 | Allow writers to write dirty pages to the log mid-transaction in order to free memory. (check-in: ecd828f969 user: dan tags: wal) | |
Changes
Changes to src/log.c.
︙ | ︙ | |||
8 9 10 11 12 13 14 | #include <unistd.h> #include <fcntl.h> #include <sys/mman.h> typedef struct LogSummaryHdr LogSummaryHdr; typedef struct LogSummary LogSummary; | | | 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 | #include <unistd.h> #include <fcntl.h> #include <sys/mman.h> typedef struct LogSummaryHdr LogSummaryHdr; typedef struct LogSummary LogSummary; typedef struct LogIterator LogIterator; typedef struct LogLock LogLock; /* ** The following structure may be used to store the same data that ** is stored in the log-summary header. ** |
︙ | ︙ | |||
105 106 107 108 109 110 111 | ** This structure is used to implement an iterator that iterates through ** all frames in the log in database page order. Where two or more frames ** correspond to the same database page, the iterator visits only the ** frame most recently written to the log. ** ** The internals of this structure are only accessed by: ** | | | | | | | 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 | ** This structure is used to implement an iterator that iterates through ** all frames in the log in database page order. Where two or more frames ** correspond to the same database page, the iterator visits only the ** frame most recently written to the log. ** ** The internals of this structure are only accessed by: ** ** logIteratorInit() - Create a new iterator, ** logIteratorNext() - Step an iterator, ** logIteratorFree() - Free an iterator. ** ** This functionality is used by the checkpoint code (see logCheckpoint()). */ struct LogIterator { int nSegment; /* Size of LogIterator.aSegment[] array */ int nFinal; /* Elements in segment nSegment-1 */ struct LogSegment { int iNext; /* Next aIndex index */ u8 *aIndex; /* Pointer to index array */ u32 *aDbPage; /* Pointer to db page array */ } aSegment[1]; }; |
︙ | ︙ | |||
342 343 344 345 346 347 348 349 350 351 352 353 354 355 | u32 *aCksum, /* IN/OUT: Checksum values */ u32 *piPage, /* OUT: Database page number for frame */ u32 *pnTruncate, /* OUT: New db size (or 0 if not commit) */ int nData, /* Database page size (size of aData[]) */ u8 *aData, /* Pointer to page data (for checksum) */ u8 *aFrame /* Frame data */ ){ logChecksumBytes(aFrame, 12, aCksum); logChecksumBytes(aData, nData, aCksum); if( aCksum[0]!=sqlite3Get4byte(&aFrame[12]) || aCksum[1]!=sqlite3Get4byte(&aFrame[16]) ){ /* Checksum failed. */ | > > | 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 | u32 *aCksum, /* IN/OUT: Checksum values */ u32 *piPage, /* OUT: Database page number for frame */ u32 *pnTruncate, /* OUT: New db size (or 0 if not commit) */ int nData, /* Database page size (size of aData[]) */ u8 *aData, /* Pointer to page data (for checksum) */ u8 *aFrame /* Frame data */ ){ assert( LOG_FRAME_HDRSIZE==20 ); logChecksumBytes(aFrame, 12, aCksum); logChecksumBytes(aData, nData, aCksum); if( aCksum[0]!=sqlite3Get4byte(&aFrame[12]) || aCksum[1]!=sqlite3Get4byte(&aFrame[16]) ){ /* Checksum failed. */ |
︙ | ︙ | |||
509 510 511 512 513 514 515 | return SQLITE_NOMEM; } aData = &aFrame[LOG_FRAME_HDRSIZE]; /* Read all frames from the log file. */ iFrame = 0; iOffset = 0; | | | 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 | return SQLITE_NOMEM; } aData = &aFrame[LOG_FRAME_HDRSIZE]; /* Read all frames from the log file. */ iFrame = 0; iOffset = 0; for(iOffset=0; (iOffset+nFrame)<=nSize; iOffset+=nFrame){ u32 pgno; /* Database page number for frame */ u32 nTruncate; /* dbsize field from frame header */ int isValid; /* True if this frame is valid */ /* Read and decode the next log frame. */ rc = sqlite3OsRead(pFd, aFrame, nFrame, iOffset); if( rc!=SQLITE_OK ) break; |
︙ | ︙ | |||
709 710 711 712 713 714 715 | assert( !pSummary || pSummary->nRef==0 ); sqlite3_free(pSummary); } *ppLog = pRet; return rc; } | | | | 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 | assert( !pSummary || pSummary->nRef==0 ); sqlite3_free(pSummary); } *ppLog = pRet; return rc; } static int logIteratorNext( LogIterator *p, /* Iterator */ u32 *piPage, /* OUT: Next db page to write */ u32 *piFrame /* OUT: Log frame to read from */ ){ u32 iMin = *piPage; u32 iRet = 0xFFFFFFFF; int i; int nBlock = p->nFinal; |
︙ | ︙ | |||
740 741 742 743 744 745 746 | nBlock = 256; } *piPage = iRet; return (iRet==0xFFFFFFFF); } | | | | | | 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 | nBlock = 256; } *piPage = iRet; return (iRet==0xFFFFFFFF); } static LogIterator *logIteratorInit(Log *pLog){ u32 *aData = pLog->pSummary->aData; LogIterator *p; /* Return value */ int nSegment; /* Number of segments to merge */ u32 iLast; /* Last frame in log */ int nByte; /* Number of bytes to allocate */ int i; /* Iterator variable */ int nFinal; /* Number of unindexed entries */ struct LogSegment *pFinal; /* Final (unindexed) segment */ u8 *aTmp; /* Temp space used by merge-sort */ iLast = pLog->hdr.iLastPg; nSegment = (iLast >> 8) + 1; nFinal = (iLast & 0x000000FF); nByte = sizeof(LogIterator) + (nSegment-1)*sizeof(struct LogSegment) + 512; p = (LogIterator *)sqlite3_malloc(nByte); if( p ){ memset(p, 0, nByte); p->nSegment = nSegment; p->nFinal = nFinal; } for(i=0; i<nSegment-1; i++){ |
︙ | ︙ | |||
782 783 784 785 786 787 788 | logMergesort8(pFinal->aDbPage, aTmp, pFinal->aIndex, &nFinal); p->nFinal = nFinal; return p; } /* | | | | | | | 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 | logMergesort8(pFinal->aDbPage, aTmp, pFinal->aIndex, &nFinal); p->nFinal = nFinal; return p; } /* ** Free a log iterator allocated by logIteratorInit(). */ static void logIteratorFree(LogIterator *p){ sqlite3_free(p); } /* ** Checkpoint the contents of the log file. */ static int logCheckpoint( Log *pLog, /* Log connection */ sqlite3_file *pFd, /* File descriptor open on db file */ u8 *zBuf /* Temporary buffer to use */ ){ int rc; /* Return code */ int pgsz = pLog->hdr.pgsz; /* Database page-size */ LogIterator *pIter = 0; /* Log iterator context */ u32 iDbpage = 0; /* Next database page to write */ u32 iFrame = 0; /* Log frame containing data for iDbpage */ if( pLog->hdr.iLastPg==0 ){ return SQLITE_OK; } /* Allocate the iterator */ pIter = logIteratorInit(pLog); if( !pIter ) return SQLITE_NOMEM; /* Sync the log file to disk */ rc = sqlite3OsSync(pLog->pFd, pLog->sync_flags); if( rc!=SQLITE_OK ) goto out; /* Iterate through the contents of the log, copying data to the db file. */ while( 0==logIteratorNext(pIter, &iDbpage, &iFrame) ){ rc = sqlite3OsRead(pLog->pFd, zBuf, pgsz, (iFrame-1) * (pgsz+LOG_FRAME_HDRSIZE) + LOG_FRAME_HDRSIZE ); if( rc!=SQLITE_OK ) goto out; rc = sqlite3OsWrite(pFd, zBuf, pgsz, (iDbpage-1)*pgsz); if( rc!=SQLITE_OK ) goto out; } |
︙ | ︙ | |||
857 858 859 860 861 862 863 | memset(zBuf, 0, LOG_FRAME_HDRSIZE); rc = sqlite3OsWrite(pLog->pFd, zBuf, LOG_FRAME_HDRSIZE, 0); if( rc!=SQLITE_OK ) goto out; rc = sqlite3OsSync(pLog->pFd, pLog->sync_flags); #endif out: | | | 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 | memset(zBuf, 0, LOG_FRAME_HDRSIZE); rc = sqlite3OsWrite(pLog->pFd, zBuf, LOG_FRAME_HDRSIZE, 0); if( rc!=SQLITE_OK ) goto out; rc = sqlite3OsSync(pLog->pFd, pLog->sync_flags); #endif out: logIteratorFree(pIter); return rc; } /* ** Close a connection to a log file. */ int sqlite3LogClose( |
︙ | ︙ |
Changes to src/pager.c.
︙ | ︙ | |||
3249 3250 3251 3252 3253 3254 3255 3256 3257 | static int pagerStress(void *p, PgHdr *pPg){ Pager *pPager = (Pager *)p; int rc = SQLITE_OK; assert( pPg->pPager==pPager ); assert( pPg->flags&PGHDR_DIRTY ); if( pagerUseLog(pPager) ){ /* Write a single frame for this page to the log. */ | > < | 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 | static int pagerStress(void *p, PgHdr *pPg){ Pager *pPager = (Pager *)p; int rc = SQLITE_OK; assert( pPg->pPager==pPager ); assert( pPg->flags&PGHDR_DIRTY ); pPg->pDirty = 0; if( pagerUseLog(pPager) ){ /* Write a single frame for this page to the log. */ rc = sqlite3LogFrames(pPager->pLog, pPager->pageSize, pPg, 0, 0, 0); }else{ /* The doNotSync flag is set by the sqlite3PagerWrite() function while it ** is journalling a set of two or more database pages that are stored ** on the same disk sector. Syncing the journal is not allowed while ** this is happening as it is important that all members of such a ** set of pages are synced to disk together. So, if the page this function |
︙ | ︙ | |||
3320 3321 3322 3323 3324 3325 3326 | rc==SQLITE_OK && pPg->pgno>pPager->dbSize && subjRequiresPage(pPg) ) ){ rc = subjournalPage(pPg); } /* Write the contents of the page out to the database file. */ if( rc==SQLITE_OK ){ | < | 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 | rc==SQLITE_OK && pPg->pgno>pPager->dbSize && subjRequiresPage(pPg) ) ){ rc = subjournalPage(pPg); } /* Write the contents of the page out to the database file. */ if( rc==SQLITE_OK ){ rc = pager_write_pagelist(pPg); } } /* Mark the page as clean. */ if( rc==SQLITE_OK ){ PAGERTRACE(("STRESS %d page %d\n", PAGERID(pPager), pPg->pgno)); |
︙ | ︙ |
Changes to test/wal.test.
︙ | ︙ | |||
616 617 618 619 620 621 622 623 624 625 | PRAGMA integrity_check; } } {17 ok} do_test wal-11.14 { list [expr [file size test.db]/1024] [expr [file size test.db-wal]/1044] } {37 38} finish_test | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 | PRAGMA integrity_check; } } {17 ok} do_test wal-11.14 { list [expr [file size test.db]/1024] [expr [file size test.db-wal]/1044] } {37 38} #------------------------------------------------------------------------- # This block of tests, wal-12.*, tests a problem... # reopen_db do_test wal-12.1 { execsql { PRAGMA page_size = 1024; CREATE TABLE t1(x, y); CREATE TABLE t2(x, y); INSERT INTO t1 VALUES('A', 1); } list [expr [file size test.db]/1024] [expr [file size test.db-wal]/1044] } {0 5} do_test wal-12.2 { db close sqlite3_wal db test.db execsql { UPDATE t1 SET y = 0 WHERE x = 'A'; } list [expr [file size test.db]/1024] [expr [file size test.db-wal]/1044] } {3 1} do_test wal-12.3 { execsql { INSERT INTO t2 VALUES('B', 1) } list [expr [file size test.db]/1024] [expr [file size test.db-wal]/1044] } {3 2} do_test wal-12.4 { file copy -force test.db test2.db file copy -force test.db-wal test2.db-wal sqlite3_wal db2 test2.db breakpoint execsql { SELECT * FROM t2 } db2 } {B 1} db2 close file copy -force test.db-wal A do_test wal-12.5 { execsql { PRAGMA checkpoint; UPDATE t2 SET y = 2 WHERE x = 'B'; PRAGMA checkpoint; UPDATE t1 SET y = 1 WHERE x = 'A'; PRAGMA checkpoint; UPDATE t1 SET y = 0 WHERE x = 'A'; SELECT * FROM t2; } } {B 2} file copy -force test.db-wal B do_test wal-12.4 { file copy -force test.db test2.db file copy -force test.db-wal test2.db-wal sqlite3_wal db2 test2.db execsql { SELECT * FROM t2 } db2 } {B 2} db2 close finish_test |