Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Refactor the sqlite3WalFrames() routine for clarity of presentation. Do the padded transaction sync as the write pointer crosses the final sector boundary instead of at the end, for efficiency. Always sync the WAL header immediately after it is written. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | statvfs |
Files: | files | file ages | folders |
SHA1: |
92c73b421b6242b09247dfb759777a53 |
User & Date: | drh 2011-12-20 20:13:25.386 |
Context
2011-12-20
| ||
22:18 | Remove the code that tries to detect OOO header writes on a WAL recovery. The code is made obsolete by syncing the WAL header. (check-in: 7ac713a14e user: drh tags: statvfs) | |
20:13 | Refactor the sqlite3WalFrames() routine for clarity of presentation. Do the padded transaction sync as the write pointer crosses the final sector boundary instead of at the end, for efficiency. Always sync the WAL header immediately after it is written. (check-in: 92c73b421b user: drh tags: statvfs) | |
2011-12-19
| ||
11:57 | Merge [21b76af6ed] into statvfs branch. (check-in: e694f7b166 user: dan tags: statvfs) | |
Changes
Changes to src/wal.c.
︙ | ︙ | |||
420 421 422 423 424 425 426 | i16 readLock; /* Which read lock is being held. -1 for none */ u8 syncFlags; /* Flags to use to sync header writes */ u8 exclusiveMode; /* Non-zero if connection is in exclusive mode */ u8 writeLock; /* True if in a write transaction */ u8 ckptLock; /* True if holding a checkpoint lock */ u8 readOnly; /* WAL_RDWR, WAL_RDONLY, or WAL_SHM_RDONLY */ u8 truncateOnCommit; /* True to truncate WAL file on commit */ | | | 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 | i16 readLock; /* Which read lock is being held. -1 for none */ u8 syncFlags; /* Flags to use to sync header writes */ u8 exclusiveMode; /* Non-zero if connection is in exclusive mode */ u8 writeLock; /* True if in a write transaction */ u8 ckptLock; /* True if holding a checkpoint lock */ u8 readOnly; /* WAL_RDWR, WAL_RDONLY, or WAL_SHM_RDONLY */ u8 truncateOnCommit; /* True to truncate WAL file on commit */ u8 syncHeader; /* Fsync the WAL header if true */ u8 padToSectorBoundary; /* Pad transactions out to the next sector */ WalIndexHdr hdr; /* Wal-index header for current transaction */ const char *zWalName; /* Name of WAL file */ u32 nCkpt; /* Checkpoint sequence counter in the wal-header */ #ifdef SQLITE_DEBUG u8 lockError; /* True if a locking error has occurred */ #endif |
︙ | ︙ | |||
1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 | pRet->pVfs = pVfs; pRet->pWalFd = (sqlite3_file *)&pRet[1]; pRet->pDbFd = pDbFd; pRet->readLock = -1; pRet->mxWalSize = mxWalSize; pRet->zWalName = zWalName; pRet->padToSectorBoundary = 1; pRet->exclusiveMode = (bNoShm ? WAL_HEAPMEMORY_MODE: WAL_NORMAL_MODE); /* Open file handle on the write-ahead log file. */ flags = (SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE|SQLITE_OPEN_WAL); rc = sqlite3OsOpen(pVfs, zWalName, pRet->pWalFd, flags, &flags); if( rc==SQLITE_OK && flags&SQLITE_OPEN_READONLY ){ pRet->readOnly = WAL_RDONLY; } if( rc!=SQLITE_OK ){ walIndexClose(pRet, 0); sqlite3OsClose(pRet->pWalFd); sqlite3_free(pRet); }else{ int iDC = sqlite3OsDeviceCharacteristics(pRet->pWalFd); | > | | 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 | pRet->pVfs = pVfs; pRet->pWalFd = (sqlite3_file *)&pRet[1]; pRet->pDbFd = pDbFd; pRet->readLock = -1; pRet->mxWalSize = mxWalSize; pRet->zWalName = zWalName; pRet->syncHeader = 1; pRet->padToSectorBoundary = 1; pRet->exclusiveMode = (bNoShm ? WAL_HEAPMEMORY_MODE: WAL_NORMAL_MODE); /* Open file handle on the write-ahead log file. */ flags = (SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE|SQLITE_OPEN_WAL); rc = sqlite3OsOpen(pVfs, zWalName, pRet->pWalFd, flags, &flags); if( rc==SQLITE_OK && flags&SQLITE_OPEN_READONLY ){ pRet->readOnly = WAL_RDONLY; } if( rc!=SQLITE_OK ){ walIndexClose(pRet, 0); sqlite3OsClose(pRet->pWalFd); sqlite3_free(pRet); }else{ int iDC = sqlite3OsDeviceCharacteristics(pRet->pWalFd); if( iDC & SQLITE_IOCAP_SEQUENTIAL ){ pRet->syncHeader = 0; } if( iDC & SQLITE_IOCAP_ZERO_DAMAGE ){ pRet->padToSectorBoundary = 0; } *ppWal = pRet; WALTRACE(("WAL%d: opened\n", pRet)); } return rc; } |
︙ | ︙ | |||
2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 | testcase( (rc&0xff)==SQLITE_IOERR ); testcase( rc==SQLITE_PROTOCOL ); testcase( rc==SQLITE_OK ); } return rc; } /* ** Write iAmt bytes of content into the WAL file beginning at iOffset. ** | > > > > > > > > > > > > > > | | < < > | | < < < < < < < < | < | > > > | | < > | < > | > > > > > > > > > > > > > > > > > > > > > > > < | > > > > > > > | 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 | testcase( (rc&0xff)==SQLITE_IOERR ); testcase( rc==SQLITE_PROTOCOL ); testcase( rc==SQLITE_OK ); } return rc; } /* ** Information about the current state of the WAL file and where ** the next fsync should occur - passed from sqlite3WalFrames() into ** walWriteToLog(). */ typedef struct WalWriter { Wal *pWal; /* The complete WAL information */ sqlite3_file *pFd; /* The WAL file to which we write */ sqlite3_int64 iSyncPoint; /* Fsync at this offset */ int syncFlags; /* Flags for the fsync */ int szPage; /* Size of one page */ } WalWriter; /* ** Write iAmt bytes of content into the WAL file beginning at iOffset. ** Do a sync when crossing the p->iSyncPoint boundary. ** ** In other words, if iSyncPoint is in between iOffset and iOffset+iAmt, ** first write the part before iSyncPoint, then sync, then write the ** rest. */ static int walWriteToLog( WalWriter *p, /* WAL to write to */ void *pContent, /* Content to be written */ int iAmt, /* Number of bytes to write */ sqlite3_int64 iOffset /* Start writing at this offset */ ){ int rc; if( iOffset<p->iSyncPoint && iOffset+iAmt>=p->iSyncPoint ){ int iFirstAmt = (int)(p->iSyncPoint - iOffset); rc = sqlite3OsWrite(p->pFd, pContent, iFirstAmt, iOffset); if( rc ) return rc; iOffset += iFirstAmt; iAmt -= iFirstAmt; pContent = (void*)(iFirstAmt + (char*)pContent); assert( p->syncFlags & (SQLITE_SYNC_NORMAL|SQLITE_SYNC_FULL) ); rc = sqlite3OsSync(p->pFd, p->syncFlags); if( rc ) return rc; } rc = sqlite3OsWrite(p->pFd, pContent, iAmt, iOffset); return rc; } /* ** Write out a single frame of the WAL */ static int walWriteOneFrame( WalWriter *p, /* Where to write the frame */ PgHdr *pPage, /* The page of the frame to be written */ int nTruncate, /* The commit flag. Usually 0. >0 for commit */ sqlite3_int64 iOffset /* Byte offset at which to write */ ){ int rc; /* Result code from subfunctions */ void *pData; /* Data actually written */ u8 aFrame[WAL_FRAME_HDRSIZE]; /* Buffer to assemble frame-header in */ #if defined(SQLITE_HAS_CODEC) if( (pData = sqlite3PagerCodec(pPage))==0 ) return SQLITE_NOMEM; #else pData = pPage->pData; #endif walEncodeFrame(p->pWal, pPage->pgno, nTruncate, pData, aFrame); rc = walWriteToLog(p, aFrame, sizeof(aFrame), iOffset); if( rc ) return rc; /* Write the page data */ rc = walWriteToLog(p, pData, p->szPage, iOffset+sizeof(aFrame)); return rc; } /* ** Write a set of frames to the log. The caller must hold the write-lock ** on the log file (obtained using sqlite3WalBeginWriteTransaction()). */ int sqlite3WalFrames( Wal *pWal, /* Wal handle to write to */ int szPage, /* Database page-size in bytes */ PgHdr *pList, /* List of dirty pages to write */ Pgno nTruncate, /* Database size after this commit */ int isCommit, /* True if this is a commit */ int sync_flags /* Flags to pass to OsSync() (or 0) */ ){ int rc; /* Used to catch return codes */ u32 iFrame; /* Next frame address */ PgHdr *p; /* Iterator to run through pList with. */ PgHdr *pLast = 0; /* Last frame in list */ int nExtra = 0; /* Number of extra copies of last page */ int szFrame; /* The size of a single frame */ i64 iOffset; /* Next byte to write in WAL file */ WalWriter w; /* The writer */ assert( pList ); assert( pWal->writeLock ); /* If this frame set completes a transaction, then nTruncate>0. If ** nTruncate==0 then this frame set does not complete the transaction. */ assert( (isCommit!=0)==(nTruncate!=0) ); #if defined(SQLITE_TEST) && defined(SQLITE_DEBUG) { int cnt; for(cnt=0, p=pList; p; p=p->pDirty, cnt++){} WALTRACE(("WAL%p: frame write begin. %d frames. mxFrame=%d. %s\n", pWal, cnt, pWal->hdr.mxFrame, isCommit ? "Commit" : "Spill")); } #endif |
︙ | ︙ | |||
2734 2735 2736 2737 2738 2739 2740 | pWal->truncateOnCommit = 1; rc = sqlite3OsWrite(pWal->pWalFd, aWalHdr, sizeof(aWalHdr), 0); WALTRACE(("WAL%p: wal-header write %s\n", pWal, rc ? "failed" : "ok")); if( rc!=SQLITE_OK ){ return rc; } | | > > > > > > > > > > > > | | | | < < < | > > > | | < < < < | | < | < | < < < < < < < < < < | < < < | < > > | > > > > > > > > > > > > | < | < < < | < < < < < < < < | < | < | < < < < | < < | > > > > | | | | | 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 | pWal->truncateOnCommit = 1; rc = sqlite3OsWrite(pWal->pWalFd, aWalHdr, sizeof(aWalHdr), 0); WALTRACE(("WAL%p: wal-header write %s\n", pWal, rc ? "failed" : "ok")); if( rc!=SQLITE_OK ){ return rc; } /* Sync the header (unless SQLITE_IOCAP_SEQUENTIAL is true or unless ** all syncing is turned off by PRAGMA synchronous=OFF). Otherwise ** an out-of-order write following a WAL restart could result in ** database corruption. See the ticket: ** ** http://localhost:591/sqlite/info/ff5be73dee */ if( pWal->syncHeader && sync_flags ){ rc = sqlite3OsSync(pWal->pWalFd, sync_flags & SQLITE_SYNC_MASK); if( rc ) return rc; } } assert( (int)pWal->szPage==szPage ); /* Setup information needed to write frames into the WAL */ w.pWal = pWal; w.pFd = pWal->pWalFd; w.iSyncPoint = 0; w.syncFlags = sync_flags; w.szPage = szPage; iOffset = walFrameOffset(iFrame+1, szPage); szFrame = szPage + WAL_FRAME_HDRSIZE; /* Write all frames into the log file exactly once */ for(p=pList; p; p=p->pDirty){ int nDbSize; /* 0 normally. Positive == commit flag */ iFrame++; assert( iOffset==walFrameOffset(iFrame, szPage) ); nDbSize = (isCommit && p->pDirty==0) ? nTruncate : 0; rc = walWriteOneFrame(&w, p, nDbSize, iOffset); if( rc ) return rc; pLast = p; iOffset += szFrame; } /* If this is the end of a transaction, then we might need to pad ** the transaction and/or sync the WAL file. ** ** Padding and syncing only occur if this set of frames complete a ** transaction and if PRAGMA synchronous=FULL. If synchronous==NORMAL ** or synchonous==OFF, then no padding or syncing are needed. ** ** If SQLITE_IOCAP_ZERO_DAMAGE is defined, then padding is not needed ** and only the sync is done. If padding is needed, then the final ** frame is repeated (with its commit mark) until the next sector ** boundary is crossed. Only the part of the WAL prior to the last ** sector boundary is synced; the part of the last frame that extends ** past the sector boundary is written after the sync. */ if( isCommit && (sync_flags & WAL_SYNC_TRANSACTIONS)!=0 ){ if( pWal->padToSectorBoundary ){ int sectorSize = sqlite3OsSectorSize(pWal->pWalFd); w.iSyncPoint = ((iOffset+sectorSize-1)/sectorSize)*sectorSize; while( iOffset<w.iSyncPoint ){ rc = walWriteOneFrame(&w, pLast, nTruncate, iOffset); if( rc ) return rc; iOffset += szFrame; nExtra++; } } rc = sqlite3OsSync(w.pFd, sync_flags & SQLITE_SYNC_MASK); } /* If this frame set completes the first transaction in the WAL and ** if PRAGMA journal_size_limit is set, then truncate the WAL to the ** journal size limit, if possible. */ if( isCommit && pWal->truncateOnCommit && pWal->mxWalSize>=0 ){ i64 sz = pWal->mxWalSize; if( walFrameOffset(iFrame+nExtra+1, szPage)>pWal->mxWalSize ){ sz = walFrameOffset(iFrame+nExtra+1, szPage); } walLimitSize(pWal, sz); pWal->truncateOnCommit = 0; } /* Append data to the wal-index. It is not necessary to lock the ** wal-index to do this as the SQLITE_SHM_WRITE lock held on the wal-index ** guarantees that there are no other writers, and no data that may ** be in use by existing readers is being overwritten. */ iFrame = pWal->hdr.mxFrame; for(p=pList; p && rc==SQLITE_OK; p=p->pDirty){ iFrame++; rc = walIndexAppend(pWal, iFrame, p->pgno); } while( nExtra>0 && rc==SQLITE_OK ){ iFrame++; nExtra--; rc = walIndexAppend(pWal, iFrame, pLast->pgno); } if( rc==SQLITE_OK ){ /* Update the private copy of the header. */ pWal->hdr.szPage = (u16)((szPage&0xff00) | (szPage>>16)); testcase( szPage<=32768 ); |
︙ | ︙ |