Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Refactoring some variable names in wal.c. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA1: |
1d201ff51f7c5ecdf71a91ed25204b71 |
User & Date: | drh 2010-05-19 17:49:50.000 |
Context
2010-05-19
| ||
18:08 | Revise the checksumming algorithm in wal.c. More variable refactoring. (check-in: 542b90eba6 user: drh tags: trunk) | |
17:49 | Refactoring some variable names in wal.c. (check-in: 1d201ff51f user: drh tags: trunk) | |
01:53 | Add a large comment to wal.c describing the WAL and wal-index file formats. (check-in: a71a22b52f user: drh tags: trunk) | |
Changes
Changes to src/wal.c.
︙ | ︙ | |||
183 184 185 186 187 188 189 | ** The following object stores a copy of the wal-index header. ** ** Member variables iCheck1 and iCheck2 contain the checksum for the ** last frame written to the wal, or 2 and 3 respectively if the log ** is currently empty. */ struct WalIndexHdr { | | | | | | | | 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 | ** The following object stores a copy of the wal-index header. ** ** Member variables iCheck1 and iCheck2 contain the checksum for the ** last frame written to the wal, or 2 and 3 respectively if the log ** is currently empty. */ struct WalIndexHdr { u32 iChange; /* Counter incremented each transaction */ u32 szPage; /* Database page size in bytes */ u32 mxFrame; /* Index of last valid frame in the WAL */ u32 nPage; /* Size of database in pages */ u32 iCheck1; /* Checksum value 1 */ u32 iCheck2; /* Checksum value 2 */ }; /* Size of serialized WalIndexHdr object. */ #define WALINDEX_HDR_NFIELD (sizeof(WalIndexHdr) / sizeof(u32)) /* A block of 16 bytes beginning at WALINDEX_LOCK_OFFSET is reserved ** for locks. Since some systems only feature mandatory file-locks, we |
︙ | ︙ | |||
210 211 212 213 214 215 216 | #define WAL_FRAME_HDRSIZE 16 /* Size of write ahead log header */ #define WAL_HDRSIZE 12 /* ** Return the offset of frame iFrame in the write-ahead log file, | | | | | 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 | #define WAL_FRAME_HDRSIZE 16 /* Size of write ahead log header */ #define WAL_HDRSIZE 12 /* ** Return the offset of frame iFrame in the write-ahead log file, ** assuming a database page size of szPage bytes. The offset returned ** is to the start of the write-ahead log frame-header. */ #define walFrameOffset(iFrame, szPage) ( \ WAL_HDRSIZE + ((iFrame)-1)*((szPage)+WAL_FRAME_HDRSIZE) \ ) /* ** An open write-ahead log file is represented by an instance of the ** following object. */ struct Wal { |
︙ | ︙ | |||
645 646 647 648 649 650 651 | if( nSize>WAL_FRAME_HDRSIZE ){ u8 aBuf[WAL_FRAME_HDRSIZE]; /* Buffer to load first frame header into */ u8 *aFrame = 0; /* Malloc'd buffer to load entire frame */ int nFrame; /* Number of bytes at aFrame */ u8 *aData; /* Pointer to data part of aFrame buffer */ int iFrame; /* Index of last frame read */ i64 iOffset; /* Next offset to read from log file */ | | | | | | | | | 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 | if( nSize>WAL_FRAME_HDRSIZE ){ u8 aBuf[WAL_FRAME_HDRSIZE]; /* Buffer to load first frame header into */ u8 *aFrame = 0; /* Malloc'd buffer to load entire frame */ int nFrame; /* Number of bytes at aFrame */ u8 *aData; /* Pointer to data part of aFrame buffer */ int iFrame; /* Index of last frame read */ i64 iOffset; /* Next offset to read from log file */ int szPage; /* Page size according to the log */ u32 aCksum[2]; /* Running checksum */ /* Read in the first frame header in the file (to determine the ** database page size). */ rc = sqlite3OsRead(pWal->pWalFd, aBuf, WAL_HDRSIZE, 0); if( rc!=SQLITE_OK ){ return rc; } /* If the database page size is not a power of two, or is greater than ** SQLITE_MAX_PAGE_SIZE, conclude that the WAL file contains no valid data. */ szPage = sqlite3Get4byte(&aBuf[0]); if( szPage&(szPage-1) || szPage>SQLITE_MAX_PAGE_SIZE || szPage<512 ){ goto finished; } aCksum[0] = sqlite3Get4byte(&aBuf[4]); aCksum[1] = sqlite3Get4byte(&aBuf[8]); /* Malloc a buffer to read frames into. */ nFrame = szPage + WAL_FRAME_HDRSIZE; aFrame = (u8 *)sqlite3_malloc(nFrame); if( !aFrame ){ return SQLITE_NOMEM; } aData = &aFrame[WAL_FRAME_HDRSIZE]; /* Read all frames from the log file. */ iFrame = 0; for(iOffset=WAL_HDRSIZE; (iOffset+nFrame)<=nSize; iOffset+=nFrame){ u32 pgno; /* Database page number for frame */ u32 nTruncate; /* dbsize field from frame header */ int isValid; /* True if this frame is valid */ /* Read and decode the next log frame. */ rc = sqlite3OsRead(pWal->pWalFd, aFrame, nFrame, iOffset); if( rc!=SQLITE_OK ) break; isValid = walDecodeFrame(aCksum, &pgno, &nTruncate, szPage, aData, aFrame); if( !isValid ) break; rc = walIndexAppend(pWal, ++iFrame, pgno); if( rc!=SQLITE_OK ) break; /* If nTruncate is non-zero, this is a commit record. */ if( nTruncate ){ hdr.iCheck1 = aCksum[0]; hdr.iCheck2 = aCksum[1]; hdr.mxFrame = iFrame; hdr.nPage = nTruncate; hdr.szPage = szPage; } } sqlite3_free(aFrame); }else{ hdr.iCheck1 = 2; hdr.iCheck2 = 3; |
︙ | ︙ | |||
981 982 983 984 985 986 987 | static int walCheckpoint( Wal *pWal, /* Wal connection */ int sync_flags, /* Flags for OsSync() (or 0) */ int nBuf, /* Size of zBuf in bytes */ u8 *zBuf /* Temporary buffer to use */ ){ int rc; /* Return code */ | | | | | | | | 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 | static int walCheckpoint( Wal *pWal, /* Wal connection */ int sync_flags, /* Flags for OsSync() (or 0) */ int nBuf, /* Size of zBuf in bytes */ u8 *zBuf /* Temporary buffer to use */ ){ int rc; /* Return code */ int szPage = pWal->hdr.szPage; /* Database page-size */ WalIterator *pIter = 0; /* Wal iterator context */ u32 iDbpage = 0; /* Next database page to write */ u32 iFrame = 0; /* Wal frame containing data for iDbpage */ /* Allocate the iterator */ rc = walIteratorInit(pWal, &pIter); if( rc!=SQLITE_OK || pWal->hdr.mxFrame==0 ){ goto out; } if( pWal->hdr.szPage!=nBuf ){ rc = SQLITE_CORRUPT_BKPT; goto out; } /* Sync the log file to disk */ if( sync_flags ){ rc = sqlite3OsSync(pWal->pWalFd, sync_flags); if( rc!=SQLITE_OK ) goto out; } /* Iterate through the contents of the log, copying data to the db file. */ while( 0==walIteratorNext(pIter, &iDbpage, &iFrame) ){ rc = sqlite3OsRead(pWal->pWalFd, zBuf, szPage, walFrameOffset(iFrame, szPage) + WAL_FRAME_HDRSIZE ); if( rc!=SQLITE_OK ) goto out; rc = sqlite3OsWrite(pWal->pDbFd, zBuf, szPage, (iDbpage-1)*szPage); if( rc!=SQLITE_OK ) goto out; } /* Truncate the database file */ rc = sqlite3OsTruncate(pWal->pDbFd, ((i64)pWal->hdr.nPage*(i64)szPage)); if( rc!=SQLITE_OK ) goto out; /* Sync the database file. If successful, update the wal-index. */ if( sync_flags ){ rc = sqlite3OsSync(pWal->pDbFd, sync_flags); if( rc!=SQLITE_OK ) goto out; } |
︙ | ︙ | |||
1304 1305 1306 1307 1308 1309 1310 | /* Search the hash table or tables for an entry matching page number ** pgno. Each iteration of the following for() loop searches one ** hash table (each hash table indexes up to HASHTABLE_NPAGE frames). ** ** This code may run concurrently to the code in walIndexAppend() ** that adds entries to the wal-index (and possibly to this hash | | | 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 | /* Search the hash table or tables for an entry matching page number ** pgno. Each iteration of the following for() loop searches one ** hash table (each hash table indexes up to HASHTABLE_NPAGE frames). ** ** This code may run concurrently to the code in walIndexAppend() ** that adds entries to the wal-index (and possibly to this hash ** table). This means the value just read from the hash ** slot (aHash[iKey]) may have been added before or after the ** current read transaction was opened. Values added after the ** read transaction was opened may have been written incorrectly - ** i.e. these slots may contain garbage data. However, we assume ** that any slots written before the current read transaction was ** opened remain unmodified. ** |
︙ | ︙ | |||
1327 1328 1329 1330 1331 1332 1333 | ** This condition filters out entries that were added to the hash ** table after the current read-transaction had started. ** ** (iFrame>iRead): ** This filters out a dangerous class of garbage data. The ** garbage hash slot may refer to a frame with the correct page ** number, but not the most recent version of the frame. For | | | | | 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 | ** This condition filters out entries that were added to the hash ** table after the current read-transaction had started. ** ** (iFrame>iRead): ** This filters out a dangerous class of garbage data. The ** garbage hash slot may refer to a frame with the correct page ** number, but not the most recent version of the frame. For ** example, if at the start of the read-transaction the WAL ** contains three copies of the desired page in frames 2, 3 and 4, ** the hash table may contain the following: ** ** { ..., 2, 3, 4, 99, 99, ..... } ** ** The correct answer is to read data from frame 4. But a ** dirty-read may potentially cause the hash-table to appear as ** follows to the reader: ** ** { ..., 2, 3, 4, 3, 99, ..... } ** ** Without this part of the if(...) clause, the reader might ** incorrectly read data from frame 3 instead of 4. This would be ** an error. ** ** It is not actually clear to the developers that such a dirty-read ** can occur. But if it does, it should not cause any problems. |
︙ | ︙ | |||
1387 1388 1389 1390 1391 1392 1393 | #endif /* If iRead is non-zero, then it is the log frame number that contains the ** required page. Read and return data from the log file. */ walIndexUnmap(pWal); if( iRead ){ | | | 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 | #endif /* If iRead is non-zero, then it is the log frame number that contains the ** required page. Read and return data from the log file. */ walIndexUnmap(pWal); if( iRead ){ i64 iOffset = walFrameOffset(iRead, pWal->hdr.szPage) + WAL_FRAME_HDRSIZE; *pInWal = 1; return sqlite3OsRead(pWal->pWalFd, pOut, nOut, iOffset); } *pInWal = 0; return SQLITE_OK; } |
︙ | ︙ | |||
1487 1488 1489 1490 1491 1492 1493 | int sqlite3WalSavepointUndo(Wal *pWal, u32 iFrame){ int rc = SQLITE_OK; u8 aCksum[8]; assert( pWal->lockState==SQLITE_SHM_WRITE ); pWal->hdr.mxFrame = iFrame; if( iFrame>0 ){ | | | | 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 | int sqlite3WalSavepointUndo(Wal *pWal, u32 iFrame){ int rc = SQLITE_OK; u8 aCksum[8]; assert( pWal->lockState==SQLITE_SHM_WRITE ); pWal->hdr.mxFrame = iFrame; if( iFrame>0 ){ i64 iOffset = walFrameOffset(iFrame, pWal->hdr.szPage) + sizeof(u32)*2; rc = sqlite3OsRead(pWal->pWalFd, aCksum, sizeof(aCksum), iOffset); pWal->hdr.iCheck1 = sqlite3Get4byte(&aCksum[0]); pWal->hdr.iCheck2 = sqlite3Get4byte(&aCksum[4]); } return rc; } /* ** Write a set of frames to the log. The caller must hold the write-lock ** on the log file (obtained using sqlite3WalWriteLock()). */ int sqlite3WalFrames( Wal *pWal, /* Wal handle to write to */ int szPage, /* Database page-size in bytes */ PgHdr *pList, /* List of dirty pages to write */ Pgno nTruncate, /* Database size after this commit */ int isCommit, /* True if this is a commit */ int sync_flags /* Flags to pass to OsSync() (or 0) */ ){ int rc; /* Used to catch return codes */ u32 iFrame; /* Next frame address */ |
︙ | ︙ | |||
1528 1529 1530 1531 1532 1533 1534 | /* If this is the first frame written into the log, write the WAL ** header to the start of the WAL file. See comments at the top of ** this source file for a description of the WAL header format. */ assert( WAL_FRAME_HDRSIZE>=WAL_HDRSIZE ); iFrame = pWal->hdr.mxFrame; if( iFrame==0 ){ | | | | | | | | | | 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 | /* If this is the first frame written into the log, write the WAL ** header to the start of the WAL file. See comments at the top of ** this source file for a description of the WAL header format. */ assert( WAL_FRAME_HDRSIZE>=WAL_HDRSIZE ); iFrame = pWal->hdr.mxFrame; if( iFrame==0 ){ sqlite3Put4byte(aFrame, szPage); sqlite3_randomness(8, &aFrame[4]); pWal->hdr.iCheck1 = sqlite3Get4byte(&aFrame[4]); pWal->hdr.iCheck2 = sqlite3Get4byte(&aFrame[8]); rc = sqlite3OsWrite(pWal->pWalFd, aFrame, WAL_HDRSIZE, 0); if( rc!=SQLITE_OK ){ return rc; } } aCksum[0] = pWal->hdr.iCheck1; aCksum[1] = pWal->hdr.iCheck2; /* Write the log file. */ for(p=pList; p; p=p->pDirty){ u32 nDbsize; /* Db-size field for frame header */ i64 iOffset; /* Write offset in log file */ iOffset = walFrameOffset(++iFrame, szPage); /* Populate and write the frame header */ nDbsize = (isCommit && p->pDirty==0) ? nTruncate : 0; walEncodeFrame(aCksum, p->pgno, nDbsize, szPage, p->pData, aFrame); rc = sqlite3OsWrite(pWal->pWalFd, aFrame, sizeof(aFrame), iOffset); if( rc!=SQLITE_OK ){ return rc; } /* Write the page data */ rc = sqlite3OsWrite(pWal->pWalFd, p->pData, szPage, iOffset+sizeof(aFrame)); if( rc!=SQLITE_OK ){ return rc; } pLast = p; } /* Sync the log file if the 'isSync' flag was specified. */ if( sync_flags ){ i64 iSegment = sqlite3OsSectorSize(pWal->pWalFd); i64 iOffset = walFrameOffset(iFrame+1, szPage); assert( isCommit ); assert( iSegment>0 ); iSegment = (((iOffset+iSegment-1)/iSegment) * iSegment); while( iOffset<iSegment ){ walEncodeFrame(aCksum,pLast->pgno,nTruncate,szPage,pLast->pData,aFrame); rc = sqlite3OsWrite(pWal->pWalFd, aFrame, sizeof(aFrame), iOffset); if( rc!=SQLITE_OK ){ return rc; } iOffset += WAL_FRAME_HDRSIZE; rc = sqlite3OsWrite(pWal->pWalFd, pLast->pData, szPage, iOffset); if( rc!=SQLITE_OK ){ return rc; } nLast++; iOffset += szPage; } rc = sqlite3OsSync(pWal->pWalFd, sync_flags); } assert( pWal->pWiData==0 ); /* Append data to the wal-index. It is not necessary to lock the |
︙ | ︙ | |||
1611 1612 1613 1614 1615 1616 1617 | iFrame++; nLast--; rc = walIndexAppend(pWal, iFrame, pLast->pgno); } if( rc==SQLITE_OK ){ /* Update the private copy of the header. */ | | | 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 | iFrame++; nLast--; rc = walIndexAppend(pWal, iFrame, pLast->pgno); } if( rc==SQLITE_OK ){ /* Update the private copy of the header. */ pWal->hdr.szPage = szPage; pWal->hdr.mxFrame = iFrame; if( isCommit ){ pWal->hdr.iChange++; pWal->hdr.nPage = nTruncate; } pWal->hdr.iCheck1 = aCksum[0]; pWal->hdr.iCheck2 = aCksum[1]; |
︙ | ︙ |